diff --git a/Assets/dll/ares64_interpreter.wbx.zst b/Assets/dll/ares64_interpreter.wbx.zst index 14a13797ff..8a13eada30 100644 Binary files a/Assets/dll/ares64_interpreter.wbx.zst and b/Assets/dll/ares64_interpreter.wbx.zst differ diff --git a/Assets/dll/ares64_recompiler.wbx.zst b/Assets/dll/ares64_recompiler.wbx.zst index 4dc8291d07..53d50d2c0d 100644 Binary files a/Assets/dll/ares64_recompiler.wbx.zst and b/Assets/dll/ares64_recompiler.wbx.zst differ diff --git a/waterbox/ares64/BizInterface.cpp b/waterbox/ares64/BizInterface.cpp index f5103bc0d9..a29315dfad 100644 --- a/waterbox/ares64/BizInterface.cpp +++ b/waterbox/ares64/BizInterface.cpp @@ -746,7 +746,7 @@ static u8 PeekFunc(u64 address) } ares::Nintendo64::Thread unused; - return ares::Nintendo64::bus.read(addr, unused); + return ares::Nintendo64::bus.read(addr, unused, nullptr); } static void SysBusAccess(u8* buffer, u64 address, u64 count, bool write) @@ -755,7 +755,7 @@ static void SysBusAccess(u8* buffer, u64 address, u64 count, bool write) { ares::Nintendo64::Thread unused; while (count--) - ares::Nintendo64::bus.write(address++, *buffer++, unused); + ares::Nintendo64::bus.write(address++, *buffer++, unused, nullptr); } else { diff --git a/waterbox/ares64/ares/LICENSE b/waterbox/ares64/ares/LICENSE old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/ares.cpp b/waterbox/ares64/ares/ares/ares/ares.cpp old mode 100644 new mode 100755 index 46d6c7bbef..4c7515497b --- a/waterbox/ares64/ares/ares/ares/ares.cpp +++ b/waterbox/ares64/ares/ares/ares/ares.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -8,4 +9,13 @@ namespace ares { Platform* platform = nullptr; bool _runAhead = false; +const string Name = "ares"; +const string Version = "138"; +const string Copyright = "ares team, Near"; +const string License = "ISC"; +const string LicenseURI = "https://opensource.org/licenses/ISC"; +const string Website = "ares-emu.net"; +const string WebsiteURI = "https://ares-emu.net/"; +const u32 SerializerSignature = 0x31545342; //"BST1" (little-endian) + } diff --git a/waterbox/ares64/ares/ares/ares/ares.hpp b/waterbox/ares64/ares/ares/ares/ares.hpp old mode 100644 new mode 100755 index ccebe32628..3bd4ccbd84 --- a/waterbox/ares64/ares/ares/ares/ares.hpp +++ b/waterbox/ares64/ares/ares/ares/ares.hpp @@ -2,7 +2,6 @@ #include #include - #include #include @@ -42,16 +41,14 @@ using namespace nall; using namespace nall::primitives; namespace ares { - static const string Name = "ares"; - static const string Version = "133"; - static const string Copyright = "ares team, Near"; - static const string License = "ISC"; - static const string LicenseURI = "https://opensource.org/licenses/ISC"; - static const string Website = "ares-emu.net"; - static const string WebsiteURI = "https://ares-emu.net/"; - - //incremented only when serialization format changes - static const u32 SerializerSignature = 0x31545342; //"BST1" (little-endian) + extern const string Name; + extern const string Version; + extern const string Copyright; + extern const string License; + extern const string LicenseURI; + extern const string Website; + extern const string WebsiteURI; + extern const u32 SerializerSignature; namespace VFS { using Pak = shared_pointer; diff --git a/waterbox/ares64/ares/ares/ares/debug/debug.cpp b/waterbox/ares64/ares/ares/ares/debug/debug.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/debug/debug.hpp b/waterbox/ares64/ares/ares/ares/debug/debug.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/inline.hpp b/waterbox/ares64/ares/ares/ares/inline.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.cpp b/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.cpp old mode 100644 new mode 100755 index 14f914c989..4998459b05 --- a/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.cpp +++ b/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.cpp @@ -1,21 +1,32 @@ #include +#if !defined(PLATFORM_MACOS) +#define STATIC_ALLOCATION +#endif + namespace ares::Memory { constexpr u32 fixedBufferSize = 8_MiB; -#if defined(PLATFORM_MACOS) -//dynamic allocation for unsupported platforms -FixedAllocator::FixedAllocator() { - _allocator.resize(fixedBufferSize, bump_allocator::executable); -} -#else -alignas(4096) u8 fixedBuffer[fixedBufferSize]; +#if defined(STATIC_ALLOCATION) +u8 fixedBuffer[fixedBufferSize + 64_KiB]; +#endif FixedAllocator::FixedAllocator() { - _allocator.resize(sizeof(fixedBuffer), 0, fixedBuffer); + u8* buffer = nullptr; + + #if defined(STATIC_ALLOCATION) + //align to 64 KiB (maximum page size of any supported OS) + auto offset = -(uintptr)fixedBuffer % 64_KiB; + //set protection to executable + if(memory::protect(fixedBuffer + offset, fixedBufferSize, true)) { + //use static allocation + buffer = fixedBuffer + offset; + } + #endif + + _allocator.resize(fixedBufferSize, bump_allocator::executable, buffer); } -#endif auto FixedAllocator::get() -> bump_allocator& { static FixedAllocator allocator; diff --git a/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.hpp b/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.hpp old mode 100644 new mode 100755 index 09d8d85e44..e276322938 --- a/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.hpp +++ b/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.hpp @@ -1,14 +1,14 @@ -#pragma once - -namespace ares::Memory { - -struct FixedAllocator { - static auto get() -> bump_allocator&; - -private: - FixedAllocator(); - - bump_allocator _allocator; -}; - -} +#pragma once + +namespace ares::Memory { + +struct FixedAllocator { + static auto get() -> bump_allocator&; + +private: + FixedAllocator(); + + bump_allocator _allocator; +}; + +} diff --git a/waterbox/ares64/ares/ares/ares/memory/memory.hpp b/waterbox/ares64/ares/ares/ares/memory/memory.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/memory/readable.hpp b/waterbox/ares64/ares/ares/ares/memory/readable.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/memory/writable.hpp b/waterbox/ares64/ares/ares/ares/memory/writable.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/attribute.hpp b/waterbox/ares64/ares/ares/ares/node/attribute.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/audio/audio.hpp b/waterbox/ares64/ares/ares/ares/node/audio/audio.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/audio/stream.cpp b/waterbox/ares64/ares/ares/ares/node/audio/stream.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/audio/stream.hpp b/waterbox/ares64/ares/ares/ares/node/audio/stream.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/class.hpp b/waterbox/ares64/ares/ares/ares/node/class.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/component/component.hpp b/waterbox/ares64/ares/ares/ares/node/component/component.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/component/real-time-clock.hpp b/waterbox/ares64/ares/ares/ares/node/component/real-time-clock.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/debugger.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/debugger.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/graphics.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/graphics.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/memory.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/memory.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/properties.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/properties.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp old mode 100644 new mode 100755 index 0c354c3b2a..31a778cc51 --- a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp +++ b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp @@ -71,9 +71,7 @@ struct Instruction : Tracer { if(!enabled()) return; if(_omitted) { - PlatformLog({ - "[Omitted: ", _omitted, "]\n"} - ); + PlatformLog(shared(), {"[Omitted: ", _omitted, "]"}); _omitted = 0; } @@ -84,7 +82,7 @@ struct Instruction : Tracer { context, " ", extra }; - PlatformLog({output.strip(), "\n"}); + PlatformLog(shared(), {output.strip()}); } auto serialize(string& output, string depth) -> void override { diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/notification.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/notification.hpp old mode 100644 new mode 100755 index a9f7331d38..95a3489807 --- a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/notification.hpp +++ b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/notification.hpp @@ -6,12 +6,7 @@ struct Notification : Tracer { auto notify(const string& message = {}) -> void { if(!enabled()) return; - - if(message) { - PlatformLog({_component, " ", _name, ": ", message, "\n"}); - } else { - PlatformLog({_component, " ", _name, "\n"}); - } + PlatformLog(shared(), message); } protected: diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/tracer.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/tracer.hpp old mode 100644 new mode 100755 index 82f38e201a..7ded508506 --- a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/tracer.hpp +++ b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/tracer.hpp @@ -6,24 +6,40 @@ struct Tracer : Debugger { } auto component() const -> string { return _component; } - auto enabled() const -> bool { return _enabled; } + auto enabled() const -> bool { return file() || terminal(); } + auto prefix() const -> bool { return _prefix; } + auto terminal() const -> bool { return _terminal; } + auto file() const -> bool { return _file; } + auto autoLineBreak() const -> bool { return _autoLineBreak; } + auto setToggle(function toggle) -> void { _toggle = toggle; } auto setComponent(string component) -> void { _component = component; } - auto setEnabled(bool enabled) -> void { _enabled = enabled; } + auto setPrefix(bool prefix) -> void { _prefix = prefix; } + auto setTerminal(bool terminal) -> void { _terminal = terminal; if(_toggle) _toggle(); } + auto setFile(bool file) -> void { _file = file; if(_toggle) _toggle(); } + auto setAutoLineBreak(bool autoLineBreak) -> void { _autoLineBreak = autoLineBreak; } auto serialize(string& output, string depth) -> void override { Debugger::serialize(output, depth); output.append(depth, " component: ", _component, "\n"); - output.append(depth, " enabled: ", _enabled, "\n"); + output.append(depth, " prefix: ", _prefix, "\n"); + output.append(depth, " terminal: ", _terminal, "\n"); + output.append(depth, " file: ", _file, "\n"); } auto unserialize(Markup::Node node) -> void override { Debugger::unserialize(node); _component = node["component"].string(); - _enabled = node["enabled"].boolean(); + _prefix = node["prefix"].boolean(); + _terminal = node["terminal"].boolean(); + _file = node["file"].boolean(); } protected: + function _toggle; string _component; - bool _enabled = false; + bool _prefix = false; + bool _terminal = false; + bool _file = false; + bool _autoLineBreak = true; }; diff --git a/waterbox/ares64/ares/ares/ares/node/input/axis.hpp b/waterbox/ares64/ares/ares/ares/node/input/axis.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/input/button.hpp b/waterbox/ares64/ares/ares/ares/node/input/button.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/input/input.hpp b/waterbox/ares64/ares/ares/ares/node/input/input.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/input/rumble.hpp b/waterbox/ares64/ares/ares/ares/node/input/rumble.hpp old mode 100644 new mode 100755 index ea83472d3c..7d0c5371f8 --- a/waterbox/ares64/ares/ares/ares/node/input/rumble.hpp +++ b/waterbox/ares64/ares/ares/ares/node/input/rumble.hpp @@ -5,7 +5,9 @@ struct Rumble : Input { auto weakValue() const -> u16 { return _weak; } auto strongValue() const -> u16 { return _strong; } - auto setValues(u16 weak, u16 strong) -> void { _weak = weak; _strong = strong; } + auto setValues(u16 strong, u16 weak) -> void { _weak = weak; _strong = strong; } + auto setWeak(u16 weak) -> void { _weak = weak; } + auto setStrong(u16 strong) -> void { _strong = strong; } // For systems with binary motors auto enable() const -> bool { return _weak > 0 || _strong > 0; } diff --git a/waterbox/ares64/ares/ares/ares/node/input/trigger.hpp b/waterbox/ares64/ares/ares/ares/node/input/trigger.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/node.cpp b/waterbox/ares64/ares/ares/ares/node/node.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/node.hpp b/waterbox/ares64/ares/ares/ares/node/node.hpp old mode 100644 new mode 100755 index e91ed880d4..d381bee0a7 --- a/waterbox/ares64/ares/ares/ares/node/node.hpp +++ b/waterbox/ares64/ares/ares/ares/node/node.hpp @@ -94,7 +94,7 @@ namespace ares::Core { // forward declarations static auto PlatformAttach(Node::Object) -> void; static auto PlatformDetach(Node::Object) -> void; - static auto PlatformLog(string_view) -> void; + static auto PlatformLog(Node::Debugger::Tracer::Tracer tracer, string_view) -> void; #include #include diff --git a/waterbox/ares64/ares/ares/ares/node/object.hpp b/waterbox/ares64/ares/ares/ares/node/object.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/peripheral.hpp b/waterbox/ares64/ares/ares/ares/node/peripheral.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/port.hpp b/waterbox/ares64/ares/ares/ares/node/port.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/setting/boolean.hpp b/waterbox/ares64/ares/ares/ares/node/setting/boolean.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/setting/integer.hpp b/waterbox/ares64/ares/ares/ares/node/setting/integer.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/setting/natural.hpp b/waterbox/ares64/ares/ares/ares/node/setting/natural.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/setting/real.hpp b/waterbox/ares64/ares/ares/ares/node/setting/real.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/setting/setting.hpp b/waterbox/ares64/ares/ares/ares/node/setting/setting.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/setting/string.hpp b/waterbox/ares64/ares/ares/ares/node/setting/string.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/system.hpp b/waterbox/ares64/ares/ares/ares/node/system.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/video/screen.cpp b/waterbox/ares64/ares/ares/ares/node/video/screen.cpp old mode 100644 new mode 100755 index 19fe96a044..39e613e42f --- a/waterbox/ares64/ares/ares/ares/node/video/screen.cpp +++ b/waterbox/ares64/ares/ares/ares/node/video/screen.cpp @@ -25,11 +25,15 @@ Screen::~Screen() { auto Screen::main(uintptr_t) -> void { while(!_kill) { - usleep(1); - if(_frame) { + unique_lock lock(_frameMutex); + + auto timeout = std::chrono::milliseconds(10); + if(_frameCondition.wait_for(lock, timeout, [&] { return _frame.load(); })) { refresh(); _frame = false; } + + if(_kill) break; } } @@ -68,6 +72,15 @@ auto Screen::setRefresh(function refresh) -> void { _refresh = refresh; } +auto Screen::refreshRateHint(double pixelFrequency, int dotsPerLine, int linesPerFrame) -> void { + refreshRateHint(1.0f / ((double)(dotsPerLine * linesPerFrame) / pixelFrequency)); +} + +auto Screen::refreshRateHint(double refreshRate) -> void { + lock_guard lock(_mutex); + platform->refreshRateHint(refreshRate); +} + auto Screen::setViewport(u32 x, u32 y, u32 width, u32 height) -> void { lock_guard lock(_mutex); _viewportX = x; @@ -76,6 +89,11 @@ auto Screen::setViewport(u32 x, u32 y, u32 width, u32 height) -> void { _viewportHeight = height; } +auto Screen::setOverscan(bool overscan) -> void { + lock_guard lock(_mutex); + _overscan = overscan; +} + auto Screen::setSize(u32 width, u32 height) -> void { lock_guard lock(_mutex); _width = width; @@ -171,10 +189,12 @@ auto Screen::frame() -> void { lock_guard lock(_mutex); _inputA.swap(_inputB); - _frame = true; if constexpr(!ares::Video::Threaded) { refresh(); _frame = false; + } else { + _frame = true; + _frameCondition.notify_one(); } } @@ -228,14 +248,18 @@ auto Screen::refresh() -> void { } } - if(_colorBleed) { + if (_colorBleed) { n32 mask = 1 << 24 | 1 << 16 | 1 << 8 | 1 << 0; - for(u32 y : range(height)) { + for (u32 y : range(height)) { auto target = output + y * width; - for(u32 x : range(width)) { - auto a = target[x]; - auto b = target[x + (x != width - 1)]; - target[x] = (a + b - ((a ^ b) & mask)) >> 1; + for (u32 x : range(0, width, _colorBleedWidth)) { + for (u32 offset = 0; offset < _colorBleedWidth && (x + offset) < width; ++offset) { + u32 next = x + _colorBleedWidth; + if (next + offset >= width) next = x; + auto a = target[x + offset]; + auto b = target[next + offset]; + target[x + offset] = (a + b - ((a ^ b) & mask)) >> 1; + } } } } diff --git a/waterbox/ares64/ares/ares/ares/node/video/screen.hpp b/waterbox/ares64/ares/ares/ares/node/video/screen.hpp old mode 100644 new mode 100755 index bcc13d1dc5..11e8e985ea --- a/waterbox/ares64/ares/ares/ares/node/video/screen.hpp +++ b/waterbox/ares64/ares/ares/ares/node/video/screen.hpp @@ -16,6 +16,7 @@ struct Screen : Video { auto scaleY() const -> f64 { return _scaleY; } auto aspectX() const -> f64 { return _aspectX; } auto aspectY() const -> f64 { return _aspectY; } + auto overscan() const -> bool { return _overscan; } auto colors() const -> u32 { return _colors; } auto pixels(bool frame = 0) -> array_span; @@ -33,10 +34,13 @@ struct Screen : Video { auto setRefresh(function refresh) -> void; auto setViewport(u32 x, u32 y, u32 width, u32 height) -> void; + auto refreshRateHint(double refreshRate) -> void; + auto refreshRateHint(double pixelFrequency, int dotsPerLine, int linesPerFrame) -> void; auto setSize(u32 width, u32 height) -> void; auto setScale(f64 scaleX, f64 scaleY) -> void; auto setAspect(f64 aspectX, f64 aspectY) -> void; + auto setOverscan(bool overscan) -> void; auto setSaturation(f64 saturation) -> void; auto setGamma(f64 gamma) -> void; @@ -44,6 +48,7 @@ struct Screen : Video { auto setFillColor(u32 fillColor) -> void; auto setColorBleed(bool colorBleed) -> void; + auto setColorBleedWidth(u32 width) -> void; auto setInterframeBlending(bool interframeBlending) -> void; auto setRotation(u32 rotation) -> void; @@ -78,7 +83,9 @@ protected: f64 _luminance = 1.0; u32 _fillColor = 0; bool _colorBleed = false; + bool _colorBleedWidth = 1; bool _interframeBlending = false; + bool _overscan = true; u32 _rotation = 0; //counter-clockwise (90 = left, 270 = right) function _color; @@ -92,6 +99,8 @@ protected: //unserialized: nall::thread _thread; recursive_mutex _mutex; + mutex _frameMutex; + condition_variable _frameCondition; atomic _kill = false; atomic _frame = false; function _refresh; diff --git a/waterbox/ares64/ares/ares/ares/node/video/sprite.cpp b/waterbox/ares64/ares/ares/ares/node/video/sprite.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/video/sprite.hpp b/waterbox/ares64/ares/ares/ares/node/video/sprite.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/node/video/video.hpp b/waterbox/ares64/ares/ares/ares/node/video/video.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/platform.hpp b/waterbox/ares64/ares/ares/ares/platform.hpp old mode 100644 new mode 100755 index c2e670f4ac..93d1cb34fd --- a/waterbox/ares64/ares/ares/ares/platform.hpp +++ b/waterbox/ares64/ares/ares/ares/platform.hpp @@ -15,11 +15,13 @@ struct Platform { virtual auto detach(Node::Object) -> void {} virtual auto pak(Node::Object) -> shared_pointer { return {}; } virtual auto event(Event) -> void {} - virtual auto log(string_view message) -> void {} + virtual auto log(Node::Debugger::Tracer::Tracer, string_view message) -> void {} virtual auto status(string_view message) -> void {} virtual auto video(Node::Video::Screen, const u32* data, u32 pitch, u32 width, u32 height) -> void {} + virtual auto refreshRateHint(double refreshRate) -> void {} virtual auto audio(Node::Audio::Stream) -> void {} virtual auto input(Node::Input::Input) -> void {} + virtual auto cheat(u32 addr) -> maybe { return nothing; } virtual auto time() -> n64 { return ::time(0); } }; @@ -31,5 +33,5 @@ namespace ares::Core { // forward declarations auto PlatformAttach(Node::Object node) -> void { if(platform && node->name()) platform->attach(node); } auto PlatformDetach(Node::Object node) -> void { if(platform && node->name()) platform->detach(node); } - auto PlatformLog(string_view text) -> void { if(platform) platform->log(text); } + auto PlatformLog(Node::Debugger::Tracer::Tracer node, string_view text) -> void { if(platform) platform->log(node, text); } } diff --git a/waterbox/ares64/ares/ares/ares/random.hpp b/waterbox/ares64/ares/ares/ares/random.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/GNUmakefile b/waterbox/ares64/ares/ares/ares/resource/GNUmakefile old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/icon.png b/waterbox/ares64/ares/ares/ares/resource/icon.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/icon@2x.png b/waterbox/ares64/ares/ares/ares/resource/icon@2x.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/logo.png b/waterbox/ares64/ares/ares/ares/resource/logo.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/logo@2x.png b/waterbox/ares64/ares/ares/ares/resource/logo@2x.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/resource.bml b/waterbox/ares64/ares/ares/ares/resource/resource.bml old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/resource.cpp b/waterbox/ares64/ares/ares/ares/resource/resource.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/resource.hpp b/waterbox/ares64/ares/ares/ares/resource/resource.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-blue.png b/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-blue.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-green.png b/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-green.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-red.png b/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-red.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-0.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-0.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-1.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-1.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-2.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-2.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/headphones.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/headphones.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/initialized.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/initialized.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/low-battery.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/low-battery.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-0.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-0.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-1.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-1.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/powered-on.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/powered-on.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/sleeping.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/sleeping.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a0.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a0.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a1.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a1.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a2.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a2.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b0.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b0.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b1.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b1.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b2.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b2.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b3.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b3.png old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/scheduler/scheduler.cpp b/waterbox/ares64/ares/ares/ares/scheduler/scheduler.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/scheduler/scheduler.hpp b/waterbox/ares64/ares/ares/ares/scheduler/scheduler.hpp old mode 100644 new mode 100755 index 51dd625dfc..0f9d67026c --- a/waterbox/ares64/ares/ares/ares/scheduler/scheduler.hpp +++ b/waterbox/ares64/ares/ares/ares/scheduler/scheduler.hpp @@ -41,7 +41,7 @@ private: vector _threads; bool _synchronize = false; - friend class Thread; + friend struct Thread; }; extern Scheduler scheduler; diff --git a/waterbox/ares64/ares/ares/ares/scheduler/thread.cpp b/waterbox/ares64/ares/ares/ares/scheduler/thread.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/ares/scheduler/thread.hpp b/waterbox/ares64/ares/ares/ares/scheduler/thread.hpp old mode 100644 new mode 100755 index f40941f6ec..382d88df4f --- a/waterbox/ares64/ares/ares/ares/scheduler/thread.hpp +++ b/waterbox/ares64/ares/ares/ares/scheduler/thread.hpp @@ -46,5 +46,5 @@ protected: u64 _scalar = 0; u64 _clock = 0; - friend class Scheduler; + friend struct Scheduler; }; diff --git a/waterbox/ares64/ares/ares/ares/types.hpp b/waterbox/ares64/ares/ares/ares/types.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/disassembler.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/disassembler.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/instruction.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/instruction.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/instructions.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/instructions.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/memory.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/memory.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/serialization.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.hpp b/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/timer.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/timer.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/accuracy.hpp b/waterbox/ares64/ares/ares/n64/accuracy.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/ai/ai.cpp b/waterbox/ares64/ares/ares/n64/ai/ai.cpp old mode 100644 new mode 100755 index 1b64e26993..89b5f61da3 --- a/waterbox/ares64/ares/ares/n64/ai/ai.cpp +++ b/waterbox/ares64/ares/ares/n64/ai/ai.cpp @@ -38,7 +38,7 @@ auto AI::sample(f64& left, f64& right) -> void { if(io.dmaLength[0] && io.dmaEnable) { io.dmaAddress[0].bit(13,23) += io.dmaAddressCarry; - auto data = rdram.ram.read(io.dmaAddress[0]); + auto data = rdram.ram.read(io.dmaAddress[0], "AI"); auto l = s16(data >> 16); auto r = s16(data >> 0); left = l / 32768.0; @@ -50,8 +50,9 @@ auto AI::sample(f64& left, f64& right) -> void { } if(!io.dmaLength[0]) { if(--io.dmaCount) { - io.dmaAddress[0] = io.dmaAddress[1]; - io.dmaLength [0] = io.dmaLength [1]; + io.dmaAddress[0] = io.dmaAddress[1]; + io.dmaLength [0] = io.dmaLength [1]; + io.dmaOriginPc[0] = io.dmaOriginPc[1]; mi.raise(MI::IRQ::AI); } } diff --git a/waterbox/ares64/ares/ares/n64/ai/ai.hpp b/waterbox/ares64/ares/ares/n64/ai/ai.hpp old mode 100644 new mode 100755 index aee1602da8..e6ff587e52 --- a/waterbox/ares64/ares/ares/n64/ai/ai.hpp +++ b/waterbox/ares64/ares/ares/n64/ai/ai.hpp @@ -38,6 +38,7 @@ struct AI : Thread, Memory::RCP { n1 dmaAddressCarry; n18 dmaLength[2]; n2 dmaCount; + u64 dmaOriginPc[2]; n14 dacRate; n4 bitRate; } io; diff --git a/waterbox/ares64/ares/ares/n64/ai/debugger.cpp b/waterbox/ares64/ares/ares/n64/ai/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/ai/io.cpp b/waterbox/ares64/ares/ares/n64/ai/io.cpp old mode 100644 new mode 100755 index d72a3ef408..ce4694685b --- a/waterbox/ares64/ares/ares/n64/ai/io.cpp +++ b/waterbox/ares64/ares/ares/n64/ai/io.cpp @@ -1,5 +1,5 @@ auto AI::readWord(u32 address, Thread& thread) -> u32 { - address = (address & 0xfffff) >> 2; + address = (address & 0x1f) >> 2; n32 data; if(address != 3) { @@ -22,7 +22,7 @@ auto AI::readWord(u32 address, Thread& thread) -> u32 { } auto AI::writeWord(u32 address, u32 data_, Thread& thread) -> void { - address = (address & 0xfffff) >> 2; + address = (address & 0x1f) >> 2; n32 data = data_; if(address == 0) { @@ -38,6 +38,7 @@ auto AI::writeWord(u32 address, u32 data_, Thread& thread) -> void { if(io.dmaCount < 2) { if(io.dmaCount == 0) mi.raise(MI::IRQ::AI); io.dmaLength[io.dmaCount] = length; + io.dmaOriginPc[io.dmaCount] = cpu.ipu.pc; io.dmaCount++; } } diff --git a/waterbox/ares64/ares/ares/n64/ai/serialization.cpp b/waterbox/ares64/ares/ares/n64/ai/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cartridge/cartridge.cpp b/waterbox/ares64/ares/ares/n64/cartridge/cartridge.cpp old mode 100644 new mode 100755 index 9826499ec4..14a1f78804 --- a/waterbox/ares64/ares/ares/n64/cartridge/cartridge.cpp +++ b/waterbox/ares64/ares/ares/n64/cartridge/cartridge.cpp @@ -47,7 +47,12 @@ auto Cartridge::connect() -> void { rtc.load(); - isviewer.ram.allocate(64_KiB); + if(rom.size <= 0x03fe'ffff) { + isviewer.ram.allocate(64_KiB); + isviewer.tracer = node->append("ISViewer", "Cartridge"); + isviewer.tracer->setAutoLineBreak(false); + isviewer.tracer->setTerminal(true); + } debugger.load(node); diff --git a/waterbox/ares64/ares/ares/n64/cartridge/cartridge.hpp b/waterbox/ares64/ares/ares/n64/cartridge/cartridge.hpp old mode 100644 new mode 100755 index 22825ba5e3..9efbf30a4a --- a/waterbox/ares64/ares/ares/n64/cartridge/cartridge.hpp +++ b/waterbox/ares64/ares/ares/n64/cartridge/cartridge.hpp @@ -40,8 +40,12 @@ struct Cartridge { } flash; struct ISViewer : Memory::PI { Memory::Writable ram; //unserialized + Node::Debugger::Tracer::Notification tracer; + + auto enabled() -> bool { return ram.size; } //isviewer.cpp + auto messageChar(char c) -> void; auto readHalf(u32 address) -> u16; auto writeHalf(u32 address, u16 data) -> void; auto readWord(u32 address) -> u32; diff --git a/waterbox/ares64/ares/ares/n64/cartridge/debugger.cpp b/waterbox/ares64/ares/ares/n64/cartridge/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cartridge/flash.cpp b/waterbox/ares64/ares/ares/n64/cartridge/flash.cpp old mode 100644 new mode 100755 index cfcbe2bf72..32402929d3 --- a/waterbox/ares64/ares/ares/n64/cartridge/flash.cpp +++ b/waterbox/ares64/ares/ares/n64/cartridge/flash.cpp @@ -85,7 +85,8 @@ auto Cartridge::Flash::writeWord(u32 address, u64 data) -> void { } if(mode == Mode::Write) { for(u32 index = 0; index < 128; index += 2) { - u16 half = rdram.ram.read(source + index); + // FIXME: this is obviously wrong, the flash can't access RDRAM + u16 half = rdram.ram.read(source + index, "Flash"); Memory::Writable::write(offset + index, half); } } diff --git a/waterbox/ares64/ares/ares/n64/cartridge/isviewer.cpp b/waterbox/ares64/ares/ares/n64/cartridge/isviewer.cpp old mode 100644 new mode 100755 index f7df89abfa..bb38b3e74d --- a/waterbox/ares64/ares/ares/n64/cartridge/isviewer.cpp +++ b/waterbox/ares64/ares/ares/n64/cartridge/isviewer.cpp @@ -8,6 +8,11 @@ auto Cartridge::ISViewer::readWord(u32 address) -> u32 { return ram.read(address); } +auto Cartridge::ISViewer::messageChar(char c) -> void { + if(!tracer->enabled()) return; + tracer->notify(c); +} + auto Cartridge::ISViewer::writeHalf(u32 address, u16 data) -> void { address = (address & 0xffff); @@ -23,7 +28,7 @@ auto Cartridge::ISViewer::writeHalf(u32 address, u16 data) -> void { // functional. for(auto address : range(data)) { char c = ram.read(0x20 + address); - fputc(c, stdout); + messageChar(c); } return; } diff --git a/waterbox/ares64/ares/ares/n64/cartridge/joybus.cpp b/waterbox/ares64/ares/ares/n64/cartridge/joybus.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cartridge/rtc.cpp b/waterbox/ares64/ares/ares/n64/cartridge/rtc.cpp old mode 100644 new mode 100755 index 66530fa593..3045949181 --- a/waterbox/ares64/ares/ares/n64/cartridge/rtc.cpp +++ b/waterbox/ares64/ares/ares/n64/cartridge/rtc.cpp @@ -1,4 +1,3 @@ - auto Cartridge::RTC::power(bool reset) -> void { if(present) run(!status.bit(7)); } @@ -11,8 +10,16 @@ auto Cartridge::RTC::load() -> void { present = 1; n64 timestamp = ram.read(24); if(!~timestamp) { - ram.fill(0); - ram.write(21, 1); + time_t t = (time_t)0; + struct tm tmm = *gmtime(&t); + ram.write(16, BCD::encode(tmm.tm_sec)); + ram.write(17, BCD::encode(tmm.tm_min)); + ram.write(18, BCD::encode(tmm.tm_hour) | 0x80); + ram.write(19, BCD::encode(tmm.tm_mday)); + ram.write(20, BCD::encode(tmm.tm_wday)); + ram.write(21, BCD::encode(tmm.tm_mon + 1)); + ram.write(22, BCD::encode(tmm.tm_year % 100)); + ram.write(23, BCD::encode(tmm.tm_year / 100)); } timestamp = platform->time() - timestamp; @@ -45,26 +52,42 @@ auto Cartridge::RTC::running() -> bool { } auto Cartridge::RTC::advance(int nsec) -> void { - struct tm tmm = {}; - tmm.tm_sec = BCD::decode(ram.read(16)); - tmm.tm_min = BCD::decode(ram.read(17)); - tmm.tm_hour = BCD::decode(ram.read(18) & 0x7f); - tmm.tm_mday = BCD::decode(ram.read(19)); - tmm.tm_mon = BCD::decode(ram.read(21)) - 1; - tmm.tm_year = BCD::decode(ram.read(22)) + 100 * BCD::decode(ram.read(23)); - time_t t = mktime(&tmm); + auto seconds = BCD::decode(ram.read(16)); + auto minutes = BCD::decode(ram.read(17)); + auto hours = BCD::decode(ram.read(18) & 0x7f); + auto day = BCD::decode(ram.read(19)); + auto wday = BCD::decode(ram.read(20)); + auto month = BCD::decode(ram.read(21)); + auto year = BCD::decode(ram.read(22)) + 100 * BCD::decode(ram.read(23)); - t += nsec; + while(nsec--) { + if(++seconds == 60) { + seconds = 0; + if(++minutes == 60) { + minutes = 0; + if(++hours == 24) { + hours = 0; + if(++wday == 7) wday = 0; + if(++day > chrono::daysInMonth(month, year)) { + day = 1; + if(++month == 13) { + month = 1; + year++; + } + } + } + } + } + } - tmm = *localtime(&t); - ram.write(16, BCD::encode(tmm.tm_sec)); - ram.write(17, BCD::encode(tmm.tm_min)); - ram.write(18, BCD::encode(tmm.tm_hour) | 0x80); - ram.write(19, BCD::encode(tmm.tm_mday)); - ram.write(20, BCD::encode(tmm.tm_wday)); - ram.write(21, BCD::encode(tmm.tm_mon + 1)); - ram.write(22, BCD::encode(tmm.tm_year % 100)); - ram.write(23, BCD::encode(tmm.tm_year / 100)); + ram.write(16, BCD::encode(seconds)); + ram.write(17, BCD::encode(minutes)); + ram.write(18, BCD::encode(hours) | 0x80); + ram.write(19, BCD::encode(day)); + ram.write(20, BCD::encode(wday)); + ram.write(21, BCD::encode(month)); + ram.write(22, BCD::encode(year % 100)); + ram.write(23, BCD::encode(year / 100)); } auto Cartridge::RTC::read(u2 block, n8* data) -> void { diff --git a/waterbox/ares64/ares/ares/n64/cartridge/serialization.cpp b/waterbox/ares64/ares/ares/n64/cartridge/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cartridge/slot.cpp b/waterbox/ares64/ares/ares/n64/cartridge/slot.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cartridge/slot.hpp b/waterbox/ares64/ares/ares/n64/cartridge/slot.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cic/cic.cpp b/waterbox/ares64/ares/ares/n64/cic/cic.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cic/cic.hpp b/waterbox/ares64/ares/ares/n64/cic/cic.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cic/commands.cpp b/waterbox/ares64/ares/ares/n64/cic/commands.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cic/io.cpp b/waterbox/ares64/ares/ares/n64/cic/io.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cic/serialization.cpp b/waterbox/ares64/ares/ares/n64/cic/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/controller.cpp b/waterbox/ares64/ares/ares/n64/controller/controller.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/controller.hpp b/waterbox/ares64/ares/ares/n64/controller/controller.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.cpp b/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.cpp old mode 100644 new mode 100755 index 837cb588d0..f3c96424c8 --- a/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.cpp +++ b/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.cpp @@ -8,7 +8,7 @@ Gamepad::Gamepad(Node::Port parent) { port->setAllocate([&](auto name) { return allocate(name); }); port->setConnect([&] { return connect(); }); port->setDisconnect([&] { return disconnect(); }); - port->setSupported({"Controller Pak", "Rumble Pak"}); + port->setSupported({"Controller Pak", "Rumble Pak", "Transfer Pak"}); x = node->append ("X-Axis"); y = node->append ("Y-Axis"); @@ -166,9 +166,7 @@ auto Gamepad::comm(n8 send, n8 recv, n8 input[], n8 output[]) -> n2 { if(transferPak) { u16 address = (input[1] << 8 | input[2] << 0) & ~31; if(pif.addressCRC(address) == (n5)input[2]) { - for(u32 index : range(recv - 1)) { - output[index] = transferPak.read(address++); - } + for(u32 index : range(recv - 1)) output[index] = transferPak.read(address++); output[recv - 1] = pif.dataCRC({&output[0], recv - 1u}); valid = 1; } diff --git a/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.hpp b/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/gamepad/mbc/mbc.hpp b/waterbox/ares64/ares/ares/n64/controller/gamepad/mbc/mbc.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/gamepad/mbc/mbc1.hpp b/waterbox/ares64/ares/ares/n64/controller/gamepad/mbc/mbc1.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/gamepad/mbc/mbc3.hpp b/waterbox/ares64/ares/ares/n64/controller/gamepad/mbc/mbc3.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/gamepad/mbc/mbc5.hpp b/waterbox/ares64/ares/ares/n64/controller/gamepad/mbc/mbc5.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/gamepad/transfer-pak.hpp b/waterbox/ares64/ares/ares/n64/controller/gamepad/transfer-pak.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/mouse/mouse.cpp b/waterbox/ares64/ares/ares/n64/controller/mouse/mouse.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/mouse/mouse.hpp b/waterbox/ares64/ares/ares/n64/controller/mouse/mouse.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/port.cpp b/waterbox/ares64/ares/ares/n64/controller/port.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/controller/port.hpp b/waterbox/ares64/ares/ares/n64/controller/port.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cpu/algorithms.cpp b/waterbox/ares64/ares/ares/n64/cpu/algorithms.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cpu/context.cpp b/waterbox/ares64/ares/ares/n64/cpu/context.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cpu/cpu.cpp b/waterbox/ares64/ares/ares/n64/cpu/cpu.cpp old mode 100644 new mode 100755 index 7bb06317ff..0994f4a1d1 --- a/waterbox/ares64/ares/ares/n64/cpu/cpu.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/cpu.cpp @@ -1,4 +1,5 @@ #include +#include namespace ares::Nintendo64 { @@ -63,7 +64,7 @@ auto CPU::synchronize() -> void { case Queue::SI_DMA_Write: return si.dmaWrite(); case Queue::SI_BUS_Write: return si.writeFinished(); case Queue::RTC_Tick: return cartridge.rtc.tick(); - case Queue::DD_Clock_Tick: return dd.rtcTickClock(); + case Queue::DD_Clock_Tick: return dd.rtc.tickClock(); case Queue::DD_MECHA_Response: return dd.mechaResponse(); case Queue::DD_BM_Request: return dd.bmRequest(); case Queue::DD_Motor_Mode: return dd.motorChange(); @@ -90,6 +91,10 @@ auto CPU::instruction() -> void { step(1 * 2); return exception.nmi(); } + if (scc.sysadFrozen) { + step(1 * 2); + return; + } if constexpr(Accuracy::CPU::Recompiler) { // Fast path: attempt to lookup previously compiled blocks with devirtualizeFast @@ -104,26 +109,30 @@ auto CPU::instruction() -> void { } if (auto address = devirtualize(ipu.pc)) { - auto block = recompiler.block(ipu.pc, *address); + auto block = recompiler.block(ipu.pc, *address, false); block->execute(*this); } } if constexpr(Accuracy::CPU::Interpreter) { - pipeline.address = ipu.pc; auto data = fetch(ipu.pc); if (!data) return; - pipeline.instruction = *data; - debugger.instruction(); + instructionPrologue(*data); decoderEXECUTE(); instructionEpilogue(); } } +auto CPU::instructionPrologue(u32 instruction) -> void { + pipeline.address = ipu.pc; + pipeline.instruction = instruction; + debugger.instruction(); +} + auto CPU::instructionEpilogue() -> s32 { if constexpr(Accuracy::CPU::Recompiler) { //simulates timings without performing actual icache loads - icache.step(ipu.pc, devirtualizeFast(ipu.pc)); + icache.step(ipu.pc, devirtualizeFast(ipu.pc)); } ipu.r[0].u64 = 0; @@ -168,9 +177,7 @@ auto CPU::power(bool reset) -> void { if constexpr(Accuracy::CPU::Recompiler) { auto buffer = ares::Memory::FixedAllocator::get().tryAcquire(4_MiB); - memory::jitprotect(false); - recompiler.allocator.resize(4_MiB, bump_allocator::executable | bump_allocator::zero_fill, buffer); - memory::jitprotect(true); + recompiler.allocator.resize(4_MiB, bump_allocator::executable, buffer); recompiler.reset(); } } diff --git a/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp b/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp old mode 100644 new mode 100755 index 82ba1fdc81..6abad3678c --- a/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp +++ b/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp @@ -36,6 +36,7 @@ struct CPU : Thread { auto synchronize() -> void; auto instruction() -> void; + auto instructionPrologue(u32 instruction) -> void; auto instructionEpilogue() -> s32; auto power(bool reset) -> void; @@ -147,26 +148,12 @@ struct CPU : Thread { cpu.step(48 * 2); valid = 1; tag = address & ~0x0000'0fff; - words[0] = cpu.busRead(tag | index | 0x00); - words[1] = cpu.busRead(tag | index | 0x04); - words[2] = cpu.busRead(tag | index | 0x08); - words[3] = cpu.busRead(tag | index | 0x0c); - words[4] = cpu.busRead(tag | index | 0x10); - words[5] = cpu.busRead(tag | index | 0x14); - words[6] = cpu.busRead(tag | index | 0x18); - words[7] = cpu.busRead(tag | index | 0x1c); + cpu.busReadBurst(tag | index, words); } auto writeBack(CPU& cpu) -> void { cpu.step(48 * 2); - cpu.busWrite(tag | index | 0x00, words[0]); - cpu.busWrite(tag | index | 0x04, words[1]); - cpu.busWrite(tag | index | 0x08, words[2]); - cpu.busWrite(tag | index | 0x0c, words[3]); - cpu.busWrite(tag | index | 0x10, words[4]); - cpu.busWrite(tag | index | 0x14, words[5]); - cpu.busWrite(tag | index | 0x18, words[6]); - cpu.busWrite(tag | index | 0x1c, words[7]); + cpu.busWriteBurst(tag | index, words); } auto read(u32 address) const -> u32 { return words[address >> 2 & 7]; } @@ -186,19 +173,22 @@ struct CPU : Thread { template auto write(u32 vaddr, u32 address, u64 data) -> void; auto power(bool reset) -> void; + auto readDebug(u32 vaddr, u32 address) -> u8; + //8KB struct Line { auto hit(u32 address) const -> bool; - template auto fill(u32 address, u64 data) -> void; auto fill(u32 address) -> void; auto writeBack() -> void; template auto read(u32 address) const -> u64; template auto write(u32 address, u64 data) -> void; bool valid; - bool dirty; + u16 dirty; u32 tag; u16 index; + u64 fillPc; + u64 dirtyPc; union { u8 bytes[16]; u16 halfs[8]; @@ -242,11 +232,12 @@ struct CPU : Thread { } entry[TLB::Entries]; //tlb.cpp - auto load(u64 vaddr) -> Match; - auto load(u64 vaddr, const Entry& entry) -> Match; + auto load(u64 vaddr, bool noExceptions = false) -> Match; + auto load(u64 vaddr, const Entry& entry, bool noExceptions = false) -> maybe; + auto loadFast(u64 vaddr) -> Match; auto store(u64 vaddr) -> Match; - auto store(u64 vaddr, const Entry& entry) -> Match; + auto store(u64 vaddr, const Entry& entry) -> maybe; struct TlbCache { ; static constexpr int entries = 4; @@ -292,15 +283,20 @@ struct CPU : Thread { auto segment(u64 vaddr) -> Context::Segment; auto devirtualize(u64 vaddr) -> maybe; alwaysinline auto devirtualizeFast(u64 vaddr) -> u64; + auto devirtualizeDebug(u64 vaddr) -> u64; auto fetch(u64 vaddr) -> maybe; template auto busWrite(u32 address, u64 data) -> void; template auto busRead(u32 address) -> u64; + template auto busWriteBurst(u32 address, u32 *data) -> void; + template auto busReadBurst(u32 address, u32 *data) -> void; template auto read(u64 vaddr) -> maybe; template auto write(u64 vaddr, u64 data, bool alignedError=true) -> bool; template auto vaddrAlignedError(u64 vaddr, bool write) -> bool; auto addressException(u64 vaddr) -> void; + auto readDebug(u64 vaddr) -> u8; + //serialization.cpp auto serialize(serializer&) -> void; @@ -631,6 +627,7 @@ struct CPU : Thread { //other n64 latch; n1 nmiPending; + n1 sysadFrozen; } scc; //interpreter-scc.cpp @@ -655,7 +652,7 @@ struct CPU : Thread { struct Coprocessor { static constexpr u8 revision = 0x00; - static constexpr u8 implementation = 0x0b; + static constexpr u8 implementation = 0x0a; } coprocessor; struct ControlStatus { @@ -690,7 +687,9 @@ struct CPU : Thread { //interpreter-fpu.cpp float_env fenv; - template auto fgr(u32) -> T&; + template auto fgr_t(u32) -> T&; + template auto fgr_s(u32) -> T&; + template auto fgr_d(u32) -> T&; auto getControlRegisterFPU(n5) -> u32; auto setControlRegisterFPU(n5, n32) -> void; template auto checkFPUExceptions() -> bool; @@ -701,8 +700,10 @@ struct CPU : Thread { auto fpeInvalidOperation() -> bool; auto fpeUnimplemented() -> bool; auto fpuCheckStart() -> bool; - auto fpuCheckInput(f32& f) -> bool; - auto fpuCheckInput(f64& f) -> bool; + template + auto fpuCheckInput(T& f) -> bool; + template + auto fpuCheckInputs(T& f1, T& f2) -> bool; auto fpuCheckOutput(f32& f) -> bool; auto fpuCheckOutput(f64& f) -> bool; auto fpuClearCause() -> void; @@ -894,10 +895,10 @@ struct CPU : Thread { } auto pool(u32 address) -> Pool*; - auto block(u32 vaddr, u32 address) -> Block*; + auto block(u32 vaddr, u32 address, bool singleInstruction = false) -> Block*; auto fastFetchBlock(u32 address) -> Block*; - auto emit(u32 vaddr, u32 address) -> Block*; + auto emit(u32 vaddr, u32 address, bool singleInstruction = false) -> Block*; auto emitEXECUTE(u32 instruction) -> bool; auto emitSPECIAL(u32 instruction) -> bool; auto emitREGIMM(u32 instruction) -> bool; @@ -905,6 +906,7 @@ struct CPU : Thread { auto emitFPU(u32 instruction) -> bool; auto emitCOP2(u32 instruction) -> bool; + bool callInstructionPrologue = false; bump_allocator allocator; Pool* pools[1 << 21]; //2_MiB * sizeof(void*) == 16_MiB } recompiler{*this}; diff --git a/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp b/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp old mode 100644 new mode 100755 index 5fa69aa3d3..96befd01c7 --- a/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp @@ -2,63 +2,19 @@ auto CPU::DataCache::Line::hit(u32 address) const -> bool { return valid && tag == (address & ~0x0000'0fff); } -template auto CPU::DataCache::Line::fill(u32 address, u64 data) -> void { - cpu.step(40 * 2); - valid = 1; - dirty = 1; - tag = address & ~0x0000'0fff; - //read words according to critical doubleword first scheme - switch(address & 8) { - case 0: - if constexpr(Size != Dual) { - words[0] = cpu.busRead(tag | index | 0x0); - words[1] = cpu.busRead(tag | index | 0x4); - } - write(address, data); - words[2] = cpu.busRead(tag | index | 0x8); - words[3] = cpu.busRead(tag | index | 0xc); - break; - case 8: - if constexpr(Size != Dual) { - words[2] = cpu.busRead(tag | index | 0x8); - words[3] = cpu.busRead(tag | index | 0xc); - } - write(address, data); - words[0] = cpu.busRead(tag | index | 0x0); - words[1] = cpu.busRead(tag | index | 0x4); - break; - } -} - auto CPU::DataCache::Line::fill(u32 address) -> void { cpu.step(40 * 2); - valid = 1; - dirty = 0; - tag = address & ~0x0000'0fff; - //read words according to critical doubleword first scheme - switch(address & 8) { - case 0: - words[0] = cpu.busRead(tag | index | 0x0); - words[1] = cpu.busRead(tag | index | 0x4); - words[2] = cpu.busRead(tag | index | 0x8); - words[3] = cpu.busRead(tag | index | 0xc); - break; - case 8: - words[2] = cpu.busRead(tag | index | 0x8); - words[3] = cpu.busRead(tag | index | 0xc); - words[0] = cpu.busRead(tag | index | 0x0); - words[1] = cpu.busRead(tag | index | 0x4); - break; - } + valid = 1; + dirty = 0; + tag = address & ~0x0000'0fff; + fillPc = cpu.ipu.pc; + cpu.busReadBurst(tag | index, words); } auto CPU::DataCache::Line::writeBack() -> void { cpu.step(40 * 2); dirty = 0; - cpu.busWrite(tag | index | 0x0, words[0]); - cpu.busWrite(tag | index | 0x4, words[1]); - cpu.busWrite(tag | index | 0x8, words[2]); - cpu.busWrite(tag | index | 0xc, words[3]); + cpu.busWriteBurst(tag | index, words); } auto CPU::DataCache::line(u32 vaddr) -> Line& { @@ -86,7 +42,8 @@ auto CPU::DataCache::Line::write(u32 address, u64 data) -> void { words[address >> 2 & 2 | 0] = data >> 32; words[address >> 2 & 2 | 1] = data >> 0; } - dirty = 1; + dirty |= ((1 << Size) - 1) << (address & 0xF); + dirtyPc = cpu.ipu.pc; } template @@ -101,12 +58,21 @@ auto CPU::DataCache::read(u32 vaddr, u32 address) -> u64 { return line.read(address); } +auto CPU::DataCache::readDebug(u32 vaddr, u32 address) -> u8 { + auto& line = this->line(vaddr); + if(!line.hit(address)) { + Thread dummyThread{}; + return bus.read(address, dummyThread, "Ares Debugger"); + } + return line.read(address); +} + template auto CPU::DataCache::write(u32 vaddr, u32 address, u64 data) -> void { auto& line = this->line(vaddr); if(!line.hit(address)) { if(line.valid && line.dirty) line.writeBack(); - return line.fill(address, data); + line.fill(address); } else { cpu.step(1 * 2); } @@ -123,3 +89,6 @@ auto CPU::DataCache::power(bool reset) -> void { for(auto& word : line.words) word = 0; } } + +template +auto CPU::DataCache::Line::write(u32 address, u64 data) -> void; diff --git a/waterbox/ares64/ares/ares/n64/cpu/debugger.cpp b/waterbox/ares64/ares/ares/n64/cpu/debugger.cpp old mode 100644 new mode 100755 index dc284e5b15..3a061cc9d9 --- a/waterbox/ares64/ares/ares/n64/cpu/debugger.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/debugger.cpp @@ -2,6 +2,12 @@ auto CPU::Debugger::load(Node::Object parent) -> void { tracer.instruction = parent->append("Instruction", "CPU"); tracer.instruction->setAddressBits(64, 2); tracer.instruction->setDepth(64); + if constexpr(Accuracy::CPU::Recompiler) { + tracer.instruction->setToggle([&] { + cpu.recompiler.reset(); + cpu.recompiler.callInstructionPrologue = tracer.instruction->enabled(); + }); + } tracer.exception = parent->append("Exception", "CPU"); tracer.interrupt = parent->append("Interrupt", "CPU"); diff --git a/waterbox/ares64/ares/ares/n64/cpu/disassembler.cpp b/waterbox/ares64/ares/ares/n64/cpu/disassembler.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cpu/exceptions.cpp b/waterbox/ares64/ares/ares/n64/cpu/exceptions.cpp old mode 100644 new mode 100755 index 310662f496..00189db5c1 --- a/waterbox/ares64/ares/ares/n64/cpu/exceptions.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/exceptions.cpp @@ -1,6 +1,11 @@ auto CPU::Exception::trigger(u32 code, u32 coprocessor, bool tlbMiss) -> void { self.debugger.exception(code); + if(code != 0) { + auto sig = (code == 2 || code == 3) ? GDB::Signal::SEGV : GDB::Signal::TRAP; + GDB::server.reportSignal(sig, self.ipu.pc); + } + u64 vectorBase = !self.scc.status.vectorLocation ? (s32)0x8000'0000 : (s32)0xbfc0'0200; u16 vectorOffset = 0x0180; diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter-cop2.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter-cop2.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter-fpu.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter-fpu.cpp old mode 100644 new mode 100755 index 796d263606..5da902a278 --- a/waterbox/ares64/ares/ares/n64/cpu/interpreter-fpu.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/interpreter-fpu.cpp @@ -7,7 +7,7 @@ auto CPU::FPU::setFloatingPointMode(bool mode) -> void { } } -template<> auto CPU::fgr(u32 index) -> s32& { +template<> auto CPU::fgr_t(u32 index) -> s32& { if(scc.status.floatingPointMode) { return fpu.r[index].s32; } else if(index & 1) { @@ -17,21 +17,41 @@ template<> auto CPU::fgr(u32 index) -> s32& { } } -template<> auto CPU::fgr(u32 index) -> u32& { - return (u32&)fgr(index); +template<> auto CPU::fgr_s(u32 index) -> s32& { + if(scc.status.floatingPointMode) { + return fpu.r[index].s32; + } else { + return fpu.r[index & ~1].s32; + } } -template<> auto CPU::fgr(u32 index) -> f32& { +template<> auto CPU::fgr_d(u32 index) -> s32& { + fpu.r[index].s32h = 0; + return fpu.r[index].s32; +} + +template<> auto CPU::fgr_t(u32 index) -> u32& { + return (u32&)fgr_t(index); +} + +template<> auto CPU::fgr_t(u32 index) -> f32& { + return fpu.r[index].f32; +} + +template<> auto CPU::fgr_d(u32 index) -> f32& { + fpu.r[index].f32h = 0; + return fpu.r[index].f32; +} + +template<> auto CPU::fgr_s(u32 index) -> f32& { if(scc.status.floatingPointMode) { return fpu.r[index].f32; - } else if(index & 1) { - return fpu.r[index & ~1].f32h; } else { return fpu.r[index & ~1].f32; } } -template<> auto CPU::fgr(u32 index) -> s64& { +template<> auto CPU::fgr_t(u32 index) -> s64& { if(scc.status.floatingPointMode) { return fpu.r[index].s64; } else { @@ -39,11 +59,31 @@ template<> auto CPU::fgr(u32 index) -> s64& { } } -template<> auto CPU::fgr(u32 index) -> u64& { - return (u64&)fgr(index); +template<> auto CPU::fgr_d(u32 index) -> s64& { + return fpu.r[index].s64; } -template<> auto CPU::fgr(u32 index) -> f64& { +template<> auto CPU::fgr_s(u32 index) -> s64& { + return fgr_t(index); +} + +template<> auto CPU::fgr_t(u32 index) -> u64& { + return (u64&)fgr_t(index); +} + +template<> auto CPU::fgr_s(u32 index) -> u64& { + return fgr_t(index); +} + +template<> auto CPU::fgr_t(u32 index) -> f64& { + return fpu.r[index].f64; +} + +template<> auto CPU::fgr_d(u32 index) -> f64& { + return fgr_t(index); +} + +template<> auto CPU::fgr_s(u32 index) -> f64& { if(scc.status.floatingPointMode) { return fpu.r[index].f64; } else { @@ -210,8 +250,9 @@ auto CPU::checkFPUExceptions() -> bool { #define CHECK_FPE_IMPL(type, res, operation, convert) \ fenv.clearExcept(); \ - type res = [&]() noinline -> type { return operation; }(); \ - if (checkFPUExceptions()) return; + volatile type v##res = [&]() noinline -> type { return operation; }(); \ + if (checkFPUExceptions()) return; \ + type res = v##res; #define CHECK_FPE(type, res, operation) CHECK_FPE_IMPL(type, res, operation, false) #define CHECK_FPE_CONV(type, res, operation) CHECK_FPE_IMPL(type, res, operation, true) @@ -240,7 +281,9 @@ auto CPU::fpuCheckStart() -> bool { return true; } -auto CPU::fpuCheckInput(f32& f) -> bool { +template +auto CPU::fpuCheckInput(T& f) -> bool { + static_assert(std::is_same_v || std::is_same_v); switch (fpclassify(f)) { case FP_SUBNORMAL: if(fpeUnimplemented()) return exception.floatingPoint(), false; @@ -253,19 +296,23 @@ auto CPU::fpuCheckInput(f32& f) -> bool { return true; } -auto CPU::fpuCheckInput(f64& f) -> bool { - switch (fpclassify(f)) { - case FP_SUBNORMAL: +template +auto CPU::fpuCheckInputs(T& f1, T& f2) -> bool { + static_assert(std::is_same_v || std::is_same_v); + int cl1 = fpclassify(f1), cl2 = fpclassify(f2); + if((cl1 == FP_NAN && !qnan(f1)) || (cl2 == FP_NAN && !qnan(f2))) { if(fpeUnimplemented()) return exception.floatingPoint(), false; - return true; - case FP_NAN: - if(qnan(f) ? fpeInvalidOperation() : fpeUnimplemented()) - return exception.floatingPoint(), false; - return true; + } + if(cl1 == FP_SUBNORMAL || cl2 == FP_SUBNORMAL) { + if(fpeUnimplemented()) return exception.floatingPoint(), false; + } + if((cl1 == FP_NAN && qnan(f1)) || (cl2 == FP_NAN && qnan(f2))) { + if(fpeInvalidOperation()) return exception.floatingPoint(), false; } return true; } + template auto fpuFlushResult(T f, u32 roundMode) -> T { @@ -364,9 +411,9 @@ auto CPU::fpuCheckInputConv(f64& f) -> bool { } #define CF fpu.csr.compare -#define FD(type) fgr(fd) -#define FS(type) fgr(fs) -#define FT(type) fgr(ft) +#define FD(type) fgr_d(fd) +#define FS(type) fgr_s(fs) +#define FT(type) fgr_t(ft) auto CPU::BC1(bool value, bool likely, s16 imm) -> void { if(!fpuCheckStart()) return; @@ -416,8 +463,7 @@ auto CPU::FABS_D(u8 fd, u8 fs) -> void { auto CPU::FADD_S(u8 fd, u8 fs, u8 ft) -> void { if(!fpuCheckStart()) return; f32 ffs = FS(f32), fft = FT(f32); - if(!fpuCheckInput(ffs)) return; - if(!fpuCheckInput(fft)) return; + if(!fpuCheckInputs(ffs, fft)) return; CHECK_FPE(f32, ffd, FS(f32) + FT(f32)); if(!fpuCheckOutput(ffd)) return; FD(f32) = ffd; @@ -427,8 +473,7 @@ auto CPU::FADD_S(u8 fd, u8 fs, u8 ft) -> void { auto CPU::FADD_D(u8 fd, u8 fs, u8 ft) -> void { if(!fpuCheckStart()) return; auto ffs = FS(f64), fft = FT(f64); - if(!fpuCheckInput(ffs)) return; - if(!fpuCheckInput(fft)) return; + if(!fpuCheckInputs(ffs, fft)) return; CHECK_FPE(f64, ffd, ffs + fft); if(!fpuCheckOutput(ffd)) return; FD(f64) = ffd; @@ -758,8 +803,7 @@ auto CPU::FCVT_W_D(u8 fd, u8 fs) -> void { auto CPU::FDIV_S(u8 fd, u8 fs, u8 ft) -> void { if(!fpuCheckStart()) return; auto ffs = FS(f32), fft = FT(f32); - if(!fpuCheckInput(ffs)) return; - if(!fpuCheckInput(fft)) return; + if(!fpuCheckInputs(ffs, fft)) return; CHECK_FPE(f32, ffd, ffs / fft); if(!fpuCheckOutput(ffd)) return; FD(f32) = ffd; @@ -769,8 +813,7 @@ auto CPU::FDIV_S(u8 fd, u8 fs, u8 ft) -> void { auto CPU::FDIV_D(u8 fd, u8 fs, u8 ft) -> void { if(!fpuCheckStart()) return; auto ffs = FS(f64), fft = FT(f64); - if(!fpuCheckInput(ffs)) return; - if(!fpuCheckInput(fft)) return; + if(!fpuCheckInputs(ffs, fft)) return; CHECK_FPE(f64, ffd, ffs / fft); if(!fpuCheckOutput(ffd)) return; FD(f64) = ffd; @@ -814,8 +857,7 @@ auto CPU::FFLOOR_W_D(u8 fd, u8 fs) -> void { } auto CPU::FMOV_S(u8 fd, u8 fs) -> void { - if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); - FD(f32) = FS(f32); + return FMOV_D(fd, fs); } auto CPU::FMOV_D(u8 fd, u8 fs) -> void { @@ -826,8 +868,7 @@ auto CPU::FMOV_D(u8 fd, u8 fs) -> void { auto CPU::FMUL_S(u8 fd, u8 fs, u8 ft) -> void { if(!fpuCheckStart()) return; auto ffs = FS(f32), fft = FT(f32); - if(!fpuCheckInput(ffs)) return; - if(!fpuCheckInput(fft)) return; + if(!fpuCheckInputs(ffs, fft)) return; CHECK_FPE(f32, ffd, ffs * fft); if(!fpuCheckOutput(ffd)) return; FD(f32) = ffd; @@ -837,8 +878,7 @@ auto CPU::FMUL_S(u8 fd, u8 fs, u8 ft) -> void { auto CPU::FMUL_D(u8 fd, u8 fs, u8 ft) -> void { if(!fpuCheckStart()) return; auto ffs = FS(f64), fft = FT(f64); - if(!fpuCheckInput(ffs)) return; - if(!fpuCheckInput(fft)) return; + if(!fpuCheckInputs(ffs, fft)) return; CHECK_FPE(f64, ffd, ffs * fft); if(!fpuCheckOutput(ffd)) return; FD(f64) = ffd; @@ -926,8 +966,7 @@ auto CPU::FSQRT_D(u8 fd, u8 fs) -> void { auto CPU::FSUB_S(u8 fd, u8 fs, u8 ft) -> void { if(!fpuCheckStart()) return; auto ffs = FS(f32), fft = FT(f32); - if(!fpuCheckInput(ffs)) return; - if(!fpuCheckInput(fft)) return; + if(!fpuCheckInputs(ffs, fft)) return; CHECK_FPE(f32, ffd, ffs - fft); if(!fpuCheckOutput(ffd)) return; FD(f32) = ffd; @@ -937,8 +976,7 @@ auto CPU::FSUB_S(u8 fd, u8 fs, u8 ft) -> void { auto CPU::FSUB_D(u8 fd, u8 fs, u8 ft) -> void { if(!fpuCheckStart()) return; auto ffs = FS(f64), fft = FT(f64); - if(!fpuCheckInput(ffs)) return; - if(!fpuCheckInput(fft)) return; + if(!fpuCheckInputs(ffs, fft)) return; CHECK_FPE(f64, ffd, ffs - fft); if(!fpuCheckOutput(ffd)) return; FD(f64) = ffd; @@ -995,14 +1033,14 @@ auto CPU::LWC1(u8 ft, cr64& rs, s16 imm) -> void { if(auto data = read(rs.u64 + imm)) FT(u32) = *data; } -auto CPU::MFC1(r64& rt, u8 fs) -> void { +auto CPU::MFC1(r64& rt, u8 ft) -> void { if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); - rt.u64 = FS(s32); + rt.u64 = FT(s32); } -auto CPU::MTC1(cr64& rt, u8 fs) -> void { +auto CPU::MTC1(cr64& rt, u8 ft) -> void { if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); - FS(s32) = rt.u32; + FT(s32) = rt.u32; } auto CPU::SDC1(u8 ft, cr64& rs, s16 imm) -> void { diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter-ipu.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter-ipu.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter-scc.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter-scc.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/cpu/memory.cpp b/waterbox/ares64/ares/ares/n64/cpu/memory.cpp old mode 100644 new mode 100755 index dc0d44ec3c..e319d26360 --- a/waterbox/ares64/ares/ares/n64/cpu/memory.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/memory.cpp @@ -140,14 +140,28 @@ auto CPU::devirtualizeFast(u64 vaddr) -> u64 { return devirtualizeCache.pbase = 0; } +auto CPU::devirtualizeDebug(u64 vaddr) -> u64 { + return devirtualizeFast(vaddr); // this wrapper preserves the inlining of 'devirtualizeFast' +} + template inline auto CPU::busWrite(u32 address, u64 data) -> void { - bus.write(address, data, *this); + bus.write(address, data, *this, "CPU"); +} + +template +inline auto CPU::busWriteBurst(u32 address, u32 *data) -> void { + bus.writeBurst(address, data, *this); } template inline auto CPU::busRead(u32 address) -> u64 { - return bus.read(address, *this); + return bus.read(address, *this, "CPU"); +} + +template +inline auto CPU::busReadBurst(u32 address, u32 *data) -> void { + return bus.readBurst(address, data, *this); } auto CPU::fetch(u64 vaddr) -> maybe { @@ -185,6 +199,8 @@ auto CPU::fetch(u64 vaddr) -> maybe { template auto CPU::read(u64 vaddr) -> maybe { if(vaddrAlignedError(vaddr, false)) return nothing; + GDB::server.reportMemRead(vaddr, Size); + switch(segment(vaddr)) { case Context::Segment::Unused: step(1 * 2); @@ -215,10 +231,37 @@ auto CPU::read(u64 vaddr) -> maybe { unreachable; } +auto CPU::readDebug(u64 vaddr) -> u8 { + Thread dummyThread{}; + + switch(segment(vaddr)) { + case Context::Segment::Unused: return 0; + case Context::Segment::Mapped: + if(auto match = tlb.load(vaddr, true)) { + if(match.cache) return dcache.readDebug(vaddr, match.address & context.physMask); + return bus.read(match.address & context.physMask, dummyThread, "Ares Debugger"); + } + return 0; + case Context::Segment::Cached: + return dcache.readDebug(vaddr, vaddr & 0x1fff'ffff); + case Context::Segment::Cached32: + return dcache.readDebug(vaddr, vaddr & 0xffff'ffff); + case Context::Segment::Direct: + return bus.read(vaddr & 0x1fff'ffff, dummyThread, "Ares Debugger"); + case Context::Segment::Direct32: + return bus.read(vaddr & 0xffff'ffff, dummyThread, "Ares Debugger"); + } + + unreachable; +} + template auto CPU::write(u64 vaddr0, u64 data, bool alignedError) -> bool { if(alignedError && vaddrAlignedError(vaddr0, true)) return false; u64 vaddr = vaddr0 & ~((u64)Size - 1); + + GDB::server.reportMemWrite(vaddr0, Size); + switch(segment(vaddr)) { case Context::Segment::Unused: step(1 * 2); diff --git a/waterbox/ares64/ares/ares/n64/cpu/recompiler.cpp b/waterbox/ares64/ares/ares/n64/cpu/recompiler.cpp old mode 100644 new mode 100755 index 030a2f0957..7c870ae0bc --- a/waterbox/ares64/ares/ares/n64/cpu/recompiler.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/recompiler.cpp @@ -1,12 +1,17 @@ auto CPU::Recompiler::pool(u32 address) -> Pool* { auto& pool = pools[address >> 8 & 0x1fffff]; - if(!pool) pool = (Pool*)allocator.acquire(sizeof(Pool)); + if(!pool) { + pool = (Pool*)allocator.acquire(sizeof(Pool)); + memory::jitprotect(false); + *pool = {}; + memory::jitprotect(true); + } return pool; } -auto CPU::Recompiler::block(u32 vaddr, u32 address) -> Block* { +auto CPU::Recompiler::block(u32 vaddr, u32 address, bool singleInstruction) -> Block* { if(auto block = pool(address)->blocks[address >> 2 & 0x3f]) return block; - auto block = emit(vaddr, address); + auto block = emit(vaddr, address, singleInstruction); pool(address)->blocks[address >> 2 & 0x3f] = block; memory::jitprotect(true); return block; @@ -18,12 +23,10 @@ auto CPU::Recompiler::fastFetchBlock(u32 address) -> Block* { return nullptr; } -auto CPU::Recompiler::emit(u32 vaddr, u32 address) -> Block* { +auto CPU::Recompiler::emit(u32 vaddr, u32 address, bool singleInstruction) -> Block* { if(unlikely(allocator.available() < 1_MiB)) { print("CPU allocator flush\n"); - memory::jitprotect(false); - allocator.release(bump_allocator::zero_fill); - memory::jitprotect(true); + allocator.release(); reset(); } @@ -33,7 +36,11 @@ auto CPU::Recompiler::emit(u32 vaddr, u32 address) -> Block* { Thread thread; bool hasBranched = 0; while(true) { - u32 instruction = bus.read(address, thread); + u32 instruction = bus.read(address, thread, "Ares Recompiler"); + if(callInstructionPrologue) { + mov32(reg(1), imm(instruction)); + call(&CPU::instructionPrologue); + } bool branched = emitEXECUTE(instruction); if(unlikely(instruction == 0x1000'ffff //beq 0,0, || instruction == (2 << 26 | vaddr >> 2 & 0x3ff'ffff))) { //j @@ -44,7 +51,7 @@ auto CPU::Recompiler::emit(u32 vaddr, u32 address) -> Block* { call(&CPU::instructionEpilogue); vaddr += 4; address += 4; - if(hasBranched || (address & 0xfc) == 0) break; //block boundary + if(hasBranched || (address & 0xfc) == 0 || singleInstruction) break; //block boundary hasBranched = branched; testJumpEpilog(); } diff --git a/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp b/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp old mode 100644 new mode 100755 index 48f9d03cfd..fc6880f582 --- a/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp @@ -117,6 +117,7 @@ auto CPU::serialize(serializer& s) -> void { s(scc.epcError); s(scc.latch); s(scc.nmiPending); + s(scc.sysadFrozen); for(auto& r : fpu.r) s(r.u64); s(fpu.csr.roundMode); diff --git a/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp b/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp old mode 100644 new mode 100755 index 94fec4cfa9..d596179a09 --- a/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp @@ -1,39 +1,40 @@ -auto CPU::TLB::load(u64 vaddr, const Entry& entry) -> Match { +auto CPU::TLB::load(u64 vaddr, const Entry& entry, bool noExceptions) -> maybe { + if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) return nothing; + if((vaddr & entry.addressMaskHi) != entry.virtualAddress) return nothing; + if(vaddr >> 62 != entry.region) return nothing; bool lo = vaddr & entry.addressSelect; if(!entry.valid[lo]) { + if(noExceptions)return Match{false}; + self.addressException(vaddr); self.debugger.tlbLoadInvalid(vaddr); self.exception.tlbLoadInvalid(); - return {false}; + return Match{false}; } physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo); self.debugger.tlbLoad(vaddr, physicalAddress); - return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; + return Match{true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; } -auto CPU::TLB::load(u64 vaddr) -> Match { +auto CPU::TLB::load(u64 vaddr, bool noExceptions) -> Match { for(auto& entry : this->tlbCache.entry) { if(!entry.entry) continue; - if(!entry.entry->globals && entry.entry->addressSpaceID != self.scc.tlb.addressSpaceID) continue; - if((vaddr & entry.entry->addressMaskHi) != entry.entry->virtualAddress) continue; - if(vaddr >> 62 != entry.entry->region) continue; - if(auto match = load(vaddr, *entry.entry)) { + if(auto match = load(vaddr, *entry.entry, noExceptions)) { entry.frequency++; - return match; + return *match; } } for(auto& entry : this->entry) { - if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue; - if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue; - if(vaddr >> 62 != entry.region) continue; - if(auto match = load(vaddr, entry)) { + if(auto match = load(vaddr, entry, noExceptions)) { this->tlbCache.insert(entry); - return match; + return *match; } } + if(noExceptions)return {false}; + self.addressException(vaddr); self.debugger.tlbLoadMiss(vaddr); self.exception.tlbLoadMiss(); @@ -56,46 +57,41 @@ auto CPU::TLB::loadFast(u64 vaddr) -> Match { return {false, 0, 0}; } -auto CPU::TLB::store(u64 vaddr, const Entry& entry) -> Match { +auto CPU::TLB::store(u64 vaddr, const Entry& entry) -> maybe { + if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) return nothing; + if((vaddr & entry.addressMaskHi) != entry.virtualAddress) return nothing; + if(vaddr >> 62 != entry.region) return nothing; bool lo = vaddr & entry.addressSelect; if(!entry.valid[lo]) { self.addressException(vaddr); self.debugger.tlbStoreInvalid(vaddr); self.exception.tlbStoreInvalid(); - return {false}; + return Match{false}; } if(!entry.dirty[lo]) { self.addressException(vaddr); self.debugger.tlbModification(vaddr); self.exception.tlbModification(); - return {false}; + return Match{false}; } physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo); self.debugger.tlbStore(vaddr, physicalAddress); - return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; + return Match{true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; } auto CPU::TLB::store(u64 vaddr) -> Match { for(auto& entry : this->tlbCache.entry) { if(!entry.entry) continue; - if(!entry.entry->globals && entry.entry->addressSpaceID != self.scc.tlb.addressSpaceID) continue; - if((vaddr & entry.entry->addressMaskHi) != entry.entry->virtualAddress) continue; - if(vaddr >> 62 != entry.entry->region) continue; - if(auto match = store(vaddr, *entry.entry)) { entry.frequency++; - return match; + return *match; } } for(auto& entry : this->entry) { - if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue; - if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue; - if(vaddr >> 62 != entry.region) continue; - if(auto match = store(vaddr, entry)) { this->tlbCache.insert(entry); - return match; + return *match; } } diff --git a/waterbox/ares64/ares/ares/n64/dd/controller.cpp b/waterbox/ares64/ares/ares/n64/dd/controller.cpp old mode 100644 new mode 100755 index aaa6384f4d..8c9810cd9c --- a/waterbox/ares64/ares/ares/n64/dd/controller.cpp +++ b/waterbox/ares64/ares/ares/n64/dd/controller.cpp @@ -144,22 +144,22 @@ auto DD::command(n16 command) -> void { } } break; case Command::SetRTCYearMonth: { - rtc.write(0, io.data); + rtc.ram.write(0, io.data); } break; case Command::SetRTCDayHour: { - rtc.write(2, io.data); + rtc.ram.write(2, io.data); } break; case Command::SetRTCMinuteSecond: { - rtc.write(4, io.data); + rtc.ram.write(4, io.data); } break; case Command::GetRTCYearMonth: { - io.data = rtc.read(0); + io.data = rtc.ram.read(0); } break; case Command::GetRTCDayHour: { - io.data = rtc.read(2); + io.data = rtc.ram.read(2); } break; case Command::GetRTCMinuteSecond: { - io.data = rtc.read(4); + io.data = rtc.ram.read(4); } break; case Command::SetLEDBlinkRate: { if (io.data.bit(24,31) != 0) ctl.ledOnTime = io.data.bit(24,31); @@ -187,7 +187,11 @@ auto DD::command(n16 command) -> void { auto DD::mechaResponse() -> void { if(state.seek) { state.seek = 0; - motorActive(); + if (io.status.diskPresent) { + motorActive(); + } else { + motorStop(); + } } io.status.busyState = 0; raise(IRQ::MECHA); diff --git a/waterbox/ares64/ares/ares/n64/dd/dd.cpp b/waterbox/ares64/ares/ares/n64/dd/dd.cpp old mode 100644 new mode 100755 index 316e53903e..76336b8646 --- a/waterbox/ares64/ares/ares/n64/dd/dd.cpp +++ b/waterbox/ares64/ares/ares/n64/dd/dd.cpp @@ -24,13 +24,14 @@ auto DD::load(Node::Object parent) -> void { c2s.allocate(0x400); ds.allocate(0x100); ms.allocate(0x40); - rtc.allocate(0x10); // TODO: Detect correct CIC from ipl rom if(auto fp = system.pak->read("64dd.ipl.rom")) { iplrom.load(fp); } + rtc.load(); + debugger.load(parent->append("Nintendo 64DD")); } @@ -79,9 +80,9 @@ auto DD::connect() -> void { if(auto fp = pak->read("program.disk")) { disk.allocate(fp->size()); disk.load(fp); + io.status.diskChanged = 1; + io.status.diskPresent = 1; } - - rtcLoad(); } auto DD::disconnect() -> void { @@ -89,7 +90,34 @@ auto DD::disconnect() -> void { save(); pak.reset(); + disk.reset(); information = {}; + + if(iplrom) { + string id; + id.append((char)iplrom.read(0x3b)); + id.append((char)iplrom.read(0x3c)); + id.append((char)iplrom.read(0x3d)); + id.append((char)iplrom.read(0x3e)); + if(id.match("NDDJ")) dd.information.cic = "CIC-NUS-8303"; + if(id.match("NDDE")) dd.information.cic = "CIC-NUS-DDUS"; + if(id.match("NDXJ")) dd.information.cic = "CIC-NUS-8401"; + } + + io.status.diskPresent = 0; + + //Deal with cases when the disk is removed while in use + if(io.status.busyState) { + //MECHA + io.status.mechaError = 1; + } + + if(io.bm.start) { + //BM + io.bm.start = 0; + io.bm.error = 1; + } + motorStop(); } auto DD::save() -> void { @@ -100,7 +128,7 @@ auto DD::save() -> void { } #endif - rtcSave(); + rtc.save(); } auto DD::power(bool reset) -> void { @@ -114,6 +142,9 @@ auto DD::power(bool reset) -> void { state = {}; io.status.resetState = 1; + io.status.diskChanged = 1; + if(disk) io.status.diskPresent = 1; + io.id = 3; if(dd.information.cic.match("CIC-NUS-8401")) io.id = 4; diff --git a/waterbox/ares64/ares/ares/n64/dd/dd.hpp b/waterbox/ares64/ares/ares/n64/dd/dd.hpp old mode 100644 new mode 100755 index d6b9f2a7e8..ff94407e74 --- a/waterbox/ares64/ares/ares/n64/dd/dd.hpp +++ b/waterbox/ares64/ares/ares/n64/dd/dd.hpp @@ -11,7 +11,6 @@ struct DD : Memory::PI
{ Memory::Writable c2s; Memory::Writable ds; Memory::Writable ms; - Memory::Writable rtc; Memory::Writable disk; Memory::Writable error; @@ -27,6 +26,21 @@ struct DD : Memory::PI
{ } tracer; } debugger; + struct RTC { + Memory::Writable ram; + + //rtc.cpp + auto load() -> void; + auto reset() -> void; + auto save() -> void; + auto serialize(serializer& s) -> void; + auto tick(u32 offset) -> void; + auto tickClock() -> void; + auto tickSecond() -> void; + auto valid() -> bool; + auto daysInMonth(u8 month, u8 year) -> u8; + } rtc; + auto title() const -> string { return information.title; } auto cic() const -> string { return information.cic; } @@ -59,13 +73,6 @@ struct DD : Memory::PI
{ auto motorStop() -> void; auto motorChange() -> void; - //rtc.cpp - auto rtcLoad() -> void; - auto rtcSave() -> void; - auto rtcTick(u32 offset) -> void; - auto rtcTickClock() -> void; - auto rtcTickSecond() -> void; - //io.cpp auto readHalf(u32 address) -> u16; auto writeHalf(u32 address, u16 data) -> void; @@ -125,6 +132,7 @@ private: n1 writeProtect; n1 mechaError; n1 diskChanged; + n1 diskPresent; } status; n16 currentTrack; diff --git a/waterbox/ares64/ares/ares/n64/dd/debugger.cpp b/waterbox/ares64/ares/ares/n64/dd/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/dd/drive.cpp b/waterbox/ares64/ares/ares/n64/dd/drive.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/dd/io.cpp b/waterbox/ares64/ares/ares/n64/dd/io.cpp old mode 100644 new mode 100755 index a85269edd9..6d8423f181 --- a/waterbox/ares64/ares/ares/n64/dd/io.cpp +++ b/waterbox/ares64/ares/ares/n64/dd/io.cpp @@ -20,7 +20,7 @@ auto DD::readHalf(u32 address) -> u16 { data.bit(4) = io.status.spindleMotorStopped; data.bit(6) = io.status.resetState; data.bit(7) = io.status.busyState; - data.bit(8) = (bool)disk; //disk present + data.bit(8) = io.status.diskPresent; data.bit(9) = irq.mecha.line; data.bit(10) = irq.bm.line; data.bit(11) = io.bm.error; @@ -57,7 +57,7 @@ auto DD::readHalf(u32 address) -> u16 { data.bit(0,7) = io.error.sector; data.bit(8) = io.error.selfStop; data.bit(9) = io.error.clockUnlock; - data.bit(10) = ~(bool)disk; //no disk + data.bit(10) = ~io.status.diskPresent; //no disk data.bit(11) = io.error.offTrack; data.bit(12) = io.error.overrun; data.bit(13) = io.error.spindle; @@ -123,7 +123,6 @@ auto DD::readHalf(u32 address) -> u16 { if(address == 36) { } - debugger.io(Read, address, data); return data; } @@ -195,7 +194,7 @@ auto DD::writeHalf(u32 address, u16 data_) -> void { //ASIC_HARD_RESET if(address == 16) { - if((data >> 16) == 0xAAAA) { + if(data == 0xAAAA) { power(true); } } @@ -242,18 +241,20 @@ auto DD::writeHalf(u32 address, u16 data_) -> void { //ASIC_TEST_PIN_SEL if(address == 36) { } - - debugger.io(Write, address, data); } auto DD::readWord(u32 address) -> u32 { + address = (address & 0x7f); n32 data; data.bit(16,31) = readHalf(address + 0); data.bit( 0,15) = readHalf(address + 2); + debugger.io(Read, address >> 2, data); return (u32)data; } auto DD::writeWord(u32 address, u32 data) -> void { + address = (address & 0x7f); writeHalf(address + 0, data >> 16); writeHalf(address + 2, data & 0xffff); + debugger.io(Write, address >> 2, data); } diff --git a/waterbox/ares64/ares/ares/n64/dd/rtc.cpp b/waterbox/ares64/ares/ares/n64/dd/rtc.cpp old mode 100644 new mode 100755 index 961a5a85ea..4fa2fea0a8 --- a/waterbox/ares64/ares/ares/n64/dd/rtc.cpp +++ b/waterbox/ares64/ares/ares/n64/dd/rtc.cpp @@ -1,75 +1,119 @@ -auto DD::rtcLoad() -> void { +auto DD::RTC::load() -> void { + ram.allocate(0x10); #if false if(auto fp = system.pak->read("time.rtc")) { - rtc.load(fp); + ram.load(fp); } #endif + //byte 0 to 7 = raw rtc time (last updated, only 6 bytes are used) n64 check = 0; - for(auto n : range(8)) check.byte(n) = rtc.read(n); + for(auto n : range(8)) check.byte(n) = ram.read(n); if(!~check) return; //new save file + //check for invalid time info, if invalid, set time info to something invalid and ignore the rest + if (!valid()) { + for(auto n : range(8)) ram.write(n, 0xff); + return; + } + + //byte 8 to 15 = timestamp of when the last save was made n64 timestamp = 0; - for(auto n : range(8)) timestamp.byte(n) = rtc.read(8 + n); + for(auto n : range(8)) timestamp.byte(n) = ram.read(8 + n); if(!~timestamp) return; //new save file + //update based on the amount of time that has passed since the last save timestamp = platform->time() - timestamp; - while(timestamp--) rtcTickSecond(); + while(timestamp--) tickSecond(); } -auto DD::rtcSave() -> void { - n64 timestamp = platform->time(); - for(auto n : range(8)) rtc.write(8 + n, timestamp.byte(n)); +auto DD::RTC::reset() -> void { + ram.reset(); +} +auto DD::RTC::save() -> void { #if false + n64 timestamp = platform->time(); + for(auto n : range(8)) ram.write(8 + n, timestamp.byte(n)); + if(auto fp = system.pak->write("time.rtc")) { - rtc.save(fp); + ram.save(fp); } #endif } -auto DD::rtcTick(u32 offset) -> void { - u8 n = rtc.read(offset); - if((++n & 0xf) > 9) n = (n & 0xf0) + 0x10; - if((n & 0xf0) > 0x90) n = 0; - rtc.write(offset, n); +auto DD::RTC::serialize(serializer& s) -> void { + s(ram); } -auto DD::rtcTickClock() -> void { - rtcTickSecond(); +auto DD::RTC::tick(u32 offset) -> void { + u8 n = ram.read(offset); + if((++n & 0xf) > 9) n = (n & 0xf0) + 0x10; + if((n & 0xf0) > 0x90) n = 0; + ram.write(offset, n); +} + +auto DD::RTC::tickClock() -> void { + tickSecond(); queue.remove(Queue::DD_Clock_Tick); queue.insert(Queue::DD_Clock_Tick, 187'500'000); } -auto DD::rtcTickSecond() -> void { +auto DD::RTC::tickSecond() -> void { + if (!valid()) return; + //second - rtcTick(5); - if(rtc.read(5) < 0x60) return; - rtc.write(5, 0); + tick(5); + if(ram.read(5) < 0x60) return; + ram.write(5, 0); //minute - rtcTick(4); - if(rtc.read(4) < 0x60) return; - rtc.write(4, 0); + tick(4); + if(ram.read(4) < 0x60) return; + ram.write(4, 0); //hour - rtcTick(3); - if(rtc.read(3) < 0x24) return; - rtc.write(3, 0); + tick(3); + if(ram.read(3) < 0x24) return; + ram.write(3, 0); //day - u32 daysInMonth[12] = {0x31, 0x28, 0x31, 0x30, 0x31, 0x30, 0x31, 0x31, 0x30, 0x31, 0x30, 0x31}; - if(rtc.read(0) && !(BCD::decode(rtc.read(0)) % 4)) daysInMonth[1]++; - - rtcTick(2); - if(rtc.read(2) <= daysInMonth[BCD::decode(rtc.read(1))-1]) return; - rtc.write(2, 1); + tick(2); + if(ram.read(2) <= BCD::encode(chrono::daysInMonth(BCD::decode(ram.read(1)), BCD::decode(ram.read(0))))) return; + ram.write(2, 1); //month - rtcTick(1); - if(rtc.read(1) < 0x12) return; - rtc.write(1, 1); + tick(1); + if(ram.read(1) <= 0x12) return; + ram.write(1, 1); //year - rtcTick(0); + tick(0); } + +auto DD::RTC::valid() -> bool { + //check validity of ram rtc data (if it's BCD valid or not) + for(auto n : range(6)) { + if((ram.read(n) & 0x0f) >= 0x0a) return false; + } + + //check for valid values of each byte + //year + if(ram.read(0) >= 0xa0) return false; + //second + if(ram.read(5) >= 0x60) return false; + //minute + if(ram.read(4) >= 0x60) return false; + //hour + if(ram.read(3) >= 0x24) return false; + //month + if(ram.read(1) > 0x12) return false; + if(ram.read(1) < 1) return false; + //day + if(ram.read(2) < 1) return false; + if(ram.read(2) > BCD::encode(chrono::daysInMonth(BCD::decode(ram.read(1)), BCD::decode(ram.read(0))))) return false; + + //everything is valid + return true; +} + diff --git a/waterbox/ares64/ares/ares/n64/dd/serialization.cpp b/waterbox/ares64/ares/ares/n64/dd/serialization.cpp old mode 100644 new mode 100755 index f9047a15f6..0d5c8d41ed --- a/waterbox/ares64/ares/ares/n64/dd/serialization.cpp +++ b/waterbox/ares64/ares/ares/n64/dd/serialization.cpp @@ -1,4 +1,6 @@ auto DD::serialize(serializer& s) -> void { + s(rtc); + s(irq.bm.line); s(irq.bm.mask); s(irq.mecha.line); diff --git a/waterbox/ares64/ares/ares/n64/memory/bus.hpp b/waterbox/ares64/ares/ares/n64/memory/bus.hpp old mode 100644 new mode 100755 index bff5a4e093..61dd10a29f --- a/waterbox/ares64/ares/ares/n64/memory/bus.hpp +++ b/waterbox/ares64/ares/ares/n64/memory/bus.hpp @@ -1,13 +1,13 @@ template -inline auto Bus::read(u32 address, Thread& thread) -> u64 { - static constexpr u64 unmapped = 0; - address &= 0x1fff'ffff - (Size - 1); +inline auto Bus::read(u32 address, Thread& thread, const char *peripheral) -> u64 { + static_assert(Size == Byte || Size == Half || Size == Word || Size == Dual); - if(address <= 0x007f'ffff) return rdram.ram.read(address); - if(address <= 0x03ef'ffff) return unmapped; + if(address <= 0x03ef'ffff) return rdram.ram.read(address, peripheral); if(address <= 0x03ff'ffff) return rdram.read(address, thread); + if(Size == Dual) return freezeDualRead(address), 0; if(address <= 0x0407'ffff) return rsp.read(address, thread); - if(address <= 0x040f'ffff) return rsp.status.read(address, thread); + if(address <= 0x040b'ffff) return rsp.status.read(address, thread); + if(address <= 0x040f'ffff) return freezeUnmapped(address), 0; if(address <= 0x041f'ffff) return rdp.read(address, thread); if(address <= 0x042f'ffff) return rdp.io.read(address, thread); if(address <= 0x043f'ffff) return mi.read(address, thread); @@ -16,26 +16,49 @@ inline auto Bus::read(u32 address, Thread& thread) -> u64 { if(address <= 0x046f'ffff) return pi.read(address, thread); if(address <= 0x047f'ffff) return ri.read(address, thread); if(address <= 0x048f'ffff) return si.read(address, thread); - if(address <= 0x04ff'ffff) return unmapped; + if(address <= 0x04ff'ffff) return freezeUnmapped(address), 0; if(address <= 0x1fbf'ffff) return pi.read(address, thread); if(address <= 0x1fcf'ffff) return si.read(address, thread); if(address <= 0x7fff'ffff) return pi.read(address, thread); - return unmapped; + return freezeUnmapped(address), 0; } template -inline auto Bus::write(u32 address, u64 data, Thread& thread) -> void { - address &= 0x1fff'ffff - (Size - 1); +inline auto Bus::readBurst(u32 address, u32 *data, Thread& thread) -> void { + static_assert(Size == DCache || Size == ICache); + + if(address <= 0x03ef'ffff) return rdram.ram.readBurst(address, data, "CPU"); + if(address <= 0x03ff'ffff) { + // FIXME: not hardware validated, no idea of the behavior + data[0] = rdram.readWord(address | 0x0, thread); + data[1] = 0; + data[2] = 0; + data[3] = 0; + if constexpr(Size == ICache) { + data[4] = 0; + data[5] = 0; + data[6] = 0; + data[7] = 0; + } + return; + } + + return freezeUncached(address); +} + +template +inline auto Bus::write(u32 address, u64 data, Thread& thread, const char *peripheral) -> void { + static_assert(Size == Byte || Size == Half || Size == Word || Size == Dual); if constexpr(Accuracy::CPU::Recompiler) { cpu.recompiler.invalidate(address + 0); if constexpr(Size == Dual) cpu.recompiler.invalidate(address + 4); } - if(address <= 0x007f'ffff) return rdram.ram.write(address, data); - if(address <= 0x03ef'ffff) return; + if(address <= 0x03ef'ffff) return rdram.ram.write(address, data, peripheral); if(address <= 0x03ff'ffff) return rdram.write(address, data, thread); if(address <= 0x0407'ffff) return rsp.write(address, data, thread); - if(address <= 0x040f'ffff) return rsp.status.write(address, data, thread); + if(address <= 0x040b'ffff) return rsp.status.write(address, data, thread); + if(address <= 0x040f'ffff) return freezeUnmapped(address); if(address <= 0x041f'ffff) return rdp.write(address, data, thread); if(address <= 0x042f'ffff) return rdp.io.write(address, data, thread); if(address <= 0x043f'ffff) return mi.write(address, data, thread); @@ -44,9 +67,41 @@ inline auto Bus::write(u32 address, u64 data, Thread& thread) -> void { if(address <= 0x046f'ffff) return pi.write(address, data, thread); if(address <= 0x047f'ffff) return ri.write(address, data, thread); if(address <= 0x048f'ffff) return si.write(address, data, thread); - if(address <= 0x04ff'ffff) return; + if(address <= 0x04ff'ffff) return freezeUnmapped(address); if(address <= 0x1fbf'ffff) return pi.write(address, data, thread); if(address <= 0x1fcf'ffff) return si.write(address, data, thread); if(address <= 0x7fff'ffff) return pi.write(address, data, thread); - return; + return freezeUnmapped(address); +} + +template +inline auto Bus::writeBurst(u32 address, u32 *data, Thread& thread) -> void { + static_assert(Size == DCache || Size == ICache); + if constexpr(Accuracy::CPU::Recompiler) { + cpu.recompiler.invalidateRange(address, Size == DCache ? 16 : 32); + } + + if(address <= 0x03ef'ffff) return rdram.ram.writeBurst(address, data, "CPU"); + if(address <= 0x03ff'ffff) { + // FIXME: not hardware validated, but a good guess + rdram.writeWord(address | 0x0, data[0], thread); + return; + } + + return freezeUncached(address); +} + +inline auto Bus::freezeUnmapped(u32 address) -> void { + debug(unusual, "[Bus::freezeUnmapped] CPU frozen because of access to RCP unmapped area: 0x", hex(address, 8L)); + cpu.scc.sysadFrozen = true; +} + +inline auto Bus::freezeUncached(u32 address) -> void { + debug(unusual, "[Bus::freezeUncached] CPU frozen because of cached access to non-RDRAM area: 0x", hex(address, 8L)); + cpu.scc.sysadFrozen = true; +} + +inline auto Bus::freezeDualRead(u32 address) -> void { + debug(unusual, "[Bus::freezeDualRead] CPU frozen because of 64-bit read from non-RDRAM area: 0x ", hex(address, 8L)); + cpu.scc.sysadFrozen = true; } diff --git a/waterbox/ares64/ares/ares/n64/memory/io.hpp b/waterbox/ares64/ares/ares/n64/memory/io.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/memory/lsb/readable.hpp b/waterbox/ares64/ares/ares/n64/memory/lsb/readable.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/memory/lsb/writable.hpp b/waterbox/ares64/ares/ares/n64/memory/lsb/writable.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/memory/memory.cpp b/waterbox/ares64/ares/ares/n64/memory/memory.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/memory/memory.hpp b/waterbox/ares64/ares/ares/n64/memory/memory.hpp old mode 100644 new mode 100755 index f1ce23dcba..7206e3d067 --- a/waterbox/ares64/ares/ares/n64/memory/memory.hpp +++ b/waterbox/ares64/ares/ares/n64/memory/memory.hpp @@ -31,8 +31,15 @@ namespace Memory { struct Bus { //bus.hpp - template auto read(u32 address, Thread& thread) -> u64; - template auto write(u32 address, u64 data, Thread& thread) -> void; + template auto read(u32 address, Thread& thread, const char *peripheral) -> u64; + template auto write(u32 address, u64 data, Thread& thread, const char *peripheral) -> void; + + template auto readBurst(u32 address, u32* data, Thread& thread) -> void; + template auto writeBurst(u32 address, u32* data, Thread& thread) -> void; + + auto freezeUnmapped(u32 address) -> void; + auto freezeUncached(u32 address) -> void; + auto freezeDualRead(u32 address) -> void; }; extern Bus bus; diff --git a/waterbox/ares64/ares/ares/n64/memory/msb/readable.hpp b/waterbox/ares64/ares/ares/n64/memory/msb/readable.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/memory/msb/writable.hpp b/waterbox/ares64/ares/ares/n64/memory/msb/writable.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/mi/debugger.cpp b/waterbox/ares64/ares/ares/n64/mi/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/mi/io.cpp b/waterbox/ares64/ares/ares/n64/mi/io.cpp old mode 100644 new mode 100755 index 9806d558d2..5555b0e2ea --- a/waterbox/ares64/ares/ares/n64/mi/io.cpp +++ b/waterbox/ares64/ares/ares/n64/mi/io.cpp @@ -1,5 +1,5 @@ auto MI::readWord(u32 address, Thread& thread) -> u32 { - address = (address & 0xfffff) >> 2; + address = (address & 0xf) >> 2; n32 data; if(address == 0) { @@ -43,7 +43,7 @@ auto MI::readWord(u32 address, Thread& thread) -> u32 { } auto MI::writeWord(u32 address, u32 data_, Thread& thread) -> void { - address = (address & 0xfffff) >> 2; + address = (address & 0xf) >> 2; n32 data = data_; if(address == 0) { diff --git a/waterbox/ares64/ares/ares/n64/mi/mi.cpp b/waterbox/ares64/ares/ares/n64/mi/mi.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/mi/mi.hpp b/waterbox/ares64/ares/ares/n64/mi/mi.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/mi/serialization.cpp b/waterbox/ares64/ares/ares/n64/mi/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/n64.hpp b/waterbox/ares64/ares/ares/n64/n64.hpp old mode 100644 new mode 100755 index 7d579c69ef..d7d151a697 --- a/waterbox/ares64/ares/ares/n64/n64.hpp +++ b/waterbox/ares64/ares/ares/n64/n64.hpp @@ -23,13 +23,18 @@ using v128 = __m128i; #include #endif +// Include the GB core, we can use its cartridge emulation for Transfer Pak +#if defined(CORE_GB) +#include +#endif + namespace ares::Nintendo64 { auto enumerate() -> vector; auto load(Node::System& node, string name) -> bool; auto option(string name, string value) -> bool; enum : u32 { Read, Write }; - enum : u32 { Byte = 1, Half = 2, Word = 4, Dual = 8 }; + enum : u32 { Byte = 1, Half = 2, Word = 4, Dual = 8, DCache = 16, ICache = 32 }; struct Region { static inline auto NTSC() -> bool; @@ -91,8 +96,8 @@ namespace ares::Nintendo64 { #include #include #include - #include #include + #include #include #include #include diff --git a/waterbox/ares64/ares/ares/n64/pi/bus.hpp b/waterbox/ares64/ares/ares/n64/pi/bus.hpp old mode 100644 new mode 100755 index 4614edaedc..5473b833fb --- a/waterbox/ares64/ares/ares/n64/pi/bus.hpp +++ b/waterbox/ares64/ares/ares/n64/pi/bus.hpp @@ -2,11 +2,13 @@ inline auto PI::readWord(u32 address, Thread& thread) -> u32 { if(address <= 0x046f'ffff) return ioRead(address); if (unlikely(io.ioBusy)) { + debug(unusual, "[PI::readWord] PI read to 0x", hex(address, 8L), " will not behave as expected because PI writing is in progress"); thread.step(writeForceFinish() * 2); return io.busLatch; } thread.step(250 * 2); - return busRead(address); + io.busLatch = busRead(address); + return io.busLatch; } template @@ -42,13 +44,13 @@ inline auto PI::busRead(u32 address) -> u32 { if(cartridge.flash) return cartridge.flash.read(address); return unmapped; } - if(address <= 0x13fe'ffff) { - if(cartridge.rom ) return cartridge.rom.read(address); - return unmapped; + if(cartridge.isviewer.enabled() && address >= 0x13f0'0000 && address <= 0x13ff'ffff) { + return cartridge.isviewer.read(address); } - if(address <= 0x13ff'ffff) return cartridge.isviewer.read(address); - if(address <= 0x7fff'ffff) return unmapped; - return unmapped; //accesses here actually lock out the RCP + if(address <= 0x1000'0000 + cartridge.rom.size - 1) { + return cartridge.rom.read(address); + } + return unmapped; } inline auto PI::writeWord(u32 address, u32 data, Thread& thread) -> void { @@ -92,13 +94,16 @@ inline auto PI::busWrite(u32 address, u32 data) -> void { if(cartridge.flash) return cartridge.flash.write(address, data); return; } - if(address <= 0x13fe'ffff) { - if(cartridge.rom ) return cartridge.rom.write(address, data); - return; + if(address >= 0x13f0'0000 && address <= 0x13ff'ffff) { + if(cartridge.isviewer.enabled()) { + writeForceFinish(); //Debugging channel for homebrew, be gentle + return cartridge.isviewer.write(address, data); + } else { + debug(unhandled, "[PI::busWrite] attempt to write to ISViewer: ROM is too big so ISViewer is disabled"); + } } - if(address <= 0x13ff'ffff) { - writeForceFinish(); //Debugging channel for homebrew, be gentle - return cartridge.isviewer.write(address, data); + if(address <= 0x1000'0000 + cartridge.rom.size - 1) { + return cartridge.rom.write(address, data); } if(address <= 0x7fff'ffff) return; } diff --git a/waterbox/ares64/ares/ares/n64/pi/debugger.cpp b/waterbox/ares64/ares/ares/n64/pi/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/pi/dma.cpp b/waterbox/ares64/ares/ares/n64/pi/dma.cpp old mode 100644 new mode 100755 index 4f3a5c9024..5640ac8347 --- a/waterbox/ares64/ares/ares/n64/pi/dma.cpp +++ b/waterbox/ares64/ares/ares/n64/pi/dma.cpp @@ -1,49 +1,52 @@ auto PI::dmaRead() -> void { io.readLength = (io.readLength | 1) + 1; + + u32 lastCacheline = 0xffff'ffff; for(u32 address = 0; address < io.readLength; address += 2) { - u16 data = rdram.ram.read(io.dramAddress + address); + u16 data = rdram.ram.read(io.dramAddress + address, "PI DMA"); busWrite(io.pbusAddress + address, data); } } auto PI::dmaWrite() -> void { u8 mem[128]; - bool first_block = true; i32 length = io.writeLength+1; + i32 maxBlockSize = 128; + bool firstBlock = true; - io.writeLength = 0x7F; - if (length <= 8) io.writeLength -= io.dramAddress&7; + if constexpr(Accuracy::CPU::Recompiler) { + cpu.recompiler.invalidateRange(io.dramAddress, (length + 1) & ~1); + } while (length > 0) { - u32 dest = io.dramAddress & 0x7FFFFE; - i32 misalign = dest & 7; - i32 block_len = 128 - misalign; - i32 cur_len = min(length, block_len); + i32 misalign = io.dramAddress & 7; + i32 distEndOfRow = 0x800-(io.dramAddress&0x7ff); + i32 blockLen = min(maxBlockSize-misalign, distEndOfRow); + i32 curLen = min(length, blockLen); - length -= cur_len; - if (length.bit(0)) length += 1; - - i32 rom_len = (cur_len + 1) & ~1; - for (u32 i = 0; i < rom_len; i += 2) { + for (int i=0; i(io.pbusAddress); - mem[i + 0] = data >> 8; - mem[i + 1] = data & 0xFF; + mem[i+0] = data >> 8; + mem[i+1] = data >> 0; io.pbusAddress += 2; + length -= 2; } - if (first_block) { - if (cur_len == block_len-1) cur_len++; - cur_len = max(cur_len-misalign, 0); + if (firstBlock && curLen < 127-misalign) { + for (i32 i = 0; i < curLen-misalign; i++) { + rdram.ram.write(io.dramAddress++, mem[i], "PI DMA"); + } + } else { + for (i32 i = 0; i < curLen-misalign; i+=2) { + rdram.ram.write(io.dramAddress++, mem[i+0], "PI DMA"); + rdram.ram.write(io.dramAddress++, mem[i+1], "PI DMA"); + } } - if constexpr(Accuracy::CPU::Recompiler) { - cpu.recompiler.invalidateRange(io.dramAddress, cur_len); - } - for (u32 i = 0; i < cur_len; i++) - rdram.ram.write(io.dramAddress++, mem[i]); io.dramAddress = (io.dramAddress + 7) & ~7; - - first_block = false; + io.writeLength = curLen <= 8 ? 127-misalign : 127; + firstBlock = false; + maxBlockSize = distEndOfRow < 8 ? 128-misalign : 128; } } diff --git a/waterbox/ares64/ares/ares/n64/pi/io.cpp b/waterbox/ares64/ares/ares/n64/pi/io.cpp old mode 100644 new mode 100755 index c131bf3a69..0884439b67 --- a/waterbox/ares64/ares/ares/n64/pi/io.cpp +++ b/waterbox/ares64/ares/ares/n64/pi/io.cpp @@ -1,5 +1,5 @@ auto PI::ioRead(u32 address) -> u32 { - address = (address & 0xfffff) >> 2; + address = (address & 0x3f) >> 2; n32 data; if(address == 0) { @@ -70,12 +70,20 @@ auto PI::ioRead(u32 address) -> u32 { data.bit(0,7) = bsd2.releaseDuration; } + if(address == 13) { + data.bit(0,31) = io.busLatch; + } + + if(address == 14) { + data.bit(0,31) = io.busLatch; + } + debugger.io(Read, address, data); return data; } auto PI::ioWrite(u32 address, u32 data_) -> void { - address = (address & 0xfffff) >> 2; + address = (address & 0x3f) >> 2; n32 data = data_; //only PI_STATUS can be written while PI is busy @@ -98,6 +106,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void { //PI_READ_LENGTH io.readLength = n24(data); io.dmaBusy = 1; + io.originPc = cpu.ipu.pc; queue.insert(Queue::PI_DMA_Read, dmaDuration(true)); dmaRead(); } @@ -106,6 +115,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void { //PI_WRITE_LENGTH io.writeLength = n24(data); io.dmaBusy = 1; + io.originPc = cpu.ipu.pc; queue.insert(Queue::PI_DMA_Write, dmaDuration(false)); dmaWrite(); } diff --git a/waterbox/ares64/ares/ares/n64/pi/pi.cpp b/waterbox/ares64/ares/ares/n64/pi/pi.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/pi/pi.hpp b/waterbox/ares64/ares/ares/n64/pi/pi.hpp old mode 100644 new mode 100755 index 240f177dd6..e824d5d739 --- a/waterbox/ares64/ares/ares/n64/pi/pi.hpp +++ b/waterbox/ares64/ares/ares/n64/pi/pi.hpp @@ -51,6 +51,7 @@ struct PI : Memory::RCP { n32 readLength; n32 writeLength; n32 busLatch; + u64 originPc; } io; struct BSD { diff --git a/waterbox/ares64/ares/ares/n64/pi/serialization.cpp b/waterbox/ares64/ares/ares/n64/pi/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/pif/debugger.cpp b/waterbox/ares64/ares/ares/n64/pif/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/pif/hle.cpp b/waterbox/ares64/ares/ares/n64/pif/hle.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/pif/io.cpp b/waterbox/ares64/ares/ares/n64/pif/io.cpp old mode 100644 new mode 100755 index 4d1ac5478f..0b472a199d --- a/waterbox/ares64/ares/ares/n64/pif/io.cpp +++ b/waterbox/ares64/ares/ares/n64/pif/io.cpp @@ -23,21 +23,23 @@ auto PIF::readWord(u32 address) -> u32 { auto PIF::writeWord(u32 address, u32 data) -> void { writeInt(address, data); - return intA(Write, Size4); + intA(Write, Size4); + mainHLE(); } auto PIF::dmaRead(u32 address, u32 ramAddress) -> void { intA(Read, Size64); for(u32 offset = 0; offset < 64; offset += 4) { u32 data = readInt(address + offset); - rdram.ram.write(ramAddress + offset, data); + rdram.ram.write(ramAddress + offset, data, "SI DMA"); } } auto PIF::dmaWrite(u32 address, u32 ramAddress) -> void { for(u32 offset = 0; offset < 64; offset += 4) { - u32 data = rdram.ram.read(ramAddress + offset); + u32 data = rdram.ram.read(ramAddress + offset, "SI DMA"); writeInt(address + offset, data); } intA(Write, Size64); + mainHLE(); } diff --git a/waterbox/ares64/ares/ares/n64/pif/pif.cpp b/waterbox/ares64/ares/ares/n64/pif/pif.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/pif/pif.hpp b/waterbox/ares64/ares/ares/n64/pif/pif.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/pif/serialization.cpp b/waterbox/ares64/ares/ares/n64/pif/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rdp/debugger.cpp b/waterbox/ares64/ares/ares/n64/rdp/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rdp/io.cpp b/waterbox/ares64/ares/ares/n64/rdp/io.cpp old mode 100644 new mode 100755 index 992554bbc0..ead06fcde6 --- a/waterbox/ares64/ares/ares/n64/rdp/io.cpp +++ b/waterbox/ares64/ares/ares/n64/rdp/io.cpp @@ -1,5 +1,5 @@ auto RDP::readWord(u32 address, Thread& thread) -> u32 { - address = (address & 0xfffff) >> 2; + address = (address & 0x1f) >> 2; n32 data; if(address == 0) { @@ -57,7 +57,7 @@ auto RDP::readWord(u32 address, Thread& thread) -> u32 { } auto RDP::writeWord(u32 address, u32 data_, Thread& thread) -> void { - address = (address & 0xfffff) >> 2; + address = (address & 0x1f) >> 2; n32 data = data_; if(address == 0) { diff --git a/waterbox/ares64/ares/ares/n64/rdp/rdp.cpp b/waterbox/ares64/ares/ares/n64/rdp/rdp.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rdp/rdp.hpp b/waterbox/ares64/ares/ares/n64/rdp/rdp.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rdp/render.cpp b/waterbox/ares64/ares/ares/n64/rdp/render.cpp old mode 100644 new mode 100755 index 0495eb23f0..9ad19ea42f --- a/waterbox/ares64/ares/ares/n64/rdp/render.cpp +++ b/waterbox/ares64/ares/ares/n64/rdp/render.cpp @@ -57,7 +57,7 @@ auto RDP::render() -> void { command.current = command.end; return; - auto& memory = !command.source ? rdram.ram : rsp.dmem; + auto& memory = !command.source ? (Memory::Writable&)rdram.ram : (Memory::Writable&)rsp.dmem; auto fetch = [&]() -> u64 { u64 op = memory.readUnaligned(command.current); diff --git a/waterbox/ares64/ares/ares/n64/rdp/serialization.cpp b/waterbox/ares64/ares/ares/n64/rdp/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rdram/debugger.cpp b/waterbox/ares64/ares/ares/n64/rdram/debugger.cpp old mode 100644 new mode 100755 index 3829628d02..0aab188d87 --- a/waterbox/ares64/ares/ares/n64/rdram/debugger.cpp +++ b/waterbox/ares64/ares/ares/n64/rdram/debugger.cpp @@ -1,11 +1,32 @@ auto RDRAM::Debugger::load(Node::Object parent) -> void { memory.ram = parent->append("RDRAM"); - memory.ram->setSize(4_MiB + 4_MiB); + if(!system.expansionPak) { + memory.ram->setSize(4_MiB); + } else { + memory.ram->setSize(4_MiB + 4_MiB); + } + memory.ram->setRead([&](u32 address) -> u8 { - return rdram.ram.read(address); + return rdram.ram.read(address, "Ares Debugger"); }); memory.ram->setWrite([&](u32 address, u8 data) -> void { - return rdram.ram.write(address, data); + return rdram.ram.write(address, data, "Ares Debugger"); + }); + + memory.dcache = parent->append("DCache"); + memory.dcache->setSize(4_MiB + 4_MiB); + memory.dcache->setRead([&](u32 address) -> u8 { + u32 vaddr = address | 0x8000'0000; + return cpu.dcache.readDebug(vaddr, address); + }); + memory.dcache->setWrite([&](u32 address, u8 data) -> void { + u32 vaddr = address | 0x8000'0000; + auto& line = cpu.dcache.line(vaddr); + if(line.hit(address)) { + line.write(address, data); + } else { + rdram.ram.write(address, data, "Ares Debugger"); + } }); tracer.io = parent->append("I/O", "RDRAM"); @@ -38,3 +59,51 @@ auto RDRAM::Debugger::io(bool mode, u32 chipID, u32 address, u32 data) -> void { tracer.io->notify(message); } } + +auto RDRAM::Debugger::cacheErrorContext(string peripheral) -> string { + if(peripheral == "CPU") { + return { "\tCurrent CPU PC: 0x", hex(cpu.ipu.pc, 16L), "\n" }; + } + if(peripheral == "RSP DMA") { + if(rsp.dma.current.originCpu) { + return { "\tRSP DMA started at CPU PC: 0x", hex(rsp.dma.current.originPc, 16L), "\n" }; + } else { + return { "\tRSP DMA started at RSP PC: 0x", hex(rsp.dma.current.originPc, 3L), "\n" }; + } + } + if(peripheral == "PI DMA") { + return { "\tPI DMA started at CPU PC: 0x", hex(pi.io.originPc, 16L), "\n" }; + } + if(peripheral == "AI DMA") { + return { "\tAI DMA started at CPU PC: 0x", hex(ai.io.dmaOriginPc[0], 16L), "\n" }; + } + return ""; +} + +auto RDRAM::Debugger::readWord(u32 address, int size, const char *peripheral) -> void { + if (system.homebrewMode && (address & ~15) != lastReadCacheline) { + lastReadCacheline = address & ~15; + auto& line = cpu.dcache.line(address); + u16 dirtyMask = ((1 << size) - 1) << (address & 0xF); + if (line.hit(address) && (line.dirty & dirtyMask)) { + string msg = { peripheral, " reading from RDRAM address 0x", hex(address), " which is modified in the cache (missing cache writeback?)\n"}; + msg.append(string{ "\tCacheline was loaded at CPU PC: 0x", hex(line.fillPc, 16L), "\n" }); + msg.append(string{ "\tCacheline was last written at CPU PC: 0x", hex(line.dirtyPc, 16L), "\n" }); + msg.append(cacheErrorContext(peripheral)); + debug(unusual, msg); + } + } +} + +auto RDRAM::Debugger::writeWord(u32 address, int size, u64 value, const char *peripheral) -> void { + if (system.homebrewMode && (address & ~15) != lastWrittenCacheline) { + lastWrittenCacheline = address & ~15; + auto& line = cpu.dcache.line(address); + if (line.hit(address)) { + string msg = { peripheral, " writing to RDRAM address 0x", hex(address), " which is cached (missing cache invalidation?)\n"}; + msg.append(string{ "\tCacheline was loaded at CPU PC: 0x", hex(line.fillPc, 16L), "\n" }); + msg.append(cacheErrorContext(peripheral)); + debug(unusual, msg); + } + } +} diff --git a/waterbox/ares64/ares/ares/n64/rdram/io.cpp b/waterbox/ares64/ares/ares/n64/rdram/io.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rdram/rdram.cpp b/waterbox/ares64/ares/ares/n64/rdram/rdram.cpp old mode 100644 new mode 100755 index f17acd4843..bcbd53130e --- a/waterbox/ares64/ares/ares/n64/rdram/rdram.cpp +++ b/waterbox/ares64/ares/ares/n64/rdram/rdram.cpp @@ -10,11 +10,15 @@ RDRAM rdram; auto RDRAM::load(Node::Object parent) -> void { node = parent->append("RDRAM"); - //4_MiB internal - //4_MiB expansion pak - ram.allocate(4_MiB + 4_MiB); - - debugger.load(node); + if(!system.expansionPak) { + //4MB internal + ram.allocate(4_MiB); + } else { + //4MB internal + 4MB expansion pak + ram.allocate(4_MiB + 4_MiB); + } + + debugger.load(node); } auto RDRAM::unload() -> void { diff --git a/waterbox/ares64/ares/ares/n64/rdram/rdram.hpp b/waterbox/ares64/ares/ares/n64/rdram/rdram.hpp old mode 100644 new mode 100755 index 6b4d772ee0..0183159820 --- a/waterbox/ares64/ares/ares/n64/rdram/rdram.hpp +++ b/waterbox/ares64/ares/ares/n64/rdram/rdram.hpp @@ -4,26 +4,79 @@ struct RDRAM : Memory::RCP { Node::Object node; struct Writable : public Memory::Writable { + RDRAM& self; + + Writable(RDRAM& self) : self(self) {} + template - auto read(u32 address) -> u64 { + auto read(u32 address, const char *peripheral) -> u64 { if (address >= size) return 0; + if (peripheral && system.homebrewMode) { + self.debugger.readWord(address, Size, peripheral); + } return Memory::Writable::read(address); } template - auto write(u32 address, u64 value) -> void { + auto write(u32 address, u64 value, const char *peripheral) -> void { if (address >= size) return; + if (peripheral && system.homebrewMode) { + self.debugger.writeWord(address, Size, value, peripheral); + } Memory::Writable::write(address, value); } - } ram; + + template + auto writeBurst(u32 address, u32 *value, const char *peripheral) -> void { + if (address >= size) return; + Memory::Writable::write(address | 0x00, value[0]); + Memory::Writable::write(address | 0x04, value[1]); + Memory::Writable::write(address | 0x08, value[2]); + Memory::Writable::write(address | 0x0c, value[3]); + if (Size == ICache) { + Memory::Writable::write(address | 0x10, value[4]); + Memory::Writable::write(address | 0x14, value[5]); + Memory::Writable::write(address | 0x18, value[6]); + Memory::Writable::write(address | 0x1c, value[7]); + } + } + + template + auto readBurst(u32 address, u32 *value, const char *peripheral) -> void { + if (address >= size) { + value[0] = value[1] = value[2] = value[3] = 0; + if (Size == ICache) + value[4] = value[5] = value[6] = value[7] = 0; + return; + } + value[0] = Memory::Writable::read(address | 0x00); + value[1] = Memory::Writable::read(address | 0x04); + value[2] = Memory::Writable::read(address | 0x08); + value[3] = Memory::Writable::read(address | 0x0c); + if (Size == ICache) { + value[4] = Memory::Writable::read(address | 0x10); + value[5] = Memory::Writable::read(address | 0x14); + value[6] = Memory::Writable::read(address | 0x18); + value[7] = Memory::Writable::read(address | 0x1c); + } + } + + } ram{*this}; struct Debugger { + u32 lastReadCacheline = 0xffff'ffff; + u32 lastWrittenCacheline = 0xffff'ffff; + //debugger.cpp auto load(Node::Object) -> void; auto io(bool mode, u32 chipID, u32 address, u32 data) -> void; + auto readWord(u32 address, int size, const char *peripheral) -> void; + auto writeWord(u32 address, int size, u64 value, const char *peripheral) -> void; + auto cacheErrorContext(string peripheral) -> string; struct Memory { Node::Debugger::Memory ram; + Node::Debugger::Memory dcache; } memory; struct Tracer { diff --git a/waterbox/ares64/ares/ares/n64/rdram/serialization.cpp b/waterbox/ares64/ares/ares/n64/rdram/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/ri/debugger.cpp b/waterbox/ares64/ares/ares/n64/ri/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/ri/io.cpp b/waterbox/ares64/ares/ares/n64/ri/io.cpp old mode 100644 new mode 100755 index 6c88fbaac0..f6c37c8f1c --- a/waterbox/ares64/ares/ares/n64/ri/io.cpp +++ b/waterbox/ares64/ares/ares/n64/ri/io.cpp @@ -1,5 +1,5 @@ auto RI::readWord(u32 address, Thread& thread) -> u32 { - address = (address & 0xfffff) >> 2; + address = (address & 0x1f) >> 2; n32 data = 0; if(address == 0) { @@ -59,7 +59,7 @@ auto RI::readWord(u32 address, Thread& thread) -> u32 { } auto RI::writeWord(u32 address, u32 data_, Thread& thread) -> void { - address = (address & 0xfffff) >> 2; + address = (address & 0x1f) >> 2; n32 data = data_; if(address == 0) { diff --git a/waterbox/ares64/ares/ares/n64/ri/ri.cpp b/waterbox/ares64/ares/ares/n64/ri/ri.cpp old mode 100644 new mode 100755 index 1ebca7025c..5267965c4b --- a/waterbox/ares64/ares/ares/n64/ri/ri.cpp +++ b/waterbox/ares64/ares/ares/n64/ri/ri.cpp @@ -27,8 +27,8 @@ auto RI::power(bool reset) -> void { io.refresh = 0x0006'3634; //store RDRAM size result into memory - rdram.ram.write(0x318, rdram.ram.size); //CIC-NUS-6102 - rdram.ram.write(0x3f0, rdram.ram.size); //CIC-NUS-6105 + rdram.ram.write(0x318, rdram.ram.size, "IPL3"); //CIC-NUS-6102 + rdram.ram.write(0x3f0, rdram.ram.size, "IPL3"); //CIC-NUS-6105 } } diff --git a/waterbox/ares64/ares/ares/n64/ri/ri.hpp b/waterbox/ares64/ares/ares/n64/ri/ri.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/ri/serialization.cpp b/waterbox/ares64/ares/ares/n64/ri/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rsp/debugger.cpp b/waterbox/ares64/ares/ares/n64/rsp/debugger.cpp old mode 100644 new mode 100755 index 2214418493..6bb0afd11a --- a/waterbox/ares64/ares/ares/n64/rsp/debugger.cpp +++ b/waterbox/ares64/ares/ares/n64/rsp/debugger.cpp @@ -22,8 +22,23 @@ auto RSP::Debugger::load(Node::Object parent) -> void { tracer.instruction = parent->append("Instruction", "RSP"); tracer.instruction->setAddressBits(12, 2); tracer.instruction->setDepth(64); + if constexpr(Accuracy::RSP::Recompiler) { + tracer.instruction->setToggle([&] { + rsp.recompiler.reset(); + rsp.recompiler.callInstructionPrologue = tracer.instruction->enabled(); + }); + } tracer.io = parent->append("I/O", "RSP"); + + if (system.homebrewMode) { + for (auto& taintWord : taintMask.dmem) { + taintWord = {}; + } + for (auto& taintWord : taintMask.imem) { + taintWord = {}; + } + } } auto RSP::Debugger::unload() -> void { @@ -89,4 +104,81 @@ auto RSP::Debugger::ioStatus(bool mode, u32 address, u32 data) -> void { } } +auto RSP::Debugger::dmaReadWord(u32 rdramAddress, u32 pbusRegion, u32 pbusAddress) -> void { + if (system.homebrewMode) { + auto& line = cpu.dcache.line(rdramAddress); + u16 dmaMask = 0xff << (rdramAddress & 0xF); + auto& tm = !pbusRegion ? taintMask.dmem : taintMask.imem; + auto& taintWord = tm[pbusAddress >> 3]; + if (line.hit(rdramAddress) && (line.dirty & dmaMask)) { + taintWord.dirty = (line.dirty & dmaMask) >> (rdramAddress & 0x8); + taintWord.ctxDmaRdramAddress = rdramAddress & ~0x7; + taintWord.ctxDmaOriginPc = rsp.dma.current.originPc; + taintWord.ctxDmaOriginCpu = rsp.dma.current.originCpu; + taintWord.ctxCacheFillPc = line.fillPc; + taintWord.ctxCacheDirtyPc = line.dirtyPc; + } else { + taintWord.dirty = 0; + } + } +} + +auto RSP::Debugger::dmemReadWord(u12 address, int size, const char *peripheral) -> void { + if (system.homebrewMode) { + u8 readMask = ((1 << size) - 1) << (address & 0x7); + auto& taintWord = taintMask.dmem[address >> 3]; + if (taintWord.dirty & readMask) { + u32 rdramAddress = taintWord.ctxDmaRdramAddress + (address & 0x7); + string msg = { peripheral, " reading from DMEM address 0x", hex(address), " which contains a value which is not cache coherent\n"}; + msg.append(string{ "\tCurrent RSP PC: 0x", hex(rsp.ipu.pc, 3L), "\n" }); + msg.append(string{ "\tThe value read was previously written by RSP DMA from RDRAM address 0x", hex(rdramAddress, 8L), "\n" }); + if(taintWord.ctxDmaOriginCpu) { + msg.append(string{ "\tRSP DMA started at CPU PC: 0x", hex(taintWord.ctxDmaOriginPc, 16L), "\n" }); + } else { + msg.append(string{ "\tRSP DMA started at RSP PC: 0x", hex(taintWord.ctxDmaOriginPc, 3L), "\n" }); + } + msg.append(string{ "\tThe relative CPU cacheline was dirty (missing cache writeback?)\n" }); + msg.append(string{ "\tCacheline was last written at CPU PC: 0x", hex(taintWord.ctxCacheDirtyPc, 16L), "\n" }); + msg.append(string{ "\tCacheline was loaded at CPU PC: 0x", hex(taintWord.ctxCacheFillPc, 16L), "\n" }); + debug(unusual, msg); + taintWord.dirty = 0; + } + } +} + +auto RSP::Debugger::dmemReadUnalignedWord(u12 address, int size, const char *peripheral) -> void { + if (system.homebrewMode) { + u32 addressAlignedStart = address & ~7; + u32 addressAlignedEnd = address + size - 1 & ~7; + if(addressAlignedStart == addressAlignedEnd) { + dmemReadWord(address, size, "RSP"); + } else { + int sizeStart = addressAlignedEnd - address; + dmemReadWord(address, sizeStart, "RSP"); + dmemReadWord(address + sizeStart, size - sizeStart, "RSP"); + } + } +} + +auto RSP::Debugger::dmemWriteWord(u12 address, int size, u64 value) -> void { + if (system.homebrewMode) { + auto& taintWord = taintMask.dmem[address >> 3]; + taintWord.dirty &= ~(((1 << size) - 1) << (address & 0x7)); + } +} + +auto RSP::Debugger::dmemWriteUnalignedWord(u12 address, int size, u64 value) -> void { + if (system.homebrewMode) { + u32 addressAlignedStart = address & ~7; + u32 addressAlignedEnd = address + size - 1 & ~7; + if(addressAlignedStart == addressAlignedEnd) { + dmemWriteWord(address, size, value); + } else { + int sizeStart = addressAlignedEnd - address; + dmemWriteWord(address, sizeStart, value); + dmemWriteWord(address + sizeStart, size - sizeStart, value); + } + } +} + #undef rsp diff --git a/waterbox/ares64/ares/ares/n64/rsp/decoder.cpp b/waterbox/ares64/ares/ares/n64/rsp/decoder.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rsp/disassembler.cpp b/waterbox/ares64/ares/ares/n64/rsp/disassembler.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rsp/dma.cpp b/waterbox/ares64/ares/ares/n64/rsp/dma.cpp old mode 100644 new mode 100755 index fbc00b7962..72b14eec4a --- a/waterbox/ares64/ares/ares/n64/rsp/dma.cpp +++ b/waterbox/ares64/ares/ares/n64/rsp/dma.cpp @@ -18,8 +18,11 @@ auto RSP::dmaTransferStep() -> void { } } for(u32 i = 0; i <= dma.current.length; i += 8) { - u64 data = rdram.ram.read(dma.current.dramAddress); + u64 data = rdram.ram.read(dma.current.dramAddress, nullptr); region.write(dma.current.pbusAddress, data); + if (system.homebrewMode) { + rsp.debugger.dmaReadWord(dma.current.dramAddress, dma.current.pbusRegion, dma.current.pbusAddress); + } dma.current.dramAddress += 8; dma.current.pbusAddress += 8; } @@ -27,7 +30,7 @@ auto RSP::dmaTransferStep() -> void { if(dma.busy.write) { for(u32 i = 0; i <= dma.current.length; i += 8) { u64 data = region.read(dma.current.pbusAddress); - rdram.ram.write(dma.current.dramAddress, data); + rdram.ram.write(dma.current.dramAddress, data, "RSP DMA"); dma.current.dramAddress += 8; dma.current.pbusAddress += 8; } diff --git a/waterbox/ares64/ares/ares/n64/rsp/interpreter-ipu.cpp b/waterbox/ares64/ares/ares/n64/rsp/interpreter-ipu.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rsp/interpreter-scc.cpp b/waterbox/ares64/ares/ares/n64/rsp/interpreter-scc.cpp old mode 100644 new mode 100755 index 672716aec7..528f87231b --- a/waterbox/ares64/ares/ares/n64/rsp/interpreter-scc.cpp +++ b/waterbox/ares64/ares/ares/n64/rsp/interpreter-scc.cpp @@ -1,9 +1,9 @@ auto RSP::MFC0(r32& rt, u8 rd) -> void { - if((rd & 8) == 0) rt.u32 = Nintendo64::rsp.ioRead ((rd & 7) << 2); + if((rd & 8) == 0) rt.u32 = Nintendo64::rsp.ioRead ((rd & 7) << 2, *this); if((rd & 8) != 0) rt.u32 = Nintendo64::rdp.readWord((rd & 7) << 2, *this); } auto RSP::MTC0(cr32& rt, u8 rd) -> void { - if((rd & 8) == 0) Nintendo64::rsp.ioWrite ((rd & 7) << 2, rt.u32); + if((rd & 8) == 0) Nintendo64::rsp.ioWrite ((rd & 7) << 2, rt.u32, *this); if((rd & 8) != 0) Nintendo64::rdp.writeWord((rd & 7) << 2, rt.u32, *this); } diff --git a/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp b/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp old mode 100644 new mode 100755 index 24cdf0b2ca..f9ed14db58 --- a/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp +++ b/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp @@ -150,7 +150,7 @@ auto RSP::CTC2(cr32& rt, u8 rd) -> void { if constexpr(Accuracy::RSP::SIMD) { #if ARCHITECTURE_SUPPORTS_SSE4_1 - static const v128 mask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080); + static const v128 mask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080u); lo->v128 = _mm_cmpeq_epi8(_mm_and_si128(_mm_set1_epi8(~rt.u32 >> 0), mask), zero); hi->v128 = _mm_cmpeq_epi8(_mm_and_si128(_mm_set1_epi8(~rt.u32 >> 8), mask), zero); #endif diff --git a/waterbox/ares64/ares/ares/n64/rsp/interpreter.cpp b/waterbox/ares64/ares/ares/n64/rsp/interpreter.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/rsp/io.cpp b/waterbox/ares64/ares/ares/n64/rsp/io.cpp old mode 100644 new mode 100755 index 1daa5fccc0..b776f9d485 --- a/waterbox/ares64/ares/ares/n64/rsp/io.cpp +++ b/waterbox/ares64/ares/ares/n64/rsp/io.cpp @@ -3,11 +3,11 @@ auto RSP::readWord(u32 address, Thread& thread) -> u32 { if(address & 0x1000) return imem.read(address); else return dmem.read(address); } - return ioRead(address); + return ioRead(address, thread); } -auto RSP::ioRead(u32 address) -> u32 { - address = (address & 0x3ffff) >> 2; +auto RSP::ioRead(u32 address, Thread &thread) -> u32 { + address = (address & 0x1f) >> 2; n32 data; if(address == 0) { @@ -72,12 +72,11 @@ auto RSP::writeWord(u32 address, u32 data, Thread& thread) -> void { if(address & 0x1000) return recompiler.invalidate(address & 0xfff), imem.write(address, data); else return dmem.write(address, data); } - return ioWrite(address, data); + return ioWrite(address, data, thread); } -auto RSP::ioWrite(u32 address, u32 data_) -> void { - - address = (address & 0x3ffff) >> 2; +auto RSP::ioWrite(u32 address, u32 data_, Thread& thread) -> void { + address = (address & 0x1f) >> 2; n32 data = data_; if(address == 0) { @@ -96,8 +95,11 @@ auto RSP::ioWrite(u32 address, u32 data_) -> void { dma.pending.length.bit(3,11) = data.bit( 3,11); dma.pending.count = data.bit(12,19); dma.pending.skip.bit(3,11) = data.bit(23,31); + dma.pending.originCpu = &thread != this; + dma.pending.originPc = dma.pending.originCpu ? cpu.ipu.pc : (u64)rsp.ipu.r[31].u32; dma.full.read = 1; dma.full.write = 0; + // printf("RSP DMA Read: %08x => %08x %08x\n", dma.pending.dramAddress, dma.pending.pbusAddress, dma.pending.length); dmaTransferStart(); } @@ -106,6 +108,8 @@ auto RSP::ioWrite(u32 address, u32 data_) -> void { dma.pending.length.bit(3,11) = data.bit( 3,11); dma.pending.count = data.bit(12,19); dma.pending.skip.bit(3,11) = data.bit(23,31); + dma.pending.originCpu = &thread != this; + dma.pending.originPc = dma.pending.originCpu ? cpu.ipu.pc : (u64)rsp.ipu.r[31].u32; dma.full.write = 1; dma.full.read = 0; dmaTransferStart(); @@ -157,7 +161,7 @@ auto RSP::ioWrite(u32 address, u32 data_) -> void { } auto RSP::Status::readWord(u32 address, Thread& thread) -> u32 { - address = (address & 0x7ffff) >> 2; + address = (address & 0x1f) >> 2; n32 data; if(address == 0) { @@ -178,7 +182,7 @@ auto RSP::Status::readWord(u32 address, Thread& thread) -> u32 { } auto RSP::Status::writeWord(u32 address, u32 data_, Thread& thread) -> void { - address = (address & 0x7ffff) >> 2; + address = (address & 0x1f) >> 2; n32 data = data_; if(address == 0) { diff --git a/waterbox/ares64/ares/ares/n64/rsp/recompiler.cpp b/waterbox/ares64/ares/ares/n64/rsp/recompiler.cpp old mode 100644 new mode 100755 index f9f190fa99..85468c2b0e --- a/waterbox/ares64/ares/ares/n64/rsp/recompiler.cpp +++ b/waterbox/ares64/ares/ares/n64/rsp/recompiler.cpp @@ -61,9 +61,7 @@ auto RSP::Recompiler::block(u12 address) -> Block* { auto RSP::Recompiler::emit(u12 address) -> Block* { if(unlikely(allocator.available() < 1_MiB)) { print("RSP allocator flush\n"); - memory::jitprotect(false); - allocator.release(bump_allocator::zero_fill); - memory::jitprotect(true); + allocator.release(); reset(); } @@ -75,19 +73,27 @@ auto RSP::Recompiler::emit(u12 address) -> Block* { u12 start = address; bool hasBranched = 0; while(true) { - pipeline.begin(); u32 instruction = self.imem.read(address); + if(callInstructionPrologue) { + mov32(reg(1), imm(instruction)); + call(&RSP::instructionPrologue); + } + pipeline.begin(); OpInfo op0 = self.decoderEXECUTE(instruction); pipeline.issue(op0); bool branched = emitEXECUTE(instruction); if(!pipeline.singleIssue && !branched && u12(address + 4) != start) { - u32 instruction = self.imem.read(address + 4); + u32 instruction = self.imem.read(address + 4); OpInfo op1 = self.decoderEXECUTE(instruction); if(RSP::canDualIssue(op0, op1)) { mov32(reg(1), imm(0)); call(&RSP::instructionEpilogue); + if(callInstructionPrologue) { + mov32(reg(1), imm(instruction)); + call(&RSP::instructionPrologue); + } address += 4; pipeline.issue(op1); branched = emitEXECUTE(instruction); diff --git a/waterbox/ares64/ares/ares/n64/rsp/rsp.cpp b/waterbox/ares64/ares/ares/n64/rsp/rsp.cpp old mode 100644 new mode 100755 index 821cd9437c..5edcc63d1f --- a/waterbox/ares64/ares/ares/n64/rsp/rsp.cpp +++ b/waterbox/ares64/ares/ares/n64/rsp/rsp.cpp @@ -43,12 +43,11 @@ auto RSP::instruction() -> void { } if constexpr(Accuracy::RSP::Interpreter) { + u32 instruction = imem.read(ipu.pc); + instructionPrologue(instruction); pipeline.begin(); - pipeline.address = ipu.pc; - pipeline.instruction = imem.read(pipeline.address); - OpInfo op0 = decoderEXECUTE(pipeline.instruction); + OpInfo op0 = decoderEXECUTE(instruction); pipeline.issue(op0); - debugger.instruction(); interpreterEXECUTE(); if(!pipeline.singleIssue && !op0.branch()) { @@ -57,10 +56,8 @@ auto RSP::instruction() -> void { if(canDualIssue(op0, op1)) { instructionEpilogue(0); - pipeline.address = ipu.pc; - pipeline.instruction = instruction; + instructionPrologue(instruction); pipeline.issue(op1); - debugger.instruction(); interpreterEXECUTE(); } } @@ -74,6 +71,12 @@ auto RSP::instruction() -> void { step(pipeline.clocks); } +auto RSP::instructionPrologue(u32 instruction) -> void { + pipeline.address = ipu.pc; + pipeline.instruction = instruction; + debugger.instruction(); +} + auto RSP::instructionEpilogue(u32 clocks) -> s32 { if constexpr(Accuracy::RSP::Recompiler) { step(clocks); @@ -142,9 +145,7 @@ auto RSP::power(bool reset) -> void { if constexpr(Accuracy::RSP::Recompiler) { auto buffer = ares::Memory::FixedAllocator::get().tryAcquire(4_MiB); - memory::jitprotect(false); - recompiler.allocator.resize(4_MiB, bump_allocator::executable | bump_allocator::zero_fill, buffer); - memory::jitprotect(true); + recompiler.allocator.resize(64_MiB, bump_allocator::executable, buffer); recompiler.reset(); } diff --git a/waterbox/ares64/ares/ares/n64/rsp/rsp.hpp b/waterbox/ares64/ares/ares/n64/rsp/rsp.hpp old mode 100644 new mode 100755 index 3a6bd95fe2..cb4f2ea45c --- a/waterbox/ares64/ares/ares/n64/rsp/rsp.hpp +++ b/waterbox/ares64/ares/ares/n64/rsp/rsp.hpp @@ -2,7 +2,36 @@ struct RSP : Thread, Memory::RCP { Node::Object node; - Memory::Writable dmem; + struct Writable : public Memory::Writable { + RSP& self; + + Writable(RSP& self) : self(self) {} + + template + auto read(u32 address) -> u64 { + if (system.homebrewMode) self.debugger.dmemReadWord(address, Size, "RSP"); + return Memory::Writable::read(address); + } + + template + auto readUnaligned(u32 address) -> u64 { + if (system.homebrewMode) self.debugger.dmemReadUnalignedWord(address, Size, "RSP"); + return Memory::Writable::readUnaligned(address); + } + + template + auto write(u32 address, u64 value) -> void { + if (system.homebrewMode) self.debugger.dmemWriteWord(address, Size, value); + Memory::Writable::write(address, value); + } + + template + auto writeUnaligned(u32 address, u64 value) -> void { + if (system.homebrewMode) self.debugger.dmemWriteUnalignedWord(address, Size, value); + Memory::Writable::writeUnaligned(address, value); + } + + } dmem{*this}; Memory::Writable imem; struct Debugger { @@ -14,6 +43,23 @@ struct RSP : Thread, Memory::RCP { auto ioSCC(bool mode, u32 address, u32 data) -> void; auto ioStatus(bool mode, u32 address, u32 data) -> void; + auto dmaReadWord(u32 rdramAddress, u32 pbusRegion, u32 pbusAddress) -> void; + auto dmemReadWord(u12 address, int size, const char *peripheral) -> void; + auto dmemWriteWord(u12 address, int size, u64 value) -> void; + auto dmemReadUnalignedWord(u12 address, int size, const char *peripheral) -> void; + auto dmemWriteUnalignedWord(u12 address, int size, u64 value) -> void; + + struct TaintMask { + struct TaintWord { + u8 dirty; + u32 ctxDmaRdramAddress; + u64 ctxDmaOriginPc; + u1 ctxDmaOriginCpu; + u64 ctxCacheFillPc; + u64 ctxCacheDirtyPc; + } dmem[512], imem[512]; + } taintMask; + struct Memory { Node::Debugger::Memory dmem; Node::Debugger::Memory imem; @@ -32,6 +78,7 @@ struct RSP : Thread, Memory::RCP { auto main() -> void; auto instruction() -> void; + auto instructionPrologue(u32 instruction) -> void; auto instructionEpilogue(u32 clocks) -> s32; auto power(bool reset) -> void; @@ -165,8 +212,8 @@ struct RSP : Thread, Memory::RCP { //io.cpp auto readWord(u32 address, Thread& thread) -> u32; auto writeWord(u32 address, u32 data, Thread& thread) -> void; - auto ioRead(u32 address) -> u32; - auto ioWrite(u32 address, u32 data) -> void; + auto ioRead(u32 address, Thread& thread) -> u32; + auto ioWrite(u32 address, u32 data, Thread& thread) -> void; //serialization.cpp auto serialize(serializer&) -> void; @@ -179,7 +226,9 @@ struct RSP : Thread, Memory::RCP { n12 length; n12 skip; n8 count; - + n64 originPc; + n1 originCpu; + auto serialize(serializer&) -> void; } pending, current; @@ -512,6 +561,7 @@ struct RSP : Thread, Memory::RCP { return s <= e ? smask & emask : smask | emask; } + bool callInstructionPrologue = false; Pipeline pipeline; bump_allocator allocator; array context; diff --git a/waterbox/ares64/ares/ares/n64/rsp/serialization.cpp b/waterbox/ares64/ares/ares/n64/rsp/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/si/debugger.cpp b/waterbox/ares64/ares/ares/n64/si/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/si/dma.cpp b/waterbox/ares64/ares/ares/n64/si/dma.cpp old mode 100644 new mode 100755 index 35f13aeb2a..c2690ab2b0 --- a/waterbox/ares64/ares/ares/n64/si/dma.cpp +++ b/waterbox/ares64/ares/ares/n64/si/dma.cpp @@ -1,6 +1,8 @@ auto SI::dmaRead() -> void { pif.dmaRead(io.readAddress, io.dramAddress); io.dmaBusy = 0; + io.pchState = 0; + io.dmaState = 0; io.interrupt = 1; mi.raise(MI::IRQ::SI); } @@ -8,6 +10,8 @@ auto SI::dmaRead() -> void { auto SI::dmaWrite() -> void { pif.dmaWrite(io.writeAddress, io.dramAddress); io.dmaBusy = 0; + io.pchState = 0; + io.dmaState = 0; io.interrupt = 1; mi.raise(MI::IRQ::SI); } diff --git a/waterbox/ares64/ares/ares/n64/si/io.cpp b/waterbox/ares64/ares/ares/n64/si/io.cpp old mode 100644 new mode 100755 index eccc777b93..b415c05848 --- a/waterbox/ares64/ares/ares/n64/si/io.cpp +++ b/waterbox/ares64/ares/ares/n64/si/io.cpp @@ -9,7 +9,7 @@ auto SI::readWord(u32 address, Thread& thread) -> u32 { } auto SI::ioRead(u32 address) -> u32 { - address = (address & 0xfffff) >> 2; + address = (address & 0x1f) >> 2; n32 data; if(address == 0) { diff --git a/waterbox/ares64/ares/ares/n64/si/serialization.cpp b/waterbox/ares64/ares/ares/n64/si/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/si/si.cpp b/waterbox/ares64/ares/ares/n64/si/si.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/si/si.hpp b/waterbox/ares64/ares/ares/n64/si/si.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/system/serialization.cpp b/waterbox/ares64/ares/ares/n64/system/serialization.cpp old mode 100644 new mode 100755 index e7ba25d222..f12ef7fd31 --- a/waterbox/ares64/ares/ares/n64/system/serialization.cpp +++ b/waterbox/ares64/ares/ares/n64/system/serialization.cpp @@ -1,4 +1,4 @@ -static const string SerializerVersion = "v133.1"; +static const string SerializerVersion = "v134"; auto System::serialize(bool synchronize) -> serializer { serializer s; diff --git a/waterbox/ares64/ares/ares/n64/system/system.cpp b/waterbox/ares64/ares/ares/n64/system/system.cpp old mode 100644 new mode 100755 index cd1a9840b7..6b9e93577e --- a/waterbox/ares64/ares/ares/n64/system/system.cpp +++ b/waterbox/ares64/ares/ares/n64/system/system.cpp @@ -1,5 +1,7 @@ #include +#include + namespace ares::Nintendo64 { auto enumerate() -> vector { @@ -29,6 +31,8 @@ auto option(string name, string value) -> bool { if(vulkan.internalUpscale == 1) vulkan.supersampleScanout = false; vulkan.outputUpscale = vulkan.supersampleScanout ? 1 : vulkan.internalUpscale; #endif + if(name == "Homebrew Mode") system.homebrewMode = value.boolean(); + if(name == "Expansion Pak") system.expansionPak = value.boolean(); return true; } diff --git a/waterbox/ares64/ares/ares/n64/system/system.hpp b/waterbox/ares64/ares/ares/n64/system/system.hpp old mode 100644 new mode 100755 index cae1e955fd..e3f7547dc4 --- a/waterbox/ares64/ares/ares/n64/system/system.hpp +++ b/waterbox/ares64/ares/ares/n64/system/system.hpp @@ -1,6 +1,8 @@ struct System { Node::System node; VFS::Pak pak; + bool homebrewMode = false; + bool expansionPak = true; enum class Region : u32 { NTSC, PAL }; @@ -31,6 +33,8 @@ private: bool dd = false; } information; + auto initDebugHooks() -> void; + //serialization.cpp auto serialize(serializer&, bool synchronize) -> void; }; diff --git a/waterbox/ares64/ares/ares/n64/vi/debugger.cpp b/waterbox/ares64/ares/ares/n64/vi/debugger.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/vi/io.cpp b/waterbox/ares64/ares/ares/n64/vi/io.cpp old mode 100644 new mode 100755 index 773378c272..4b3482a933 --- a/waterbox/ares64/ares/ares/n64/vi/io.cpp +++ b/waterbox/ares64/ares/ares/n64/vi/io.cpp @@ -1,5 +1,5 @@ auto VI::readWord(u32 address, Thread& thread) -> u32 { - address = (address & 0xfffff) >> 2; + address = (address & 0x3f) >> 2; n32 data; if(address == 0) { @@ -97,7 +97,7 @@ auto VI::readWord(u32 address, Thread& thread) -> u32 { } auto VI::writeWord(u32 address, u32 data_, Thread& thread) -> void { - address = (address & 0xfffff) >> 2; + address = (address & 0x3f) >> 2; n32 data = data_; #if defined(VULKAN) diff --git a/waterbox/ares64/ares/ares/n64/vi/serialization.cpp b/waterbox/ares64/ares/ares/n64/vi/serialization.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/ares/n64/vi/vi.cpp b/waterbox/ares64/ares/ares/n64/vi/vi.cpp old mode 100644 new mode 100755 index cd22eb7084..7f5aa6e6f0 --- a/waterbox/ares64/ares/ares/n64/vi/vi.cpp +++ b/waterbox/ares64/ares/ares/n64/vi/vi.cpp @@ -26,6 +26,7 @@ auto VI::load(Node::Object parent) -> void { #endif screen = node->append("Screen", width, height); screen->setRefresh({&VI::refresh, this}); + screen->refreshRateHint(Region::PAL() ? 50 : 60); // TODO: More accurate refresh rate hint screen->colors((1 << 24) + (1 << 15), [&](n32 color) -> n64 { if(color < (1 << 24)) { u64 a = 65535; @@ -159,7 +160,7 @@ auto VI::refresh() -> void { auto line = screen->pixels(1).data() + (dy - vscan_start) * hscan_len; u32 x0 = vi.io.xsubpixel + vi.io.xscale * (dx0 - vi.io.hstart); for(i32 dx = dx0; dx < dx1; dx++) { - u16 data = rdram.ram.read(address + (x0 >> 10) * 2); + u16 data = rdram.ram.read(address + (x0 >> 10) * 2, "VI"); line[dx - hscan_start] = 1 << 24 | data >> 1; x0 += vi.io.xscale; } @@ -177,7 +178,7 @@ auto VI::refresh() -> void { auto line = screen->pixels(1).data() + (dy - vscan_start) * hscan_len; u32 x0 = vi.io.xsubpixel + vi.io.xscale * (dx0 - vi.io.hstart); for(i32 dx = dx0; dx < dx1; dx++) { - u32 data = rdram.ram.read(address + (x0 >> 10) * 4); + u32 data = rdram.ram.read(address + (x0 >> 10) * 4, "VI"); line[dx - hscan_start] = data >> 8; x0 += vi.io.xscale; } diff --git a/waterbox/ares64/ares/ares/n64/vi/vi.hpp b/waterbox/ares64/ares/ares/n64/vi/vi.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/adaptive-array.hpp b/waterbox/ares64/ares/nall/adaptive-array.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/algorithm.hpp b/waterbox/ares64/ares/nall/algorithm.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/any.hpp b/waterbox/ares64/ares/nall/any.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/arguments.hpp b/waterbox/ares64/ares/nall/arguments.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/arithmetic.hpp b/waterbox/ares64/ares/nall/arithmetic.hpp old mode 100644 new mode 100755 index baf00a9fb2..4d0ebbf382 --- a/waterbox/ares64/ares/nall/arithmetic.hpp +++ b/waterbox/ares64/ares/nall/arithmetic.hpp @@ -3,6 +3,8 @@ //multi-precision arithmetic //warning: each size is quadratically more expensive than the size before it! +#include + #include #include #include diff --git a/waterbox/ares64/ares/nall/arithmetic/barrett.hpp b/waterbox/ares64/ares/nall/arithmetic/barrett.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/arithmetic/natural.hpp b/waterbox/ares64/ares/nall/arithmetic/natural.hpp old mode 100644 new mode 100755 index e90f2b3bf1..0cedfaee5a --- a/waterbox/ares64/ares/nall/arithmetic/natural.hpp +++ b/waterbox/ares64/ares/nall/arithmetic/natural.hpp @@ -1,3 +1,5 @@ +#include + #define ConcatenateType(Size) u##Size #define DeclareType(Size) ConcatenateType(Size) @@ -278,7 +280,7 @@ template alwaysinline auto ror(const Pair& lhs, const T& rhs) -> Pai return lhs >> rhs | lhs << (PairBits - rhs); } -#define EI /*typename =*/ enable_if_t::value> +#define EI enable_if_t::value> template auto& operator*= (T& lhs, const Pair& rhs) { return lhs = lhs * T(rhs); } template auto& operator/= (T& lhs, const Pair& rhs) { return lhs = lhs / T(rhs); } diff --git a/waterbox/ares64/ares/nall/arithmetic/unsigned.hpp b/waterbox/ares64/ares/nall/arithmetic/unsigned.hpp old mode 100644 new mode 100755 index bc82a4c09d..35e8a34b9e --- a/waterbox/ares64/ares/nall/arithmetic/unsigned.hpp +++ b/waterbox/ares64/ares/nall/arithmetic/unsigned.hpp @@ -2,34 +2,34 @@ namespace nall { -template::value>> +template::value>> inline auto upper(T value) -> T { return value >> sizeof(T) * 4; } -template::value>> +template::value>> inline auto lower(T value) -> T { static const T Mask = ~T(0) >> sizeof(T) * 4; return value & Mask; } -template::value>, typename = enable_if_t::value>> +template::value>, enable_if_t::value>> inline auto mul(T lhs, U rhs) -> uintmax { return lhs * rhs; } -template::value>> +template::value>> inline auto square(T value) -> uintmax { return value * value; } -template::value>> -inline auto rol(T lhs, U rhs) -> T { +template +inline auto rol(T lhs, U rhs, enable_if_t::value>* = 0) -> T { return lhs << rhs | lhs >> sizeof(T) * 8 - rhs; } -template::value>> -inline auto ror(T lhs, U rhs) -> T { +template +inline auto ror(T lhs, U rhs, enable_if_t::value>* = 0) -> T { return lhs >> rhs | lhs << sizeof(T) * 8 - rhs; } diff --git a/waterbox/ares64/ares/nall/array-span.hpp b/waterbox/ares64/ares/nall/array-span.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/array-view.hpp b/waterbox/ares64/ares/nall/array-view.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/array.hpp b/waterbox/ares64/ares/nall/array.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/atoi.hpp b/waterbox/ares64/ares/nall/atoi.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/bcd.hpp b/waterbox/ares64/ares/nall/bcd.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/beat/single/apply.hpp b/waterbox/ares64/ares/nall/beat/single/apply.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/beat/single/create.hpp b/waterbox/ares64/ares/nall/beat/single/create.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/bit.hpp b/waterbox/ares64/ares/nall/bit.hpp old mode 100644 new mode 100755 index b259c12201..3a50c130bb --- a/waterbox/ares64/ares/nall/bit.hpp +++ b/waterbox/ares64/ares/nall/bit.hpp @@ -78,6 +78,14 @@ namespace bit { return first; } + //return index of the last bit set (or zero of no bits are set) + //last(0b11000) == 4 + constexpr inline auto last(u64 x) -> u32 { + u32 i = 0; + while(x) { x >>= 1; i++; } + return i > 0 ? --i : i; + } + //round up to next highest single bit: //round(15) == 16, round(16) == 16, round(17) == 32 constexpr inline auto round(u64 x) -> u64 { diff --git a/waterbox/ares64/ares/nall/bump-allocator.hpp b/waterbox/ares64/ares/nall/bump-allocator.hpp old mode 100644 new mode 100755 index dbf1e52adc..7a4aac5789 --- a/waterbox/ares64/ares/nall/bump-allocator.hpp +++ b/waterbox/ares64/ares/nall/bump-allocator.hpp @@ -28,9 +28,6 @@ struct bump_allocator { reset(); if(buffer) { - if(flags & executable) { - memory::protect(buffer, capacity, true); - } if(flags & zero_fill) { memset(buffer, 0x00, capacity); } @@ -88,9 +85,9 @@ struct bump_allocator { _offset = nextOffset(size); //alignment } - auto tryAcquire(u32 size) -> u8* { + auto tryAcquire(u32 size, bool reserve = true) -> u8* { if((nextOffset(size)) > _capacity) return nullptr; - return acquire(size); + return reserve ? acquire(size) : acquire(); } private: diff --git a/waterbox/ares64/ares/nall/case-range.hpp b/waterbox/ares64/ares/nall/case-range.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/cd.hpp b/waterbox/ares64/ares/nall/cd.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/cd/crc16.hpp b/waterbox/ares64/ares/nall/cd/crc16.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/cd/edc.hpp b/waterbox/ares64/ares/nall/cd/edc.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/cd/efm.hpp b/waterbox/ares64/ares/nall/cd/efm.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/cd/rspc.hpp b/waterbox/ares64/ares/nall/cd/rspc.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/cd/scrambler.hpp b/waterbox/ares64/ares/nall/cd/scrambler.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/cd/session.hpp b/waterbox/ares64/ares/nall/cd/session.hpp old mode 100644 new mode 100755 index f8b826c5e1..7bff004d3d --- a/waterbox/ares64/ares/nall/cd/session.hpp +++ b/waterbox/ares64/ares/nall/cd/session.hpp @@ -268,7 +268,9 @@ struct Session { q[0] = track.control << 4 | 1; q[1] = BCD::encode(trackID); q[2] = BCD::encode(indexID); - auto msf = MSF(lba - track.indices[1].lba); + auto msf = indexID == 0 + ? MSF(track.indices[0].end - lba) + : MSF(lba - track.indices[1].lba); q[3] = BCD::encode(msf.minute); q[4] = BCD::encode(msf.second); q[5] = BCD::encode(msf.frame); diff --git a/waterbox/ares64/ares/nall/cd/sync.hpp b/waterbox/ares64/ares/nall/cd/sync.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/chrono.hpp b/waterbox/ares64/ares/nall/chrono.hpp old mode 100644 new mode 100755 index dec25cbd8d..49e83a6ef1 --- a/waterbox/ares64/ares/nall/chrono.hpp +++ b/waterbox/ares64/ares/nall/chrono.hpp @@ -25,6 +25,12 @@ inline auto benchmark(const function& f, u64 times = 1) -> void { print("[chrono::benchmark] ", (double)(end - start) / 1'000'000'000.0, "s\n"); } +inline auto daysInMonth(u32 month, u32 year) -> u8 { + u32 days = 30 + ((month + (month >> 3)) & 1); + if (month == 2) days -= (year % 4 == 0) ? 1 : 2; + return days; +} + //exact date/time functions (from system epoch) struct timeinfo { diff --git a/waterbox/ares64/ares/nall/cipher/chacha20.hpp b/waterbox/ares64/ares/nall/cipher/chacha20.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/counting-sort.hpp b/waterbox/ares64/ares/nall/counting-sort.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/database/odbc.hpp b/waterbox/ares64/ares/nall/database/odbc.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/database/sqlite3.hpp b/waterbox/ares64/ares/nall/database/sqlite3.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/base.hpp b/waterbox/ares64/ares/nall/decode/base.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/base64.hpp b/waterbox/ares64/ares/nall/decode/base64.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/bmp.hpp b/waterbox/ares64/ares/nall/decode/bmp.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/bwt.hpp b/waterbox/ares64/ares/nall/decode/bwt.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/chd.hpp b/waterbox/ares64/ares/nall/decode/chd.hpp old mode 100644 new mode 100755 index b150787f68..bd09f64085 --- a/waterbox/ares64/ares/nall/decode/chd.hpp +++ b/waterbox/ares64/ares/nall/decode/chd.hpp @@ -31,7 +31,7 @@ struct CHD { }; auto load(const string& location) -> bool; - auto read(u32 sector) -> vector; + auto read(u32 sector) const -> vector; auto sectorCount() const -> u32; vector tracks; @@ -40,10 +40,10 @@ private: #if false chd_file* chd = nullptr; #endif - const int chd_sector_size = 2352 + 96; + static constexpr int chd_sector_size = 2352 + 96; size_t chd_hunk_size; - vector chd_hunk_buffer; - int chd_current_hunk = -1; + mutable vector chd_hunk_buffer; + mutable int chd_current_hunk = -1; }; inline CHD::~CHD() { @@ -123,11 +123,10 @@ inline auto CHD::load(const string& location) -> bool { return false; } - // Ensure two second pregap is present const bool pregap_in_file = (pregap_frames > 0 && pgtype[0] == 'V'); - if (pregap_frames <= 0 && typeStr != "AUDIO") { - pregap_frames = 2 * 75; - } + + // First track should have 2 second pregap as standard + if(track_no == 1 && !pregap_in_file) pregap_frames = 2 * 75; // Add the new track Track track; @@ -136,7 +135,7 @@ inline auto CHD::load(const string& location) -> bool { track.pregap = pregap_frames; track.postgap = postgap_frames; - // Pregap + // index0 = Pregap if (pregap_frames > 0) { Index index; index.number = 0; @@ -190,7 +189,7 @@ inline auto CHD::load(const string& location) -> bool { #endif } -inline auto CHD::read(u32 sector) -> vector { +inline auto CHD::read(u32 sector) const -> vector { // Convert LBA in CD-ROM to LBA in CHD #if false for(auto& track : tracks) { diff --git a/waterbox/ares64/ares/nall/decode/cue.hpp b/waterbox/ares64/ares/nall/decode/cue.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/gzip.hpp b/waterbox/ares64/ares/nall/decode/gzip.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/html.hpp b/waterbox/ares64/ares/nall/decode/html.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/huffman.hpp b/waterbox/ares64/ares/nall/decode/huffman.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/inflate.hpp b/waterbox/ares64/ares/nall/decode/inflate.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/lzsa.hpp b/waterbox/ares64/ares/nall/decode/lzsa.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/mtf.hpp b/waterbox/ares64/ares/nall/decode/mtf.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/png.hpp b/waterbox/ares64/ares/nall/decode/png.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/rle.hpp b/waterbox/ares64/ares/nall/decode/rle.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/url.hpp b/waterbox/ares64/ares/nall/decode/url.hpp old mode 100644 new mode 100755 index 18382d1c96..62a99e2488 --- a/waterbox/ares64/ares/nall/decode/url.hpp +++ b/waterbox/ares64/ares/nall/decode/url.hpp @@ -1,5 +1,7 @@ #pragma once +#include + namespace nall::Decode { //returns empty string on malformed content diff --git a/waterbox/ares64/ares/nall/decode/wav.hpp b/waterbox/ares64/ares/nall/decode/wav.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/decode/zip.hpp b/waterbox/ares64/ares/nall/decode/zip.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/directory.cpp b/waterbox/ares64/ares/nall/directory.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/directory.hpp b/waterbox/ares64/ares/nall/directory.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/dl.cpp b/waterbox/ares64/ares/nall/dl.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/dl.hpp b/waterbox/ares64/ares/nall/dl.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/dsp/iir/biquad.hpp b/waterbox/ares64/ares/nall/dsp/iir/biquad.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/dsp/iir/dc-removal.hpp b/waterbox/ares64/ares/nall/dsp/iir/dc-removal.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/dsp/iir/one-pole.hpp b/waterbox/ares64/ares/nall/dsp/iir/one-pole.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/dsp/resampler/cubic.hpp b/waterbox/ares64/ares/nall/dsp/resampler/cubic.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/elliptic-curve/curve25519.hpp b/waterbox/ares64/ares/nall/elliptic-curve/curve25519.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/elliptic-curve/ed25519.hpp b/waterbox/ares64/ares/nall/elliptic-curve/ed25519.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-optimized.hpp b/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-optimized.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-reference.hpp b/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-reference.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/emulation/21fx.hpp b/waterbox/ares64/ares/nall/emulation/21fx.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/base.hpp b/waterbox/ares64/ares/nall/encode/base.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/base64.hpp b/waterbox/ares64/ares/nall/encode/base64.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/bmp.hpp b/waterbox/ares64/ares/nall/encode/bmp.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/bwt.hpp b/waterbox/ares64/ares/nall/encode/bwt.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/html.hpp b/waterbox/ares64/ares/nall/encode/html.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/huffman.hpp b/waterbox/ares64/ares/nall/encode/huffman.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/lzsa.hpp b/waterbox/ares64/ares/nall/encode/lzsa.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/mtf.hpp b/waterbox/ares64/ares/nall/encode/mtf.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/png.hpp b/waterbox/ares64/ares/nall/encode/png.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/rle.hpp b/waterbox/ares64/ares/nall/encode/rle.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/url.hpp b/waterbox/ares64/ares/nall/encode/url.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/wav.hpp b/waterbox/ares64/ares/nall/encode/wav.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/encode/zip.hpp b/waterbox/ares64/ares/nall/encode/zip.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/endian.hpp b/waterbox/ares64/ares/nall/endian.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/file-buffer.hpp b/waterbox/ares64/ares/nall/file-buffer.hpp old mode 100644 new mode 100755 index 41bd2e298d..eac9570701 --- a/waterbox/ares64/ares/nall/file-buffer.hpp +++ b/waterbox/ares64/ares/nall/file-buffer.hpp @@ -229,10 +229,10 @@ private: auto bufferSynchronize() -> void { if(!fileHandle) return; - if(bufferOffset == (fileOffset & ~(buffer.size() - 1))) return; + if(bufferOffset == (fileOffset & ~u64(buffer.size() - 1))) return; bufferFlush(); - bufferOffset = fileOffset & ~(buffer.size() - 1); + bufferOffset = fileOffset & ~u64(buffer.size() - 1); fseek(fileHandle, bufferOffset, SEEK_SET); u64 length = bufferOffset + buffer.size() <= fileSize ? buffer.size() : fileSize & buffer.size() - 1; if(length) (void)fread(buffer.data(), 1, length, fileHandle); diff --git a/waterbox/ares64/ares/nall/file-map.cpp b/waterbox/ares64/ares/nall/file-map.cpp old mode 100644 new mode 100755 index c579f0c607..3172378ab0 --- a/waterbox/ares64/ares/nall/file-map.cpp +++ b/waterbox/ares64/ares/nall/file-map.cpp @@ -41,14 +41,17 @@ NALL_HEADER_INLINE auto file_map::open(const string& filename, u32 mode_) -> boo _file = CreateFileW(utf16_t(filename), desiredAccess, FILE_SHARE_READ, nullptr, creationDisposition, FILE_ATTRIBUTE_NORMAL, nullptr); - if(_file == INVALID_HANDLE_VALUE) return false; + if(_file == INVALID_HANDLE_VALUE) { + _file = nullptr; + return false; + } _size = GetFileSize(_file, nullptr); _map = CreateFileMapping(_file, nullptr, protection, 0, _size, nullptr); - if(_map == INVALID_HANDLE_VALUE) { + if(_map == nullptr) { CloseHandle(_file); - _file = INVALID_HANDLE_VALUE; + _file = nullptr; return false; } @@ -62,14 +65,14 @@ NALL_HEADER_INLINE auto file_map::close() -> void { _data = nullptr; } - if(_map != INVALID_HANDLE_VALUE) { + if(_map != nullptr) { CloseHandle(_map); - _map = INVALID_HANDLE_VALUE; + _map = nullptr; } - if(_file != INVALID_HANDLE_VALUE) { + if(_file != nullptr) { CloseHandle(_file); - _file = INVALID_HANDLE_VALUE; + _file = nullptr; } _open = false; diff --git a/waterbox/ares64/ares/nall/file-map.hpp b/waterbox/ares64/ares/nall/file-map.hpp old mode 100644 new mode 100755 index 0a7ec70094..66e2cc2e1c --- a/waterbox/ares64/ares/nall/file-map.hpp +++ b/waterbox/ares64/ares/nall/file-map.hpp @@ -50,8 +50,8 @@ private: #if defined(API_WINDOWS) - HANDLE _file = INVALID_HANDLE_VALUE; - HANDLE _map = INVALID_HANDLE_VALUE; + HANDLE _file = nullptr; + HANDLE _map = nullptr; public: auto operator=(file_map&& source) -> file_map& { @@ -67,8 +67,8 @@ public: source._open = false; source._data = nullptr; source._size = 0; - source._file = INVALID_HANDLE_VALUE; - source._map = INVALID_HANDLE_VALUE; + source._file = nullptr; + source._map = nullptr; return *this; } diff --git a/waterbox/ares64/ares/nall/file.hpp b/waterbox/ares64/ares/nall/file.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/float-env.hpp b/waterbox/ares64/ares/nall/float-env.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/function.hpp b/waterbox/ares64/ares/nall/function.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/galois-field.hpp b/waterbox/ares64/ares/nall/galois-field.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/gdb/Readme.md b/waterbox/ares64/ares/nall/gdb/Readme.md new file mode 100755 index 0000000000..dd81ff811c --- /dev/null +++ b/waterbox/ares64/ares/nall/gdb/Readme.md @@ -0,0 +1,227 @@ +# Debug Server + +The file `server.cpp` adds a gdb-server compatible with several GDB versions and IDEs like VScode and CLion.
+It is implemented as a standalone server independent of any specific system, and even ares itself.
+This allows for easy integration with systems without having to worry about the details of GDB itself.
+ +Managing the server itself, including the underlying TCP connection, is done by ares.
+System specific logic is handled via (optional) call-backs that a can be registered,
+as well as methods to report events to GDB. + +The overall design of this server is to be as neutral as possible.
+Meaning that things like stopping, stepping and reading memory should not affect the game.
+This is done to make sure that games behave the same as if they were running without a debugger, down to the cycle.
+ +## Integration Guide +This section describes how to implement the debugger for a system in ares.
+It should not be necessary to modify the server itself, or to know much about the GDB protocol.
+Simply registering callbacks and reporting events are enough to get the full set of features working.
+ +For a minimal working debugging session, register/memory reads and a way to report the PC are required.
+Although implementing as much as possible is recommended to make GDB more stable. + +Interactions with the server can be split in three categories: +- **Hooks:** lets GDB call functions in your ares system (e.g.: memory read) +- **Report-functions:** notify GDB about events (e.g.: exceptions) +- **Status-functions:** helper to check the GDB status (e.g.: are breakpoints set or not) + +Hooks can be set via setting the callbacks in `GDB::server.hooks.XXX`.
+Report functions are prefixed `GDB::server.reportXXX()`, and status functions a documented here separately.
+All hooks/report/status functions can be safely set or called even if the server is not running.
+ +As an example of a fictional system, this is what a memory read could look like: +```cpp +GDB::server.hooks.regRead = [](u32 regIdx) { + return hex(cpu.readRegister(regIdx), 16, '0'); +}; +``` +Or the main execution loop: +```cpp +while(!endOfFrame && GDB::server.reportPC(cpu.getPC())) { + cpu.step(); +} +``` + +For a real reference implementation, you can take a look at the N64 system.
+ +## Hooks + +### Memory Read - `read = (u64 address, u32 byteCount) -> string` +Reads `byteCount` bytes from `address` and returns them as a hex-string.
+Both the hex-encoding / single-byte reads are dictated by the GDB protocol.
+ +It is important to implement this in a neutral way: no exceptions and status changes.
+The GDB-client may issue reads from any address at any point while halted.
+If not handled properly, this can cause game crashes or different emulation behavior.
+ +If your system emulates cache, make sure to also handle this here.
+A read must be able to see the cache, but never cause a flush.
+ +Example response (reading 3 bytes): `A1B200` + +### Memory Write - `write = (u64 address, u32 unitSize, u64 value) -> void` +Writes `value` of byte-size `unitSize` to `address`.
+For example, writing a 32-bit value would issue a call like this: `write(0x80001230, 4, 0x0000000012345678)`.
+Contrary to read, this is not required to be neutral, and is allowed to cause exceptions.
+ +If your system emulates cache, make sure to also handle this here.
+The write should behave the same as if it was done via a CPU instruction, incl. flushing the cache if needed.
+ +### Normalize Address - `normalizeAddress = (u64 address) -> u64` +Normalizes an address into something that makes it comparable.
+This is only used for memory-watchpoints, which needs to compare what GDB send to what ares has internally.
+If your system has virtual addresses or masks, this should de-virtualize it.
+ +It's OK to not set this function, or to simply return the input untouched.
+In case that memory-watchpoint are not working, this is probably the place to fix it.
+ +Example implementation: +```cpp +GDB::server.hooks.normalizeAddress = [](u64 address) { + return address & 0x0FFF'FFFF; +}; +``` + +### Register Read - `regRead = (u32 regIdx) -> string` +Reads a single register at `regIdx` and returns it as a hex-string.
+The size of the hex-string is dictated by the specific architecture.
+ +Same as for memory-read, this must be implemented in a neutral way.
+Any invalid register can be returned as zero.
+ +Example response: `00000000000123AB` + +### Register Write - `regWrite = (u32 regIdx, u64 regValue) -> bool` + +Writes the value `regValue` to the register at `regIdx`.
+This write is allowed to have side effects.
+ +If the specific register is not writable or doesn't exist, `false` must be returned.
+On success, `true` must be returned.
+ +### Register Read (General) - `regReadGeneral = () -> string` +Most common way for GDB to read registers, this fetches all registers at once.
+The amount and order of registers is dictated by the specific architecture and GDB.
+When implementing this, GDB will usually complain if the order/size is incorrect.
+ +Same as for single reads, this must be implemented in a neutral way.
+ +Due to some issues regarding exception handling, you are given the option to return a different PC.
+This PC-override can be accessed via `GDB::server.getPcOverride() -> maybe`.
+The reasons for that are explained later in `reportSignal()`. + +Other than that, this can be implemented by looping over `hooks.regRead` and returning a concatenated string.
+Example response: `0000000000000000ffffffff8001000000000000000000420000000000000000000000000000000100000`... + +### Register Write (General) - `regWriteGeneral = (const string ®Data) -> void` +Writes all registers at once, this happens very rarely.
+The format of `regData` is the same as the response of `hooks.regReadGeneral`.
+Any register that is not writable or doesn't exist can be ignored.
+ +### Emulator Cache - `emuCacheInvalidate = (u64 address) -> void` +Should invalidate the emulator's cache at `address`.
+This is only necessary if you have a re-compiler or some form of instruction cache.
+ +### Target XML - `targetXML = () -> string` +Provides an XML description of the target system.
+The XML must not contain any newlines, and should be as short as possible.
+If the client has access to an `.elf` file, this will be mostly ignored. + +Example implementation: +```cpp +GDB::server.hooks.targetXML = []() -> string { + return "" + "mips:4000" + ""; +}; +``` +Documentation: https://sourceware.org/gdb/onlinedocs/gdb/Target-Description-Format.html#Target-Description-Format +
+ +## Report-Functions + +### Signal `reportSignal(Signal sig, u64 originPC) -> bool` +Reports a signal/exception `sig` that occurred at `originPC`.
+The architecture specific exception must be mapped to the enum in `Signal`.
+As a default, `Signal::TRAP` can be used.
+ +It will return `false` if the exception occurred while the game was already paused.
+This can be safely ignored.
+ +Since you may not be able to stop the execution before an exception occurs,
+The `originPC` value will be saved until the next time the game is resumed.
+An `hooks.regReadGeneral` implementation may use this to temp. return a different PC.
+This is done to allow GDB to halt on the causing instruction instead of the exception handler.
+If you can halt before an exception occurs, you can ignore this.
+ +### PC `reportPC(u64 pc) -> bool` +Sets a new PC, this will internally check for break- and watch-points.
+For convenience, it will return `false` if you should halt execution.
+If no debugger is running, it will always return `true`.
+ +You must only call this once per step, before the instruction at the given address gets executed.
+This also means a return value of `false` should make it halt before the instruction too.
+Once halted, it's safe to call this with the same PC each iteration.
+ +If a re-compiler is used, you may not want to call this for every single instruction.
+In that case take a look at `hasBreakpoints()` on how to optimize this.
+ +In case you need the information if a halt is required multiple times, use `GDB::server.isHalted()` instead.
+ +### Memory Read `reportMemRead(u64 address, u32 size) -> void` +Reports that a memory read occurred at `address` with `size` bytes.
+The passed address must be the raw un-normalized address.
+ +This is exclusively used for memory-watchpoints.
+No PC override mechanism is provided here, since it's breaks GDB.
+ +### Memory Write `reportMemWrite(u64 address, u32 size) -> void` +Exactly the same as `reportMemRead`, but for writes instead.
+The new value of that location will be automatically fetched by the client via a memory read,
+and is therefore not needed here. + +## Status-Functions + +### Halted `isHalted() -> bool` +Returns if the game should be currently halted or not.
+For convenience, the same value gets directly returned from `reportPC`.
+ +### Breakpoints `hasBreakpoints() -> bool` +Return `true` if at least one break- or watch-point is set.
+ +If you use a block-based re-compiler, stopping at every instruction may not be possible.
+You may use this information to force single-instruction execution in that case.
+If it returns false, you can safely resume using the block-based execution again.
+ +### PC Override `getPcOverride() -> maybe` +Returns a value if a PC override is active.
+As mentioned in `reportSignal()`, this can be used to return a different PC letting GDB halt at the causing instruction.
+You can safely call this function multiple times.
+Once a single step is taken, or the game is resumed, the override is cleared.
+ +## API Usage + +This API can also be used without GDB, which allows for more use cases.
+For example, you can write automated tooling or custom debugging UIs.
+To make access easier, no strict checks are performed.
+This means that the handshake protocol is optional, and checksums are not verified. + +### TCP +TCP connections behave the same way as a GDB session.
+The connection is kept open the entire time, and commands are sent sequentially, each waiting for an response before sending the next command. + +However, it is possible to send commands even if the game is still running, +this allows for real-time data access. + +Keep in minds that the server uses the RDP-commands, which are different from what you would type into a GDB client.
+For a list of all commands, see: https://sourceware.org/gdb/onlinedocs/gdb/Packets.html#Packets + +As an example, reading from memory would look like this: +``` +$m8020a504,100#00 +``` +This reads 100 bytes from address `0x8020a504`, the `$` and `#` define the message start/end, and the `00` is the checksum (which is not checked). + +One detail, and security check, is that new connections must send `+` as the first byte in the first payload.
+It's also a good idea to send a proper disconnect-command before closing the socket.
+Otherwise, the debugger will not accept new connections until a reset or restart occurs. \ No newline at end of file diff --git a/waterbox/ares64/ares/nall/gdb/server.cpp b/waterbox/ares64/ares/nall/gdb/server.cpp new file mode 100755 index 0000000000..eeb9ffb575 --- /dev/null +++ b/waterbox/ares64/ares/nall/gdb/server.cpp @@ -0,0 +1,552 @@ +#include + +#include + +using string = ::nall::string; +using string_view = ::nall::string_view; + +namespace { + constexpr bool GDB_LOG_MESSAGES = false; + + constexpr u32 MAX_REQUESTS_PER_UPDATE = 10; + constexpr u32 MAX_PACKET_SIZE = 0x4096; + constexpr u32 DEF_BREAKPOINT_SIZE = 64; + constexpr bool NON_STOP_MODE = false; // broken for now, mainly useful for multi-thread debugging, which we can't really support + + auto gdbCalcChecksum(const string &payload) -> u8 { + u8 checksum = 0; + for(char c : payload)checksum += c; + return checksum; + } + + template + inline auto addOrRemoveEntry(vector &data, T value, bool shouldAdd) { + if(shouldAdd) { + data.append(value); + } else { + data.removeByValue(value); + } + } +} + +namespace nall::GDB { + Server server{}; + + auto Server::reportSignal(Signal sig, u64 originPC) -> bool { + if(!hasActiveClient || !handshakeDone)return true; // no client -> no error + if(forceHalt)return false; // Signals can only happen while the game is running, ignore others + + pcOverride = originPC; + + forceHalt = true; + haltSignalSent = true; + sendSignal(sig); + + return true; + } + + auto Server::reportWatchpoint(const Watchpoint &wp, u64 address) -> void { + auto orgAddress = wp.addressStartOrg + (address - wp.addressStart); + forceHalt = true; + haltSignalSent = true; + sendSignal(Signal::TRAP, {wp.getTypePrefix(), hex(orgAddress), ";"}); + } + + auto Server::reportMemRead(u64 address, u32 size) -> void { + if(!watchpointRead)return; + + if(hooks.normalizeAddress) { + address = hooks.normalizeAddress(address); + } + + u64 addressEnd = address + size - 1; + for(const auto& wp : watchpointRead) { + if(wp.hasOverlap(address, addressEnd)) { + return reportWatchpoint(wp, address); + } + } + } + + auto Server::reportMemWrite(u64 address, u32 size) -> void { + if(!watchpointWrite)return; + + if(hooks.normalizeAddress) { + address = hooks.normalizeAddress(address); + } + + u64 addressEnd = address + size - 1; + for(const auto& wp : watchpointWrite) { + if(wp.hasOverlap(address, addressEnd)) { + return reportWatchpoint(wp, address); + } + } + } + + auto Server::reportPC(u64 pc) -> bool { + if(!hasActiveClient)return true; + + currentPC = pc; + bool needHalts = forceHalt || breakpoints.contains(pc); + + if(needHalts) { + forceHalt = true; // breakpoints may get deleted after a signal, but we have to stay stopped + + if(!haltSignalSent) { + haltSignalSent = true; + sendSignal(Signal::TRAP); + } + } + + if(singleStepActive) { + singleStepActive = false; + forceHalt = true; + } + + return !needHalts; + } + + /** + * NOTE: please read the comment in the header server.hpp file before making any changes here! + */ + auto Server::processCommand(const string& cmd, bool &shouldReply) -> string + { + auto cmdParts = cmd.split(":"); + auto cmdName = cmdParts[0]; + char cmdPrefix = cmdName.size() > 0 ? cmdName(0) : ' '; + + if constexpr(GDB_LOG_MESSAGES) { + print("GDB <: %s\n", cmdBuffer.data()); + } + + switch(cmdPrefix) + { + case '!': return "OK"; // informs us that "extended remote-debugging" is used + + case '?': // handshake: why did we halt? + haltProgram(); + haltSignalSent = true; + return "T05"; // needs to be faked, otherwise the GDB-client hangs up and eats 100% CPU + + case 'c': // continue + case 'C': // continue (with signal, signal itself can be ignored) + // normal stop-mode is only allowed to respond once a signal was raised, non-stop must return OK immediately + handshakeDone = true; // good indicator that GDB is done, also enables exception sending + shouldReply = NON_STOP_MODE; + resumeProgram(); + return "OK"; + + case 'D': // client wants to detach (Note: VScode doesn't seem to use this, uses vKill instead) + requestDisconnect = true; + return "OK"; + break; + + case 'g': // dump all general registers + if(hooks.regReadGeneral) { + return hooks.regReadGeneral(); + } else { + return "0000000000000000000000000000000000000000"; + } + break; + + case 'G': // set all general registers + if(hooks.regWriteGeneral) { + hooks.regWriteGeneral(cmd.slice(1)); + return "OK"; + } + break; + + case 'H': // set which thread a 'c' command that may follow belongs to (can be ignored in stop-mode) + if(cmdName == "Hc0")currentThreadC = 0; + if(cmdName == "Hc-1")currentThreadC = -1; + return "OK"; + + case 'k': // old version of vKill + if(handshakeDone) { // sometimes this gets send during handshake (to reset the program?) -> ignore + requestDisconnect = true; + } + return "OK"; + break; + + case 'm': // read memory (e.g.: "m80005A00,4") + { + if(!hooks.read) { + return ""; + } + + auto sepIdxMaybe = cmdName.find(","); + u32 sepIdx = sepIdxMaybe ? sepIdxMaybe.get() : 1; + + u64 address = cmdName.slice(1, sepIdx-1).hex(); + u64 count = cmdName.slice(sepIdx+1, cmdName.size()-sepIdx).hex(); + return hooks.read(address, count); + } + break; + + case 'M': // write memory (e.g.: "M801ef90a,4:01000000") + { + if(!hooks.write) { + return ""; + } + + auto sepIdxMaybe = cmdName.find(","); + u32 sepIdx = sepIdxMaybe ? sepIdxMaybe.get() : 1; + + u64 address = cmdName.slice(1, sepIdx-1).hex(); + u64 unitSize = cmdName.slice(sepIdx+1, 1).hex(); + u64 value = cmdParts.size() > 1 ? cmdParts[1].hex() : 0; + + hooks.write(address, unitSize, value); + return "OK"; + } + + break; + + case 'p': // read specific register (e.g.: "p15") + if(hooks.regRead) { + u32 regIdx = cmdName.slice(1).integer(); + return hooks.regRead(regIdx); + } else { + return "00000000"; + } + break; + + case 'P': // write specific register (e.g.: "P15=FFFFFFFF80001234") + if(hooks.regWrite) { + auto sepIdxMaybe = cmdName.find("="); + u32 sepIdx = sepIdxMaybe ? sepIdxMaybe.get() : 1; + + u32 regIdx = static_cast(cmdName.slice(1, sepIdx-1).hex()); + u64 regValue = cmdName.slice(sepIdx+1).hex(); + + return hooks.regWrite(regIdx, regValue) ? "OK" : "E00"; + } + break; + + case 'q': + // This tells the client what we can and can't do + if(cmdName == "qSupported"){ return { + "PacketSize=", hex(MAX_PACKET_SIZE), + ";fork-events-;swbreak+;hwbreak-", + ";vContSupported-", // prevent vCont commands (reduces potential GDB variations: some prefer using it, others don't) + NON_STOP_MODE ? ";QNonStop+" : "", + "QStartNoAckMode+", + hooks.targetXML ? ";xmlRegisters+;qXfer:features:read+" : "" // (see: https://marc.info/?l=gdb&m=149901965961257&w=2) + };} + + // handshake-command, most return dummy values to convince gdb to connect + if(cmdName == "qTStatus")return forceHalt ? "T1" : ""; + if(cmdName == "qAttached")return "1"; // we are always attached, since a game is running + if(cmdName == "qOffsets")return "Text=0;Data=0;Bss=0"; + + if(cmdName == "qSymbol")return "OK"; // client offers us symbol-names -> we don't care + + // client asks us about existing breakpoints (may happen after a re-connect) -> ignore since we clear them on connect + if(cmdName == "qTfP")return ""; + if(cmdName == "qTsP")return ""; + + // extended target features (gdb extension), most return XML data + if(cmdName == "qXfer" && cmdParts.size() > 4) + { + if(cmdParts[1] == "features" && cmdParts[2] == "read") { + // informs the client about arch/registers (https://sourceware.org/gdb/onlinedocs/gdb/Target-Description-Format.html#Target-Description-Format) + if(cmdParts[3] == "target.xml") { + return hooks.targetXML ? string{"l", hooks.targetXML()} : string{""}; + } + } + } + + // Thread-related queries + if(cmdName == "qfThreadInfo")return {"m1"}; + if(cmdName == "qsThreadInfo")return {"l"}; + if(cmdName == "qThreadExtraInfo,1")return ""; // ignoring this command fixes support for CLion (and VSCode?), otherwise gdb hangs + if(cmdName == "qC")return {"QC1"}; + // there will also be a "qP0000001f0000000000000001" command depending on the IDE, this is ignored to prevent GDB from hanging up + break; + + case 'Q': + if(cmdName == "QNonStop") { // 0=stop, 1=non-stop-mode (this allows for async GDB-communication) + if(cmdParts.size() <= 1)return "E00"; + nonStopMode = cmdParts[1] == "1"; + + if(nonStopMode) { + haltProgram(); + } else { + resumeProgram(); + } + return "OK"; + } + + if(cmdName == "QStartNoAckMode") { + if (noAckMode) { + return "OK"; + } + // The final OK has to be sent in ack mode. + sendPayload("OK"); + shouldReply = false; + noAckMode = true; + return ""; + } + break; + + case 's': { + if(cmdName.size() > 1) { + u64 address = cmdName.slice(1).integer(); + printf("stepping at address unsupported, ignore (%016" PRIX64 ")\n", address); + } + + shouldReply = false; + singleStepActive = true; + resumeProgram(); + return ""; + } break; + + case 'v': { + // normalize (e.g. "vAttach;1" -> "vAttach") + auto sepIdxMaybe = cmdName.find(";"); + auto vName = sepIdxMaybe ? cmdName.slice(0, sepIdxMaybe.get()) : cmdName; + + if(vName == "vMustReplyEmpty")return ""; // handshake-command / keep-alive (must return the same as an unknown command would) + if(vName == "vAttach")return NON_STOP_MODE ? "OK" : "S05"; // attaches to the process, we must return a fake trap-exception to make gdb happy + if(vName == "vCont?")return ""; // even though "vContSupported-" is set, gdb may still ask for it -> ignore to force e.g. `s` instead of `vCont;s:1;c` + if(vName == "vStopped")return ""; + if(vName == "vCtrlC") { + haltProgram(); + return "OK"; + } + + if(vName == "vKill") { + if(handshakeDone) { // sometimes this gets send during handshake (to reset the program?) -> ignore + requestDisconnect = true; + } + return "OK"; + } + + if(vName == "vCont") return "E00"; // if GDB completely ignores both "vCont is unsupported" responses, throw an error here + + } break; + + case 'Z': // insert breakpoint (e.g. "Z0,801a0ef4,4") + case 'z': // remove breakpoint (e.g. "z0,801a0ef4,4") + { + bool isInsert = cmdPrefix == 'Z'; + bool isHardware = cmdName(1) == '1'; // 0=software, 1=hardware + auto sepIdxMaybe = cmdName.findFrom(3, ","); + u32 sepIdx = sepIdxMaybe ? (sepIdxMaybe.get()+3) : 0; + + u64 address = cmdName.slice(3, sepIdx-1).hex(); + u64 addressStart = address; + u64 addressEnd = address + cmdName.slice(sepIdx+1).hex() - 1; + + if(hooks.normalizeAddress) { + addressStart = hooks.normalizeAddress(addressStart); + addressEnd = hooks.normalizeAddress(addressEnd); + } + Watchpoint wp{addressStart, addressEnd, address}; + + switch(cmdName(1)) { + case '0': // (hardware/software breakpoints are the same for us) + case '1': addOrRemoveEntry(breakpoints, address, isInsert); break; + + case '2': + wp.type = WatchpointType::WRITE; + addOrRemoveEntry(watchpointWrite, wp, isInsert); + break; + + case '3': + wp.type = WatchpointType::READ; + addOrRemoveEntry(watchpointRead, wp, isInsert); + break; + + case '4': + wp.type = WatchpointType::ACCESS; + addOrRemoveEntry(watchpointRead, wp, isInsert); + addOrRemoveEntry(watchpointWrite, wp, isInsert); + break; + default: return "E00"; + } + + if(hooks.emuCacheInvalidate) { // for re-compiler, otherwise breaks might be skipped + hooks.emuCacheInvalidate(address); + } + return "OK"; + } + } + + printf("Unknown-Command: %s (data: %s)\n", cmdName.data(), cmdBuffer.data()); + return ""; + } + + auto Server::onText(string_view text) -> void { + + if(cmdBuffer.size() == 0) { + cmdBuffer.reserve(text.size()); + } + + for(char c : text) + { + switch(c) + { + case '$': + insideCommand = true; + break; + + case '#': { // end of message + 2-char checksum after that + insideCommand = false; + + ++messageCount; + bool shouldReply = true; + auto cmdRes = processCommand(cmdBuffer, shouldReply); + if(shouldReply) { + sendPayload(cmdRes); + } else if(!noAckMode) { + sendText("+"); + } + + cmdBuffer = ""; + } break; + + case '+': break; // "OK" response -> ignore + + case '\x03': // CTRL+C (same as "vCtrlC" packet) -> force halt + if constexpr(GDB_LOG_MESSAGES) { + printf("GDB <: CTRL+C [0x03]\n"); + } + haltProgram(); + break; + + default: + if(insideCommand) { + cmdBuffer.append(c); + } + } + } + } + + auto Server::updateLoop() -> void { + if(!isStarted())return; + + if(requestDisconnect) { + requestDisconnect = false; + if(!noAckMode) { + sendText("+"); + } + disconnectClient(); + resumeProgram(); + return; + } + + // The following code manages the message processing which gets exchanged from the server thread. + // It was carefully build to balance latency, throughput and CPU usage to let the game still run at full speed + // while allowing for fast processing once the debugger is halted. + + u32 loopFrames = isHalted() ? 20 : 1; // "frames" to check (loops with sleep in-between) + u32 loopCount = isHalted() ? 500 : 100; // loops inside a frame, the more the less latency, but CPU usage goes up + u32 maxLoopResets = 10000; // how many times can a new message reset the counter (prevents infinite loops with misbehaving clients) + bool wasHalted = isHalted(); + + for(u32 frame=0; frame 0 && maxLoopResets > 0) { + i = loopCount; // reset loop here to keep a fast chain of messages going (reduces latency) + --maxLoopResets; + } + } + + if(wasHalted)usleep(1); + } + } + + auto Server::getStatusText(u32 port, bool useIPv4) -> string { + auto url = getURL(port, useIPv4); + string prefix = isHalted() ? "⬛" : "▶"; + + if(hasClient())return {prefix, " GDB connected ", url}; + if(isStarted())return {"GDB listening ", url}; + return {"GDB pending (", url, ")"}; + } + + auto Server::sendSignal(Signal code) -> void { + sendPayload({"S", hex(static_cast(code), 2)}); + } + + auto Server::sendSignal(Signal code, const string& reason) -> void { + sendPayload({"T", hex(static_cast(code), 2), reason}); + } + + auto Server::sendPayload(const string& payload) -> void { + string msg{noAckMode ? "$" : "+$", payload, '#', hex(gdbCalcChecksum(payload), 2, '0')}; + if constexpr(GDB_LOG_MESSAGES) { + printf("GDB >: %.*s\n", msg.size() > 100 ? 100 : msg.size(), msg.data()); + } + sendText(msg); + } + + auto Server::haltProgram() -> void { + forceHalt = true; + haltSignalSent = false; + } + + auto Server::resumeProgram() -> void { + pcOverride.reset(); + forceHalt = false; + haltSignalSent = false; + } + + auto Server::onConnect() -> void { + printf("GDB client connected\n"); + resetClientData(); + hasActiveClient = true; + } + + auto Server::onDisconnect() -> void { + printf("GDB client disconnected\n"); + hadHandshake = false; + resetClientData(); + } + + auto Server::reset() -> void { + hooks.read.reset(); + hooks.write.reset(); + hooks.normalizeAddress.reset(); + hooks.regReadGeneral.reset(); + hooks.regWriteGeneral.reset(); + hooks.regRead.reset(); + hooks.regWrite.reset(); + hooks.emuCacheInvalidate.reset(); + hooks.targetXML.reset(); + + resetClientData(); + } + + auto Server::resetClientData() -> void { + breakpoints.reset(); + breakpoints.reserve(DEF_BREAKPOINT_SIZE); + + watchpointRead.reset(); + watchpointRead.reserve(DEF_BREAKPOINT_SIZE); + + watchpointWrite.reset(); + watchpointWrite.reserve(DEF_BREAKPOINT_SIZE); + + pcOverride.reset(); + insideCommand = false; + cmdBuffer = ""; + haltSignalSent = false; + forceHalt = false; + singleStepActive = false; + nonStopMode = false; + noAckMode = false; + + currentThreadC = -1; + hasActiveClient = false; + handshakeDone = false; + requestDisconnect = false; + } + +}; diff --git a/waterbox/ares64/ares/nall/gdb/server.hpp b/waterbox/ares64/ares/nall/gdb/server.hpp new file mode 100755 index 0000000000..a767429356 --- /dev/null +++ b/waterbox/ares64/ares/nall/gdb/server.hpp @@ -0,0 +1,131 @@ +#pragma once + +#include +#include + +namespace nall::GDB { + +enum class Signal : u8 { + HANGUP = 1, + INT = 2, + QUIT = 3, + ILLEGAL = 4, + TRAP = 5, + ABORT = 6, + SEGV = 11, +}; + +/** + * This implements a GDB server to handle remote debugging via a GDB client. + * It is both independent of ares itself and any specific system. + * Functionality is added by providing system-specific callbacks, as well as using the API inside a system. + * (See the Readme.md file for more information.) + * + * NOTE: + * Command handling and the overall logic was carefully designed to support as many IDEs and GDB versions as possible. + * Things can break very easily (and the official documentation may lie), so be very sure of any changes made here. + * If changes are necessary, please verify that the following gdb-versions / IDEs still work properly: + * + * GDB: + * - gdb-multiarch (the plain vanilla version exists in most package managers, supports a lot of arches) + * - mips64-ultra-elf-gdb (special MIPS build of gdb-multiarch, i do NOT recommend it, behaves strangely) + * - mingw-w64-x86_64-gdb (vanilla build for Windows/MSYS) + * + * IDEs/Tools: + * - GDB's CLI + * - VSCode + * - CLion (with bundled gdb-multiarch) + * + * For testing, please also check both linux and windows (WSL2). + * With WSL2, windows-ares is started from within WSL, while the debugger runs in linux. + * This can be easily tested with VSCode and it's debugger. + */ +class Server : public nall::TCPText::Server { + public: + + auto reset() -> void; + + struct { + // Memory + function read{}; + function write{}; + function normalizeAddress{}; + + // Registers + function regReadGeneral{}; + function regWriteGeneral{}; + function regRead{}; + function regWrite{}; + + // Emulator + function emuCacheInvalidate{}; + function targetXML{}; + + + } hooks{}; + + // Exception + auto reportSignal(Signal sig, u64 originPC) -> bool; + + // PC / Memory State Updates + auto reportPC(u64 pc) -> bool; + auto reportMemRead(u64 address, u32 size) -> void; + auto reportMemWrite(u64 address, u32 size) -> void; + + // Breakpoints / Watchpoints + auto isHalted() const { return forceHalt && haltSignalSent; } + auto hasBreakpoints() const { + return breakpoints || singleStepActive || watchpointRead || watchpointWrite; + } + + auto getPcOverride() const { return pcOverride; }; + + auto updateLoop() -> void; + auto getStatusText(u32 port, bool useIPv4) -> string; + + protected: + auto onText(string_view text) -> void override; + auto onConnect() -> void override; + auto onDisconnect() -> void override; + + private: + bool insideCommand{false}; + string cmdBuffer{""}; + + bool haltSignalSent{false}; // marks if a signal as been sent for new halts (force-halt and breakpoints) + bool forceHalt{false}; // forces a halt despite no breakpoints being hit + bool singleStepActive{false}; + + bool noAckMode{false}; // gets set if lldb prefers no acknowledgements + bool nonStopMode{false}; // (NOTE: Not working for now), gets set if gdb wants to switch over to async-messaging + bool handshakeDone{false}; // set to true after a few handshake commands, used to prevent exception-reporting until client is ready + bool requestDisconnect{false}; // set to true if the client decides it wants to disconnect + + bool hasActiveClient{false}; + u32 messageCount{0}; // message count per update loop + s32 currentThreadC{-1}; // selected thread for the next 'c' command + + u64 currentPC{0}; + maybe pcOverride{0}; // temporary override to handle edge-cases for exceptions/watchpoints + + // client-state: + vector breakpoints{}; + vector watchpointRead{}; + vector watchpointWrite{}; + + auto processCommand(const string& cmd, bool &shouldReply) -> string; + auto resetClientData() -> void; + + auto reportWatchpoint(const Watchpoint &wp, u64 address) -> void; + + auto sendPayload(const string& payload) -> void; + auto sendSignal(Signal code) -> void; + auto sendSignal(Signal code, const string& reason) -> void; + + auto haltProgram() -> void; + auto resumeProgram() -> void; +}; + +extern Server server; + +} diff --git a/waterbox/ares64/ares/nall/gdb/watchpoint.hpp b/waterbox/ares64/ares/nall/gdb/watchpoint.hpp new file mode 100755 index 0000000000..b60d32657d --- /dev/null +++ b/waterbox/ares64/ares/nall/gdb/watchpoint.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include + +namespace nall::GDB { + + enum class WatchpointType : u32 { + WRITE, READ, ACCESS + }; + + struct Watchpoint { + u64 addressStart{0}; + u64 addressEnd{0}; + u64 addressStartOrg{0}; // un-normalized address, GDB needs this + WatchpointType type{}; + + auto operator==(const Watchpoint& w) const { + return addressStart == w.addressStart && addressEnd == w.addressEnd + && addressStartOrg == w.addressStartOrg && type == w.type; + } + + auto hasOverlap(u64 start, u64 end) const { + return (end >= addressStart) && (start <= addressEnd); + } + + auto getTypePrefix() const -> string { + if(type == WatchpointType::WRITE)return "watch:"; + if(type == WatchpointType::READ)return "rwatch:"; + return "awatch:"; + } + }; +} \ No newline at end of file diff --git a/waterbox/ares64/ares/nall/hash/crc16.hpp b/waterbox/ares64/ares/nall/hash/crc16.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/hash/crc32.hpp b/waterbox/ares64/ares/nall/hash/crc32.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/hash/crc64.hpp b/waterbox/ares64/ares/nall/hash/crc64.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/hash/hash.hpp b/waterbox/ares64/ares/nall/hash/hash.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/hash/sha224.hpp b/waterbox/ares64/ares/nall/hash/sha224.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/hash/sha256.hpp b/waterbox/ares64/ares/nall/hash/sha256.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/hash/sha384.hpp b/waterbox/ares64/ares/nall/hash/sha384.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/hash/sha512.hpp b/waterbox/ares64/ares/nall/hash/sha512.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/hashset.hpp b/waterbox/ares64/ares/nall/hashset.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/hid.hpp b/waterbox/ares64/ares/nall/hid.hpp old mode 100644 new mode 100755 index cc8e7f120d..c2ce520ecd --- a/waterbox/ares64/ares/nall/hid.hpp +++ b/waterbox/ares64/ares/nall/hid.hpp @@ -17,7 +17,7 @@ struct Input { private: string _name; s16 _value = 0; - friend class Group; + friend struct Group; }; struct Group : vector { @@ -36,7 +36,7 @@ struct Group : vector { private: string _name; - friend class Device; + friend struct Device; }; struct Device : vector { diff --git a/waterbox/ares64/ares/nall/http/client.cpp b/waterbox/ares64/ares/nall/http/client.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/http/client.hpp b/waterbox/ares64/ares/nall/http/client.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/http/message.hpp b/waterbox/ares64/ares/nall/http/message.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/http/request.hpp b/waterbox/ares64/ares/nall/http/request.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/http/response.hpp b/waterbox/ares64/ares/nall/http/response.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/http/role.hpp b/waterbox/ares64/ares/nall/http/role.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/http/server.cpp b/waterbox/ares64/ares/nall/http/server.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/http/server.hpp b/waterbox/ares64/ares/nall/http/server.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/image.hpp b/waterbox/ares64/ares/nall/image.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/image/blend.hpp b/waterbox/ares64/ares/nall/image/blend.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/image/core.hpp b/waterbox/ares64/ares/nall/image/core.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/image/fill.hpp b/waterbox/ares64/ares/nall/image/fill.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/image/interpolation.hpp b/waterbox/ares64/ares/nall/image/interpolation.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/image/load.hpp b/waterbox/ares64/ares/nall/image/load.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/image/multifactor.hpp b/waterbox/ares64/ares/nall/image/multifactor.hpp old mode 100644 new mode 100755 index be7722aa5e..4c1b1d4e74 --- a/waterbox/ares64/ares/nall/image/multifactor.hpp +++ b/waterbox/ares64/ares/nall/image/multifactor.hpp @@ -3,24 +3,24 @@ namespace nall { inline multiFactorImage::multiFactorImage(const multiFactorImage& source) { - (*this) = source; + operator=(source); } inline multiFactorImage::multiFactorImage(multiFactorImage&& source) { - operator=(std::forward(source)); + operator=(std::move(source)); } inline multiFactorImage::multiFactorImage(const image& lowDPI, const image& highDPI) { - (*(image*)this) = lowDPI; + image::operator=(lowDPI); _highDPI = highDPI; } inline multiFactorImage::multiFactorImage(const image& source) { - (*(image*)this) = source; + image::operator=(source); } inline multiFactorImage::multiFactorImage(image&& source) { - operator=(std::forward(source)); + image::operator=(std::move(source)); } inline multiFactorImage::multiFactorImage() { @@ -32,7 +32,7 @@ inline multiFactorImage::~multiFactorImage() { inline auto multiFactorImage::operator=(const multiFactorImage& source) -> multiFactorImage& { if(this == &source) return *this; - (*(image*)this) = source; + image::operator=(source); _highDPI = source._highDPI; return *this; @@ -41,15 +41,15 @@ inline auto multiFactorImage::operator=(const multiFactorImage& source) -> multi inline auto multiFactorImage::operator=(multiFactorImage&& source) -> multiFactorImage& { if(this == &source) return *this; - (*(image*)this) = source; - _highDPI = source._highDPI; + image::operator=(std::move(source)); + _highDPI = std::move(source._highDPI); return *this; } inline auto multiFactorImage::operator==(const multiFactorImage& source) const -> bool { - if((const image&)*this != (const image&)source) return false; - return _highDPI != source._highDPI; + if(image::operator!=(source)) return false; + return _highDPI == source._highDPI; } inline auto multiFactorImage::operator!=(const multiFactorImage& source) const -> bool { diff --git a/waterbox/ares64/ares/nall/image/scale.hpp b/waterbox/ares64/ares/nall/image/scale.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/image/static.hpp b/waterbox/ares64/ares/nall/image/static.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/image/utility.hpp b/waterbox/ares64/ares/nall/image/utility.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/induced-sort.hpp b/waterbox/ares64/ares/nall/induced-sort.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/inline-if.hpp b/waterbox/ares64/ares/nall/inline-if.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/inode.cpp b/waterbox/ares64/ares/nall/inode.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/inode.hpp b/waterbox/ares64/ares/nall/inode.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/instance.hpp b/waterbox/ares64/ares/nall/instance.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/interpolation.hpp b/waterbox/ares64/ares/nall/interpolation.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/intrinsics.hpp b/waterbox/ares64/ares/nall/intrinsics.hpp old mode 100644 new mode 100755 index c3aa3d4871..d00b54bc01 --- a/waterbox/ares64/ares/nall/intrinsics.hpp +++ b/waterbox/ares64/ares/nall/intrinsics.hpp @@ -25,7 +25,6 @@ namespace nall { static constexpr bool GCC = 0; static constexpr bool Microsoft = 0; }; - #pragma clang diagnostic error "-Wc++20-extensions" #pragma clang diagnostic error "-Wgnu-case-range" #pragma clang diagnostic error "-Wgnu-statement-expression" #pragma clang diagnostic error "-Wvla" @@ -64,6 +63,10 @@ namespace nall { static constexpr bool GCC = 0; static constexpr bool Microsoft = 1; }; + #pragma warning(disable:4146) //unary minus operator applied to unsigned type, result still unsigned + #pragma warning(disable:4244) //conversion from 'type1' to 'type2', possible loss of data + #pragma warning(disable:4804) //unsafe use of type 'bool' in operation + #pragma warning(disable:4805) //unsafe mix of type 'bool' and type 'type' in operation #pragma warning(disable:4996) //libc "deprecation" warnings #else #error "unable to detect compiler" @@ -188,6 +191,8 @@ namespace nall { static constexpr bool arm32 = 0; static constexpr bool ppc64 = 0; static constexpr bool ppc32 = 0; + static constexpr bool rv64 = 0; + static constexpr bool rv32 = 0; }; #elif defined(__amd64__) || defined(_M_AMD64) #define ARCHITECTURE_AMD64 @@ -201,6 +206,8 @@ namespace nall { static constexpr bool arm32 = 0; static constexpr bool ppc64 = 0; static constexpr bool ppc32 = 0; + static constexpr bool rv64 = 0; + static constexpr bool rv32 = 0; }; #elif defined(__aarch64__) || defined(_M_ARM64) #define ARCHITECTURE_ARM64 @@ -214,6 +221,8 @@ namespace nall { static constexpr bool arm32 = 0; static constexpr bool ppc64 = 0; static constexpr bool ppc32 = 0; + static constexpr bool rv64 = 0; + static constexpr bool rv32 = 0; }; #elif defined(__arm__) #define ARCHITECTURE_ARM32 @@ -224,6 +233,8 @@ namespace nall { static constexpr bool arm32 = 1; static constexpr bool ppc64 = 0; static constexpr bool ppc32 = 0; + static constexpr bool rv64 = 0; + static constexpr bool rv32 = 0; }; #elif defined(__ppc64__) || defined(_ARCH_PPC64) #define ARCHITECTURE_PPC64 @@ -234,6 +245,8 @@ namespace nall { static constexpr bool arm32 = 0; static constexpr bool ppc64 = 1; static constexpr bool ppc32 = 0; + static constexpr bool rv64 = 0; + static constexpr bool rv32 = 0; }; #elif defined(__ppc__) || defined(_ARCH_PPC) || defined(_M_PPC) #define ARCHITECTURE_PPC32 @@ -244,6 +257,32 @@ namespace nall { static constexpr bool arm32 = 0; static constexpr bool ppc64 = 0; static constexpr bool ppc32 = 1; + static constexpr bool rv64 = 0; + static constexpr bool rv32 = 0; + }; +#elif defined(__riscv) && __riscv_xlen == 64 + #define ARCHITECTURE_RV64 + struct Architecture { + static constexpr bool x86 = 0; + static constexpr bool amd64 = 0; + static constexpr bool arm64 = 0; + static constexpr bool arm32 = 0; + static constexpr bool ppc64 = 0; + static constexpr bool ppc32 = 0; + static constexpr bool rv64 = 1; + static constexpr bool rv32 = 0; + }; +#elif defined(__riscv) && __riscv_xlen == 32 + #define ARCHITECTURE_RV32 + struct Architecture { + static constexpr bool x86 = 0; + static constexpr bool amd64 = 0; + static constexpr bool arm64 = 0; + static constexpr bool arm32 = 0; + static constexpr bool ppc64 = 0; + static constexpr bool ppc32 = 0; + static constexpr bool rv64 = 0; + static constexpr bool rv32 = 1; }; #else #error "unable to detect architecture" diff --git a/waterbox/ares64/ares/nall/ips.hpp b/waterbox/ares64/ares/nall/ips.hpp new file mode 100755 index 0000000000..3bc480025f --- /dev/null +++ b/waterbox/ares64/ares/nall/ips.hpp @@ -0,0 +1,63 @@ +#pragma once + +namespace nall::IPS { + +inline auto apply(array_view source, array_view patch, maybe result = {}) -> maybe> { + #define error(text) { if(result) *result = {"error: ", text}; return {}; } + #define success() { if(result) *result = ""; return target; } + + vector target; + for (u32 i : range(source.size())) { + target.append(source[i]); + } + + u32 patchOffset = 0; + auto read = [&]() -> u8 { + return patch[patchOffset++]; + }; + auto readOffset = [&]() -> u32 { + u32 result = read() << 16; + result |= read() << 8; + result |= read(); + return result; + }; + auto readLength = [&]() -> u32 { + u32 result = read() << 8; + result |= read(); + return result; + }; + auto write = [&](u32 index, u8 data) { + target[index] = data; + }; + + if(read() != 'P') error("IPS header invalid"); + if(read() != 'A') error("IPS header invalid"); + if(read() != 'T') error("IPS header invalid"); + if(read() != 'C') error("IPS header invalid"); + if(read() != 'H') error("IPS header invalid"); + + u32 patchSize = patch.size(); + while (patchOffset < patchSize - 3) { + u32 offset = readOffset(); + u32 length = readLength(); + + if(target.size() < offset + length) error("Invalid IPS patch file"); + + if (length == 0) { + length = readLength(); + u8 data = read(); + for(u32 i : range(length)) write(offset + i, data); + } else { + for (u32 i : range(length)) write(offset + i, read()); + } + } + + if(read() != 'E') error("IPS footer invalid"); + if(read() != 'O') error("IPS footer invalid"); + if(read() != 'F') error("IPS footer invalid"); + + success(); + #undef error + #undef success +} +} diff --git a/waterbox/ares64/ares/nall/iterator.hpp b/waterbox/ares64/ares/nall/iterator.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/literals.hpp b/waterbox/ares64/ares/nall/literals.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/locale.hpp b/waterbox/ares64/ares/nall/locale.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/location.hpp b/waterbox/ares64/ares/nall/location.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/mac/poly1305.hpp b/waterbox/ares64/ares/nall/mac/poly1305.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/macos/guard.hpp b/waterbox/ares64/ares/nall/macos/guard.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/main.cpp b/waterbox/ares64/ares/nall/main.cpp old mode 100644 new mode 100755 index 37b7a5ee30..c11cf2ad08 --- a/waterbox/ares64/ares/nall/main.cpp +++ b/waterbox/ares64/ares/nall/main.cpp @@ -1,6 +1,7 @@ #include #if defined(PLATFORM_WINDOWS) + #include #include #include #include diff --git a/waterbox/ares64/ares/nall/main.hpp b/waterbox/ares64/ares/nall/main.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/map.hpp b/waterbox/ares64/ares/nall/map.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/matrix-multiply.hpp b/waterbox/ares64/ares/nall/matrix-multiply.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/matrix.hpp b/waterbox/ares64/ares/nall/matrix.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/maybe.hpp b/waterbox/ares64/ares/nall/maybe.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/memory.cpp b/waterbox/ares64/ares/nall/memory.cpp old mode 100644 new mode 100755 index 8479cff96b..831eb25568 --- a/waterbox/ares64/ares/nall/memory.cpp +++ b/waterbox/ares64/ares/nall/memory.cpp @@ -29,18 +29,17 @@ NALL_HEADER_INLINE auto unmap(void* target, u32 size) -> void { #endif } -NALL_HEADER_INLINE auto protect(void* target, u32 size, bool executable) -> void { +NALL_HEADER_INLINE auto protect(void* target, u32 size, bool executable) -> bool { #if defined(API_WINDOWS) DWORD protect = executable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; DWORD oldProtect; - VirtualProtect(target, size, protect, &oldProtect); + return VirtualProtect(target, size, protect, &oldProtect); #elif defined(API_POSIX) int prot = PROT_READ | PROT_WRITE; if(executable) { prot |= PROT_EXEC; } - int ret = mprotect(target, size, prot); - assert(ret == 0); + return !mprotect(target, size, prot); #endif } diff --git a/waterbox/ares64/ares/nall/memory.hpp b/waterbox/ares64/ares/nall/memory.hpp old mode 100644 new mode 100755 index c932702bae..23c08b1c43 --- a/waterbox/ares64/ares/nall/memory.hpp +++ b/waterbox/ares64/ares/nall/memory.hpp @@ -34,7 +34,7 @@ namespace nall::memory { auto map(u32 size, bool executable) -> void*; auto unmap(void* target, u32 size) -> void; - auto protect(void* target, u32 size, bool executable) -> void; + auto protect(void* target, u32 size, bool executable) -> bool; auto jitprotect(bool executable) -> void; } @@ -195,16 +195,18 @@ template auto writem(void* target, T data) -> void { for(s32 n = size - 1; n >= 0; n--) *p++ = data >> n * 8; } -auto map(u32 size, bool executable) -> void*; - -auto unmap(void* target, u32 size) -> void; - -auto protect(void* target, u32 size, bool executable) -> void; - inline auto jitprotect(bool executable) -> void { #if defined(PLATFORM_MACOS) if(__builtin_available(macOS 11.0, *)) { - pthread_jit_write_protect_np(executable); + static thread_local s32 depth = 0; + if(!executable && depth++ == 0 + || executable && --depth == 0) { + pthread_jit_write_protect_np(executable); + } + #if defined(DEBUG) + struct unmatched_jitprotect {}; + if(depth < 0 || depth > 10) throw unmatched_jitprotect{}; + #endif } #endif } diff --git a/waterbox/ares64/ares/nall/merge-sort.hpp b/waterbox/ares64/ares/nall/merge-sort.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/nall.cpp b/waterbox/ares64/ares/nall/nall.cpp old mode 100644 new mode 100755 index 223dde853c..890c9882e6 --- a/waterbox/ares64/ares/nall/nall.cpp +++ b/waterbox/ares64/ares/nall/nall.cpp @@ -10,7 +10,6 @@ #include #include #include -//#include #include #include #include @@ -18,6 +17,8 @@ #include #include #include +#include +#include //currently unused by ares //#include //#include diff --git a/waterbox/ares64/ares/nall/nall.hpp b/waterbox/ares64/ares/nall/nall.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/path.cpp b/waterbox/ares64/ares/nall/path.cpp old mode 100644 new mode 100755 index 02a12b9f30..265cf9fe3a --- a/waterbox/ares64/ares/nall/path.cpp +++ b/waterbox/ares64/ares/nall/path.cpp @@ -2,6 +2,8 @@ #if defined(PLATFORM_WINDOWS) #include +#elif defined(PLATFORM_MACOS) + #include #endif namespace nall::Path { @@ -14,12 +16,34 @@ NALL_HEADER_INLINE auto program() -> string { result.transform("\\", "/"); return Path::real(result); #else + #if defined(PLATFORM_MACOS) + if (CFBundleRef bundle = CFBundleGetMainBundle()) { + char path[PATH_MAX] = ""; + CFURLRef url = CFBundleCopyBundleURL(bundle); + CFURLGetFileSystemRepresentation(url, true, reinterpret_cast(path), sizeof(path)); + CFRelease(url); + return Path::real(path); + } + #endif Dl_info info; dladdr((void*)&program, &info); return Path::real(info.dli_fname); #endif } +NALL_HEADER_INLINE auto resources() -> string { + #if defined(PLATFORM_MACOS) + if (CFBundleRef bundle = CFBundleGetMainBundle()) { + char path[PATH_MAX] = ""; + CFURLRef url = CFBundleCopyBundleURL(bundle); + CFURLGetFileSystemRepresentation(url, true, reinterpret_cast(path), sizeof(path)); + CFRelease(url); + return string(path).append("/Contents/Resources/"); + } + #endif + return program(); +} + NALL_HEADER_INLINE auto root() -> string { #if defined(PLATFORM_WINDOWS) wchar_t path[PATH_MAX] = L""; diff --git a/waterbox/ares64/ares/nall/path.hpp b/waterbox/ares64/ares/nall/path.hpp old mode 100644 new mode 100755 index b9b602a68d..6218110050 --- a/waterbox/ares64/ares/nall/path.hpp +++ b/waterbox/ares64/ares/nall/path.hpp @@ -26,6 +26,10 @@ inline auto real(string_view name) -> string { auto program() -> string; +// program() +// ./ares.app/Contents/Resources/ +auto resources() -> string; + // / // c:/ auto root() -> string; diff --git a/waterbox/ares64/ares/nall/platform.cpp b/waterbox/ares64/ares/nall/platform.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/platform.hpp b/waterbox/ares64/ares/nall/platform.hpp old mode 100644 new mode 100755 index 5fabcb2aed..a87eab5ced --- a/waterbox/ares64/ares/nall/platform.hpp +++ b/waterbox/ares64/ares/nall/platform.hpp @@ -22,6 +22,7 @@ namespace Math { #include #include #include +#include #include #include diff --git a/waterbox/ares64/ares/nall/pointer.hpp b/waterbox/ares64/ares/nall/pointer.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/posix/service.hpp b/waterbox/ares64/ares/nall/posix/service.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/posix/shared-memory.hpp b/waterbox/ares64/ares/nall/posix/shared-memory.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/primitives.hpp b/waterbox/ares64/ares/nall/primitives.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/primitives/bit-field.hpp b/waterbox/ares64/ares/nall/primitives/bit-field.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/primitives/bit-range.hpp b/waterbox/ares64/ares/nall/primitives/bit-range.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/primitives/boolean.hpp b/waterbox/ares64/ares/nall/primitives/boolean.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/primitives/integer.hpp b/waterbox/ares64/ares/nall/primitives/integer.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/primitives/literals.hpp b/waterbox/ares64/ares/nall/primitives/literals.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/primitives/natural.hpp b/waterbox/ares64/ares/nall/primitives/natural.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/primitives/real.hpp b/waterbox/ares64/ares/nall/primitives/real.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/primitives/types.hpp b/waterbox/ares64/ares/nall/primitives/types.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/priority-queue.hpp b/waterbox/ares64/ares/nall/priority-queue.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/property.hpp b/waterbox/ares64/ares/nall/property.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/queue.hpp b/waterbox/ares64/ares/nall/queue.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/queue/spsc.hpp b/waterbox/ares64/ares/nall/queue/spsc.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/queue/st.hpp b/waterbox/ares64/ares/nall/queue/st.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/random.cpp b/waterbox/ares64/ares/nall/random.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/random.hpp b/waterbox/ares64/ares/nall/random.hpp old mode 100644 new mode 100755 index c8114ba285..182707e91e --- a/waterbox/ares64/ares/nall/random.hpp +++ b/waterbox/ares64/ares/nall/random.hpp @@ -57,7 +57,7 @@ private: static const u64 crc64 = 0xc96c'5795'd787'0f42; u64 lfsr = crc64; - friend class RNG; + friend struct RNG; }; struct PCG : RNG { @@ -91,7 +91,7 @@ private: u64 state = 0; u64 increment = 0; - friend class RNG; + friend struct RNG; }; } @@ -121,7 +121,7 @@ private: Cipher::XChaCha20 context{0, 0}; u32 counter = 0; - friend class RNG; + friend struct RNG; }; } diff --git a/waterbox/ares64/ares/nall/range.hpp b/waterbox/ares64/ares/nall/range.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/amd64.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/amd64.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/constants.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/constants.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/emitter.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/emitter.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-systemv.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-systemv.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-windows.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-windows.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/encoder-instructions.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/encoder-instructions.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/recompiler/generic/constants.hpp b/waterbox/ares64/ares/nall/recompiler/generic/constants.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/recompiler/generic/encoder-calls.hpp b/waterbox/ares64/ares/nall/recompiler/generic/encoder-calls.hpp old mode 100644 new mode 100755 index cb7e935d08..b4a1b0174b --- a/waterbox/ares64/ares/nall/recompiler/generic/encoder-calls.hpp +++ b/waterbox/ares64/ares/nall/recompiler/generic/encoder-calls.hpp @@ -44,7 +44,7 @@ template alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0) { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64{p0}.data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64(p0).data); sljit_s32 type = SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 2); if constexpr(!std::is_void_v) type |= SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W); @@ -54,8 +54,8 @@ template alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1) { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64{p0}.data); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, imm64{p1}.data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64(p0).data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, imm64(p1).data); sljit_s32 type = SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 2) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 3); @@ -66,9 +66,9 @@ template alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1, P2 p2) { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64{p0}.data); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, imm64{p1}.data); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, imm64{p2}.data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64(p0).data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, imm64(p1).data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, imm64(p2).data); sljit_s32 type = SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 2) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 3) diff --git a/waterbox/ares64/ares/nall/recompiler/generic/encoder-instructions.hpp b/waterbox/ares64/ares/nall/recompiler/generic/encoder-instructions.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/recompiler/generic/generic.hpp b/waterbox/ares64/ares/nall/recompiler/generic/generic.hpp old mode 100644 new mode 100755 index 8872c460d2..dd102be37e --- a/waterbox/ares64/ares/nall/recompiler/generic/generic.hpp +++ b/waterbox/ares64/ares/nall/recompiler/generic/generic.hpp @@ -3,7 +3,7 @@ #if defined(SLJIT) namespace nall::recompiler { struct generic { - static constexpr bool supported = Architecture::amd64 | Architecture::arm64 | Architecture::ppc64; + static constexpr bool supported = Architecture::amd64 | Architecture::arm64 | Architecture::ppc64 | Architecture::rv64; bump_allocator& allocator; sljit_compiler* compiler = nullptr; @@ -31,6 +31,7 @@ namespace nall::recompiler { auto endFunction() -> u8* { u8* code = (u8*)sljit_generate_code(compiler); + allocator.reserve(sljit_get_generated_code_size(compiler)); resetCompiler(); return code; } diff --git a/waterbox/ares64/ares/nall/reed-solomon.hpp b/waterbox/ares64/ares/nall/reed-solomon.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/run.cpp b/waterbox/ares64/ares/nall/run.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/run.hpp b/waterbox/ares64/ares/nall/run.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/serial.hpp b/waterbox/ares64/ares/nall/serial.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/serializer.hpp b/waterbox/ares64/ares/nall/serializer.hpp old mode 100644 new mode 100755 index 741753db1c..229edf23d6 --- a/waterbox/ares64/ares/nall/serializer.hpp +++ b/waterbox/ares64/ares/nall/serializer.hpp @@ -76,11 +76,10 @@ struct serializer { } template auto operator()(T& value) -> serializer& { - constexpr bool integral = is_integral_v || is_same_v; - static_assert(has_serialize_v || integral || is_floating_point_v); + static_assert(has_serialize_v || is_integral_v || is_floating_point_v); if constexpr(has_serialize_v) { value.serialize(*this); - } else if constexpr(integral) { + } else if constexpr(is_integral_v) { integer(value); } else if constexpr(is_floating_point_v) { real(value); diff --git a/waterbox/ares64/ares/nall/service.hpp b/waterbox/ares64/ares/nall/service.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/set.hpp b/waterbox/ares64/ares/nall/set.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/shared-memory.hpp b/waterbox/ares64/ares/nall/shared-memory.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/shared-pointer.hpp b/waterbox/ares64/ares/nall/shared-pointer.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/smtp.cpp b/waterbox/ares64/ares/nall/smtp.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/smtp.hpp b/waterbox/ares64/ares/nall/smtp.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/stdint.hpp b/waterbox/ares64/ares/nall/stdint.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string.hpp b/waterbox/ares64/ares/nall/string.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/allocator/adaptive.hpp b/waterbox/ares64/ares/nall/string/allocator/adaptive.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/allocator/copy-on-write.hpp b/waterbox/ares64/ares/nall/string/allocator/copy-on-write.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/allocator/small-string-optimization.hpp b/waterbox/ares64/ares/nall/string/allocator/small-string-optimization.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/allocator/vector.hpp b/waterbox/ares64/ares/nall/string/allocator/vector.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/atoi.hpp b/waterbox/ares64/ares/nall/string/atoi.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/cast.hpp b/waterbox/ares64/ares/nall/string/cast.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/compare.hpp b/waterbox/ares64/ares/nall/string/compare.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/convert.hpp b/waterbox/ares64/ares/nall/string/convert.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/core.hpp b/waterbox/ares64/ares/nall/string/core.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/eval/evaluator.hpp b/waterbox/ares64/ares/nall/string/eval/evaluator.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/eval/literal.hpp b/waterbox/ares64/ares/nall/string/eval/literal.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/eval/node.hpp b/waterbox/ares64/ares/nall/string/eval/node.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/eval/parser.hpp b/waterbox/ares64/ares/nall/string/eval/parser.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/find.hpp b/waterbox/ares64/ares/nall/string/find.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/format.hpp b/waterbox/ares64/ares/nall/string/format.hpp old mode 100644 new mode 100755 index 8400811951..5aeca7e1ee --- a/waterbox/ares64/ares/nall/string/format.hpp +++ b/waterbox/ares64/ares/nall/string/format.hpp @@ -108,6 +108,12 @@ template inline auto hex(T value, long precision, char padchar) -> s return buffer; } +// ~3x faster than method above with larger payloads of single individual bytes +inline auto hexByte(char* out, u8 value) -> void { + out[0] = "0123456789ABCDEF"[value >> 4]; + out[1] = "0123456789ABCDEF"[value & 0xF]; +} + template inline auto octal(T value, long precision, char padchar) -> string { string buffer; buffer.resize(sizeof(T) * 3); diff --git a/waterbox/ares64/ares/nall/string/markup/bml.hpp b/waterbox/ares64/ares/nall/string/markup/bml.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/markup/find.hpp b/waterbox/ares64/ares/nall/string/markup/find.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/markup/node.hpp b/waterbox/ares64/ares/nall/string/markup/node.hpp old mode 100644 new mode 100755 index 2f1ad7bfed..243b2a53b0 --- a/waterbox/ares64/ares/nall/string/markup/node.hpp +++ b/waterbox/ares64/ares/nall/string/markup/node.hpp @@ -40,7 +40,7 @@ protected: auto _lookup(const string& path) const -> Node; auto _create(const string& path) -> Node; - friend class Node; + friend struct Node; }; struct Node { diff --git a/waterbox/ares64/ares/nall/string/markup/xml.hpp b/waterbox/ares64/ares/nall/string/markup/xml.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/match.hpp b/waterbox/ares64/ares/nall/string/match.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/pascal.hpp b/waterbox/ares64/ares/nall/string/pascal.hpp old mode 100644 new mode 100755 index e2aca838ee..59bdb0b082 --- a/waterbox/ares64/ares/nall/string/pascal.hpp +++ b/waterbox/ares64/ares/nall/string/pascal.hpp @@ -33,7 +33,7 @@ struct string_pascal { explicit operator bool() const { return _data; } operator const char*() const { return _data ? _data + sizeof(u32) : nullptr; } - operator string() const { return _data ? string{_data + sizeof(u32)} : ""; } + operator string() const { return _data ? string{_data + sizeof(u32)} : ""_s; } auto operator=(const string_pascal& source) -> type& { if(this == &source) return *this; diff --git a/waterbox/ares64/ares/nall/string/replace.hpp b/waterbox/ares64/ares/nall/string/replace.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/split.hpp b/waterbox/ares64/ares/nall/string/split.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/transform/cml.hpp b/waterbox/ares64/ares/nall/string/transform/cml.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/transform/dml.hpp b/waterbox/ares64/ares/nall/string/transform/dml.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/trim.hpp b/waterbox/ares64/ares/nall/string/trim.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/utf8.hpp b/waterbox/ares64/ares/nall/string/utf8.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/utility.hpp b/waterbox/ares64/ares/nall/string/utility.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/vector.hpp b/waterbox/ares64/ares/nall/string/vector.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/string/view.hpp b/waterbox/ares64/ares/nall/string/view.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/suffix-array.hpp b/waterbox/ares64/ares/nall/suffix-array.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/tcptext/tcp-socket.cpp b/waterbox/ares64/ares/nall/tcptext/tcp-socket.cpp new file mode 100755 index 0000000000..c13fe0f106 --- /dev/null +++ b/waterbox/ares64/ares/nall/tcptext/tcp-socket.cpp @@ -0,0 +1,312 @@ +#include + +#include +#include +#include + +#if defined(PLATFORM_WINDOWS) + #include +#else + #include +#endif + +struct sockaddr_in; +struct sockaddr_in6; + +namespace { + constexpr bool TCP_LOG_MESSAGES = false; + + constexpr u32 TCP_BUFFER_SIZE = 1024 * 16; + constexpr u32 CLIENT_SLEEP_MS = 10; // ms to sleep while checking for new clients + constexpr u32 CYCLES_BEFORE_SLEEP = 100; // how often to do a send/receive check before a sleep + constexpr u32 RECEIVE_TIMEOUT_SEC = 1; // only important for latency of disconnecting clients, reads are blocming anyways + + // A few platform specific socket functions: + // (In general, windows+linux share the same names, yet they behave differenly) + auto socketSetBlockingMode(s32 socket, bool isBlocking) -> bool + { + if(socket < 0)return false; + #if defined(O_NONBLOCK) // Linux + auto oldFlags = fcntl(socket, F_GETFL, 0); + auto newFlags = isBlocking ? (oldFlags ^ O_NONBLOCK) : (oldFlags | O_NONBLOCK); + return fcntl(socket, F_SETFL, newFlags) == 0; + #elif defined(FIONBIO) // Windows + u_long state = isBlocking ? 0 : 1; + return ioctlsocket(socket, FIONBIO, &state) == NO_ERROR; + #endif + } + + auto socketShutdown(s32 socket) { + if(socket < 0)return; + #if defined(SD_BOTH) // Windows + ::shutdown(socket, SD_BOTH); + #elif defined(SHUT_RDWR) // Linux, Mac + ::shutdown(socket, SHUT_RDWR); + #endif + } + + auto socketClose(s32 socket) { + if(socket < 0)return; + #if defined(PLATFORM_WINDOWS) + ::closesocket(socket); + #else + ::close(socket); + #endif + } +} + +namespace nall::TCP { + +NALL_HEADER_INLINE auto Socket::getURL(u32 port, bool useIPv4) const -> string { + return {useIPv4 ? "127.0.0.1:" : "[::1]:", port}; +} + +NALL_HEADER_INLINE auto Socket::open(u32 port, bool useIPv4) -> bool { + stopServer = false; + + auto url = getURL(port, useIPv4); + printf("Opening TCP-server on %s\n", url.data()); + + auto threadServer = std::thread([this, port, useIPv4]() { + serverRunning = true; + + while (!stopServer) { + fdServer = socket(useIPv4 ? AF_INET : AF_INET6, SOCK_STREAM, 0); + if(fdServer < 0) + break; + + { + s32 valueOn = 1; + #if defined(SO_NOSIGPIPE) //BSD, OSX + setsockopt(fdServer, SOL_SOCKET, SO_NOSIGPIPE, &valueOn, sizeof(s32)); + #endif + + #if defined(SO_REUSEADDR) //BSD, Linux, OSX + setsockopt(fdServer, SOL_SOCKET, SO_REUSEADDR, &valueOn, sizeof(s32)); + #endif + + #if defined(SO_REUSEPORT) //BSD, OSX + setsockopt(fdServer, SOL_SOCKET, SO_REUSEPORT, &valueOn, sizeof(s32)); + #endif + + #if defined(TCP_NODELAY) + setsockopt(fdServer, IPPROTO_TCP, TCP_NODELAY, &valueOn, sizeof(s32)); + #endif + + if(!socketSetBlockingMode(fdServer, true)) { + print("TCP: failed to set to blocking mode!\n"); + } + + #if defined(SO_RCVTIMEO) + #if defined(PLATFORM_WINDOWS) + DWORD rcvTimeMs = 1000 * RECEIVE_TIMEOUT_SEC; + setsockopt(fdServer, SOL_SOCKET, SO_RCVTIMEO, &rcvTimeMs, sizeof(rcvTimeMs)); + #else + struct timeval rcvtimeo; + rcvtimeo.tv_sec = RECEIVE_TIMEOUT_SEC; + rcvtimeo.tv_usec = 0; + setsockopt(fdServer, SOL_SOCKET, SO_RCVTIMEO, &rcvtimeo, sizeof(rcvtimeo)); + #endif + #endif + } + + s32 bindRes; + if(useIPv4) { + sockaddr_in serverAddrV4{}; + serverAddrV4.sin_family = AF_INET; + serverAddrV4.sin_addr.s_addr = htonl(INADDR_ANY); + serverAddrV4.sin_port = htons(port); + + bindRes = ::bind(fdServer, (sockaddr*)&serverAddrV4, sizeof(serverAddrV4)) < 0; + } else { + sockaddr_in6 serverAddrV6{}; + serverAddrV6.sin6_family = AF_INET6; + serverAddrV6.sin6_addr = in6addr_loopback; + serverAddrV6.sin6_port = htons(port); + + bindRes = ::bind(fdServer, (sockaddr*)&serverAddrV6, sizeof(serverAddrV6)) < 0; + } + + if(bindRes < 0 || listen(fdServer, 1) < 0) { + printf("error binding socket on port %d! (%s)\n", port, strerror(errno)); + break; + } + + // scan for new connections + while(fdClient < 0) { + fdClient = ::accept(fdServer, nullptr, nullptr); + if(fdClient < 0) { + if(errno != EAGAIN) { + if(!stopServer) + printf("error accepting connection! (%s)\n", strerror(errno)); + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(CLIENT_SLEEP_MS)); + } + } + if (fdClient < 0) { + break; + } + + // close the server socket, we only want one client + socketClose(fdServer); + fdServer = -1; + + while (!stopServer && fdClient >= 0) { + // Kick client if we need to + if(wantKickClient) { + socketClose(fdClient); + fdClient = -1; + wantKickClient = false; + onDisconnect(); + break; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(CLIENT_SLEEP_MS)); + } + } + + printf("Stopping TCP-server...\n"); + + socketClose(fdClient); + fdClient = -1; + + wantKickClient = false; + + printf("TCP-server stopped\n"); + serverRunning = false; + }); + + auto threadSend = std::thread([this]() + { + vector localSendBuffer{}; + u32 cycles = 0; + + while(!stopServer) + { + if(fdClient < 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(CLIENT_SLEEP_MS)); + continue; + } + + { // copy send-data to minimize lock time + std::lock_guard guard{sendBufferMutex}; + if(sendBuffer.size() > 0) { + localSendBuffer = sendBuffer; + sendBuffer.resize(0); + } + } + + // send data + if(localSendBuffer.size() > 0) { + auto bytesWritten = send(fdClient, localSendBuffer.data(), localSendBuffer.size(), 0); + if(bytesWritten < localSendBuffer.size()) { + printf("Error sending data! (%s)\n", strerror(errno)); + } + + if constexpr(TCP_LOG_MESSAGES) { + printf("%.4f | TCP >: [%" PRIu64 "]: %.*s\n", (f64)chrono::millisecond() / 1000.0, localSendBuffer.size(), localSendBuffer.size() > 100 ? 100 : (int)localSendBuffer.size(), (char*)localSendBuffer.data()); + } + + localSendBuffer.resize(0); + cycles = 0; // sending once has a good chance of sending more -> reset sleep timer + } + + if(cycles++ >= CYCLES_BEFORE_SLEEP) { + std::this_thread::sleep_for(std::chrono::microseconds(1)); + cycles = 0; + } + } + }); + + auto threadReceive = std::thread([this]() + { + u8 packet[TCP_BUFFER_SIZE]{0}; + + while(!stopServer) + { + if(fdClient < 0 || wantKickClient) { + std::this_thread::sleep_for(std::chrono::milliseconds(CLIENT_SLEEP_MS)); + continue; + } + + // receive data from connected clients + s32 length = recv(fdClient, packet, TCP_BUFFER_SIZE, MSG_NOSIGNAL); + if(length > 0) { + std::lock_guard guard{receiveBufferMutex}; + auto oldSize = receiveBuffer.size(); + receiveBuffer.resize(oldSize + length); + memcpy(receiveBuffer.data() + oldSize, packet, length); + + if constexpr(TCP_LOG_MESSAGES) { + printf("%.4f | TCP <: [%d]: %.*s ([%d]: %.*s)\n", (f64)chrono::millisecond() / 1000.0, length, length, (char*)receiveBuffer.data(), length, length, (char*)packet); + } + } else if(length == 0) { + disconnectClient(); + } else { + #if defined(PLATFORM_WINDOWS) + if (WSAGetLastError() != WSAETIMEDOUT) { + #else + if (errno != EAGAIN) { + #endif + printf("TCP server: error receiving data from client: %s\n", strerror(errno)); + disconnectClient(); + } + } + } + }); + + threadServer.detach(); + threadSend.detach(); + threadReceive.detach(); + + return true; +} + +NALL_HEADER_INLINE auto Socket::close(bool notifyHandler) -> void { + stopServer = true; + + // we have to forcefully shut it down here, since otherwise accept() would hang causing a UI crash + socketShutdown(fdServer); + socketClose(fdClient); + socketClose(fdServer); + fdServer = -1; + fdClient = -1; + + while(serverRunning) { + std::this_thread::sleep_for(std::chrono::milliseconds(250)); // wait for other threads to stop + } + + if(notifyHandler) { + onDisconnect(); // don't call this in destructor, it's virtual + } +} + +NALL_HEADER_INLINE auto Socket::update() -> void { + vector data{}; + + { // local copy, minimize lock time + std::lock_guard guard{receiveBufferMutex}; + if(receiveBuffer.size() > 0) { + data = receiveBuffer; + receiveBuffer.resize(0); + } + } + + if(data.size() > 0) { + onData(data); + } +} + +NALL_HEADER_INLINE auto Socket::disconnectClient() -> void { + wantKickClient = true; +} + +NALL_HEADER_INLINE auto Socket::sendData(const u8* data, u32 size) -> void { + std::lock_guard guard{sendBufferMutex}; + u32 oldSize = sendBuffer.size(); + sendBuffer.resize(oldSize + size); + memcpy(sendBuffer.data() + oldSize, data, size); +} + +} diff --git a/waterbox/ares64/ares/nall/tcptext/tcp-socket.hpp b/waterbox/ares64/ares/nall/tcptext/tcp-socket.hpp new file mode 100755 index 0000000000..7cabb2317b --- /dev/null +++ b/waterbox/ares64/ares/nall/tcptext/tcp-socket.hpp @@ -0,0 +1,65 @@ +#pragma once + +/** + * Opens a TCP server with callbacks to send and receive data. + * + * This spawns 3 new threads: + * threadServer: listens for new connections, kicks connections + * threadSend: sends data to the client + * threadReceive: receives data from the client + * + * Each contains it's own loop including sleeps to not use too much CPU. + * The exception is threadReceive which relies on the blocking recv() call (kernel wakes it up again). + * + * Incoming and outgoing data is synchronized using mutexes, + * and put into buffers that are shared with the main thread. + * Meaning, the thread that calls 'update()' with also be the one that gets 'onData()' calls. + * No additional synchronization is needed. + * + * NOTE: if you work on the loop/sleeps, make sure to test CPU usage and package-latency. + */ +namespace nall::TCP { + +class Socket { + public: + auto open(u32 port, bool useIPv4) -> bool; + auto close(bool notifyHandler = true) -> void; + + auto disconnectClient() -> void; + + auto isStarted() const -> bool { return serverRunning; } + auto hasClient() const -> bool { return fdClient >= 0; } + + auto getURL(u32 port, bool useIPv4) const -> string; + + ~Socket() { close(false); } + + protected: + auto update() -> void; + + auto sendData(const u8* data, u32 size) -> void; + virtual auto onData(const vector &data) -> void = 0; + + virtual auto onConnect() -> void = 0; + virtual auto onDisconnect() -> void = 0; + + private: + std::atomic stopServer{false}; // set to true to let the server-thread know to stop. + std::atomic serverRunning{false}; // signals the current state of the server-thread + std::atomic wantKickClient{false}; // set to true to let server know to disconnect the current client (if conn.) + + std::atomic fdServer{-1}; + std::atomic fdClient{-1}; + + vector receiveBuffer{}; + std::mutex receiveBufferMutex{}; + + vector sendBuffer{}; + std::mutex sendBufferMutex{}; +}; + +} + +#if defined(NALL_HEADER_ONLY) + #include +#endif diff --git a/waterbox/ares64/ares/nall/tcptext/tcptext-server.cpp b/waterbox/ares64/ares/nall/tcptext/tcptext-server.cpp new file mode 100755 index 0000000000..4d0581f9d6 --- /dev/null +++ b/waterbox/ares64/ares/nall/tcptext/tcptext-server.cpp @@ -0,0 +1,29 @@ +#include + +namespace nall::TCPText { + NALL_HEADER_INLINE auto Server::sendText(const string &text) -> void { + sendData((const u8*)text.data(), text.size()); + } + + NALL_HEADER_INLINE auto Server::onData(const vector &data) -> void { + string_view dataStr((const char*)data.data(), (u32)data.size()); + + if(!hadHandshake) { + hadHandshake = true; + + // This is a security check for browsers. + // Any website can request localhost via JS or HTML, while it can't see the result, + // GDB will receive the data and commands could be injected (true for all GDB-servers). + // Since all HTTP requests start with headers, we can simply block anything that doesn't start like a GDB client. + if(dataStr[0] != '+') { + printf("Non-GDB client detected (message: %s), disconnect client\n", dataStr.data()); + disconnectClient(); + return; + } + + onConnect(); + } + + onText(dataStr); + } +} diff --git a/waterbox/ares64/ares/nall/tcptext/tcptext-server.hpp b/waterbox/ares64/ares/nall/tcptext/tcptext-server.hpp new file mode 100755 index 0000000000..8299bcdea6 --- /dev/null +++ b/waterbox/ares64/ares/nall/tcptext/tcptext-server.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include + +/** + * Provides text-based TCP server on top of the Socket. + * This handles incoming messages and can send data back to the client. + */ +namespace nall::TCPText { + +class Server : public TCP::Socket { + public: + bool hadHandshake{false}; + + protected: + auto onData(const vector &data) -> void override; + + auto sendText(const string &text) -> void; + virtual auto onText(string_view text) -> void = 0; +}; + +} + +#if defined(NALL_HEADER_ONLY) + #include +#endif diff --git a/waterbox/ares64/ares/nall/terminal.cpp b/waterbox/ares64/ares/nall/terminal.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/terminal.hpp b/waterbox/ares64/ares/nall/terminal.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/thread.cpp b/waterbox/ares64/ares/nall/thread.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/thread.hpp b/waterbox/ares64/ares/nall/thread.hpp old mode 100644 new mode 100755 index a76896f9ac..17714a1a66 --- a/waterbox/ares64/ares/nall/thread.hpp +++ b/waterbox/ares64/ares/nall/thread.hpp @@ -13,8 +13,10 @@ namespace nall { using mutex = std::mutex; using recursive_mutex = std::recursive_mutex; + using condition_variable = std::condition_variable; template using lock_guard = std::lock_guard; template using atomic = std::atomic; + template using unique_lock = std::unique_lock; } #if defined(API_POSIX) diff --git a/waterbox/ares64/ares/nall/traits.hpp b/waterbox/ares64/ares/nall/traits.hpp old mode 100644 new mode 100755 index 5862b36bb7..523ee5d22b --- a/waterbox/ares64/ares/nall/traits.hpp +++ b/waterbox/ares64/ares/nall/traits.hpp @@ -24,16 +24,10 @@ namespace nall { using std::is_base_of; using std::is_base_of_v; using std::is_function; - using std::is_integral; - using std::is_integral_v; using std::is_pointer; using std::is_pointer_v; using std::is_same; using std::is_same_v; - using std::is_signed; - using std::is_signed_v; - using std::is_unsigned; - using std::is_unsigned_v; using std::nullptr_t; using std::remove_extent; using std::remove_extent_t; @@ -41,13 +35,22 @@ namespace nall { using std::remove_reference_t; using std::swap; using std::true_type; -} -namespace std { + //directly specializing std traits would result in undefined behavior + template struct is_integral : std::is_integral {}; + template struct is_signed : std::is_signed {}; + template struct is_unsigned : std::is_unsigned {}; + + template inline constexpr bool is_integral_v = is_integral::value; + template inline constexpr bool is_signed_v = is_signed ::value; + template inline constexpr bool is_unsigned_v = is_unsigned::value; + + //defined in arithmetic.hpp when unavailable as a builtin + template<> struct is_integral : true_type {}; + template<> struct is_unsigned : true_type {}; + #if defined(__SIZEOF_INT128__) template<> struct is_integral : true_type {}; - template<> struct is_integral : true_type {}; - template<> struct is_signed : true_type {}; - template<> struct is_unsigned : true_type {}; + template<> struct is_signed : true_type {}; #endif } diff --git a/waterbox/ares64/ares/nall/unique-pointer.hpp b/waterbox/ares64/ares/nall/unique-pointer.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/utility.hpp b/waterbox/ares64/ares/nall/utility.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/variant.hpp b/waterbox/ares64/ares/nall/variant.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/varint.hpp b/waterbox/ares64/ares/nall/varint.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vector.hpp b/waterbox/ares64/ares/nall/vector.hpp old mode 100644 new mode 100755 index ed52da7a5d..a8c129554e --- a/waterbox/ares64/ares/nall/vector.hpp +++ b/waterbox/ares64/ares/nall/vector.hpp @@ -124,6 +124,7 @@ struct vector_base { auto find(const function& comparator) -> maybe; auto find(const T& value) const -> maybe; auto findSorted(const T& value) const -> maybe; + auto contains(const T& value) const -> bool; auto foreach(const function& callback) -> void; auto foreach(const function& callback) -> void; diff --git a/waterbox/ares64/ares/nall/vector/access.hpp b/waterbox/ares64/ares/nall/vector/access.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vector/assign.hpp b/waterbox/ares64/ares/nall/vector/assign.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vector/compare.hpp b/waterbox/ares64/ares/nall/vector/compare.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vector/core.hpp b/waterbox/ares64/ares/nall/vector/core.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vector/iterator.hpp b/waterbox/ares64/ares/nall/vector/iterator.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vector/memory.hpp b/waterbox/ares64/ares/nall/vector/memory.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vector/modify.hpp b/waterbox/ares64/ares/nall/vector/modify.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vector/specialization/u8.hpp b/waterbox/ares64/ares/nall/vector/specialization/u8.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vector/utility.hpp b/waterbox/ares64/ares/nall/vector/utility.hpp old mode 100644 new mode 100755 index 14f70f9599..670ff97392 --- a/waterbox/ares64/ares/nall/vector/utility.hpp +++ b/waterbox/ares64/ares/nall/vector/utility.hpp @@ -36,6 +36,13 @@ template auto vector::findSorted(const T& value) const -> maybe auto vector::contains(const T& value) const -> bool { + for(const auto &v : *this) { + if(v == value) return true; + } + return false; +} + template auto vector::foreach(const function& callback) -> void { for(u64 n : range(size())) callback(_pool[n]); } diff --git a/waterbox/ares64/ares/nall/vfs.hpp b/waterbox/ares64/ares/nall/vfs.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vfs/attribute.hpp b/waterbox/ares64/ares/nall/vfs/attribute.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vfs/cdrom.hpp b/waterbox/ares64/ares/nall/vfs/cdrom.hpp old mode 100644 new mode 100755 index f410a0b66b..d572891eb4 --- a/waterbox/ares64/ares/nall/vfs/cdrom.hpp +++ b/waterbox/ares64/ares/nall/vfs/cdrom.hpp @@ -11,6 +11,10 @@ namespace nall::vfs { struct cdrom : file { + ~cdrom() { + _thread.join(); + } + static auto open(const string& location) -> shared_pointer { auto instance = shared_pointer{new cdrom}; if(location.iendsWith(".cue") && instance->loadCue(location)) return instance; @@ -19,8 +23,8 @@ struct cdrom : file { } auto writable() const -> bool override { return false; } - auto data() const -> const u8* override { return _image.data(); } - auto data() -> u8* override { return _image.data(); } + auto data() const -> const u8* override { wait(size()); return _image.data(); } + auto data() -> u8* override { wait(size()); return _image.data(); } auto size() const -> u64 override { return _image.size(); } auto offset() const -> u64 override { return _offset; } @@ -36,19 +40,33 @@ struct cdrom : file { auto read() -> u8 override { if(_offset >= _image.size()) return 0x00; + wait(_offset); return _image[_offset++]; } auto write(u8 data) -> void override { //CD-ROMs are read-only; but allow writing anyway if needed, since the image is in memory if(_offset >= _image.size()) return; + wait(_offset); _image[_offset++] = data; } + auto wait(u64 offset) const -> void { + bool force = false; + if(offset >= _image.size()) { + offset = _image.size() - 1; + force = true; + } + //subchannel data is always loaded + if(offset % 2448 < 2352 || force) { + while(offset + 1 > _loadOffset) usleep(1); + } + } + private: auto loadCue(const string& cueLocation) -> bool { - Decode::CUE cuesheet; - if(!cuesheet.load(cueLocation)) return false; + auto cuesheet = shared_pointer::create(); + if(!cuesheet->load(cueLocation)) return false; CD::Session session; session.leadIn.lba = -LeadInSectors; @@ -56,19 +74,19 @@ private: s32 lbaFileBase = 0; // add 2 sec pregap to 1st track - if(!cuesheet.files[0].tracks[0].pregap) - cuesheet.files[0].tracks[0].pregap = Track1Pregap; + if(!cuesheet->files[0].tracks[0].pregap) + cuesheet->files[0].tracks[0].pregap = Track1Pregap; else - cuesheet.files[0].tracks[0].pregap = Track1Pregap + cuesheet.files[0].tracks[0].pregap(); + cuesheet->files[0].tracks[0].pregap = Track1Pregap + cuesheet->files[0].tracks[0].pregap(); - if(cuesheet.files[0].tracks[0].indices[0].number == 1) { + if(cuesheet->files[0].tracks[0].indices[0].number == 1) { session.tracks[1].indices[0].lba = 0; session.tracks[1].indices[0].end = - cuesheet.files[0].tracks[0].pregap() + cuesheet.files[0].tracks[0].indices[0].lba - 1; + cuesheet->files[0].tracks[0].pregap() + cuesheet->files[0].tracks[0].indices[0].lba - 1; } s32 lbaIndex = 0; - for(auto& file : cuesheet.files) { + for(auto& file : cuesheet->files) { for(auto& track : file.tracks) { session.tracks[track.number].control = track.type == "audio" ? 0b0000 : 0b0100; if(track.pregap) lbaFileBase += track.pregap(); @@ -117,8 +135,15 @@ private: _image.resize(2448 * (LeadInSectors + lbaFileBase + LeadOutSectors)); - lbaFileBase = 0; - for(auto& file : cuesheet.files) { + //preload subchannel data + loadSub({Location::notsuffix(cueLocation), ".sub"}, session); + + //load user data on separate thread + _thread = thread::create( + [this, cueLocation, cuesheet = std::move(cuesheet)](uintptr) -> void { + + s32 lbaFileBase = 0; + for(auto& file : cuesheet->files) { auto location = string{Location::path(cueLocation), file.name}; auto filedata = nall::file::open(location, nall::file::mode::read); if(file.type == "wave") filedata.seek(44); //skip RIFF header @@ -127,7 +152,8 @@ private: for(auto& index : track.indices) { if(index.lba < 0) continue; // ignore gaps (not in file) for(s32 sector : range(index.sectorCount())) { - auto target = _image.data() + 2448ull * (LeadInSectors + lbaFileBase + index.lba + sector); + auto offset = 2448ull * (LeadInSectors + lbaFileBase + index.lba + sector); + auto target = _image.data() + offset; auto length = track.sectorSize(); if(length == 2048) { //ISO: generate header + parity data @@ -145,39 +171,30 @@ private: //BIN + WAV: direct copy filedata.read({target, length}); } + _loadOffset = offset + 2448; } } if(track.postgap) lbaFileBase += track.postgap(); } lbaFileBase += file.tracks.last().indices.last().end + 1; } + _loadOffset = _image.size(); - auto subchannel = session.encode(LeadInSectors + session.leadOut.end + 1); - if(auto overlay = nall::file::read({Location::notsuffix(cueLocation), ".sub"})) { - auto target = subchannel.data() + 96 * (LeadInSectors + Track1Pregap); - auto length = (s64)subchannel.size() - 96 * (LeadInSectors + Track1Pregap); - memory::copy(target, length, overlay.data(), overlay.size()); - } - - for(u64 sector : range(size() / 2448)) { - auto source = subchannel.data() + sector * 96; - auto target = _image.data() + sector * 2448 + 2352; - memory::copy(target, source, 96); - } + }); return true; } auto loadChd(const string& location) -> bool { - Decode::CHD chd; - if(!chd.load(location)) return false; + auto chd = shared_pointer::create(); + if(!chd->load(location)) return false; CD::Session session; session.leadIn.lba = -LeadInSectors; session.leadIn.end = -1; s32 lbaIndex = 0; - for(auto& track : chd.tracks) { + for(auto& track : chd->tracks) { session.tracks[track.number].control = track.type == "AUDIO" ? 0b0000 : 0b0100; for(auto& index : track.indices) { session.tracks[track.number].indices[index.number].lba = index.lba; @@ -209,12 +226,20 @@ private: _image.resize(2448 * (LeadInSectors + lbaIndex + LeadOutSectors)); + //preload subchannel data + loadSub({Location::notsuffix(location), ".sub"}, session); + + //load user data on separate thread + _thread = thread::create( + [this, chd = std::move(chd)](uintptr) -> void { + s32 lba = 0; - for(auto& track : chd.tracks) { + for(auto& track : chd->tracks) { for(auto& index : track.indices) { for(s32 sector : range(index.sectorCount())) { - auto target = _image.data() + 2448ull * (LeadInSectors + index.lba + sector); - auto sectorData = chd.read(lba); + auto offset = 2448ull * (LeadInSectors + index.lba + sector); + auto target = _image.data() + offset; + auto sectorData = chd->read(lba); if(sectorData.size() == 2048) { //ISO: generate header + parity data memory::assign(target + 0, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff); //sync @@ -230,12 +255,22 @@ private: memory::copy(target, 2352, sectorData.data(), sectorData.size()); } lba++; + _loadOffset = offset + 2448; } } } + _loadOffset = _image.size(); + }); + + return true; + } + +private: + void loadSub(const string& location, const CD::Session& session) { auto subchannel = session.encode(LeadInSectors + session.leadOut.end + 1); - if(auto overlay = nall::file::read({Location::notsuffix(location), ".sub"})) { + + if(auto overlay = nall::file::read(location)) { auto target = subchannel.data() + 96 * (LeadInSectors + Track1Pregap); auto length = (s64)subchannel.size() - 96 * (LeadInSectors + Track1Pregap); memory::copy(target, length, overlay.data(), overlay.size()); @@ -246,12 +281,12 @@ private: auto target = _image.data() + sector * 2448 + 2352; memory::copy(target, source, 96); } - - return true; } vector _image; u64 _offset = 0; + atomic _loadOffset = 0; + thread _thread; static constexpr s32 LeadInSectors = 7500; static constexpr s32 Track1Pregap = 150; diff --git a/waterbox/ares64/ares/nall/vfs/directory.hpp b/waterbox/ares64/ares/nall/vfs/directory.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vfs/disk.hpp b/waterbox/ares64/ares/nall/vfs/disk.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vfs/file.hpp b/waterbox/ares64/ares/nall/vfs/file.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vfs/memory.hpp b/waterbox/ares64/ares/nall/vfs/memory.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vfs/node.hpp b/waterbox/ares64/ares/nall/vfs/node.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/vfs/vfs.hpp b/waterbox/ares64/ares/nall/vfs/vfs.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/view.hpp b/waterbox/ares64/ares/nall/view.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/detour.cpp b/waterbox/ares64/ares/nall/windows/detour.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/detour.hpp b/waterbox/ares64/ares/nall/windows/detour.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/guid.cpp b/waterbox/ares64/ares/nall/windows/guid.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/guid.hpp b/waterbox/ares64/ares/nall/windows/guid.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/launcher.cpp b/waterbox/ares64/ares/nall/windows/launcher.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/launcher.hpp b/waterbox/ares64/ares/nall/windows/launcher.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/registry.cpp b/waterbox/ares64/ares/nall/windows/registry.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/registry.hpp b/waterbox/ares64/ares/nall/windows/registry.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/service.hpp b/waterbox/ares64/ares/nall/windows/service.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/shared-memory.hpp b/waterbox/ares64/ares/nall/windows/shared-memory.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/utf8.cpp b/waterbox/ares64/ares/nall/windows/utf8.cpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/utf8.hpp b/waterbox/ares64/ares/nall/windows/utf8.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/windows/windows.hpp b/waterbox/ares64/ares/nall/windows/windows.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/xorg/clipboard.hpp b/waterbox/ares64/ares/nall/xorg/clipboard.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/xorg/guard.hpp b/waterbox/ares64/ares/nall/xorg/guard.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/nall/xorg/xorg.hpp b/waterbox/ares64/ares/nall/xorg/xorg.hpp old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit.h b/waterbox/ares64/ares/thirdparty/sljit.h old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/.gitignore b/waterbox/ares64/ares/thirdparty/sljit/.gitignore old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/API_CHANGES b/waterbox/ares64/ares/thirdparty/sljit/API_CHANGES old mode 100644 new mode 100755 index 8ec367e302..6afa9750a6 --- a/waterbox/ares64/ares/thirdparty/sljit/API_CHANGES +++ b/waterbox/ares64/ares/thirdparty/sljit/API_CHANGES @@ -1,5 +1,38 @@ This file is the short summary of the API changes: +21.02.2024 - Non-backward compatible + The sljit_set_put_label() function is renamed + to sljit_emit_mov_addr() and sljit_put_label + is merged into sljit_jump and removed. + +01.11.2023 - Non-backward compatible + The SLJIT_ARG_TYPE_VOID definition is changed + to SLJIT_ARG_TYPE_RET_VOID to improve Windows + compatibility. + +05.09.2023 - Non-backward compatible + Turn SLJIT_IMM from a flag to a single value. + +10.08.2023 - Non-backward compatible + Rename SLJIT_INT_REGISTER to SLJIT_GP_REGISTER. + +01.08.2023 - Non-backward compatible + A type argument is added to sljit_get_register_index + and sljit_get_float_register_index is removed. + +19.07.2023 - Non-backward compatible + SLJIT_MEM_UNALIGNED_16/32 options are renamed + to SLJIT_MEM_ALIGNED_16/32 and a type argument + is added to sljit_get_float_register_index. + +16.02.2022 - Non-backward compatible + The sljit_emit_cmov operation is replaced + by sljit_emit_select. + +11.02.2022 - Non-backward compatible + All floating point comparisons are supported, + sljit_cmp_info return value is changed. + 02.02.2022 - Backward compatible All SLJIT_SET_* constants are even numbers. diff --git a/waterbox/ares64/ares/thirdparty/sljit/INTERNAL_CHANGES b/waterbox/ares64/ares/thirdparty/sljit/INTERNAL_CHANGES old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/LICENSE b/waterbox/ares64/ares/thirdparty/sljit/LICENSE old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/Makefile b/waterbox/ares64/ares/thirdparty/sljit/Makefile old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/README b/waterbox/ares64/ares/thirdparty/sljit/README old mode 100644 new mode 100755 index 1fc2337e41..e45495cdc0 --- a/waterbox/ares64/ares/thirdparty/sljit/README +++ b/waterbox/ares64/ares/thirdparty/sljit/README @@ -2,18 +2,45 @@ SLJIT - Stack Less JIT Compiler Purpose: - A simple, machine independent JIT compiler, which suitable for - translating interpreted byte code to machine code. The sljitLir.h + A low-level, machine independent JIT compiler, which is suitable for + translating interpreted byte code into machine code. The sljitLir.h describes the LIR (low-level intermediate representation) of SLJIT. +Key features: + - Supports several target architectures: + x86 32/64, ARM 32/64, RiscV 32/64, s390x 64, + PowerPC 32/64, LoongArch 64, MIPS 32/64 + - Supports a large number of operations + - Supports self-modifying code + - Supports tail calls + - Support fast calls (non-ABI compatible) + - Supports byte order reverse (endianness switching) + - Supports unaligned memory accesses + - Supports SIMD / atomic operations on certain CPUs + - Direct register access, both integer and floating point + - Stack space allocated for function local variables can be + accessed as a linear memory area + - All-in-one compilation is supported + - When sljitLir.c is directly included by a C source file, + the jit compiler API can be completely hidden from + external use (see SLJIT_CONFIG_STATIC macro) + - Code can be generated for multiple target cpus + by including sljitLir.c in different C files, where + each compiler instance is configured to target a + different architecture + - The compiler can be serialized into a byte buffer + - Useful for ahead-of-time compiling + - Code generation can be resumed after deserialization + (partial ahead-of-time compiling) + Compatible: - Any C (C++) compiler. At least I hope so. + C99 (C++) compilers. Using sljit: Copy the content of sljit_src directory into your project source directory. Add sljitLir.c source file to your build environment. All other files are included by sljitLir.c (if required). Define the machine by SLJIT_CONFIG_* - selector. See sljitConfig.h for all possible values. For C++ compilers, + selector. See sljitConfigCPU.h for all possible values. For C++ compilers, rename sljitLir.c to sljitLir.cpp. More info: @@ -35,6 +62,7 @@ Special thanks: Marc Mutz Martin Storsjö Michael McConville + Mingtao Zhou (LoongArch support) Walter Lee Wen Xichang YunQiang Su diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/overview.txt b/waterbox/ares64/ares/thirdparty/sljit/doc/overview.txt old mode 100644 new mode 100755 index af74947954..8c21f84351 --- a/waterbox/ares64/ares/thirdparty/sljit/doc/overview.txt +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/overview.txt @@ -9,7 +9,7 @@ with SLJIT. Further details can be found in sljitLir.h. SLJIT is a platform independent assembler which - provides access to common CPU features - can be easily ported to wide-spread CPU - architectures (e.g. x86, ARM, POWER, MIPS, SPARC, s390x) + architectures (e.g. x86, ARM, POWER, MIPS, s390x, LoongArch) The key challenge of this project is finding a common subset of CPU features which @@ -49,9 +49,9 @@ is a valid instruction sequence: sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 0); // An int32_t value is loaded into SLJIT_R0 - sljit_emit_op1(compiler, SLJIT_NOT32, + sljit_emit_op1(compiler, SLJIT_REV32, SLJIT_R0, 0, SLJIT_R0, 0); - // the int32_t value in SLJIT_R0 is bit inverted + // the int32_t value in SLJIT_R0 is byte swapped // and the type of the result is still int32_t The next code snippet is not allowed: @@ -59,14 +59,14 @@ The next code snippet is not allowed: sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 0); // An intptr_t value is loaded into SLJIT_R0 - sljit_emit_op1(compiler, SLJIT_NOT32, + sljit_emit_op1(compiler, SLJIT_REV32, SLJIT_R0, 0, SLJIT_R0, 0); - // The result of SLJIT_NOT instruction - // is undefined. Even crash is possible + // The result of the instruction is undefined. + // Even crash is possible for some instructions // (e.g. on MIPS-64). However, it is always allowed to overwrite a -register regardless its previous value: +register regardless of its previous value: sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 0); @@ -127,7 +127,7 @@ following rules were introduced: - Status flags cannot be controlled directly (there are no set/clear/invert operations) -The last two rules allows efficent mapping of status flags. +The last two rules allows efficient mapping of status flags. For example the arithmetic and multiply overflow flag is mapped to the same overflow flag bit on x86. This is allowed, since no instruction can set both of these flags. When diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/99bottles.bf b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/99bottles.bf old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/README b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/README old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/array_access.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/array_access.c old mode 100644 new mode 100755 index 1404cf936c..cee485670d --- a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/array_access.c +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/array_access.c @@ -5,10 +5,9 @@ typedef long (SLJIT_FUNC *func_arr_t)(long *arr, long narr); -static long SLJIT_FUNC print_num(long a) +static void SLJIT_FUNC print_num(long a) { printf("num = %ld\n", a); - return a + 1; } /* @@ -27,26 +26,46 @@ long func(long *array, long narray) static int array_access(long *arr, long narr) { void *code; - unsigned long len; + size_t len; func_arr_t func; + struct sljit_label *loopstart; + struct sljit_jump *out; /* Create a SLJIT compiler */ struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); - sljit_emit_enter(C, 0, SLJIT_ARGS2(W, P, W), 1, 3, 0, 0, 0); - /* opt arg R S FR FS local_size */ - sljit_emit_op2(C, SLJIT_XOR, SLJIT_S2, 0, SLJIT_S2, 0, SLJIT_S2, 0); // S2 = 0 - sljit_emit_op1(C, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, narr); // S1 = narr - struct sljit_label *loopstart = sljit_emit_label(C); // loopstart: - struct sljit_jump *out = sljit_emit_cmp(C, SLJIT_GREATER_EQUAL, SLJIT_S2, 0, SLJIT_S1, 0); // S2 >= a --> jump out + sljit_emit_enter(C, 0, SLJIT_ARGS2(W, P, W), 1, 3, 0, 0, 0); + /* opt arg R S FR FS local_size */ - sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_S2), SLJIT_WORD_SHIFT);// R0 = (long *)S0[S2]; - sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0); + /* S2 = 0 */ + sljit_emit_op2(C, SLJIT_XOR, SLJIT_S2, 0, SLJIT_S2, 0, SLJIT_S2, 0); - sljit_emit_op2(C, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S2, 0, SLJIT_IMM, 1); // S2 += 1 - sljit_set_label(sljit_emit_jump(C, SLJIT_JUMP), loopstart); // jump loopstart - sljit_set_label(out, sljit_emit_label(C)); // out: - sljit_emit_return(C, SLJIT_MOV, SLJIT_S1, 0); // return RET + /* S1 = narr */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, narr); + + /* loopstart: */ + loopstart = sljit_emit_label(C); + + /* S2 >= narr --> jumo out */ + out = sljit_emit_cmp(C, SLJIT_GREATER_EQUAL, SLJIT_S2, 0, SLJIT_S1, 0); + + /* R0 = (long *)S0[S2]; */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_S2), SLJIT_WORD_SHIFT); + + /* print_num(R0) */ + sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1V(W), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); + + /* S2 += 1 */ + sljit_emit_op2(C, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S2, 0, SLJIT_IMM, 1); + + /* jump loopstart */ + sljit_set_label(sljit_emit_jump(C, SLJIT_JUMP), loopstart); + + /* out: */ + sljit_set_label(out, sljit_emit_label(C)); + + /* return S1 */ + sljit_emit_return(C, SLJIT_MOV, SLJIT_S1, 0); /* Generate machine code */ code = sljit_generate_code(C); @@ -64,7 +83,7 @@ static int array_access(long *arr, long narr) return 0; } -int main() +int main(void) { long arr[8] = { 3, -10, 4, 6, 8, 12, 2000, 0 }; return array_access(arr, 8); diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/brainfuck.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/brainfuck.c old mode 100644 new mode 100755 index 53a491dd0e..508646834d --- a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/brainfuck.c +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/brainfuck.c @@ -111,7 +111,7 @@ static int loop_pop(struct sljit_label **loop_start, struct sljit_jump **loop_en return 0; } -static void *SLJIT_FUNC my_alloc(long size, long n) +static void *SLJIT_FUNC my_alloc(size_t size, size_t n) { return calloc(size, n); } @@ -148,9 +148,11 @@ static void *compile(FILE *src, unsigned long *lcode) int SP = SLJIT_S0; /* bf SP */ int CELLS = SLJIT_S1; /* bf array */ - sljit_emit_enter(C, 0, SLJIT_ARGS2(VOID, W, W), 2, 2, 0, 0, 0); /* opt arg R S FR FS local_size */ + sljit_emit_enter(C, 0, SLJIT_ARGS2V(W, W), 2, 2, 0, 0, 0); + /* opt arg R S FR FS local_size */ - sljit_emit_op2(C, SLJIT_XOR, SP, 0, SP, 0, SP, 0); /* SP = 0 */ + /* SP = 0 */ + sljit_emit_op2(C, SLJIT_XOR, SP, 0, SP, 0, SP, 0); sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, BF_CELL_SIZE); sljit_emit_op1(C, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/branch.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/branch.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/first_program.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/first_program.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/func_call.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/func_call.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/hello.bf b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/hello.bf old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/loop.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/loop.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/sljit_tutorial.html b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/sljit_tutorial.html old mode 100644 new mode 100755 index fba7738bfb..d83ab6d60a --- a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/sljit_tutorial.html +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/sljit_tutorial.html @@ -90,7 +90,7 @@ static int add3(sljit_sw a, sljit_sw b, sljit_sw c)
func3_t func;

/* Create a SLJIT compiler */
- struct sljit_compiler *C = sljit_create_compiler();
+ struct sljit_compiler *C = sljit_create_compiler(NULL, NULL);

/* Start a context(function entry), has 3 arguments, discuss later */
sljit_emit_enter(C, 0, SLJIT_ARGS3(W, W, W, W), 1, 3, 0, 0, 0);
@@ -121,7 +121,7 @@ static int add3(sljit_sw a, sljit_sw b, sljit_sw c)

/* Clean up */
sljit_free_compiler(C);
- sljit_free_code(code);
+ sljit_free_code(code, NULL);
return 0;
}
@@ -212,7 +212,7 @@ static int branch(sljit_sw a, sljit_sw b, sljit_sw c)
struct sljit_jump *out;

/* Create a SLJIT compiler */
- struct sljit_compiler *C = sljit_create_compiler();
+ struct sljit_compiler *C = sljit_create_compiler(NULL, NULL);

/* 3 arg, 1 temp reg, 3 save reg */
sljit_emit_enter(C, 0, SLJIT_ARGS3(W, W, W, W), 1, 3, 0, 0, 0);
@@ -253,7 +253,7 @@ static int branch(sljit_sw a, sljit_sw b, sljit_sw c)

/* Clean up */
sljit_free_compiler(C);
- sljit_free_code(code);
+ sljit_free_code(code, NULL);
return 0;
}
diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/struct_access.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/struct_access.c old mode 100644 new mode 100755 index 577a627d71..561b2eef6d --- a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/struct_access.c +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/struct_access.c @@ -10,7 +10,7 @@ struct point_st { char d; }; -typedef long (SLJIT_FUNC *point_func_t)(struct point_st *point);; +typedef long (SLJIT_FUNC *point_func_t)(struct point_st *point); static long SLJIT_FUNC print_num(long a) { @@ -45,21 +45,26 @@ static int struct_access() struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); sljit_emit_enter(C, 0, SLJIT_ARGS1(W, W), 1, 1, 0, 0, 0); - /* opt arg R S FR FS local_size */ + /* opt arg R S FR FS local_size */ - sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, x)); // S0->x --> R0 - sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0); + /* S0->x --> R0; print_num(R0) */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, x)); + sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); - sljit_emit_op1(C, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, y)); // S0->y --> R0 - sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0); + /* S0->y --> R0; print_num(R0) */ + sljit_emit_op1(C, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, y)); + sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); - sljit_emit_op1(C, SLJIT_MOV_S16, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, z)); // S0->z --> R0 - sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0); + /* S0->z --> R0; print_num(R0) */ + sljit_emit_op1(C, SLJIT_MOV_S16, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, z)); + sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); - sljit_emit_op1(C, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, d)); // S0->d --> R0 - sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0); + /* S0->d --> R0; print_num(R0) */ + sljit_emit_op1(C, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, d)); + sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); - sljit_emit_return(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, x)); // return S0->x + /* return S0->x */ + sljit_emit_return(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, x)); /* Generate machine code */ code = sljit_generate_code(C); diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/temp_var.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/temp_var.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.c b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.c old mode 100644 new mode 100755 index 2eaecd96e1..babdd8ffee --- a/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.c +++ b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.c @@ -980,7 +980,7 @@ static int generate_transitions(struct compiler_common *compiler_common) struct stack_item *item; stack_init(depth); - compiler_common->dfa_transitions = SLJIT_MALLOC(sizeof(struct stack_item) * compiler_common->dfa_size, NULL); + compiler_common->dfa_transitions = (struct stack_item *)SLJIT_MALLOC(sizeof(struct stack_item) * compiler_common->dfa_size, NULL); if (!compiler_common->dfa_transitions) return REGEX_MEMORY_ERROR; @@ -1174,7 +1174,7 @@ static int generate_search_states(struct compiler_common *compiler_common) compiler_common->terms_size = !(compiler_common->flags & REGEX_FAKE_MATCH_END) ? 1 : 2; compiler_common->longest_range_size = 0; - compiler_common->search_states = SLJIT_MALLOC(sizeof(struct stack_item) * compiler_common->dfa_size, NULL); + compiler_common->search_states = (struct stack_item *)SLJIT_MALLOC(sizeof(struct stack_item) * compiler_common->dfa_size, NULL); if (!compiler_common->search_states) return REGEX_MEMORY_ERROR; @@ -1966,7 +1966,7 @@ struct regex_machine* regex_compile(const regex_char_t *regex_string, int length } /* Step 4.1: Generate entry. */ - CHECK(sljit_emit_enter(compiler_common.compiler, 0, SLJIT_ARGS3(VOID, P, P, 32), 5, 5, 0, 0, 0)); + CHECK(sljit_emit_enter(compiler_common.compiler, 0, SLJIT_ARGS3V(P, P, 32), 5, 5, 0, 0, 0)); /* Copy arguments to their place. */ EMIT_OP1(SLJIT_MOV, R_REGEX_MATCH, 0, SLJIT_S0, 0); diff --git a/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.h b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.h old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexMain.c b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexMain.c old mode 100644 new mode 100755 index 8951a0a7b5..edc13ff0cc --- a/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexMain.c +++ b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexMain.c @@ -211,9 +211,9 @@ static void run_tests(struct test_case* test, int verbose, int silent) printf("REGEX tests: "); if (fail == 0) - printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " "); + printf("all tests " COLOR_GREEN "PASSED" COLOR_DEFAULT " "); else - printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail)); + printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests failed ", fail, fail * 100 / (success + fail)); printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name()); } diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c old mode 100644 new mode 100755 index 6352377c4d..95b9842fa9 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c @@ -33,15 +33,18 @@ On non-macOS systems, returns MAP_JIT if it is defined. */ #include -#if TARGET_OS_OSX -#if defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86 + +#if (defined(TARGET_OS_OSX) && TARGET_OS_OSX) || (TARGET_OS_MAC && !TARGET_OS_IPHONE) + +#if defined(SLJIT_CONFIG_X86) && SLJIT_CONFIG_X86 + #include #include #define SLJIT_MAP_JIT (get_map_jit_flag()) #define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) -static SLJIT_INLINE int get_map_jit_flag() +static SLJIT_INLINE int get_map_jit_flag(void) { size_t page_size; void *ptr; @@ -67,10 +70,8 @@ static SLJIT_INLINE int get_map_jit_flag() } return map_jit_flag; } -#else /* !SLJIT_CONFIG_X86 */ -#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) -#error "Unsupported architecture" -#endif /* SLJIT_CONFIG_ARM */ + +#elif defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM #include #include @@ -81,15 +82,29 @@ static SLJIT_INLINE int get_map_jit_flag() static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) { -#if MAC_OS_X_VERSION_MIN_REQUIRED >= 110000 - pthread_jit_write_protect_np(enable_exec); -#else -#error "Must target Big Sur or newer" +#if MAC_OS_X_VERSION_MIN_REQUIRED < 110000 + if (__builtin_available(macos 11, *)) #endif /* BigSur */ + pthread_jit_write_protect_np(enable_exec); } -#endif /* SLJIT_CONFIG_X86 */ + +#elif defined(SLJIT_CONFIG_PPC) && SLJIT_CONFIG_PPC + +#define SLJIT_MAP_JIT (0) +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) + +#else +#error "Unsupported architecture" +#endif /* SLJIT_CONFIG */ + #else /* !TARGET_OS_OSX */ + +#ifdef MAP_JIT #define SLJIT_MAP_JIT (MAP_JIT) +#else +#define SLJIT_MAP_JIT (0) +#endif + #endif /* TARGET_OS_OSX */ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c old mode 100644 new mode 100755 index 6cd391104c..85f3a9d1ea --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c @@ -308,7 +308,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) free_block = free_blocks; while (free_block) { next_free_block = free_block->next; - if (!free_block->header.prev_size && + if (!free_block->header.prev_size && AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) { total_size -= free_block->size; sljit_remove_free_block(free_block); @@ -317,7 +317,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) free_block = next_free_block; } - SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + SLJIT_ASSERT(total_size || (!total_size && !free_blocks)); SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); SLJIT_ALLOCATOR_UNLOCK(); } diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfig.h b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfig.h old mode 100644 new mode 100755 index e11d4a2e1a..364c8bb788 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfig.h +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfig.h @@ -38,28 +38,6 @@ extern "C" { non-zero value. */ -/* --------------------------------------------------------------------- */ -/* Architecture */ -/* --------------------------------------------------------------------- */ - -/* Architecture selection. */ -/* #define SLJIT_CONFIG_X86_32 1 */ -/* #define SLJIT_CONFIG_X86_64 1 */ -/* #define SLJIT_CONFIG_ARM_V5 1 */ -/* #define SLJIT_CONFIG_ARM_V7 1 */ -/* #define SLJIT_CONFIG_ARM_THUMB2 1 */ -/* #define SLJIT_CONFIG_ARM_64 1 */ -/* #define SLJIT_CONFIG_PPC_32 1 */ -/* #define SLJIT_CONFIG_PPC_64 1 */ -/* #define SLJIT_CONFIG_MIPS_32 1 */ -/* #define SLJIT_CONFIG_MIPS_64 1 */ -/* #define SLJIT_CONFIG_RISCV_32 1 */ -/* #define SLJIT_CONFIG_RISCV_64 1 */ -/* #define SLJIT_CONFIG_S390X 1 */ - -/* #define SLJIT_CONFIG_AUTO 1 */ -/* #define SLJIT_CONFIG_UNSUPPORTED 1 */ - /* --------------------------------------------------------------------- */ /* Utilities */ /* --------------------------------------------------------------------- */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigCPU.h b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigCPU.h new file mode 100755 index 0000000000..2720bdab0b --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigCPU.h @@ -0,0 +1,188 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_CPU_H_ +#define SLJIT_CONFIG_CPU_H_ + +/* --------------------------------------------------------------------- */ +/* Architecture */ +/* --------------------------------------------------------------------- */ + +/* Architecture selection. */ +/* #define SLJIT_CONFIG_X86_32 1 */ +/* #define SLJIT_CONFIG_X86_64 1 */ +/* #define SLJIT_CONFIG_ARM_V6 1 */ +/* #define SLJIT_CONFIG_ARM_V7 1 */ +/* #define SLJIT_CONFIG_ARM_THUMB2 1 */ +/* #define SLJIT_CONFIG_ARM_64 1 */ +/* #define SLJIT_CONFIG_PPC_32 1 */ +/* #define SLJIT_CONFIG_PPC_64 1 */ +/* #define SLJIT_CONFIG_MIPS_32 1 */ +/* #define SLJIT_CONFIG_MIPS_64 1 */ +/* #define SLJIT_CONFIG_RISCV_32 1 */ +/* #define SLJIT_CONFIG_RISCV_64 1 */ +/* #define SLJIT_CONFIG_S390X 1 */ +/* #define SLJIT_CONFIG_LOONGARCH_64 */ + +/* #define SLJIT_CONFIG_AUTO 1 */ +/* #define SLJIT_CONFIG_UNSUPPORTED 1 */ + +/*****************/ +/* Sanity check. */ +/*****************/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + + (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) \ + + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + + (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ + + (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ + + (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + + (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) \ + + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 +#error "Multiple architectures are selected" +#endif + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + && !(defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) \ + && !(defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + && !(defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + && !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + && !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ + && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) \ + && !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \ + && !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) +#if defined SLJIT_CONFIG_AUTO && !SLJIT_CONFIG_AUTO +#error "An architecture must be selected" +#else /* SLJIT_CONFIG_AUTO */ +#define SLJIT_CONFIG_AUTO 1 +#endif /* !SLJIT_CONFIG_AUTO */ +#endif /* !SLJIT_CONFIG */ + +/********************************************************/ +/* Automatic CPU detection (requires compiler support). */ +/********************************************************/ + +#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) +#ifndef _WIN32 + +#if defined(__i386__) || defined(__i386) +#define SLJIT_CONFIG_X86_32 1 +#elif defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif defined(__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 +#elif defined(__thumb2__) +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \ + ((defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7S__)) \ + || (defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8R__)) \ + || (defined(__ARM_ARCH_9A__))) +#define SLJIT_CONFIG_ARM_V7 1 +#elif defined(__arm__) || defined (__ARM__) +#define SLJIT_CONFIG_ARM_V6 1 +#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__)) +#define SLJIT_CONFIG_PPC_64 1 +#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) +#define SLJIT_CONFIG_PPC_32 1 +#elif defined(__mips__) && !defined(_LP64) +#define SLJIT_CONFIG_MIPS_32 1 +#elif defined(__mips64) +#define SLJIT_CONFIG_MIPS_64 1 +#elif defined (__riscv_xlen) && (__riscv_xlen == 32) +#define SLJIT_CONFIG_RISCV_32 1 +#elif defined (__riscv_xlen) && (__riscv_xlen == 64) +#define SLJIT_CONFIG_RISCV_64 1 +#elif defined (__loongarch_lp64) +#define SLJIT_CONFIG_LOONGARCH_64 1 +#elif defined(__s390x__) +#define SLJIT_CONFIG_S390X 1 +#else +/* Unsupported architecture */ +#define SLJIT_CONFIG_UNSUPPORTED 1 +#endif + +#else /* _WIN32 */ + +#if defined(_M_X64) || defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__) +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif (defined(_M_ARM) && _M_ARM >= 7) +#define SLJIT_CONFIG_ARM_V7 1 +#elif defined(_ARM_) +#define SLJIT_CONFIG_ARM_V6 1 +#elif defined(_M_ARM64) || defined(__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 +#else +#define SLJIT_CONFIG_X86_32 1 +#endif + +#endif /* !_WIN32 */ +#endif /* SLJIT_CONFIG_AUTO */ + +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#undef SLJIT_EXECUTABLE_ALLOCATOR +#endif /* SLJIT_CONFIG_UNSUPPORTED */ + +/******************************/ +/* CPU family type detection. */ +/******************************/ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CONFIG_ARM_32 1 +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#define SLJIT_CONFIG_X86 1 +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +#define SLJIT_CONFIG_ARM 1 +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_CONFIG_PPC 1 +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#define SLJIT_CONFIG_MIPS 1 +#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +#define SLJIT_CONFIG_RISCV 1 +#elif (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) +#define SLJIT_CONFIG_LOONGARCH 1 +#endif + +#endif /* SLJIT_CONFIG_CPU_H_ */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigInternal.h b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigInternal.h old mode 100644 new mode 100755 index e9bd4d9322..f12505e9c8 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigInternal.h +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigInternal.h @@ -72,6 +72,8 @@ extern "C" { SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS : number of available temporary registers + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS : number of available temporary floating point registers SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index SLJIT_F32_SHIFT : the shift required to apply when accessing a single precision floating point array by index @@ -81,141 +83,27 @@ extern "C" { the scratch register index of ecx is stored in this variable SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET) SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address + SLJIT_CONV_MAX_FLOAT : result when a floating point value is converted to integer + and the floating point value is higher than the maximum integer value + (possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT) + SLJIT_CONV_MIN_FLOAT : result when a floating point value is converted to integer + and the floating point value is lower than the minimum integer value + (possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT) + SLJIT_CONV_NAN_FLOAT : result when a NaN floating point value is converted to integer + (possible values: SLJIT_CONV_RESULT_MAX_INT, SLJIT_CONV_RESULT_MIN_INT, + or SLJIT_CONV_RESULT_ZERO) Other macros: + SLJIT_TMP_R0 .. R9 : accessing temporary registers + SLJIT_TMP_R(i) : accessing temporary registers + SLJIT_TMP_FR0 .. FR9 : accessing temporary floating point registers + SLJIT_TMP_FR(i) : accessing temporary floating point registers SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper) + SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit + floating point register when SLJIT_HAS_F64_AS_F32_PAIR returns non-zero */ -/*****************/ -/* Sanity check. */ -/*****************/ - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ - + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ - + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ - + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ - + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ - + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ - + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ - + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ - + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ - + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ - + (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ - + (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ - + (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ - + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ - + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 -#error "Multiple architectures are selected" -#endif - -#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ - && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ - && !(defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ - && !(defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ - && !(defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ - && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ - && !(defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ - && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ - && !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ - && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ - && !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ - && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ - && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ - && !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \ - && !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) -#if defined SLJIT_CONFIG_AUTO && !SLJIT_CONFIG_AUTO -#error "An architecture must be selected" -#else /* SLJIT_CONFIG_AUTO */ -#define SLJIT_CONFIG_AUTO 1 -#endif /* !SLJIT_CONFIG_AUTO */ -#endif /* !SLJIT_CONFIG */ - -/********************************************************/ -/* Automatic CPU detection (requires compiler support). */ -/********************************************************/ - -#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) -#ifndef _WIN32 - -#if defined(__i386__) || defined(__i386) -#define SLJIT_CONFIG_X86_32 1 -#elif defined(__x86_64__) -#define SLJIT_CONFIG_X86_64 1 -#elif defined(__aarch64__) -#define SLJIT_CONFIG_ARM_64 1 -#elif defined(__thumb2__) -#define SLJIT_CONFIG_ARM_THUMB2 1 -#elif (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \ - ((defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7S__)) \ - || (defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8R__)) \ - || (defined(__ARM_ARCH_9A__))) -#define SLJIT_CONFIG_ARM_V7 1 -#elif defined(__arm__) || defined (__ARM__) -#define SLJIT_CONFIG_ARM_V5 1 -#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__)) -#define SLJIT_CONFIG_PPC_64 1 -#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) -#define SLJIT_CONFIG_PPC_32 1 -#elif defined(__mips__) && !defined(_LP64) -#define SLJIT_CONFIG_MIPS_32 1 -#elif defined(__mips64) -#define SLJIT_CONFIG_MIPS_64 1 -#elif defined (__riscv_xlen) && (__riscv_xlen == 32) -#define SLJIT_CONFIG_RISCV_32 1 -#elif defined (__riscv_xlen) && (__riscv_xlen == 64) -#define SLJIT_CONFIG_RISCV_64 1 -#elif defined(__s390x__) -#define SLJIT_CONFIG_S390X 1 -#else -/* Unsupported architecture */ -#define SLJIT_CONFIG_UNSUPPORTED 1 -#endif - -#else /* _WIN32 */ - -#if defined(_M_X64) || defined(__x86_64__) -#define SLJIT_CONFIG_X86_64 1 -#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__) -#define SLJIT_CONFIG_ARM_THUMB2 1 -#elif (defined(_M_ARM) && _M_ARM >= 7) -#define SLJIT_CONFIG_ARM_V7 1 -#elif defined(_ARM_) -#define SLJIT_CONFIG_ARM_V5 1 -#elif defined(_M_ARM64) || defined(__aarch64__) -#define SLJIT_CONFIG_ARM_64 1 -#else -#define SLJIT_CONFIG_X86_32 1 -#endif - -#endif /* !_WIN32 */ -#endif /* SLJIT_CONFIG_AUTO */ - -#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) -#undef SLJIT_EXECUTABLE_ALLOCATOR -#endif - -/******************************/ -/* CPU family type detection. */ -/******************************/ - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ - || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) -#define SLJIT_CONFIG_ARM_32 1 -#endif - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -#define SLJIT_CONFIG_X86 1 -#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) -#define SLJIT_CONFIG_ARM 1 -#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define SLJIT_CONFIG_PPC 1 -#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) -#define SLJIT_CONFIG_MIPS 1 -#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) -#define SLJIT_CONFIG_RISCV 1 -#endif - /***********************************************************/ /* Intel Control-flow Enforcement Technology (CET) spport. */ /***********************************************************/ @@ -244,23 +132,23 @@ extern "C" { */ #ifndef SLJIT_MALLOC -#define SLJIT_MALLOC(size, allocator_data) malloc(size) +#define SLJIT_MALLOC(size, allocator_data) (malloc(size)) #endif #ifndef SLJIT_FREE -#define SLJIT_FREE(ptr, allocator_data) free(ptr) +#define SLJIT_FREE(ptr, allocator_data) (free(ptr)) #endif #ifndef SLJIT_MEMCPY -#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len) +#define SLJIT_MEMCPY(dest, src, len) (memcpy(dest, src, len)) #endif #ifndef SLJIT_MEMMOVE -#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) +#define SLJIT_MEMMOVE(dest, src, len) (memmove(dest, src, len)) #endif #ifndef SLJIT_ZEROMEM -#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) +#define SLJIT_ZEROMEM(dest, len) (memset(dest, 0, len)) #endif /***************************/ @@ -310,7 +198,7 @@ extern "C" { /* Type of public API functions. */ /*********************************/ -#ifndef SLJIT_API_FUNC_ATTRIBUTE +#ifndef SLJIT_API_FUNC_ATTRIBUTE #if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) /* Static ABI functions. For all-in-one programs. */ @@ -330,6 +218,10 @@ extern "C" { /* Instruction cache flush. */ /****************************/ +#ifdef __APPLE__ +#include +#endif + /* * TODO: * @@ -370,7 +262,7 @@ extern "C" { /* Not required to implement on archs with unified caches. */ #define SLJIT_CACHE_FLUSH(from, to) -#elif defined __APPLE__ +#elif defined(__APPLE__) && MAC_OS_X_VERSION_MIN_REQUIRED >= 1050 /* Supported by all macs since Mac OS 10.5. However, it does not work on non-jailbroken iOS devices, @@ -435,14 +327,15 @@ typedef signed int sljit_s32; #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) /* Just to have something. */ #define SLJIT_WORD_SHIFT 0 -typedef unsigned long int sljit_uw; -typedef long int sljit_sw; +typedef unsigned int sljit_uw; +typedef int sljit_sw; #elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ - && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) #define SLJIT_32BIT_ARCHITECTURE 1 #define SLJIT_WORD_SHIFT 2 typedef unsigned int sljit_uw; @@ -478,12 +371,46 @@ typedef double sljit_f64; #define SLJIT_F32_SHIFT 2 #define SLJIT_F64_SHIFT 3 +#define SLJIT_CONV_RESULT_MAX_INT 0 +#define SLJIT_CONV_RESULT_MIN_INT 1 +#define SLJIT_CONV_RESULT_ZERO 2 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#elif (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_ZERO +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_ZERO +#else +#error "Result for float to integer conversion is not defined" +#endif + #ifndef SLJIT_W /* Defining long constants. */ -#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) -#define SLJIT_W(w) (w##l) -#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #ifdef _WIN64 #define SLJIT_W(w) (w##ll) #else /* !windows */ @@ -523,9 +450,10 @@ typedef double sljit_f64; /* Auto detecting mips revision. */ #if (defined __mips_isa_rev) && (__mips_isa_rev >= 6) #define SLJIT_MIPS_REV 6 -#elif (defined __mips_isa_rev && __mips_isa_rev >= 1) \ - || (defined __clang__ && defined _MIPS_ARCH_OCTEON) \ - || (defined __clang__ && defined _MIPS_ARCH_P5600) +#elif defined(__mips_isa_rev) && __mips_isa_rev >= 1 +#define SLJIT_MIPS_REV __mips_isa_rev +#elif defined(__clang__) \ + && (defined(_MIPS_ARCH_OCTEON) || defined(_MIPS_ARCH_P5600)) /* clang either forgets to define (clang-7) __mips_isa_rev at all * or sets it to zero (clang-8,-9) for -march=octeon (MIPS64 R2+) * and -march=p5600 (MIPS32 R5). @@ -564,7 +492,8 @@ typedef double sljit_f64; || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ - || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) #define SLJIT_UNALIGNED 1 #endif @@ -576,7 +505,8 @@ typedef double sljit_f64; || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ - || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) #define SLJIT_FPU_UNALIGNED 1 #endif @@ -649,8 +579,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 1 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 #define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw)) #define SLJIT_PREF_SHIFT_REG SLJIT_R2 #define SLJIT_MASKED_SHIFT 1 @@ -659,7 +591,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #define SLJIT_NUMBER_OF_REGISTERS 13 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 #ifndef _WIN64 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 @@ -673,28 +607,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 -#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - -#define SLJIT_NUMBER_OF_REGISTERS 12 -#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 -#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 -#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 -#define SLJIT_LOCALS_OFFSET_BASE 0 - -#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) #define SLJIT_NUMBER_OF_REGISTERS 26 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 #define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw)) #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -703,8 +633,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 23 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) #define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw)) #elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) @@ -727,6 +659,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #endif +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 3 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -734,9 +668,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 23 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 12 -#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_LOCALS_OFFSET_BASE 0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 @@ -765,17 +701,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 #define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE #define SLJIT_MASKED_SHIFT 1 +#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) + +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_MASKED_SHIFT 1 +#define SLJIT_MASKED_SHIFT32 1 + #elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +/* Just to have something. */ #define SLJIT_NUMBER_OF_REGISTERS 0 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 0 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 0 #define SLJIT_LOCALS_OFFSET_BASE 0 #endif @@ -788,6 +741,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) +/**********************************/ +/* Temporary register management. */ +/**********************************/ + +#define SLJIT_TMP_REGISTER_BASE (SLJIT_NUMBER_OF_REGISTERS + 2) +#define SLJIT_TMP_FREGISTER_BASE (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) + +/* WARNING: Accessing temporary registers is not recommended, because they + are also used by the JIT compiler for various computations. Using them + might have any side effects including incorrect operations and crashes, + so use them at your own risk. The machine registers themselves might have + limitations, e.g. the r0 register on s390x / ppc cannot be used as + base address for memory operations. */ + +/* Temporary registers */ +#define SLJIT_TMP_R0 (SLJIT_TMP_REGISTER_BASE + 0) +#define SLJIT_TMP_R1 (SLJIT_TMP_REGISTER_BASE + 1) +#define SLJIT_TMP_R2 (SLJIT_TMP_REGISTER_BASE + 2) +#define SLJIT_TMP_R3 (SLJIT_TMP_REGISTER_BASE + 3) +#define SLJIT_TMP_R4 (SLJIT_TMP_REGISTER_BASE + 4) +#define SLJIT_TMP_R5 (SLJIT_TMP_REGISTER_BASE + 5) +#define SLJIT_TMP_R6 (SLJIT_TMP_REGISTER_BASE + 6) +#define SLJIT_TMP_R7 (SLJIT_TMP_REGISTER_BASE + 7) +#define SLJIT_TMP_R8 (SLJIT_TMP_REGISTER_BASE + 8) +#define SLJIT_TMP_R9 (SLJIT_TMP_REGISTER_BASE + 9) +#define SLJIT_TMP_R(i) (SLJIT_TMP_REGISTER_BASE + (i)) + +#define SLJIT_TMP_FR0 (SLJIT_TMP_FREGISTER_BASE + 0) +#define SLJIT_TMP_FR1 (SLJIT_TMP_FREGISTER_BASE + 1) +#define SLJIT_TMP_FR2 (SLJIT_TMP_FREGISTER_BASE + 2) +#define SLJIT_TMP_FR3 (SLJIT_TMP_FREGISTER_BASE + 3) +#define SLJIT_TMP_FR4 (SLJIT_TMP_FREGISTER_BASE + 4) +#define SLJIT_TMP_FR5 (SLJIT_TMP_FREGISTER_BASE + 5) +#define SLJIT_TMP_FR6 (SLJIT_TMP_FREGISTER_BASE + 6) +#define SLJIT_TMP_FR7 (SLJIT_TMP_FREGISTER_BASE + 7) +#define SLJIT_TMP_FR8 (SLJIT_TMP_FREGISTER_BASE + 8) +#define SLJIT_TMP_FR9 (SLJIT_TMP_FREGISTER_BASE + 9) +#define SLJIT_TMP_FR(i) (SLJIT_TMP_FREGISTER_BASE + (i)) + /********************************/ /* CPU status flags management. */ /********************************/ @@ -796,10 +788,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ - || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) #define SLJIT_HAS_STATUS_FLAGS_STATE 1 #endif +/***************************************/ +/* Floating point register management. */ +/***************************************/ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SLJIT_F64_SECOND(reg) \ + ((reg) + SLJIT_FS0 + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS) +#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */ +#define SLJIT_F64_SECOND(reg) \ + (reg) +#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */ + /*************************************/ /* Debug and verbose related macros. */ /*************************************/ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.c old mode 100644 new mode 100755 index 4a73e8e495..bfaee050ea --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.c @@ -123,47 +123,71 @@ #endif /* Parameter parsing. */ -#define REG_MASK 0x3f +#define REG_MASK 0x7f #define OFFS_REG(reg) (((reg) >> 8) & REG_MASK) #define OFFS_REG_MASK (REG_MASK << 8) #define TO_OFFS_REG(reg) ((reg) << 8) -/* When reg cannot be unused. */ -#define FAST_IS_REG(reg) ((reg) <= REG_MASK) +#define FAST_IS_REG(reg) ((reg) < REG_MASK) /* Mask for argument types. */ #define SLJIT_ARG_MASK 0x7 #define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG) -/* Mask for sljit_emit_mem. */ -#define REG_PAIR_MASK 0xff00 -#define REG_PAIR_FIRST(reg) ((reg) & 0xff) +/* Mask for register pairs. */ +#define REG_PAIR_MASK 0x7f00 +#define REG_PAIR_FIRST(reg) ((reg) & 0x7f) #define REG_PAIR_SECOND(reg) ((reg) >> 8) /* Mask for sljit_emit_enter. */ #define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3) +/* Getters for simd operations, which returns with log2(size). */ +#define SLJIT_SIMD_GET_OPCODE(type) ((type) & 0xff) +#define SLJIT_SIMD_GET_REG_SIZE(type) (((type) >> 12) & 0x3f) +#define SLJIT_SIMD_GET_ELEM_SIZE(type) (((type) >> 18) & 0x3f) +#define SLJIT_SIMD_GET_ELEM2_SIZE(type) (((type) >> 24) & 0x3f) + +#define SLJIT_SIMD_CHECK_REG(type) (((type) & 0x3f000) >= SLJIT_SIMD_REG_64 && ((type) & 0x3f000) <= SLJIT_SIMD_REG_512) +#define SLJIT_SIMD_TYPE_MASK(m) ((sljit_s32)0xff000fff & ~(SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST | (m))) +#define SLJIT_SIMD_TYPE_MASK2(m) ((sljit_s32)0xc0000fff & ~(SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST | (m))) + /* Jump flags. */ -#define JUMP_LABEL 0x1 -#define JUMP_ADDR 0x2 +#define JUMP_ADDR 0x1 +#define JUMP_MOV_ADDR 0x2 /* SLJIT_REWRITABLE_JUMP is 0x1000. */ #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) -# define PATCH_MB 0x4 -# define PATCH_MW 0x8 +# define PATCH_MB 0x04 +# define PATCH_MW 0x08 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) # define PATCH_MD 0x10 -#endif +# define MOV_ADDR_HI 0x20 +# define JUMP_MAX_SIZE ((sljit_uw)(10 + 3)) +# define CJUMP_MAX_SIZE ((sljit_uw)(2 + 10 + 3)) +#else /* !SLJIT_CONFIG_X86_64 */ +# define JUMP_MAX_SIZE ((sljit_uw)5) +# define CJUMP_MAX_SIZE ((sljit_uw)6) +#endif /* SLJIT_CONFIG_X86_64 */ # define TYPE_SHIFT 13 +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +/* Bits 7..12 is for debug jump size, SLJIT_REWRITABLE_JUMP is 0x1000 */ +# define JUMP_SIZE_SHIFT 7 +#endif /* SLJIT_DEBUG */ #endif /* SLJIT_CONFIG_X86 */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) -# define IS_BL 0x4 -# define PATCH_B 0x8 -#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */ +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# define IS_BL 0x04 +# define PATCH_B 0x08 +#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V7 */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) # define CPOOL_SIZE 512 -#endif /* SLJIT_CONFIG_ARM_V5 */ +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)3) +#endif /* SLJIT_CONFIG_ARM_V7 */ #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) # define IS_COND 0x04 @@ -172,25 +196,30 @@ # define PATCH_TYPE1 0x10 /* conditional + imm20 */ # define PATCH_TYPE2 0x20 - /* IT + imm24 */ -# define PATCH_TYPE3 0x30 /* imm11 */ -# define PATCH_TYPE4 0x40 +# define PATCH_TYPE3 0x30 /* imm24 */ -# define PATCH_TYPE5 0x50 +# define PATCH_TYPE4 0x40 /* BL + imm24 */ -# define PATCH_BL 0x60 +# define PATCH_TYPE5 0x50 + /* addwi/subwi */ +# define PATCH_TYPE6 0x60 /* 0xf00 cc code for branches */ +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)5) #endif /* SLJIT_CONFIG_ARM_THUMB2 */ #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) # define IS_COND 0x004 # define IS_CBZ 0x008 # define IS_BL 0x010 -# define PATCH_B 0x020 -# define PATCH_COND 0x040 -# define PATCH_ABS48 0x080 -# define PATCH_ABS64 0x100 +# define PATCH_COND 0x020 +# define PATCH_B 0x040 +# define PATCH_B32 0x080 +# define PATCH_ABS48 0x100 +# define PATCH_ABS64 0x200 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)5) #endif /* SLJIT_CONFIG_ARM_64 */ #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) @@ -201,8 +230,12 @@ #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) # define PATCH_ABS32 0x040 # define PATCH_ABS48 0x080 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)7) +#else /* !SLJIT_CONFIG_PPC_64 */ +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)4) #endif /* SLJIT_CONFIG_PPC_64 */ -# define REMOVE_COND 0x100 #endif /* SLJIT_CONFIG_PPC */ #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) @@ -244,11 +277,26 @@ # define PATCH_ABS32 0x080 # define PATCH_ABS44 0x100 # define PATCH_ABS52 0x200 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)6) #else /* !SLJIT_CONFIG_RISCV_64 */ -# define PATCH_REL32 0x0 +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)2) #endif /* SLJIT_CONFIG_RISCV_64 */ #endif /* SLJIT_CONFIG_RISCV */ +#if (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +# define IS_COND 0x004 +# define IS_CALL 0x008 + +# define PATCH_B 0x010 +# define PATCH_J 0x020 + +# define PATCH_REL32 0x040 +# define PATCH_ABS32 0x080 +# define PATCH_ABS52 0x100 + +#endif /* SLJIT_CONFIG_LOONGARCH */ /* Stack management. */ #define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ @@ -447,9 +495,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) compiler->args_size = -1; -#endif +#endif /* SLJIT_CONFIG_X86_32 */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + CPOOL_SIZE * sizeof(sljit_u8), allocator_data); if (!compiler->cpool) { @@ -460,18 +508,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo } compiler->cpool_unique = (sljit_u8*)(compiler->cpool + CPOOL_SIZE); compiler->cpool_diff = 0xffffffff; -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) compiler->delay_slot = UNMOVABLE_INS; -#endif +#endif /* SLJIT_CONFIG_MIPS */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->last_flags = 0; compiler->last_return = -1; compiler->logical_local_size = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ #if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) if (!compiler_initialized) { @@ -504,7 +552,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compile SLJIT_FREE(curr, allocator_data); } -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) SLJIT_FREE(compiler->cpool, allocator_data); #endif SLJIT_FREE(compiler, allocator_data); @@ -546,7 +594,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl { if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { jump->flags &= (sljit_uw)~JUMP_ADDR; - jump->flags |= JUMP_LABEL; jump->u.label = label; } } @@ -554,18 +601,11 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) { if (SLJIT_LIKELY(!!jump)) { - jump->flags &= (sljit_uw)~JUMP_LABEL; jump->flags |= JUMP_ADDR; jump->u.target = target; } } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label) -{ - if (SLJIT_LIKELY(!!put_label)) - put_label->label = label; -} - #define SLJIT_CURRENT_FLAGS_ALL \ (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE) @@ -660,31 +700,44 @@ static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) compiler->buf = prev; } -/* Only used in RISC architectures where the instruction size is constant */ -#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ - && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_MAX_ADDRESS ~(sljit_uw)0 -static SLJIT_INLINE sljit_uw compute_next_addr(struct sljit_label *label, struct sljit_jump *jump, - struct sljit_const *const_, struct sljit_put_label *put_label) +#define SLJIT_GET_NEXT_SIZE(ptr) (ptr != NULL) ? ((ptr)->size) : SLJIT_MAX_ADDRESS +#define SLJIT_GET_NEXT_ADDRESS(ptr) (ptr != NULL) ? ((ptr)->addr) : SLJIT_MAX_ADDRESS + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +#define SLJIT_NEXT_DEFINE_TYPES \ + sljit_uw next_label_size; \ + sljit_uw next_jump_addr; \ + sljit_uw next_const_addr; \ + sljit_uw next_min_addr + +#define SLJIT_NEXT_INIT_TYPES() \ + next_label_size = SLJIT_GET_NEXT_SIZE(label); \ + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); \ + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + +#define SLJIT_GET_NEXT_MIN() \ + next_min_addr = sljit_get_next_min(next_label_size, next_jump_addr, next_const_addr); + +static SLJIT_INLINE sljit_uw sljit_get_next_min(sljit_uw next_label_size, + sljit_uw next_jump_addr, sljit_uw next_const_addr) { - sljit_uw result = ~(sljit_uw)0; + sljit_uw result = next_jump_addr; - if (label) - result = label->size; + SLJIT_ASSERT(result == SLJIT_MAX_ADDRESS || result != next_const_addr); - if (jump && jump->addr < result) - result = jump->addr; + if (next_const_addr < result) + result = next_const_addr; - if (const_ && const_->addr < result) - result = const_->addr; - - if (put_label && put_label->addr < result) - result = put_label->addr; + if (next_label_size < result) + result = next_label_size; return result; } -#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_S390X */ +#endif /* !SLJIT_CONFIG_X86 */ static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, @@ -725,8 +778,9 @@ static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) { label->next = NULL; + label->u.index = compiler->label_count++; label->size = compiler->size; - if (compiler->last_label) + if (compiler->last_label != NULL) compiler->last_label->next = label; else compiler->labels = label; @@ -737,7 +791,21 @@ static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler { jump->next = NULL; jump->flags = flags; - if (compiler->last_jump) + jump->u.label = NULL; + if (compiler->last_jump != NULL) + compiler->last_jump->next = jump; + else + compiler->jumps = jump; + compiler->last_jump = jump; +} + +static SLJIT_INLINE void set_mov_addr(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_uw offset) +{ + jump->next = NULL; + jump->addr = compiler->size - offset; + jump->flags = JUMP_MOV_ADDR; + jump->u.label = NULL; + if (compiler->last_jump != NULL) compiler->last_jump->next = jump; else compiler->jumps = jump; @@ -748,26 +816,13 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp { const_->next = NULL; const_->addr = compiler->size; - if (compiler->last_const) + if (compiler->last_const != NULL) compiler->last_const->next = const_; else compiler->consts = const_; compiler->last_const = const_; } -static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct sljit_compiler *compiler, sljit_uw offset) -{ - put_label->next = NULL; - put_label->label = NULL; - put_label->addr = compiler->size - offset; - put_label->flags = 0; - if (compiler->last_put_label) - compiler->last_put_label->next = put_label; - else - compiler->put_labels = put_label; - compiler->last_put_label = put_label; -} - #define ADDRESSING_DEPENDS_ON(exp, reg) \ (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg)) @@ -827,11 +882,8 @@ static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratch #define FUNCTION_CHECK_IS_REG(r) \ (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) \ - || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) - -#define FUNCTION_CHECK_IS_FREG(fr) \ - (((fr) >= SLJIT_FR0 && (fr) < (SLJIT_FR0 + compiler->fscratches)) \ - || ((fr) > (SLJIT_FS0 - compiler->fsaveds) && (fr) <= SLJIT_FS0)) + || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0) \ + || ((r) >= SLJIT_TMP_REGISTER_BASE && (r) < (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS))) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #define CHECK_IF_VIRTUAL_REGISTER(p) ((p) <= SLJIT_S3 && (p) >= SLJIT_S8) @@ -841,7 +893,7 @@ static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratch static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { - if (compiler->scratches == -1 || compiler->saveds == -1) + if (compiler->scratches == -1) return 0; if (!(p & SLJIT_MEM)) @@ -878,7 +930,7 @@ static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { - if (compiler->scratches == -1 || compiler->saveds == -1) + if (compiler->scratches == -1) return 0; if (FUNCTION_CHECK_IS_REG(p)) @@ -895,7 +947,7 @@ static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { - if (compiler->scratches == -1 || compiler->saveds == -1) + if (compiler->scratches == -1) return 0; if (FUNCTION_CHECK_IS_REG(p)) @@ -907,19 +959,59 @@ static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p #define FUNCTION_CHECK_DST(p, i) \ CHECK_ARGUMENT(function_check_dst(compiler, p, i)); -static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + +#define FUNCTION_CHECK_IS_FREG(fr, is_32) \ + function_check_is_freg(compiler, (fr), (is_32)) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32); + +#define FUNCTION_FCHECK(p, i, is_32) \ + CHECK_ARGUMENT(function_fcheck(compiler, (p), (i), (is_32))); + +static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 is_32) { - if (compiler->scratches == -1 || compiler->saveds == -1) + if (compiler->scratches == -1) return 0; - if (FUNCTION_CHECK_IS_FREG(p)) + if (FUNCTION_CHECK_IS_FREG(p, is_32)) return (i == 0); return function_check_src_mem(compiler, p, i); } -#define FUNCTION_FCHECK(p, i) \ - CHECK_ARGUMENT(function_fcheck(compiler, p, i)); +#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */ +#define FUNCTION_CHECK_IS_FREG(fr, is_32) \ + function_check_is_freg(compiler, (fr)) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr) +{ + if (compiler->scratches == -1) + return 0; + + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) + || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); +} + +#define FUNCTION_FCHECK(p, i, is_32) \ + CHECK_ARGUMENT(function_fcheck(compiler, (p), (i))); + +static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1) + return 0; + + if ((p >= SLJIT_FR0 && p < (SLJIT_FR0 + compiler->fscratches)) + || (p > (SLJIT_FS0 - compiler->fsaveds) && p <= SLJIT_FS0) + || (p >= SLJIT_TMP_FREGISTER_BASE && p < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS))) + return (i == 0); + + return function_check_src_mem(compiler, p, i); +} + +#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */ #endif /* SLJIT_ARGUMENT_CHECKS */ @@ -948,23 +1040,35 @@ static void sljit_verbose_reg(struct sljit_compiler *compiler, sljit_s32 r) { if (r < (SLJIT_R0 + compiler->scratches)) fprintf(compiler->verbose, "r%d", r - SLJIT_R0); - else if (r != SLJIT_SP) + else if (r < SLJIT_SP) fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - r); - else + else if (r == SLJIT_SP) fprintf(compiler->verbose, "sp"); + else + fprintf(compiler->verbose, "t%d", r - SLJIT_TMP_REGISTER_BASE); } static void sljit_verbose_freg(struct sljit_compiler *compiler, sljit_s32 r) { +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (r >= SLJIT_F64_SECOND(SLJIT_FR0)) { + fprintf(compiler->verbose, "^"); + r -= SLJIT_F64_SECOND(0); + } +#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */ + if (r < (SLJIT_FR0 + compiler->fscratches)) fprintf(compiler->verbose, "fr%d", r - SLJIT_FR0); - else + else if (r < SLJIT_TMP_FREGISTER_BASE) fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - r); + else + fprintf(compiler->verbose, "ft%d", r - SLJIT_TMP_FREGISTER_BASE); } static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { - if ((p) & SLJIT_IMM) + if ((p) == SLJIT_IMM) fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); else if ((p) & SLJIT_MEM) { if ((p) & REG_MASK) { @@ -1016,9 +1120,17 @@ static const char* op0_names[] = { }; static const char* op1_names[] = { + "mov", "mov", "mov", "mov", + "mov", "mov", "mov", "mov", + "mov", "clz", "ctz", "rev", + "rev", "rev", "rev", "rev" +}; + +static const char* op1_types[] = { "", ".u8", ".s8", ".u16", ".s16", ".u32", ".s32", "32", - ".p", "clz", "ctz", "rev" + ".p", "", "", "", + ".u16", ".s16", ".u32", ".s32" }; static const char* op2_names[] = { @@ -1037,14 +1149,27 @@ static const char* op_src_dst_names[] = { static const char* fop1_names[] = { "mov", "conv", "conv", "conv", - "conv", "conv", "cmp", "neg", - "abs", + "conv", "conv", "conv", "conv", + "cmp", "neg", "abs", +}; + +static const char* fop1_conv_types[] = { + "sw", "s32", "sw", "s32", + "uw", "u32" }; static const char* fop2_names[] = { "add", "sub", "mul", "div" }; +static const char* fop2r_names[] = { + "copysign" +}; + +static const char* simd_op2_names[] = { + "and", "or", "xor" +}; + static const char* jump_names[] = { "equal", "not_equal", "less", "greater_equal", @@ -1052,7 +1177,8 @@ static const char* jump_names[] = { "sig_less", "sig_greater_equal", "sig_greater", "sig_less_equal", "overflow", "not_overflow", - "carry", "", + "carry", "not_carry", + "atomic_stored", "atomic_not_stored", "f_equal", "f_not_equal", "f_less", "f_greater_equal", "f_greater", "f_less_equal", @@ -1095,7 +1221,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_com jump = compiler->jumps; while (jump) { /* All jumps have target. */ - CHECK_ARGUMENT(jump->flags & (JUMP_LABEL | JUMP_ADDR)); + CHECK_ARGUMENT((jump->flags & JUMP_ADDR) || jump->u.label != NULL); jump = jump->next; } #endif @@ -1224,7 +1350,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_void(struct sljit_ } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_VOID); + CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_RET_VOID); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -1267,7 +1393,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi if (GET_OPCODE(op) < SLJIT_MOV_F64) { FUNCTION_CHECK_SRC(src, srcw); } else { - FUNCTION_FCHECK(src, srcw); + FUNCTION_FCHECK(src, srcw, op & SLJIT_32); } compiler->last_flags = 0; #endif @@ -1275,7 +1401,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (GET_OPCODE(op) < SLJIT_MOV_F64) { fprintf(compiler->verbose, " return%s%s ", !(op & SLJIT_32) ? "" : "32", - op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]); + op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]); sljit_verbose_param(compiler, src, srcw); } else { fprintf(compiler->verbose, " return%s ", !(op & SLJIT_32) ? ".f64" : ".f32"); @@ -1336,7 +1462,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_REV); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_REV_S32); switch (GET_OPCODE(op)) { case SLJIT_MOV: @@ -1344,6 +1470,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler case SLJIT_MOV_S32: case SLJIT_MOV32: case SLJIT_MOV_P: + case SLJIT_REV_U32: + case SLJIT_REV_S32: /* Nothing allowed */ CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); break; @@ -1358,17 +1486,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - if (GET_OPCODE(op) <= SLJIT_MOV_P) - { - fprintf(compiler->verbose, " mov%s%s ", !(op & SLJIT_32) ? "" : "32", - op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]); - } - else - { - fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_32) ? "" : "32", - !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", - !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); - } + fprintf(compiler->verbose, " %s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], + !(op & SLJIT_32) ? "" : "32", op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); @@ -1379,6 +1498,94 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P); + CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32); + + /* All arguments must be valid registers. */ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(mem_reg) && !CHECK_IF_VIRTUAL_REGISTER(mem_reg)); + + if (op == SLJIT_MOV32_U8 || op == SLJIT_MOV32_U16) { + /* Only SLJIT_32 is allowed. */ + CHECK_ARGUMENT(!(op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z))); + } else { + /* Nothing allowed. */ + CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + } + + compiler->last_flags = 0; +#endif /* SLJIT_ARGUMENT_CHECKS */ +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " atomic_load%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]); + sljit_verbose_reg(compiler, dst_reg); + fprintf(compiler->verbose, ", ["); + sljit_verbose_reg(compiler, mem_reg); + fprintf(compiler->verbose, "]\n"); + } +#endif /* SLJIT_VERBOSE */ + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P); + CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32); + + /* All arguments must be valid registers. */ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src_reg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(mem_reg) && !CHECK_IF_VIRTUAL_REGISTER(mem_reg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(temp_reg) && src_reg != temp_reg); + + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) || GET_FLAG_TYPE(op) == SLJIT_ATOMIC_STORED); + + if (GET_OPCODE(op) == SLJIT_MOV_U8 || GET_OPCODE(op) == SLJIT_MOV_U16) { + /* Only SLJIT_32, SLJIT_ATOMIC_STORED are allowed. */ + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + } else { + /* Only SLJIT_ATOMIC_STORED is allowed. */ + CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z))); + } + + compiler->last_flags = GET_FLAG_TYPE(op) | (op & SLJIT_32); +#endif /* SLJIT_ARGUMENT_CHECKS */ +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " atomic_store%s%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & VARIABLE_FLAG_MASK) ? "" : ".stored"); + sljit_verbose_reg(compiler, src_reg); + fprintf(compiler->verbose, ", ["); + sljit_verbose_reg(compiler, mem_reg); + fprintf(compiler->verbose, "], "); + sljit_verbose_reg(compiler, temp_reg); + fprintf(compiler->verbose, "\n"); + } +#endif /* SLJIT_VERBOSE */ + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 unset, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, @@ -1541,20 +1748,19 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_dst(struct sljit_compi CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 reg) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { + SLJIT_UNUSED_ARG(type); SLJIT_UNUSED_ARG(reg); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS); -#endif - CHECK_RETURN_OK; -} - -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit_s32 reg) -{ - SLJIT_UNUSED_ARG(reg); -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + if (type == SLJIT_GP_REGISTER) { + CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS) + || (reg >= SLJIT_TMP_REGISTER_BASE && reg < (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS))); + } else { + CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6))); + CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS) + || (reg >= SLJIT_TMP_FREGISTER_BASE && reg < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS))); + } #endif CHECK_RETURN_OK; } @@ -1608,8 +1814,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); - FUNCTION_FCHECK(src, srcw); - FUNCTION_FCHECK(dst, dstw); + FUNCTION_FCHECK(src, srcw, op & SLJIT_32); + FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1648,8 +1854,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK) || (GET_FLAG_TYPE(op) >= SLJIT_F_EQUAL && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_LESS_EQUAL)); - FUNCTION_FCHECK(src1, src1w); - FUNCTION_FCHECK(src2, src2w); + FUNCTION_FCHECK(src1, src1w, op & SLJIT_32); + FUNCTION_FCHECK(src2, src2w, op & SLJIT_32); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1678,15 +1884,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); - FUNCTION_FCHECK(src, srcw); + FUNCTION_FCHECK(src, srcw, op & SLJIT_32); FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], - (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? ".s32" : ".sw", + fop1_conv_types[GET_OPCODE(op) - SLJIT_CONV_SW_FROM_F64], (op & SLJIT_32) ? ".f32" : ".f64"); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); @@ -1697,7 +1902,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { @@ -1708,16 +1913,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(str #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_SW && GET_OPCODE(op) <= SLJIT_CONV_F64_FROM_S32); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_CHECK_SRC(src, srcw); - FUNCTION_FCHECK(dst, dstw); + FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + fprintf(compiler->verbose, " %s%s.from.%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], (op & SLJIT_32) ? ".f32" : ".f64", - (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? ".s32" : ".sw"); + fop1_conv_types[GET_OPCODE(op) - SLJIT_CONV_SW_FROM_F64]); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src, srcw); @@ -1732,13 +1936,18 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); - FUNCTION_FCHECK(src1, src1w); - FUNCTION_FCHECK(src2, src2w); - FUNCTION_FCHECK(dst, dstw); + FUNCTION_FCHECK(src1, src1w, op & SLJIT_32); + FUNCTION_FCHECK(src2, src2w, op & SLJIT_32); + FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1754,6 +1963,80 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_COPYSIGN_F64); + FUNCTION_FCHECK(src1, src1w, op & SLJIT_32); + FUNCTION_FCHECK(src2, src2w, op & SLJIT_32); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, op & SLJIT_32)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ", fop2r_names[GET_OPCODE(op) - SLJIT_FOP2R_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); + sljit_verbose_freg(compiler, dst_freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + SLJIT_UNUSED_ARG(value); + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 1)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fset32 "); + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", %f\n", value); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + SLJIT_UNUSED_ARG(value); + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fset64 "); + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", %f\n", value); + } +#endif + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { @@ -1761,7 +2044,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcopy(struct sljit_compil CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_COPY_TO_F64 && GET_OPCODE(op) <= SLJIT_COPY_FROM_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, op & SLJIT_32)); #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); @@ -1836,7 +2119,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compil #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ || (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) #define CHECK_UNORDERED(type, last_flags) \ - ((((type) & 0xff) == SLJIT_UNORDERED || ((type) & 0xff) == SLJIT_ORDERED) && \ + ((((type) & 0xfe) == SLJIT_ORDERED) && \ ((last_flags) & 0xff) >= SLJIT_UNORDERED && ((last_flags) & 0xff) <= SLJIT_ORDERED_LESS_EQUAL) #else #define CHECK_UNORDERED(type, last_flags) 0 @@ -1945,10 +2228,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); - CHECK_ARGUMENT((type & 0xff) >= SLJIT_F_EQUAL && (type & 0xff) <= SLJIT_ORDERED_LESS_EQUAL - && ((type & 0xff) <= SLJIT_ORDERED || sljit_cmp_info(type & 0xff))); - FUNCTION_FCHECK(src1, src1w); - FUNCTION_FCHECK(src2, src2w); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_F_EQUAL && (type & 0xff) <= SLJIT_ORDERED_LESS_EQUAL); + FUNCTION_FCHECK(src1, src1w, type & SLJIT_32); + FUNCTION_FCHECK(src2, src2w, type & SLJIT_32); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -2055,7 +2337,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " flags.%s%s%s ", GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], - GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : ""), + GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : ""), !(op & SLJIT_SET_Z) ? "" : ".z"); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", %s\n", jump_names[type]); @@ -2064,9 +2346,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) sljit_s32 cond = type & ~SLJIT_32; @@ -2075,25 +2358,68 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); - if (src != SLJIT_IMM) { - CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src)); - CHECK_ARGUMENT(srcw == 0); - } + FUNCTION_CHECK_SRC(src1, src1w); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src2_reg)); if (cond <= SLJIT_NOT_ZERO) CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); - else + else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) { + CHECK_ARGUMENT((type & 0xfe) == SLJIT_CARRY); + compiler->last_flags = 0; + } else CHECK_ARGUMENT((cond & 0xfe) == (compiler->last_flags & 0xff) || CHECK_UNORDERED(cond, compiler->last_flags)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " cmov%s %s, ", + fprintf(compiler->verbose, " select%s %s, ", !(type & SLJIT_32) ? "" : "32", jump_names[type & ~SLJIT_32]); sljit_verbose_reg(compiler, dst_reg); fprintf(compiler->verbose, ", "); - sljit_verbose_param(compiler, src, srcw); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_reg(compiler, src2_reg); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 cond = type & ~SLJIT_32; + + CHECK_ARGUMENT(cond >= SLJIT_EQUAL && cond <= SLJIT_ORDERED_LESS_EQUAL); + + CHECK_ARGUMENT(compiler->fscratches != -1 && compiler->fsaveds != -1); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, type & SLJIT_32)); + FUNCTION_FCHECK(src1, src1w, type & SLJIT_32); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src2_freg, type & SLJIT_32)); + + if (cond <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) { + CHECK_ARGUMENT((type & 0xfe) == SLJIT_CARRY); + compiler->last_flags = 0; + } else + CHECK_ARGUMENT((cond & 0xfe) == (compiler->last_flags & 0xff) + || CHECK_UNORDERED(cond, compiler->last_flags)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fselect%s %s, ", + !(type & SLJIT_32) ? "" : "32", + jump_names[type & ~SLJIT_32]); + sljit_verbose_freg(compiler, dst_freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_freg(compiler, src2_freg); fprintf(compiler->verbose, "\n"); } #endif @@ -2104,33 +2430,35 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler sljit_s32 reg, sljit_s32 mem, sljit_sw memw) { +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 allowed_flags; +#endif /* SLJIT_ARGUMENT_CHECKS */ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; CHECK_RETURN_OK; } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - sljit_s32 allowed_flags; - if (type & SLJIT_MEM_UNALIGNED) { - CHECK_ARGUMENT(!(type & (SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32))); - } else if (type & SLJIT_MEM_UNALIGNED_16) { - CHECK_ARGUMENT(!(type & SLJIT_MEM_UNALIGNED_32)); + CHECK_ARGUMENT(!(type & (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32))); + } else if (type & SLJIT_MEM_ALIGNED_16) { + CHECK_ARGUMENT(!(type & SLJIT_MEM_ALIGNED_32)); } else { - CHECK_ARGUMENT((reg & REG_PAIR_MASK) || (type & SLJIT_MEM_UNALIGNED_32)); + CHECK_ARGUMENT((reg & REG_PAIR_MASK) || (type & SLJIT_MEM_ALIGNED_32)); } allowed_flags = SLJIT_MEM_UNALIGNED; switch (type & 0xff) { + case SLJIT_MOV_P: + case SLJIT_MOV: + allowed_flags |= SLJIT_MEM_ALIGNED_32; + /* fallthrough */ case SLJIT_MOV_U32: case SLJIT_MOV_S32: case SLJIT_MOV32: - allowed_flags = SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16; - break; - case SLJIT_MOV: - case SLJIT_MOV_P: - allowed_flags = SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32; + allowed_flags |= SLJIT_MEM_ALIGNED_16; break; } @@ -2157,15 +2485,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler else fprintf(compiler->verbose, " %s%s%s", (type & SLJIT_MEM_STORE) ? "store" : "load", - !(type & SLJIT_32) ? "" : "32", - op1_names[(type & 0xff) - SLJIT_OP1_BASE]); + !(type & SLJIT_32) ? "" : "32", op1_types[(type & 0xff) - SLJIT_OP1_BASE]); if (type & SLJIT_MEM_UNALIGNED) - printf(".un"); - else if (type & SLJIT_MEM_UNALIGNED_16) - printf(".un16"); - else if (type & SLJIT_MEM_UNALIGNED_32) - printf(".un32"); + printf(".unal"); + else if (type & SLJIT_MEM_ALIGNED_16) + printf(".al16"); + else if (type & SLJIT_MEM_ALIGNED_32) + printf(".al32"); if (reg & REG_PAIR_MASK) { fprintf(compiler->verbose, " {"); @@ -2218,7 +2545,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem_update(struct sljit_c fprintf(compiler->verbose, " %s%s%s.%s ", (type & SLJIT_MEM_STORE) ? "store" : "load", !(type & SLJIT_32) ? "" : "32", - op1_names[(type & 0xff) - SLJIT_OP1_BASE], + op1_types[(type & 0xff) - SLJIT_OP1_BASE], (type & SLJIT_MEM_POST) ? "post" : "pre"); sljit_verbose_reg(compiler, reg); @@ -2235,19 +2562,20 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile sljit_s32 mem, sljit_sw memw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); if (type & SLJIT_MEM_UNALIGNED) { - CHECK_ARGUMENT(!(type & (SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32))); - } else if (type & SLJIT_MEM_UNALIGNED_16) { - CHECK_ARGUMENT(!(type & SLJIT_MEM_UNALIGNED_32)); + CHECK_ARGUMENT(!(type & (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32))); + } else if (type & SLJIT_MEM_ALIGNED_16) { + CHECK_ARGUMENT(!(type & SLJIT_MEM_ALIGNED_32)); } else { - CHECK_ARGUMENT(type & SLJIT_MEM_UNALIGNED_32); + CHECK_ARGUMENT(type & SLJIT_MEM_ALIGNED_32); CHECK_ARGUMENT(!(type & SLJIT_32)); } - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32))); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg)); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32))); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32)); FUNCTION_CHECK_SRC_MEM(mem, memw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -2257,11 +2585,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile !(type & SLJIT_32) ? "f64" : "f32"); if (type & SLJIT_MEM_UNALIGNED) - printf(".un"); - else if (type & SLJIT_MEM_UNALIGNED_16) - printf(".un16"); - else if (type & SLJIT_MEM_UNALIGNED_32) - printf(".un32"); + printf(".unal"); + else if (type & SLJIT_MEM_ALIGNED_16) + printf(".al16"); + else if (type & SLJIT_MEM_ALIGNED_32) + printf(".al32"); fprintf(compiler->verbose, " "); sljit_verbose_freg(compiler, freg); @@ -2278,10 +2606,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem_update(struct sljit_ sljit_s32 mem, sljit_sw memw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_POST)) == 0); FUNCTION_CHECK_SRC_MEM(mem, memw); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -2306,6 +2635,297 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem_update(struct sljit_ CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK2(SLJIT_SIMD_STORE)) == 0); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) <= (srcdst & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + FUNCTION_FCHECK(srcdst, srcdstw, 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_mov(compiler, type | SLJIT_SIMD_TEST, freg, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_mem: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_%s.%d.%s%d", + (type & SLJIT_SIMD_STORE) ? "store" : "load", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + if ((type & 0x3f000000) == SLJIT_SIMD_MEM_UNALIGNED) + fprintf(compiler->verbose, ".unal "); + else + fprintf(compiler->verbose, ".al%d ", (8 << SLJIT_SIMD_GET_ELEM2_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, srcdst, srcdstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) { + CHECK_ARGUMENT(srcw == 0); + } else { + FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); + } + } else if (src != SLJIT_IMM) { + FUNCTION_CHECK_DST(src, srcw); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_replicate(compiler, type | SLJIT_SIMD_TEST, freg, src, srcw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_dup: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_replicate.%d.%s%d ", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + if (type & SLJIT_SIMD_FLOAT) + sljit_verbose_fparam(compiler, src, srcw); + else + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO | SLJIT_SIMD_LANE_SIGNED | SLJIT_32)) == 0); + CHECK_ARGUMENT((type & (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO)) != (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO)); + CHECK_ARGUMENT((type & (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_SIGNED)) != SLJIT_SIMD_LANE_SIGNED); + CHECK_ARGUMENT(!(type & SLJIT_SIMD_FLOAT) || !(type & (SLJIT_SIMD_LANE_SIGNED | SLJIT_32))); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(!(type & SLJIT_32) || SLJIT_SIMD_GET_ELEM_SIZE(type) <= 2); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + CHECK_ARGUMENT(lane_index >= 0 && lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type)))); + + if (type & SLJIT_SIMD_FLOAT) { + FUNCTION_FCHECK(srcdst, srcdstw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); + } else if ((type & SLJIT_SIMD_STORE) || srcdst != SLJIT_IMM) { + FUNCTION_CHECK_DST(srcdst, srcdstw); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_TEST, freg, lane_index, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_move_lane: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_%s_lane%s%s%s.%d.%s%d ", + (type & SLJIT_SIMD_STORE) ? "store" : "load", + (type & SLJIT_32) ? "32" : "", + (type & SLJIT_SIMD_LANE_ZERO) ? "_z" : "", + (type & SLJIT_SIMD_LANE_SIGNED) ? "_s" : "", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, "[%d], ", lane_index); + if (type & SLJIT_SIMD_FLOAT) + sljit_verbose_fparam(compiler, srcdst, srcdstw); + else + sljit_verbose_param(compiler, srcdst, srcdstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src, 0)); + CHECK_ARGUMENT(src_lane_index >= 0 && src_lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type)))); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_lane_replicate(compiler, type | SLJIT_SIMD_TEST, freg, src, src_lane_index) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_lane_replicate: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_lane_replicate.%d.%s%d ", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_freg(compiler, src); + fprintf(compiler->verbose, "[%d]\n", src_lane_index); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK2(SLJIT_SIMD_EXTEND_SIGNED)) == 0); + CHECK_ARGUMENT((type & (SLJIT_SIMD_EXTEND_SIGNED | SLJIT_SIMD_FLOAT)) != (SLJIT_SIMD_EXTEND_SIGNED | SLJIT_SIMD_FLOAT)); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_ELEM2_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_TEST, freg, src, srcw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_extend: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_load_extend%s.%d.%s%d.%s%d ", + (type & SLJIT_SIMD_EXTEND_SIGNED) ? "_s" : "", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM2_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(SLJIT_32)) == SLJIT_SIMD_STORE); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_sign(compiler, type | SLJIT_SIMD_TEST, freg, dst, dstw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_sign: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_store_sign%s.%d.%s%d ", + (type & SLJIT_32) ? "32" : "", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) >= SLJIT_SIMD_OP2_AND && (type & SLJIT_SIMD_TYPE_MASK(0)) <= SLJIT_SIMD_OP2_XOR); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src1_freg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src2_freg, 0)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_op2(compiler, type | SLJIT_SIMD_TEST, dst_freg, src1_freg, src2_freg) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_op2: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_%s.%d.%s%d ", + simd_op2_names[SLJIT_SIMD_GET_OPCODE(type) - 1], + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, dst_freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_freg(compiler, src1_freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_freg(compiler, src2_freg); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { /* Any offset is allowed. */ @@ -2341,14 +2961,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " put_label "); + fprintf(compiler->verbose, " mov_addr "); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, "\n"); } @@ -2363,7 +2983,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_co #endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ #define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \ - SLJIT_COMPILE_ASSERT(!(SLJIT_CONV_SW_FROM_F64 & 0x1) && !(SLJIT_CONV_F64_FROM_SW & 0x1), \ + SLJIT_COMPILE_ASSERT(!(SLJIT_CONV_SW_FROM_F64 & 0x1) && !(SLJIT_CONV_F64_FROM_SW & 0x1) && !(SLJIT_CONV_F64_FROM_UW & 0x1), \ invalid_float_opcodes); \ if (GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CMP_F64) { \ if (GET_OPCODE(op) == SLJIT_CMP_F64) { \ @@ -2378,48 +2998,22 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_co ADJUST_LOCAL_OFFSET(src, srcw); \ return sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw); \ } \ - CHECK(check_sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw)); \ + if ((GET_OPCODE(op) | 0x1) == SLJIT_CONV_F64_FROM_S32) { \ + CHECK(check_sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw)); \ ADJUST_LOCAL_OFFSET(dst, dstw); \ ADJUST_LOCAL_OFFSET(src, srcw); \ - return sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw); \ + return sljit_emit_fop1_conv_f64_from_uw(compiler, op, dst, dstw, src, srcw); \ } \ CHECK(check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw)); \ ADJUST_LOCAL_OFFSET(dst, dstw); \ ADJUST_LOCAL_OFFSET(src, srcw); -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ - || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ - || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)) \ - || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ - || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) - -static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) -{ - struct sljit_label *label; - struct sljit_jump *jump; - sljit_s32 op = (type & SLJIT_32) ? SLJIT_MOV32 : SLJIT_MOV; - - SLJIT_SKIP_CHECKS(compiler); - jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1); - FAIL_IF(!jump); - - SLJIT_SKIP_CHECKS(compiler); - FAIL_IF(sljit_emit_op1(compiler, op, dst_reg, 0, src, srcw)); - - SLJIT_SKIP_CHECKS(compiler); - label = sljit_emit_label(compiler); - FAIL_IF(!label); - - sljit_set_label(jump, label); - return SLJIT_SUCCESS; -} - -#endif - -#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) \ - && !(defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, @@ -2432,7 +3026,7 @@ static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit return sljit_emit_op1(compiler, type & (0xff | SLJIT_32), reg, 0, mem, memw); } -#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) && !SLJIT_CONFIG_ARM_V5 */ +#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) */ #if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) \ && !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) @@ -2478,7 +3072,7 @@ static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, slji #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) # include "sljitNativeX86_common.c" -#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) # include "sljitNativeARM_32.c" #elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) # include "sljitNativeARM_32.c" @@ -2494,8 +3088,12 @@ static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, slji # include "sljitNativeRISCV_common.c" #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) # include "sljitNativeS390X.c" +#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +# include "sljitNativeLOONGARCH_64.c" #endif +#include "sljitSerialize.c" + static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) @@ -2540,8 +3138,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp return sljit_emit_return_void(compiler); } +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined(SLJIT_CONFIG_LOONGARCH_64) && SLJIT_CONFIG_LOONGARCH_64) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop2(compiler, op, dst_freg, 0, src1, src1w, src2, src2w); +} + +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_S390X && !SLJIT_CONFIG_LOONGARCH_64 */ + #if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ - && !(defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) + && !(defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + && !(defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src1, sljit_sw src1w, @@ -2557,18 +3176,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler condition = type & 0xff; #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) if ((condition == SLJIT_EQUAL || condition == SLJIT_NOT_EQUAL)) { - if ((src1 & SLJIT_IMM) && !src1w) { + if (src1 == SLJIT_IMM && !src1w) { src1 = src2; src1w = src2w; src2 = SLJIT_IMM; src2w = 0; } - if ((src2 & SLJIT_IMM) && !src2w) + if (src2 == SLJIT_IMM && !src2w) return emit_cmp_to0(compiler, type, src1, src1w); } #endif - if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) { + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM && src2 != SLJIT_IMM)) { /* Immediate is preferred as second argument by most architectures. */ switch (condition) { case SLJIT_LESS: @@ -2621,20 +3240,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler #endif /* !SLJIT_CONFIG_MIPS */ -#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) { - if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL) - return 0; - switch (type) { case SLJIT_UNORDERED_OR_EQUAL: case SLJIT_ORDERED_NOT_EQUAL: - return 0; + return 1; } - return 1; + return 0; } #endif /* SLJIT_CONFIG_ARM */ @@ -2706,6 +3322,158 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler #endif /* !SLJIT_CONFIG_ARM_64 && !SLJIT_CONFIG_PPC */ +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(srcdst); + SLJIT_UNUSED_ARG(srcdstw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(lane_index); + SLJIT_UNUSED_ARG(srcdst); + SLJIT_UNUSED_ARG(srcdstw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(src_lane_index); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(dst_freg); + SLJIT_UNUSED_ARG(src1_freg); + SLJIT_UNUSED_ARG(src2_freg); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM */ + +#if !(defined(SLJIT_CONFIG_X86) && SLJIT_CONFIG_X86) \ + && !(defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM) \ + && !(defined(SLJIT_CONFIG_S390X) && SLJIT_CONFIG_S390X) \ + && !(defined(SLJIT_CONFIG_LOONGARCH) && SLJIT_CONFIG_LOONGARCH) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, + sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst_reg); + SLJIT_UNUSED_ARG(mem_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, + sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src_reg); + SLJIT_UNUSED_ARG(mem_reg); + SLJIT_UNUSED_ARG(temp_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM && !SLJIT_CONFIG_S390X && !SLJIT_CONFIG_LOONGARCH */ + #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) @@ -2723,503 +3491,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); } -#endif - -#else /* SLJIT_CONFIG_UNSUPPORTED */ - -/* Empty function bodies for those machines, which are not (yet) supported. */ - -SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) -{ - return "unsupported"; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data) -{ - SLJIT_UNUSED_ARG(allocator_data); - SLJIT_UNUSED_ARG(exec_allocator_data); - SLJIT_UNREACHABLE(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNREACHABLE(); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNREACHABLE(); -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(size); - SLJIT_UNREACHABLE(); - return NULL; -} - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) -SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(verbose); - SLJIT_UNREACHABLE(); -} -#endif - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNREACHABLE(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) -{ - SLJIT_UNUSED_ARG(feature_type); - SLJIT_UNREACHABLE(); - return 0; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) -{ - SLJIT_UNUSED_ARG(type); - SLJIT_UNREACHABLE(); - return 0; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) -{ - SLJIT_UNUSED_ARG(code); - SLJIT_UNUSED_ARG(exec_allocator_data); - SLJIT_UNREACHABLE(); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(options); - SLJIT_UNUSED_ARG(arg_types); - SLJIT_UNUSED_ARG(scratches); - SLJIT_UNUSED_ARG(saveds); - SLJIT_UNUSED_ARG(fscratches); - SLJIT_UNUSED_ARG(fsaveds); - SLJIT_UNUSED_ARG(local_size); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(options); - SLJIT_UNUSED_ARG(arg_types); - SLJIT_UNUSED_ARG(scratches); - SLJIT_UNUSED_ARG(saveds); - SLJIT_UNUSED_ARG(fscratches); - SLJIT_UNUSED_ARG(fsaveds); - SLJIT_UNUSED_ARG(local_size); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst_reg, - sljit_s32 src1_reg, - sljit_s32 src2_reg, - sljit_s32 src3, sljit_sw src3w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst_reg); - SLJIT_UNUSED_ARG(src1_reg); - SLJIT_UNUSED_ARG(src2_reg); - SLJIT_UNUSED_ARG(src3); - SLJIT_UNUSED_ARG(src3w); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) -{ - SLJIT_UNREACHABLE(); - return reg; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_u32 size) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(instruction); - SLJIT_UNUSED_ARG(size); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(current_flags); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 freg, sljit_s32 reg) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(freg); - SLJIT_UNUSED_ARG(reg); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNREACHABLE(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNREACHABLE(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 arg_types) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(arg_types); - SLJIT_UNREACHABLE(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - SLJIT_UNREACHABLE(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - SLJIT_UNREACHABLE(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) -{ - SLJIT_UNUSED_ARG(jump); - SLJIT_UNUSED_ARG(label); - SLJIT_UNREACHABLE(); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) -{ - SLJIT_UNUSED_ARG(jump); - SLJIT_UNUSED_ARG(target); - SLJIT_UNREACHABLE(); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label) -{ - SLJIT_UNUSED_ARG(put_label); - SLJIT_UNUSED_ARG(label); - SLJIT_UNREACHABLE(); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 arg_types, - sljit_s32 src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(arg_types); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 type) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(type); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(dst_reg); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(reg); - SLJIT_UNUSED_ARG(mem); - SLJIT_UNUSED_ARG(memw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(reg); - SLJIT_UNUSED_ARG(mem); - SLJIT_UNUSED_ARG(memw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(freg); - SLJIT_UNUSED_ARG(mem); - SLJIT_UNUSED_ARG(memw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(freg); - SLJIT_UNUSED_ARG(mem); - SLJIT_UNUSED_ARG(memw); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(offset); - SLJIT_UNREACHABLE(); - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw initval) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(initval); - SLJIT_UNREACHABLE(); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - return NULL; -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) -{ - SLJIT_UNUSED_ARG(addr); - SLJIT_UNUSED_ARG(new_target); - SLJIT_UNUSED_ARG(executable_offset); - SLJIT_UNREACHABLE(); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) -{ - SLJIT_UNUSED_ARG(addr); - SLJIT_UNUSED_ARG(new_constant); - SLJIT_UNUSED_ARG(executable_offset); - SLJIT_UNREACHABLE(); -} +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM_64 */ #endif /* !SLJIT_CONFIG_UNSUPPORTED */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.h b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.h old mode 100644 new mode 100755 index 87805dd7fd..0974056d6c --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.h +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.h @@ -72,6 +72,7 @@ #include "sljitConfigPre.h" #endif /* SLJIT_HAVE_CONFIG_PRE */ +#include "sljitConfigCPU.h" #include "sljitConfig.h" /* The following header file defines useful macros for fine tuning @@ -107,9 +108,9 @@ extern "C" { /* Cannot allocate executable memory. Only sljit_generate_code() returns with this error code. */ #define SLJIT_ERR_EX_ALLOC_FAILED 3 -/* Return value for SLJIT_CONFIG_UNSUPPORTED placeholder architecture. */ +/* Unsupported instruction form. */ #define SLJIT_ERR_UNSUPPORTED 4 -/* An ivalid argument is passed to any SLJIT function. */ +/* An invalid argument is passed to any SLJIT function. */ #define SLJIT_ERR_BAD_ARGUMENT 5 /* --------------------------------------------------------------------- */ @@ -127,40 +128,40 @@ extern "C" { is the first saved register, the one before the last is the second saved register, and so on. - If an architecture provides two scratch and three saved registers, - its scratch and saved register sets are the following: + For example, in an architecture with only five registers (A-E), if two + are scratch and three saved registers, they will be defined as follows: - R0 | | R0 is always a scratch register - R1 | | R1 is always a scratch register - [R2] | S2 | R2 and S2 represent the same physical register - [R3] | S1 | R3 and S1 represent the same physical register - [R4] | S0 | R4 and S0 represent the same physical register + A | R0 | | R0 always represent scratch register A + B | R1 | | R1 always represent scratch register B + C | [R2] | S2 | R2 and S2 represent the same physical register C + D | [R3] | S1 | R3 and S1 represent the same physical register D + E | [R4] | S0 | R4 and S0 represent the same physical register E - Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and - SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture. + Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS will be 2 and + SLJIT_NUMBER_OF_SAVED_REGISTERS will be 3. - Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12 + Note: For all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12 and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers are virtual on x86-32. See below. The purpose of this definition is convenience: saved registers can - be used as extra scratch registers. For example four registers can - be specified as scratch registers and the fifth one as saved register - on the CPU above and any user code which requires four scratch - registers can run unmodified. The SLJIT compiler automatically saves - the content of the two extra scratch register on the stack. Scratch - registers can also be preserved by saving their value on the stack - but this needs to be done manually. + be used as extra scratch registers. For example, building in the + previous example, four registers can be specified as scratch registers + and the fifth one as saved register, allowing any user code which requires + four scratch registers to run unmodified. The SLJIT compiler automatically + saves the content of the two extra scratch register on the stack. Scratch + registers can also be preserved by saving their value on the stack but + that needs to be done manually. Note: To emphasize that registers assigned to R2-R4 are saved registers, they are enclosed by square brackets. - Note: sljit_emit_enter and sljit_set_context defines whether a register - is S or R register. E.g: when 3 scratches and 1 saved is mapped - by sljit_emit_enter, the allowed register set will be: R0-R2 and - S0. Although S2 is mapped to the same position as R2, it does not - available in the current configuration. Furthermore the S1 register - is not available at all. + Note: sljit_emit_enter and sljit_set_context define whether a register + is S or R register. E.g: if in the previous example 3 scratches and + 1 saved are mapped by sljit_emit_enter, the allowed register set + will be: R0-R2 and S0. Although S2 is mapped to the same register + than R2, it is not available in that configuration. Furthermore + the S1 register cannot be used at all. */ /* Scratch registers. */ @@ -209,7 +210,7 @@ extern "C" { /* The SLJIT_SP provides direct access to the linear stack space allocated by sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). The immediate offset is extended by the relative stack offset automatically. - The sljit_get_local_base can be used to obtain the real address of a value. */ + sljit_get_local_base can be used to obtain the real address of a value. */ #define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) /* Return with machine word. */ @@ -221,7 +222,7 @@ extern "C" { /* --------------------------------------------------------------------- */ /* Each floating point register can store a 32 or a 64 bit precision - value. The FR and FS register sets are overlap in the same way as R + value. The FR and FS register sets overlap in the same way as R and S register sets. See above. */ /* Floating point scratch registers. */ @@ -231,6 +232,10 @@ extern "C" { #define SLJIT_FR3 4 #define SLJIT_FR4 5 #define SLJIT_FR5 6 +#define SLJIT_FR6 7 +#define SLJIT_FR7 8 +#define SLJIT_FR8 9 +#define SLJIT_FR9 10 /* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ #define SLJIT_FR(i) (1 + (i)) @@ -242,6 +247,10 @@ extern "C" { #define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) #define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) #define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) +#define SLJIT_FS6 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 6) +#define SLJIT_FS7 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 7) +#define SLJIT_FS8 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 8) +#define SLJIT_FS9 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 9) /* All S registers provided by the architecture can be accessed by SLJIT_FS(i) The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ #define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) @@ -260,23 +269,39 @@ extern "C" { /* The following argument type definitions are used by sljit_emit_enter, sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. - As for sljit_emit_call and sljit_emit_icall, the first integer argument + For sljit_emit_call and sljit_emit_icall, the first integer argument must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. Similarly the first floating point argument must be placed into SLJIT_FR0, the second one into SLJIT_FR1, and so on. - As for sljit_emit_enter, the integer arguments can be stored in scratch - or saved registers. The first integer argument without _R postfix is - stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer - arguments with _R postfix are placed into scratch registers. The index - of the scratch register is the count of the previous integer arguments - starting from SLJIT_R0. The floating point arguments are always placed - into SLJIT_FR0, SLJIT_FR1, and so on. + For sljit_emit_enter, the integer arguments can be stored in scratch + or saved registers. Scratch registers are identified by a _R suffix. - Note: if a function is called by sljit_emit_call/sljit_emit_icall and - an argument is stored in a scratch register by sljit_emit_enter, - that argument uses the same scratch register index for both - integer and floating point arguments. + If only saved registers are used, then the allocation mirrors what is + done for the "call" functions but using saved registers, meaning that + the first integer argument goes to SLJIT_S0, the second one goes into + SLJIT_S1, and so on. + + If scratch registers are used, then the way the integer registers are + allocated changes so that SLJIT_S0, SLJIT_S1, etc; will be assigned + only for the arguments not using scratch registers, while SLJIT_R + will be used for the ones using scratch registers. + + Furthermore, the index (shown as "n" above) that will be used for the + scratch register depends on how many previous integer registers + (scratch or saved) were used already, starting with SLJIT_R0. + Eventhough some indexes will be likely skipped, they still need to be + accounted for in the scratches parameter of sljit_emit_enter. See below + for some examples. + + The floating point arguments always use scratch registers (but not the + _R suffix like the integer arguments) and must use SLJIT_FR0, SLJIT_FR1, + just like in the "call" functions. + + Note: the mapping for scratch registers is part of the compiler context + and therefore a new context after sljit_emit_call/sljit_emit_icall + could remove access to some scratch registers that were used as + arguments. Example function definition: sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, @@ -292,25 +317,29 @@ extern "C" { Argument passing: arg_a must be placed in SLJIT_R0 - arg_c must be placed in SLJIT_R1 arg_b must be placed in SLJIT_FR0 + arg_c must be placed in SLJIT_R1 arg_d must be placed in SLJIT_FR1 Examples for argument processing by sljit_emit_enter: - SLJIT_ARGS4(VOID, P, 32_R, F32, W) + SLJIT_ARGS4V(P, 32_R, F32, W) Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1 + The type of the result is void. - SLJIT_ARGS4(VOID, W, W_R, W, W_R) + SLJIT_ARGS4(F32, W, W_R, W, W_R) Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3 + The type of the result is sljit_f32. - SLJIT_ARGS4(VOID, F64, W, F32, W_R) + SLJIT_ARGS4(P, W, F32, P_R) Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1 + The type of the result is pointer. Note: it is recommended to pass the scratch arguments first followed by the saved arguments: - SLJIT_ARGS4(VOID, W_R, W_R, W, W) + SLJIT_ARGS4(W, W_R, W_R, W, W) Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1 + The type of the result is sljit_sw / sljit_uw. */ /* The following flag is only allowed for the integer arguments of @@ -318,21 +347,21 @@ extern "C" { stored in a scratch register instead of a saved register. */ #define SLJIT_ARG_TYPE_SCRATCH_REG 0x8 -/* Void result, can only be used by SLJIT_ARG_RETURN. */ -#define SLJIT_ARG_TYPE_VOID 0 +/* No return value, only supported by SLJIT_ARG_RETURN. */ +#define SLJIT_ARG_TYPE_RET_VOID 0 /* Machine word sized integer argument or result. */ -#define SLJIT_ARG_TYPE_W 1 +#define SLJIT_ARG_TYPE_W 1 #define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG) /* 32 bit integer argument or result. */ -#define SLJIT_ARG_TYPE_32 2 +#define SLJIT_ARG_TYPE_32 2 #define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG) /* Pointer sized integer argument or result. */ -#define SLJIT_ARG_TYPE_P 3 +#define SLJIT_ARG_TYPE_P 3 #define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG) /* 64 bit floating point argument or result. */ -#define SLJIT_ARG_TYPE_F64 4 +#define SLJIT_ARG_TYPE_F64 4 /* 32 bit floating point argument or result. */ -#define SLJIT_ARG_TYPE_F32 5 +#define SLJIT_ARG_TYPE_F32 5 #define SLJIT_ARG_SHIFT 4 #define SLJIT_ARG_RETURN(type) (type) @@ -345,24 +374,40 @@ extern "C" { can be shortened to: SLJIT_ARGS1(W, F32) + + Another example where no value is returned: + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W_R, 1) + + can be shortened to: + SLJIT_ARGS1V(W_R) */ #define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type #define SLJIT_ARGS0(ret) \ SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret)) +#define SLJIT_ARGS0V() \ + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID) #define SLJIT_ARGS1(ret, arg1) \ (SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) +#define SLJIT_ARGS1V(arg1) \ + (SLJIT_ARGS0V() | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) #define SLJIT_ARGS2(ret, arg1, arg2) \ (SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) +#define SLJIT_ARGS2V(arg1, arg2) \ + (SLJIT_ARGS1V(arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) #define SLJIT_ARGS3(ret, arg1, arg2, arg3) \ (SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) +#define SLJIT_ARGS3V(arg1, arg2, arg3) \ + (SLJIT_ARGS2V(arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) #define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \ (SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) +#define SLJIT_ARGS4V(arg1, arg2, arg3, arg4) \ + (SLJIT_ARGS3V(arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) /* --------------------------------------------------------------------- */ /* Main structures and functions */ @@ -382,7 +427,10 @@ struct sljit_memory_fragment { struct sljit_label { struct sljit_label *next; - sljit_uw addr; + union { + sljit_uw index; + sljit_uw addr; + } u; /* The maximum size difference. */ sljit_uw size; }; @@ -398,13 +446,6 @@ struct sljit_jump { } u; }; -struct sljit_put_label { - struct sljit_put_label *next; - struct sljit_label *label; - sljit_uw addr; - sljit_uw flags; -}; - struct sljit_const { struct sljit_const *next; sljit_uw addr; @@ -416,18 +457,18 @@ struct sljit_compiler { struct sljit_label *labels; struct sljit_jump *jumps; - struct sljit_put_label *put_labels; struct sljit_const *consts; struct sljit_label *last_label; struct sljit_jump *last_jump; struct sljit_const *last_const; - struct sljit_put_label *last_put_label; void *allocator_data; void *exec_allocator_data; struct sljit_memory_fragment *buf; struct sljit_memory_fragment *abuf; + /* Number of labels created by the compiler. */ + sljit_uw label_count; /* Available scratch registers. */ sljit_s32 scratches; /* Available saved registers. */ @@ -447,17 +488,18 @@ struct sljit_compiler { #if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) sljit_s32 status_flags_state; -#endif +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_s32 args_size; -#endif +#endif /* SLJIT_CONFIG_X86_32 */ #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* Temporary fields. */ sljit_s32 mode32; -#endif +#endif /* SLJIT_CONFIG_X86_64 */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) /* Constant pool handling. */ sljit_uw *cpool; sljit_u8 *cpool_unique; @@ -466,44 +508,54 @@ struct sljit_compiler { /* Other members. */ /* Contains pointer, "ldr pc, [...]" pairs. */ sljit_uw patches; -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) /* Temporary fields. */ sljit_uw shift_imm; -#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */ +#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V6 */ #if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__) sljit_uw args_size; -#endif +#endif /* SLJIT_CONFIG_ARM_32 && __SOFTFP__ */ #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + /* Temporary fields. */ sljit_u32 imm; -#endif +#endif /* SLJIT_CONFIG_PPC */ #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) sljit_s32 delay_slot; + /* Temporary fields. */ sljit_s32 cache_arg; sljit_sw cache_argw; -#endif +#endif /* SLJIT_CONFIG_MIPS */ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) sljit_uw args_size; -#endif +#endif /* SLJIT_CONFIG_MIPS_32 */ #if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) + /* Temporary fields. */ sljit_s32 cache_arg; sljit_sw cache_argw; -#endif +#endif /* SLJIT_CONFIG_RISCV */ #if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) /* Need to allocate register save area to make calls. */ + /* Temporary fields. */ sljit_s32 mode; -#endif +#endif /* SLJIT_CONFIG_S390X */ + +#if (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) + /* Temporary fields. */ + sljit_s32 cache_arg; + sljit_sw cache_argw; +#endif /* SLJIT_CONFIG_LOONGARCH */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) FILE* verbose; -#endif +#endif /* SLJIT_VERBOSE */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) @@ -514,7 +566,7 @@ struct sljit_compiler { sljit_s32 last_return; /* Local size passed to entry functions. */ sljit_s32 logical_local_size; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ @@ -522,7 +574,7 @@ struct sljit_compiler { /* Trust arguments when an API function is called. Used internally for calling API functions. */ sljit_s32 skip_checks; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG || SLJIT_VERBOSE */ }; /* --------------------------------------------------------------------- */ @@ -637,24 +689,37 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler #define SLJIT_HAS_COPY_F32 9 /* [Emulated] Copy from/to f64 operation is available (see sljit_emit_fcopy). */ #define SLJIT_HAS_COPY_F64 10 +/* [Not emulated] The 64 bit floating point registers can be used as + two separate 32 bit floating point registers (e.g. ARM32). The + second 32 bit part can be accessed by SLJIT_F64_SECOND. */ +#define SLJIT_HAS_F64_AS_F32_PAIR 11 +/* [Not emulated] Some SIMD operations are supported by the compiler. */ +#define SLJIT_HAS_SIMD 12 +/* [Not emulated] SIMD registers are mapped to a pair of double precision + floating point registers. E.g. passing either SLJIT_FR0 or SLJIT_FR1 to + a simd operation represents the same 128 bit register, and both SLJIT_FR0 + and SLJIT_FR1 are overwritten. */ +#define SLJIT_SIMD_REGS_ARE_PAIRS 13 +/* [Not emulated] Atomic support is available (fine-grained). */ +#define SLJIT_HAS_ATOMIC 14 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) -/* [Not emulated] SSE2 support is available on x86. */ -#define SLJIT_HAS_SSE2 100 +/* [Not emulated] AVX support is available on x86. */ +#define SLJIT_HAS_AVX 100 +/* [Not emulated] AVX2 support is available on x86. */ +#define SLJIT_HAS_AVX2 101 #endif SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); /* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL, - sljit_cmp_info returns one, if the cpu supports the passed floating - point comparison type. + sljit_cmp_info returns with: + zero - if the cpu supports the floating point comparison type + one - if the comparison requires two machine instructions + two - if the comparison requires more than two machine instructions - If type is SLJIT_UNORDERED or SLJIT_ORDERED, sljit_cmp_info returns - one, if the cpu supports checking the unordered comparison result - regardless of the comparison type passed to the comparison instruction. - The returned value is always one, if there is at least one type between - SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL where sljit_cmp_info - returns with a zero value. + When the result is non-zero, it is recommended to avoid + using the specified comparison type if it is easy to do so. Otherwise it returns zero. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); @@ -665,7 +730,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); /* The executable code is a function from the viewpoint of the C - language. The function calls must obey to the ABI (Application + language. The function calls must conform to the ABI (Application Binary Interface) of the platform, which specify the purpose of machine registers and stack handling among other things. The sljit_emit_enter function emits the necessary instructions for @@ -724,7 +789,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); #define SLJIT_ENTER_REG_ARG 0x00000004 /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ -#define SLJIT_MAX_LOCAL_SIZE 65536 +#define SLJIT_MAX_LOCAL_SIZE 1048576 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, @@ -735,9 +800,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi by sljit_emit_enter. Several functions (such as sljit_emit_return) requires this context to be able to generate the appropriate code. However, some code fragments (compiled separately) may have no - normal entry point so their context is unknown for the compiler. + normal entry point so their context is unknown to the compiler. - The sljit_set_context and sljit_emit_enter have the same arguments, + sljit_set_context and sljit_emit_enter have the same arguments, but sljit_set_context does not generate any machine code. Note: every call of sljit_emit_enter and sljit_set_context overwrites @@ -797,7 +862,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c Note: Different architectures have different addressing limitations. A single instruction is enough for the following addressing - modes. Other adrressing modes are emulated by instruction + modes. Other addressing modes are emulated by instruction sequences. This information could help to improve those code generators which focuses only a few architectures. @@ -828,6 +893,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c s390x: [reg+imm], -2^19 <= imm < 2^19 [reg+reg] is supported Write-back is not supported + loongarch: [reg+imm], -2048 <= imm <= 2047 + [reg+reg] is supported + Write-back is not supported */ /* Macros for specifying operand types. */ @@ -835,9 +903,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c #define SLJIT_MEM0() (SLJIT_MEM) #define SLJIT_MEM1(r1) (SLJIT_MEM | (r1)) #define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8)) -#define SLJIT_IMM 0x40 +#define SLJIT_IMM 0x7f #define SLJIT_REG_PAIR(r1, r2) ((r1) | ((r2) << 8)) +/* Macros for checking operand types (only for valid arguments). */ +#define SLJIT_IS_REG(arg) ((arg) > 0 && (arg) < SLJIT_IMM) +#define SLJIT_IS_MEM(arg) ((arg) & SLJIT_MEM) +#define SLJIT_IS_MEM0(arg) ((arg) == SLJIT_MEM) +#define SLJIT_IS_MEM1(arg) ((arg) > SLJIT_MEM && (arg) < (SLJIT_MEM << 1)) +#define SLJIT_IS_MEM2(arg) (((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1)) +#define SLJIT_IS_IMM(arg) ((arg) == SLJIT_IMM) +#define SLJIT_IS_REG_PAIR(arg) (!((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1)) + +/* Macros for extracting registers from operands. */ +/* Support operands which contains a single register or + constructed using SLJIT_MEM1, SLJIT_MEM2, or SLJIT_REG_PAIR. */ +#define SLJIT_EXTRACT_REG(arg) ((arg) & 0x7f) +/* Support operands which constructed using SLJIT_MEM2, or SLJIT_REG_PAIR. */ +#define SLJIT_EXTRACT_SECOND_REG(arg) ((arg) >> 8) + /* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on 32 bit CPUs. When this option is set for an arithmetic operation, only the lower 32 bits of the input registers are used, and the CPU status @@ -1054,6 +1138,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile Note: immediate source argument is not supported */ #define SLJIT_REV (SLJIT_OP1_BASE + 11) #define SLJIT_REV32 (SLJIT_REV | SLJIT_32) +/* Reverse the order of bytes in the lower 16 bit and extend as unsigned + Flags: - (may destroy flags) + Note: converts between little and big endian formats + Note: immediate source argument is not supported */ +#define SLJIT_REV_U16 (SLJIT_OP1_BASE + 12) +#define SLJIT_REV32_U16 (SLJIT_REV_U16 | SLJIT_32) +/* Reverse the order of bytes in the lower 16 bit and extend as signed + Flags: - (may destroy flags) + Note: converts between little and big endian formats + Note: immediate source argument is not supported */ +#define SLJIT_REV_S16 (SLJIT_OP1_BASE + 13) +#define SLJIT_REV32_S16 (SLJIT_REV_S16 | SLJIT_32) +/* Reverse the order of bytes in the lower 32 bit and extend as unsigned + Flags: - (may destroy flags) + Note: converts between little and big endian formats + Note: immediate source argument is not supported */ +#define SLJIT_REV_U32 (SLJIT_OP1_BASE + 14) +/* Reverse the order of bytes in the lower 32 bit and extend as signed + Flags: - (may destroy flags) + Note: converts between little and big endian formats + Note: immediate source argument is not supported */ +#define SLJIT_REV_S32 (SLJIT_OP1_BASE + 15) /* The following unary operations are supported by using sljit_emit_op2: - binary not: SLJIT_XOR with immedate -1 as src1 or src2 @@ -1276,15 +1382,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp /* Flags: - (may destroy flags) */ #define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5) #define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_UW (SLJIT_FOP1_BASE + 6) +#define SLJIT_CONV_F32_FROM_UW (SLJIT_CONV_F64_FROM_UW | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_U32 (SLJIT_FOP1_BASE + 7) +#define SLJIT_CONV_F32_FROM_U32 (SLJIT_CONV_F64_FROM_U32 | SLJIT_32) /* Note: dst is the left and src is the right operand for SLJIT_CMP_F32/64. Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */ -#define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6) +#define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 8) #define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_32) /* Flags: - (may destroy flags) */ -#define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7) +#define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 9) #define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_32) /* Flags: - (may destroy flags) */ -#define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8) +#define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 10) #define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -1312,6 +1424,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); +/* Starting index of opcodes for sljit_emit_fop2r. */ +#define SLJIT_FOP2R_BASE 168 + +/* Flags: - (may destroy flags) */ +#define SLJIT_COPYSIGN_F64 (SLJIT_FOP2R_BASE + 0) +#define SLJIT_COPYSIGN_F32 (SLJIT_COPYSIGN_F64 | SLJIT_32) + +/* Similar to sljit_emit_fop2, except the destination is always a register. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Sets a floating point register to an immediate value. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value); + /* The following opcodes are used by sljit_emit_fcopy(). */ /* 64 bit: copy a 64 bit value from an integer register into a @@ -1410,28 +1542,32 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY) #define SLJIT_NOT_CARRY 13 +#define SLJIT_ATOMIC_STORED 14 +#define SLJIT_SET_ATOMIC_STORED SLJIT_SET(SLJIT_ATOMIC_STORED) +#define SLJIT_ATOMIC_NOT_STORED 15 + /* Basic floating point comparison types. Note: when the comparison result is unordered, their behaviour is unspecified. */ -#define SLJIT_F_EQUAL 14 +#define SLJIT_F_EQUAL 16 #define SLJIT_SET_F_EQUAL SLJIT_SET(SLJIT_F_EQUAL) -#define SLJIT_F_NOT_EQUAL 15 +#define SLJIT_F_NOT_EQUAL 17 #define SLJIT_SET_F_NOT_EQUAL SLJIT_SET(SLJIT_F_EQUAL) -#define SLJIT_F_LESS 16 +#define SLJIT_F_LESS 18 #define SLJIT_SET_F_LESS SLJIT_SET(SLJIT_F_LESS) -#define SLJIT_F_GREATER_EQUAL 17 +#define SLJIT_F_GREATER_EQUAL 19 #define SLJIT_SET_F_GREATER_EQUAL SLJIT_SET(SLJIT_F_LESS) -#define SLJIT_F_GREATER 18 +#define SLJIT_F_GREATER 20 #define SLJIT_SET_F_GREATER SLJIT_SET(SLJIT_F_GREATER) -#define SLJIT_F_LESS_EQUAL 19 +#define SLJIT_F_LESS_EQUAL 21 #define SLJIT_SET_F_LESS_EQUAL SLJIT_SET(SLJIT_F_GREATER) /* Jumps when either argument contains a NaN value. */ -#define SLJIT_UNORDERED 20 +#define SLJIT_UNORDERED 22 #define SLJIT_SET_UNORDERED SLJIT_SET(SLJIT_UNORDERED) /* Jumps when neither argument contains a NaN value. */ -#define SLJIT_ORDERED 21 +#define SLJIT_ORDERED 23 #define SLJIT_SET_ORDERED SLJIT_SET(SLJIT_UNORDERED) /* Ordered / unordered floating point comparison types. @@ -1439,41 +1575,41 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi Note: each comparison type has an ordered and unordered form. Some architectures supports only either of them (see: sljit_cmp_info). */ -#define SLJIT_ORDERED_EQUAL 22 +#define SLJIT_ORDERED_EQUAL 24 #define SLJIT_SET_ORDERED_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL) -#define SLJIT_UNORDERED_OR_NOT_EQUAL 23 +#define SLJIT_UNORDERED_OR_NOT_EQUAL 25 #define SLJIT_SET_UNORDERED_OR_NOT_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL) -#define SLJIT_ORDERED_LESS 24 +#define SLJIT_ORDERED_LESS 26 #define SLJIT_SET_ORDERED_LESS SLJIT_SET(SLJIT_ORDERED_LESS) -#define SLJIT_UNORDERED_OR_GREATER_EQUAL 25 +#define SLJIT_UNORDERED_OR_GREATER_EQUAL 27 #define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL SLJIT_SET(SLJIT_ORDERED_LESS) -#define SLJIT_ORDERED_GREATER 26 +#define SLJIT_ORDERED_GREATER 28 #define SLJIT_SET_ORDERED_GREATER SLJIT_SET(SLJIT_ORDERED_GREATER) -#define SLJIT_UNORDERED_OR_LESS_EQUAL 27 +#define SLJIT_UNORDERED_OR_LESS_EQUAL 29 #define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER) -#define SLJIT_UNORDERED_OR_EQUAL 28 +#define SLJIT_UNORDERED_OR_EQUAL 30 #define SLJIT_SET_UNORDERED_OR_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL) -#define SLJIT_ORDERED_NOT_EQUAL 29 +#define SLJIT_ORDERED_NOT_EQUAL 31 #define SLJIT_SET_ORDERED_NOT_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL) -#define SLJIT_UNORDERED_OR_LESS 30 +#define SLJIT_UNORDERED_OR_LESS 32 #define SLJIT_SET_UNORDERED_OR_LESS SLJIT_SET(SLJIT_UNORDERED_OR_LESS) -#define SLJIT_ORDERED_GREATER_EQUAL 31 +#define SLJIT_ORDERED_GREATER_EQUAL 33 #define SLJIT_SET_ORDERED_GREATER_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_LESS) -#define SLJIT_UNORDERED_OR_GREATER 32 +#define SLJIT_UNORDERED_OR_GREATER 34 #define SLJIT_SET_UNORDERED_OR_GREATER SLJIT_SET(SLJIT_UNORDERED_OR_GREATER) -#define SLJIT_ORDERED_LESS_EQUAL 33 +#define SLJIT_ORDERED_LESS_EQUAL 35 #define SLJIT_SET_ORDERED_LESS_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_GREATER) /* Unconditional jump types. */ -#define SLJIT_JUMP 34 +#define SLJIT_JUMP 36 /* Fast calling method. See the description above. */ -#define SLJIT_FAST_CALL 35 +#define SLJIT_FAST_CALL 37 /* Default C calling convention. */ -#define SLJIT_CALL 36 +#define SLJIT_CALL 38 /* Called function must be compiled by SLJIT. See SLJIT_ENTER_REG_ARG option. */ -#define SLJIT_CALL_REG_ARG 37 +#define SLJIT_CALL_REG_ARG 39 /* The target can be changed during runtime (see: sljit_set_jump_addr). */ #define SLJIT_REWRITABLE_JUMP 0x1000 @@ -1563,19 +1699,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type); -/* Emit a conditional mov instruction which moves source to destination, - if the condition is satisfied. Unlike other arithmetic operations this - instruction does not support memory access. +/* Emit a conditional select instruction which moves src1 to dst_reg, + if the condition is satisfied, or src2_reg to dst_reg otherwise. type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL - type can be combined (or'ed) with SLJIT_32 - dst_reg must be a valid register - src must be a valid register or immediate (SLJIT_IMM) + type can be combined (or'ed) with SLJIT_32 to move 32 bit + register values instead of word sized ones + dst_reg and src2_reg must be valid registers + src1 must be valid operand + + Note: if src1 is a memory operand, its value + might be loaded even if the condition is false. Flags: - (does not modify flags) */ -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw); + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg); + +/* Emit a conditional floating point select instruction which moves + src1 to dst_reg, if the condition is satisfied, or src2_reg to + dst_reg otherwise. + + type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL + type can be combined (or'ed) with SLJIT_32 to move 32 bit + floating point values instead of 64 bit ones + dst_freg and src2_freg must be valid floating point registers + src1 must be valid operand + + Note: if src1 is a memory operand, its value + might be loaded even if the condition is false. + + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg); /* The following flags are used by sljit_emit_mem(), sljit_emit_mem_update(), sljit_emit_fmem(), and sljit_emit_fmem_update(). */ @@ -1590,9 +1749,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil /* Load or stora data from an unaligned (byte aligned) address. */ #define SLJIT_MEM_UNALIGNED 0x000400 /* Load or stora data from a 16 bit aligned address. */ -#define SLJIT_MEM_UNALIGNED_16 0x000800 +#define SLJIT_MEM_ALIGNED_16 0x000800 /* Load or stora data from a 32 bit aligned address. */ -#define SLJIT_MEM_UNALIGNED_32 0x001000 +#define SLJIT_MEM_ALIGNED_32 0x001000 /* The following flags are used by sljit_emit_mem_update(), and sljit_emit_fmem_update(). */ @@ -1610,8 +1769,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil /* The sljit_emit_mem emits instructions for various memory operations: - When SLJIT_MEM_UNALIGNED / SLJIT_MEM_UNALIGNED_16 / - SLJIT_MEM_UNALIGNED_32 is set in type argument: + When SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_16 / + SLJIT_MEM_ALIGNED_32 is set in type argument: Emit instructions for unaligned memory loads or stores. When SLJIT_UNALIGNED is not defined, the only way to access unaligned memory data is using sljit_emit_mem. Otherwise all operations (e.g. @@ -1626,8 +1785,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil location specified by the mem/memw arguments, and the end address of this operation is the starting address of the data transfer between the second register and memory. The type argument must - be SLJIT_MOV. The SLJIT_MEM_UNALIGNED* options are allowed for - this operation. + be SLJIT_MOV. The SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_* + options are allowed for this operation. type must be between SLJIT_MOV and SLJIT_MOV_P and can be combined (or'ed) with SLJIT_MEM_* flags @@ -1691,6 +1850,286 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler sljit_s32 freg, sljit_s32 mem, sljit_sw memw); +/* The following options are used by several simd operations. */ + +/* Load data into a simd register, this is the default */ +#define SLJIT_SIMD_LOAD 0x000000 +/* Store data from a simd register */ +#define SLJIT_SIMD_STORE 0x000001 +/* The simd register contains floating point values */ +#define SLJIT_SIMD_FLOAT 0x000400 +/* Tests whether the operation is available */ +#define SLJIT_SIMD_TEST 0x000800 +/* Move data to/from a 64 bit (8 byte) long SIMD register */ +#define SLJIT_SIMD_REG_64 (3 << 12) +/* Move data to/from a 128 bit (16 byte) long SIMD register */ +#define SLJIT_SIMD_REG_128 (4 << 12) +/* Move data to/from a 256 bit (32 byte) long SIMD register */ +#define SLJIT_SIMD_REG_256 (5 << 12) +/* Move data to/from a 512 bit (64 byte) long SIMD register */ +#define SLJIT_SIMD_REG_512 (6 << 12) +/* Element size is 8 bit long (this is the default), usually cannot be combined with SLJIT_SIMD_FLOAT */ +#define SLJIT_SIMD_ELEM_8 (0 << 18) +/* Element size is 16 bit long, usually cannot be combined with SLJIT_SIMD_FLOAT */ +#define SLJIT_SIMD_ELEM_16 (1 << 18) +/* Element size is 32 bit long */ +#define SLJIT_SIMD_ELEM_32 (2 << 18) +/* Element size is 64 bit long */ +#define SLJIT_SIMD_ELEM_64 (3 << 18) +/* Element size is 128 bit long */ +#define SLJIT_SIMD_ELEM_128 (4 << 18) +/* Element size is 256 bit long */ +#define SLJIT_SIMD_ELEM_256 (5 << 18) + +/* The following options are used by sljit_emit_simd_mov(). */ + +/* Memory address is unaligned (this is the default) */ +#define SLJIT_SIMD_MEM_UNALIGNED (0 << 24) +/* Memory address is 16 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_16 (1 << 24) +/* Memory address is 32 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_32 (2 << 24) +/* Memory address is 64 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_64 (3 << 24) +/* Memory address is 128 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_128 (4 << 24) +/* Memory address is 256 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_256 (5 << 24) +/* Memory address is 512 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_512 (6 << 24) + +/* Moves data between a simd register and memory. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* and + SLJIT_SIMD_MEM_* options + freg is the source or destination simd register + of the operation + srcdst must be a memory operand or a simd register + + Note: + The alignment and element size must be + less or equal than simd register size. + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw); + +/* Replicates a scalar value to all lanes of a simd + register. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* options + except SLJIT_SIMD_STORE. + freg is the destination simd register of the operation + src is the value which is replicated + + Note: + The src == SLJIT_IMM and srcw == 0 can be used to + clear a register even when SLJIT_SIMD_FLOAT is set. + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw); + +/* The following options are used by sljit_emit_simd_lane_mov(). */ + +/* Clear all bits of the simd register before loading the lane. */ +#define SLJIT_SIMD_LANE_ZERO 0x000002 +/* Sign extend the integer value stored from the lane. */ +#define SLJIT_SIMD_LANE_SIGNED 0x000004 + +/* Moves data between a simd register lane and a register or + memory. If the srcdst argument is a register, it must be + a floating point register when SLJIT_SIMD_FLOAT is specified, + or a general purpose register otherwise. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* options + Further options: + SLJIT_32 - when SLJIT_SIMD_FLOAT is not set + SLJIT_SIMD_LANE_SIGNED - when SLJIT_SIMD_STORE + is set and SLJIT_SIMD_FLOAT is not set + SLJIT_SIMD_LANE_ZERO - when SLJIT_SIMD_LOAD + is specified + freg is the source or destination simd register + of the operation + lane_index is the index of the lane + srcdst is the destination operand for loads, and + source operand for stores + + Note: + The elem size must be lower than register size. + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw); + +/* Replicates a scalar value from a lane to all lanes + of a simd register. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* options + except SLJIT_SIMD_STORE. + freg is the destination simd register of the operation + src is the simd register which lane is replicated + src_lane_index is the lane index of the src register + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index); + +/* The following options are used by sljit_emit_simd_load_extend(). */ + +/* Sign extend the integer elements */ +#define SLJIT_SIMD_EXTEND_SIGNED 0x000002 +/* Extend data to 16 bit */ +#define SLJIT_SIMD_EXTEND_16 (1 << 24) +/* Extend data to 32 bit */ +#define SLJIT_SIMD_EXTEND_32 (2 << 24) +/* Extend data to 64 bit */ +#define SLJIT_SIMD_EXTEND_64 (3 << 24) + +/* Extend elements and stores them in a simd register. + The extension operation increases the size of the + elements (e.g. from 16 bit to 64 bit). For integer + values, the extension can be signed or unsigned. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_*, and + SLJIT_SIMD_EXTEND_* options except SLJIT_SIMD_STORE + freg is the destination simd register of the operation + src must be a memory operand or a simd register. + In the latter case, the source elements are stored + in the lower half of the register. + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw); + +/* Extract the highest bit (usually the sign bit) from + each elements of a vector. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* and SLJIT_32 + options except SLJIT_SIMD_LOAD + freg is the source simd register of the operation + dst is the destination operand + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw); + +/* The following options are used by sljit_emit_simd_op2(). */ + +/* Binary 'and' operation */ +#define SLJIT_SIMD_OP2_AND 0x000001 +/* Binary 'or' operation */ +#define SLJIT_SIMD_OP2_OR 0x000002 +/* Binary 'xor' operation */ +#define SLJIT_SIMD_OP2_XOR 0x000003 + +/* Perform simd operations using simd registers. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* and SLJIT_SIMD_OP2_ + options except SLJIT_SIMD_LOAD and SLJIT_SIMD_STORE + dst_freg is the destination register of the operation + src1_freg is the first source register of the operation + src1_freg is the second source register of the operation + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg); + +/* The sljit_emit_atomic_load and sljit_emit_atomic_store operation pair + can perform an atomic read-modify-write operation. First, an unsigned + value must be loaded from memory using sljit_emit_atomic_load. Then, + the updated value must be written back to the same memory location by + sljit_emit_atomic_store. A thread can only perform a single atomic + operation at a time. + + Note: atomic operations are experimental, and not implemented + for all cpus. + + The following conditions must be satisfied, or the operation + is undefined: + - the address provided in mem_reg must be divisible by the size of + the value (only naturally aligned updates are supported) + - no memory writes are allowed between the load and store operations + regardless of its target address (currently read operations are + allowed, but this might change in the future) + - the memory operation (op) and the base address (stored in mem_reg) + passed to the load/store operations must be the same (the mem_reg + can be a different register, only its value must be the same) + - an store must always follow a load for the same transaction. + + op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all + signed loads such as SLJIT_MOV32_S16 + dst_reg is the register where the data will be loaded into + mem_reg is the base address of the memory load (it cannot be + SLJIT_SP or a virtual register on x86-32) + + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg); + +/* The sljit_emit_atomic_load and sljit_emit_atomic_store operations + allows performing an atomic read-modify-write operation. See the + description of sljit_emit_atomic_load. + + op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all signed + loads such as SLJIT_MOV32_S16 + src_reg is the register which value is stored into the memory + mem_reg is the base address of the memory store (it cannot be + SLJIT_SP or a virtual register on x86-32) + temp_reg is a not preserved scratch register, which must be + initialized with the value loaded into the dst_reg during the + corresponding sljit_emit_atomic_load operation, or the operation + is undefined + + Flags: ATOMIC_STORED is set if the operation is successful, + otherwise the memory remains unchanged. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg); + /* Copies the base address of SLJIT_SP + offset to dst. The offset can represent the starting address of a value in the local data (stack). The offset is not limited by the local data limits, it can be any value. @@ -1708,17 +2147,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value); -/* Store the value of a label (see: sljit_set_put_label) +/* Store the value of a label (see: sljit_set_label / sljit_set_target) Flags: - (does not modify flags) */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); -/* Set the value stored by put_label to this label. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label); - -/* After the code generation the address for label, jump and const instructions - are computed. Since these structures are freed by sljit_free_compiler, the - addresses must be preserved by the user program elsewere. */ -static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->addr; } +/* Provides the address of label, jump and const instructions after sljit_generate_code + is called. The returned value is unspecified before the sljit_generate_code call. + Since these structures are freed by sljit_free_compiler, the addresses must be + preserved by the user program elsewere. */ +static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->u.addr; } static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } @@ -1731,30 +2168,39 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta /* CPU specific functions */ /* --------------------------------------------------------------------- */ -/* The following function is a helper function for sljit_emit_op_custom. - It returns with the real machine register index ( >=0 ) of any SLJIT_R, - SLJIT_S and SLJIT_SP registers. +/* Types for sljit_get_register_index */ - Note: it returns with -1 for virtual registers (only on x86-32). */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg); +/* General purpose (integer) registers. */ +#define SLJIT_GP_REGISTER 0 +/* Floating point registers. */ +#define SLJIT_FLOAT_REGISTER 1 /* The following function is a helper function for sljit_emit_op_custom. - It returns with the real machine register ( >= 0 ) index of any SLJIT_FR, - and SLJIT_FS register. + It returns with the real machine register index ( >=0 ) of any registers. - Note: the index is always an even number on ARM-32, MIPS. */ + When type is SLJIT_GP_REGISTER: + reg must be an SLJIT_R(i), SLJIT_S(i), or SLJIT_SP register -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg); + When type is SLJIT_FLOAT_REGISTER: + reg must be an SLJIT_FR(i) or SLJIT_FS(i) register + + When type is SLJIT_SIMD_REG_64 / 128 / 256 / 512 : + reg must be an SLJIT_FR(i) or SLJIT_FS(i) register + + Note: it returns with -1 for unknown registers, such as virtual + registers on x86-32 or unsupported simd registers. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg); /* Any instruction can be inserted into the instruction stream by sljit_emit_op_custom. It has a similar purpose as inline assembly. The size parameter must match to the instruction size of the target architecture: - x86: 0 < size <= 15. The instruction argument can be byte aligned. + x86: 0 < size <= 15, the instruction argument can be byte aligned. Thumb2: if size == 2, the instruction argument must be 2 byte aligned. if size == 4, the instruction argument must be 4 byte aligned. + s390x: size can be 2, 4, or 6, the instruction argument can be byte aligned. Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -1782,6 +2228,98 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags); +/* --------------------------------------------------------------------- */ +/* Serialization functions */ +/* --------------------------------------------------------------------- */ + +/* Label/jump/const enumeration functions. The items in each group + are enumerated in creation order. Serialization / deserialization + preserves this order for each group. For example the fifth label + after deserialization refers to the same machine code location as + the fifth label before the serialization. */ +static SLJIT_INLINE struct sljit_label *sljit_get_first_label(struct sljit_compiler *compiler) { return compiler->labels; } +static SLJIT_INLINE struct sljit_jump *sljit_get_first_jump(struct sljit_compiler *compiler) { return compiler->jumps; } +static SLJIT_INLINE struct sljit_const *sljit_get_first_const(struct sljit_compiler *compiler) { return compiler->consts; } + +static SLJIT_INLINE struct sljit_label *sljit_get_next_label(struct sljit_label *label) { return label->next; } +static SLJIT_INLINE struct sljit_jump *sljit_get_next_jump(struct sljit_jump *jump) { return jump->next; } +static SLJIT_INLINE struct sljit_const *sljit_get_next_const(struct sljit_const *const_) { return const_->next; } + +/* A number starting from 0 is assigned to each label, which +represents its creation index. The first label created by the +compiler has index 0, the second has index 1, the third has +index 2, and so on. The returned value is unspecified after +sljit_generate_code() is called. */ +static SLJIT_INLINE sljit_uw sljit_get_label_index(struct sljit_label *label) { return label->u.index; } + +/* The sljit_jump_has_label() and sljit_jump_has_target() functions +returns non-zero value if a label or target is set for the jump +respectively. Both may return with a zero value. The other two +functions return the value assigned to the jump. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_label(struct sljit_jump *jump); +static SLJIT_INLINE struct sljit_label *sljit_jump_get_label(struct sljit_jump *jump) { return jump->u.label; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_target(struct sljit_jump *jump); +static SLJIT_INLINE sljit_uw sljit_jump_get_target(struct sljit_jump *jump) { return jump->u.target; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_is_mov_addr(struct sljit_jump *jump); + +/* Option bits for sljit_serialize_compiler. */ + +/* When debugging is enabled, the serialized buffer contains +debugging information unless this option is specified. */ +#define SLJIT_SERIALIZE_IGNORE_DEBUG 0x1 + +/* Serialize the internal structure of the compiler into a buffer. +If the serialization is successful, the returned value is a newly +allocated buffer which is allocated by the memory allocator assigned +to the compiler. Otherwise the returned value is NULL. Unlike +sljit_generate_code(), serialization does not modify the internal +state of the compiler, so the code generation can be continued. + + options must be the combination of SLJIT_SERIALIZE_* option bits + size is an output argument, which is set to the byte size of + the result buffer if the operation is successful + +Notes: + - This function is useful for ahead-of-time compilation (AOT). + - The returned buffer must be freed later by the caller. + The SLJIT_FREE() macro is suitable for this purpose: + SLJIT_FREE(returned_buffer, sljit_get_allocator_data(compiler)) + - Memory allocated by sljit_alloc_memory() is not serialized. + - The type of the returned buffer is sljit_uw* to emphasize that + the buffer is word aligned. However, the 'size' output argument + contains the byte size, so this value is always divisible by + sizeof(sljit_uw). +*/ +SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler, + sljit_s32 options, sljit_uw *size); + +/* Construct a new compiler instance from a buffer produced by +sljit_serialize_compiler(). If the operation is successful, the new +compiler instance is returned. Otherwise the returned value is NULL. + + buffer points to a word aligned memory data which was + created by sljit_serialize_compiler() + size is the byte size of the buffer + options must be 0 + allocator_data and exec_allocator_data specify an allocator + specific data similar to sljit_create_compiler() + +Notes: + - Labels assigned to jumps are restored with their + corresponding label in the label set created by + the deserializer. Target addresses assigned to + jumps are also restored. Uninitialized jumps + remain uninitialized. + - After the deserialization, sljit_generate_code() does + not need to be the next operation on the returned + compiler, the code generation can be continued. + Even sljit_serialize_compiler() can be called again. + - When debugging is enabled, a buffers without debug + information cannot be deserialized. +*/ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler *sljit_deserialize_compiler(sljit_uw* buffer, sljit_uw size, + sljit_s32 options, void *allocator_data, void *exec_allocator_data); + /* --------------------------------------------------------------------- */ /* Miscellaneous utility functions */ /* --------------------------------------------------------------------- */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_32.c old mode 100644 new mode 100755 index 8175293d2b..0d2fa55968 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_32.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_32.c @@ -34,13 +34,16 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO; -#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - return "ARMv5" SLJIT_CPUINFO ARM_ABI_INFO; +#elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + return "ARMv6" SLJIT_CPUINFO ARM_ABI_INFO; #else #error "Internal error: Unknown ARM architecture" #endif } +/* Length of an instruction word. */ +typedef sljit_u32 sljit_ins; + /* Last register + 1. */ #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) @@ -55,27 +58,39 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) #define CONST_POOL_EMPTY 0xffffffff #define ALIGN_INSTRUCTION(ptr) \ - (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1)) + (sljit_ins*)(((sljit_ins)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1)) #define MAX_DIFFERENCE(max_diff) \ - (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) + (((max_diff) / (sljit_s32)sizeof(sljit_ins)) - (CONST_POOL_ALIGNMENT - 1)) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 }; -static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 +static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = { + 0, + 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, + 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6 }; -#define RM(rm) ((sljit_uw)reg_map[rm]) -#define RM8(rm) ((sljit_uw)reg_map[rm] << 8) -#define RD(rd) ((sljit_uw)reg_map[rd] << 12) -#define RN(rn) ((sljit_uw)reg_map[rn] << 16) +static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = { + 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1 +}; -#define VM(rm) ((sljit_uw)freg_map[rm]) -#define VD(rd) ((sljit_uw)freg_map[rd] << 12) -#define VN(rn) ((sljit_uw)freg_map[rn] << 16) +#define RM(rm) ((sljit_ins)reg_map[rm]) +#define RM8(rm) ((sljit_ins)reg_map[rm] << 8) +#define RD(rd) ((sljit_ins)reg_map[rd] << 12) +#define RN(rn) ((sljit_ins)reg_map[rn] << 16) + +#define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5)) +#define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22)) +#define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7)) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -92,16 +107,19 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define AND 0xe0000000 #define B 0xea000000 #define BIC 0xe1c00000 +#define BKPT 0xe1200070 #define BL 0xeb000000 #define BLX 0xe12fff30 #define BX 0xe12fff10 #define CLZ 0xe16f0f10 #define CMN 0xe1600000 #define CMP 0xe1400000 -#define BKPT 0xe1200070 #define EOR 0xe0200000 #define LDR 0xe5100000 #define LDR_POST 0xe4100000 +#define LDREX 0xe1900f9f +#define LDREXB 0xe1d00f9f +#define LDREXH 0xe1f00f9f #define MOV 0xe1a00000 #define MUL 0xe0000090 #define MVN 0xe1e00000 @@ -109,51 +127,89 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define ORR 0xe1800000 #define PUSH 0xe92d0000 #define POP 0xe8bd0000 -#define RBIT 0xe6ff0f30 #define REV 0xe6bf0f30 +#define REV16 0xe6bf0fb0 #define RSB 0xe0600000 #define RSC 0xe0e00000 #define SBC 0xe0c00000 #define SMULL 0xe0c00090 #define STR 0xe5000000 +#define STREX 0xe1800f90 +#define STREXB 0xe1c00f90 +#define STREXH 0xe1e00f90 #define SUB 0xe0400000 +#define SXTB 0xe6af0070 +#define SXTH 0xe6bf0070 #define TST 0xe1000000 #define UMULL 0xe0800090 +#define UXTB 0xe6ef0070 +#define UXTH 0xe6ff0070 #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 +#define VAND 0xf2000110 #define VCMP_F32 0xeeb40a40 #define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F32_U32 0xeeb80a40 #define VCVT_F64_F32 0xeeb70ac0 #define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 +#define VDUP 0xee800b10 +#define VDUP_s 0xf3b00c00 +#define VEOR 0xf3000110 +#define VLD1 0xf4200000 +#define VLD1_r 0xf4a00c00 +#define VLD1_s 0xf4a00000 #define VLDR_F32 0xed100a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 #define VMOV2 0xec400a10 +#define VMOV_i 0xf2800010 +#define VMOV_s 0xee000b10 +#define VMOVN 0xf3b20200 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 +#define VORR 0xf2200110 #define VPOP 0xecbd0b00 #define VPUSH 0xed2d0b00 +#define VSHLL 0xf2800a10 +#define VSHR 0xf2800010 +#define VSRA 0xf2800110 +#define VST1 0xf4000000 +#define VST1_s 0xf4800000 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) /* Arm v7 specific instructions. */ -#define MOVW 0xe3000000 #define MOVT 0xe3400000 -#define SXTB 0xe6af0070 -#define SXTH 0xe6bf0070 -#define UXTB 0xe6ef0070 -#define UXTH 0xe6ff0070 +#define MOVW 0xe3000000 +#define RBIT 0xe6ff0f30 #endif -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32) +{ + if (compiler->scratches == -1) + return 0; + + if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0)) + fr -= SLJIT_F64_SECOND(0); + + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) + || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); +} + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) static sljit_s32 push_cpool(struct sljit_compiler *compiler) { /* Pushing the constant pool into the instruction stream. */ - sljit_uw* inst; + sljit_ins* inst; sljit_uw* cpool_ptr; sljit_uw* cpool_end; sljit_s32 i; @@ -163,13 +219,13 @@ static sljit_s32 push_cpool(struct sljit_compiler *compiler) compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1; SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE); - inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!inst); compiler->size++; *inst = 0xff000000 | compiler->cpool_fill; for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) { - inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!inst); compiler->size++; *inst = 0; @@ -178,7 +234,7 @@ static sljit_s32 push_cpool(struct sljit_compiler *compiler) cpool_ptr = compiler->cpool; cpool_end = cpool_ptr + compiler->cpool_fill; while (cpool_ptr < cpool_end) { - inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!inst); compiler->size++; *inst = *cpool_ptr++; @@ -188,23 +244,23 @@ static sljit_s32 push_cpool(struct sljit_compiler *compiler) return SLJIT_SUCCESS; } -static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst) { - sljit_uw* ptr; + sljit_ins* ptr; if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) FAIL_IF(push_cpool(compiler)); - ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); compiler->size++; *ptr = inst; return SLJIT_SUCCESS; } -static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal) { - sljit_uw* ptr; + sljit_ins* ptr; sljit_uw cpool_index = CPOOL_SIZE; sljit_uw* cpool_ptr; sljit_uw* cpool_end; @@ -240,7 +296,7 @@ static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_u } SLJIT_ASSERT((inst & 0xfff) == 0); - ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); compiler->size++; *ptr = inst | cpool_index; @@ -252,14 +308,15 @@ static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_u return SLJIT_SUCCESS; } -static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal) { - sljit_uw* ptr; + sljit_ins* ptr; + if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) FAIL_IF(push_cpool(compiler)); SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0); - ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); compiler->size++; *ptr = inst | compiler->cpool_fill; @@ -306,7 +363,7 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ while (last_pc_patch < code_ptr) { /* Data transfer instruction with Rn == r15. */ - if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { + if ((*last_pc_patch & 0x0e0f0000) == 0x040f0000) { diff = (sljit_uw)(const_pool - last_pc_patch); ind = (*last_pc_patch) & 0xfff; @@ -396,11 +453,11 @@ static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struc #else -static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst) { - sljit_uw* ptr; + sljit_ins* ptr; - ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); compiler->size++; *ptr = inst; @@ -422,14 +479,15 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw if (jump->flags & SLJIT_REWRITABLE_JUMP) return 0; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (jump->flags & IS_BL) code_ptr--; +#endif /* SLJIT_CONFIG_ARM_V6 */ if (jump->flags & JUMP_ADDR) diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset); else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); } @@ -437,6 +495,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw if (diff & 0x3) return 0; +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (jump->flags & IS_BL) { if (diff <= 0x01ffffff && diff >= -0x02000000) { *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); @@ -450,34 +509,22 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw jump->flags |= PATCH_B; } } -#else - if (jump->flags & JUMP_ADDR) - diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset); - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); - } - - /* Branch to Thumb code has not been optimized yet. */ - if (diff & 0x3) - return 0; - +#else /* !SLJIT_CONFIG_ARM_V6 */ if (diff <= 0x01ffffff && diff >= -0x02000000) { - code_ptr -= 2; - *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK); + *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (*code_ptr & COND_MASK); jump->flags |= PATCH_B; return 1; } -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ return 0; } -static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) +static void set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - sljit_uw *ptr = (sljit_uw *)jump_ptr; - sljit_uw *inst = (sljit_uw *)ptr[0]; - sljit_uw mov_pc = ptr[1]; +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_ins *ptr = (sljit_ins*)jump_ptr; + sljit_ins *inst = (sljit_ins*)ptr[0]; + sljit_ins mov_pc = ptr[1]; sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC); sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2); @@ -492,7 +539,7 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); - inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } } else { @@ -503,7 +550,7 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut inst[1] = NOP; if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); - inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } } @@ -522,14 +569,14 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut if (!bl) { if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); - inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } } else { inst[1] = BLX | RM(TMP_REG1); if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); - inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } } @@ -545,8 +592,8 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); } } -#else - sljit_uw *inst = (sljit_uw*)jump_ptr; +#else /* !SLJIT_CONFIG_ARM_V6 */ + sljit_ins *inst = (sljit_ins*)jump_ptr; SLJIT_UNUSED_ARG(executable_offset); @@ -561,21 +608,21 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); - inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ } static sljit_uw get_imm(sljit_uw imm); static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm); static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg); -static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) +static void set_const_value(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - sljit_uw *ptr = (sljit_uw*)addr; - sljit_uw *inst = (sljit_uw*)ptr[0]; +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_ins *ptr = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins*)ptr[0]; sljit_uw ldr_literal = ptr[1]; sljit_uw src2; @@ -591,7 +638,7 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); - inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } return; @@ -607,7 +654,7 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); - inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } return; @@ -627,7 +674,7 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); - inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } } @@ -641,8 +688,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); } -#else - sljit_uw *inst = (sljit_uw*)addr; +#else /* !SLJIT_CONFIG_ARM_V6 */ + sljit_ins *inst = (sljit_ins*)addr; SLJIT_UNUSED_ARG(executable_offset); @@ -657,90 +704,185 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_off if (flush_cache) { SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); - inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ } +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + /* The pc+8 offset is represented by the 2 * SSIZE_OF(ins) below. */ + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if ((diff & 0x3) == 0 && diff <= (0x3fc + 2 * SSIZE_OF(ins)) && diff >= (-0x3fc + 2 * SSIZE_OF(ins))) { + jump->flags |= PATCH_B; + return 0; + } + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + return 0; +#else /* !SLJIT_CONFIG_ARM_V6 */ + return 1; +#endif /* SLJIT_CONFIG_ARM_V6 */ +} + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE - 1; + + if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2; + + if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins))) + total_size = 1 - 1; + } + + size_reduce += JUMP_MAX_SIZE - 1 - total_size; + } else { + /* Real size minus 1. Unit size: instruction. */ + total_size = 1; + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (diff <= 0xff + 2 && diff >= -0xff + 2) + total_size = 0; + } + + size_reduce += 1 - total_size; + } + + jump->flags |= total_size << JUMP_SIZE_SHIFT; + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +#endif /* SLJIT_CONFIG_ARM_V7 */ + SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) { struct sljit_memory_fragment *buf; - sljit_uw *code; - sljit_uw *code_ptr; - sljit_uw *buf_ptr; - sljit_uw *buf_end; - sljit_uw size; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; sljit_uw addr; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_sw diff; +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) sljit_uw cpool_size; sljit_uw cpool_skip_alignment; sljit_uw cpool_current_index; - sljit_uw *cpool_start_address; - sljit_uw *last_pc_patch; + sljit_ins *cpool_start_address; + sljit_ins *last_pc_patch; struct future_patch *first_patch; #endif struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); /* Second code generation pass. */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - size = compiler->size + (compiler->patches << 1); +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + compiler->size += (compiler->patches << 1); if (compiler->cpool_fill > 0) - size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; -#else - size = compiler->size; -#endif - code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw), compiler->exec_allocator_data); + compiler->size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; +#else /* !SLJIT_CONFIG_ARM_V6 */ + reduce_code_size(compiler); +#endif /* SLJIT_CONFIG_ARM_V6 */ + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) cpool_size = 0; cpool_skip_alignment = 0; cpool_current_index = 0; cpool_start_address = NULL; first_patch = NULL; last_pc_patch = code; -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ code_ptr = code; word_count = 0; - next_addr = 1; executable_offset = SLJIT_EXEC_OFFSET(code); label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; - - if (label && label->size == 0) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); - label = label->next; - } + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { - buf_ptr = (sljit_uw*)buf->memory; + buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { - word_count++; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (cpool_size > 0) { if (cpool_skip_alignment > 0) { buf_ptr++; cpool_skip_alignment--; - } - else { + } else { if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { SLJIT_FREE_EXEC(code, compiler->exec_allocator_data); compiler->error = SLJIT_ERR_ALLOC_FAILED; @@ -750,64 +892,63 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (++cpool_current_index >= cpool_size) { SLJIT_ASSERT(!first_patch); cpool_size = 0; - if (label && label->size == word_count) { - /* Points after the current instruction. */ - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = (sljit_uw)(code_ptr - code); - label = label->next; - - next_addr = compute_next_addr(label, jump, const_, put_label); - } } } - } - else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { -#endif + } else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { +#endif /* SLJIT_CONFIG_ARM_V6 */ *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); - /* These structures are ordered by their address. */ - if (jump && jump->addr == word_count) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (detect_jump_type(jump, code_ptr, code, executable_offset)) - code_ptr--; - jump->addr = (sljit_uw)code_ptr; -#else - jump->addr = (sljit_uw)(code_ptr - 2); - if (detect_jump_type(jump, code_ptr, code, executable_offset)) - code_ptr -= 2; -#endif - jump = jump->next; - } - if (label && label->size == word_count) { - /* code_ptr can be affected above. */ - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); - label->size = (sljit_uw)((code_ptr + 1) - code); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (const_ && const_->addr == word_count) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + + /* These structures are ordered by their address. */ + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + if (detect_jump_type(jump, code_ptr, code, executable_offset)) + code_ptr--; + jump->addr = (sljit_uw)code_ptr; +#else /* !SLJIT_CONFIG_ARM_V6 */ + word_count += jump->flags >> JUMP_SIZE_SHIFT; + jump->addr = (sljit_uw)code_ptr; + if (!detect_jump_type(jump, code_ptr, code, executable_offset)) { + code_ptr[2] = code_ptr[0]; + addr = ((code_ptr[0] & 0xf) << 12); + code_ptr[0] = MOVW | addr; + code_ptr[1] = MOVT | addr; + code_ptr += 2; + } + SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + } else { +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + word_count += jump->flags >> JUMP_SIZE_SHIFT; +#endif /* SLJIT_CONFIG_ARM_V7 */ + addr = (sljit_uw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = addr; + } + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; -#else - const_->addr = (sljit_uw)(code_ptr - 1); -#endif const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - } - else { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + } else { /* Fortunately, no need to shift. */ cpool_size = *buf_ptr++ & ~PUSH_POOL; SLJIT_ASSERT(cpool_size > 0); @@ -815,30 +956,36 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); if (cpool_current_index > 0) { /* Unconditional branch. */ - *code_ptr = B | (((sljit_uw)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); - code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); + *code_ptr = B | (((sljit_ins)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index); } cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; cpool_current_index = 0; last_pc_patch = code_ptr; } -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ + word_count++; } while (buf_ptr < buf_end); buf = buf->next; } while (buf); + if (label && label->size == word_count) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) SLJIT_ASSERT(cpool_size == 0); if (compiler->cpool_fill > 0) { cpool_start_address = ALIGN_INSTRUCTION(code_ptr); cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); if (cpool_current_index > 0) - code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); + code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index); buf_ptr = compiler->cpool; buf_end = buf_ptr + compiler->cpool_fill; @@ -858,91 +1005,95 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { - buf_ptr = (sljit_uw *)jump->addr; + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + buf_ptr = (sljit_ins*)jump->addr; - if (jump->flags & PATCH_B) { - addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); - if (!(jump->flags & JUMP_ADDR)) { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - addr) <= 0x01ffffff && (sljit_sw)(jump->u.label->addr - addr) >= -0x02000000); - *buf_ptr |= ((jump->u.label->addr - addr) >> 2) & 0x00ffffff; + if (jump->flags & JUMP_MOV_ADDR) { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + SLJIT_ASSERT((buf_ptr[0] & (sljit_ins)0xffff0000) == 0xe59f0000); +#else /* !SLJIT_CONFIG_ARM_V6 */ + SLJIT_ASSERT((buf_ptr[0] & ~(sljit_ins)0xf000) == 0); +#endif /* SLJIT_CONFIG_ARM_V6 */ + + if (jump->flags & PATCH_B) { + SLJIT_ASSERT((((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) & 0x3) == 0); + diff = ((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) >> 2; + + SLJIT_ASSERT(diff <= 0xff && diff >= -0xff); + + addr = ADD; + if (diff < 0) { + diff = -diff; + addr = SUB; + } + + buf_ptr[0] = addr | (buf_ptr[0] & 0xf000) | RN(TMP_PC) | (1 << 25) | (0xf << 8) | (sljit_ins)(diff & 0xff); + } else { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr; +#else /* !SLJIT_CONFIG_ARM_V6 */ + buf_ptr[1] = MOVT | buf_ptr[0] | ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff); + buf_ptr[0] = MOVW | buf_ptr[0] | ((addr << 4) & 0xf0000) | (addr & 0xfff); +#endif /* SLJIT_CONFIG_ARM_V6 */ } - else { - SLJIT_ASSERT((sljit_sw)(jump->u.target - addr) <= 0x01ffffff && (sljit_sw)(jump->u.target - addr) >= -0x02000000); - *buf_ptr |= ((jump->u.target - addr) >> 2) & 0x00ffffff; - } - } - else if (jump->flags & SLJIT_REWRITABLE_JUMP) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - jump->addr = (sljit_uw)code_ptr; - code_ptr[0] = (sljit_uw)buf_ptr; - code_ptr[1] = *buf_ptr; - inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); - code_ptr += 2; -#else - inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); -#endif - } - else { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + } else if (jump->flags & PATCH_B) { + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); + SLJIT_ASSERT(diff <= 0x01ffffff && diff >= -0x02000000); + *buf_ptr |= (diff >> 2) & 0x00ffffff; + } else { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (jump->flags & IS_BL) buf_ptr--; - if (*buf_ptr & (1 << 23)) - buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; - else - buf_ptr += 1; - *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; -#else - inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); -#endif + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->addr = (sljit_uw)code_ptr; + code_ptr[0] = (sljit_ins)buf_ptr; + code_ptr[1] = *buf_ptr; + set_jump_addr((sljit_uw)code_ptr, executable_offset, addr, 0); + code_ptr += 2; + } else { + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + *buf_ptr = addr; + } +#else /* !SLJIT_CONFIG_ARM_V6 */ + set_jump_addr((sljit_uw)buf_ptr, executable_offset, addr, 0); +#endif /* SLJIT_CONFIG_ARM_V6 */ } + jump = jump->next; } -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) const_ = compiler->consts; while (const_) { - buf_ptr = (sljit_uw*)const_->addr; + buf_ptr = (sljit_ins*)const_->addr; const_->addr = (sljit_uw)code_ptr; - code_ptr[0] = (sljit_uw)buf_ptr; + code_ptr[0] = (sljit_ins)buf_ptr; code_ptr[1] = *buf_ptr; if (*buf_ptr & (1 << 23)) buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; else buf_ptr += 1; /* Set the value again (can be a simple constant). */ - inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); + set_const_value((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); code_ptr += 2; const_ = const_->next; } -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ - put_label = compiler->put_labels; - while (put_label) { - addr = put_label->label->addr; - buf_ptr = (sljit_uw*)put_label->addr; - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000); - buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr; -#else - SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT); - buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff); - buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff); -#endif - put_label = put_label->next; - } - - SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size); + SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size); compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw); - code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); - code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + code = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); SLJIT_CACHE_FLUSH(code, code_ptr); SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); @@ -953,29 +1104,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) { switch (feature_type) { case SLJIT_HAS_FPU: + case SLJIT_HAS_F64_AS_F32_PAIR: #ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; + return (SLJIT_IS_FPU_AVAILABLE) != 0; #else /* Available by default. */ return 1; -#endif +#endif /* SLJIT_IS_FPU_AVAILABLE */ + case SLJIT_HAS_SIMD: +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + return 0; +#else +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + /* Available by default. */ + return 1; +#endif /* SLJIT_IS_FPU_AVAILABLE */ +#endif /* SLJIT_CONFIG_ARM_V6 */ + case SLJIT_SIMD_REGS_ARE_PAIRS: case SLJIT_HAS_CLZ: case SLJIT_HAS_ROT: case SLJIT_HAS_CMOV: -#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - case SLJIT_HAS_CTZ: case SLJIT_HAS_REV: case SLJIT_HAS_PREFETCH: -#endif case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: return 1; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) case SLJIT_HAS_CTZ: +#if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6 return 2; -#endif +#else + return 1; +#endif /* SLJIT_CONFIG_ARM_V6 */ default: return 0; @@ -995,17 +1159,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define LOAD_DATA 0x08 /* Flag bits for emit_op. */ -#define ALLOW_IMM 0x10 -#define ALLOW_INV_IMM 0x20 -#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) -#define ALLOW_NEG_IMM 0x40 +#define ALLOW_IMM 0x10 +#define ALLOW_INV_IMM 0x20 +#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) +#define ALLOW_NEG_IMM 0x40 +#define ALLOW_DOUBLE_IMM 0x80 /* s/l - store/load (1 bit) u/s - signed/unsigned (1 bit) w/b/h/N - word/byte/half/NOT allowed (2 bit) Storing signed and unsigned values are the same operations. */ -static const sljit_uw data_transfer_insts[16] = { +static const sljit_ins data_transfer_insts[16] = { /* s u w */ 0xe5000000 /* str */, /* s u b */ 0xe5400000 /* strb */, /* s u h */ 0xe10000b0 /* strh */, @@ -1026,7 +1191,7 @@ static const sljit_uw data_transfer_insts[16] = { }; #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \ - (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_uw)(arg)) + (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_ins)(arg)) /* Normal ldr/str instruction. Type2: ldrsb, ldrh, ldrsh */ @@ -1036,7 +1201,7 @@ static const sljit_uw data_transfer_insts[16] = { (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ - ((sljit_uw)(opcode) | (sljit_uw)(mode) | VD(dst) | VM(src1) | VN(src2)) + ((sljit_ins)(opcode) | (sljit_ins)(mode) | VD(dst) | VM(src1) | VN(src2)) /* Flags for emit_op: */ /* Arguments are swapped. */ @@ -1108,12 +1273,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { - FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); } else { if (fsaveds > 0) - FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1))); if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) - FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); } } @@ -1142,7 +1307,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); else FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP) - | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2))); float_arg_count++; offset += sizeof(sljit_f64) - sizeof(sljit_sw); break; @@ -1151,7 +1316,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); else FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP) - | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2))); float_arg_count++; break; default: @@ -1168,7 +1333,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (offset < 4 * sizeof(sljit_sw)) FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2))); else - FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_ins)size - 4 * sizeof(sljit_sw)))); break; } @@ -1221,7 +1386,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #endif if (local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); return SLJIT_SUCCESS; } @@ -1250,13 +1415,8 @@ static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) { sljit_uw imm2 = get_imm(imm); - if (imm2 == 0) { - imm2 = (imm & ~(sljit_uw)0x3ff) >> 10; - imm = (imm & 0x3ff) >> 2; - - FAIL_IF(push_inst(compiler, ADD | SRC2_IMM | RD(SLJIT_SP) | RN(SLJIT_SP) | 0xb00 | imm2)); - return push_inst(compiler, ADD | SRC2_IMM | RD(SLJIT_SP) | RN(SLJIT_SP) | 0xf00 | (imm & 0xff)); - } + if (imm2 == 0) + return emit_op(compiler, SLJIT_ADD, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, (sljit_sw)imm); return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2); } @@ -1279,12 +1439,12 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { - FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); } else { if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) - FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); if (fsaveds > 0) - FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1))); } local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; @@ -1335,10 +1495,10 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit if (frame_size == 0) return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008); if (frame_size > 2 * SSIZE_OF(sw)) - return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)(frame_size - (2 * SSIZE_OF(sw)))); + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw)))); } - FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)local_size)); + FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)local_size)); tmp = 1; } else if (frame_size == 0) { frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw); @@ -1354,7 +1514,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit local_size += SSIZE_OF(sw); if (frame_size > local_size) - FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_uw)(frame_size - local_size))); + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_ins)(frame_size - local_size))); else if (frame_size < local_size) FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); @@ -1366,11 +1526,11 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit if (restored_reg != TMP_REG2) frame_size -= SSIZE_OF(sw); - return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)frame_size); + return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)frame_size); } tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008; - return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)tmp); + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)tmp); } if (local_size > 0) @@ -1389,7 +1549,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit FAIL_IF(push_inst(compiler, POP | reg_list)); if (frame_size > 0) - return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_uw)frame_size - sizeof(sljit_sw))); + return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_ins)frame_size - sizeof(sljit_sw))); if (lr_dst != 0) return SLJIT_SUCCESS; @@ -1437,7 +1597,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl sljit_s32 is_masked; sljit_uw shift_type; - switch (GET_OPCODE(op)) { + switch (op) { case SLJIT_MOV: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); if (dst != src2) { @@ -1451,17 +1611,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MOV_U8: case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if (flags & MOVE_REG_CONV) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (op == SLJIT_MOV_U8) - return push_inst(compiler, AND | RD(dst) | RN(src2) | SRC2_IMM | 0xff); - FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | RM(src2))); - return push_inst(compiler, MOV | RD(dst) | (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst)); -#else + if (flags & MOVE_REG_CONV) return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); -#endif - } - else if (dst != src2) { + + if (dst != src2) { SLJIT_ASSERT(src2 & SRC2_IMM); return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); } @@ -1470,15 +1623,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MOV_U16: case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if (flags & MOVE_REG_CONV) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - FAIL_IF(push_inst(compiler, MOV | RD(dst) | (16 << 7) | RM(src2))); - return push_inst(compiler, MOV | RD(dst) | (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst)); -#else + if (flags & MOVE_REG_CONV) return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); -#endif - } - else if (dst != src2) { + + if (dst != src2) { SLJIT_ASSERT(src2 & SRC2_IMM); return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); } @@ -1492,30 +1640,30 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_CTZ: SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0)); FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1))); FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2))); FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32)); return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f); -#else /* !SLJIT_CONFIG_ARM_V5 */ +#else /* !SLJIT_CONFIG_ARM_V6 */ FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2))); return push_inst(compiler, CLZ | RD(dst) | RM(dst)); -#endif /* SLJIT_CONFIG_ARM_V5 */ +#endif /* SLJIT_CONFIG_ARM_V6 */ case SLJIT_REV: -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (8 << 7) | (0 << 5) | RM(src2))); - FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | (1 << 5) | RM(src2))); - FAIL_IF(push_inst(compiler, ORR | RD(dst) | RN(dst) | (16 << 7) | (0 << 5) | RM(TMP_REG1))); - FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (16 << 7) | (1 << 5) | RM(TMP_REG1))); - FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (8 << 7) | (3 << 5) | RM(TMP_REG1))); - FAIL_IF(push_inst(compiler, ORR | RD(dst) | RN(dst) | (8 << 7) | (0 << 5) | RM(TMP_REG1))); - return push_inst(compiler, ORR | RD(dst) | RN(dst) | (8 << 7) | (1 << 5) | RM(TMP_REG1)); -#else /* !SLJIT_CONFIG_ARM_V5 */ + case SLJIT_REV_U32: + case SLJIT_REV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); return push_inst(compiler, REV | RD(dst) | RM(src2)); -#endif /* SLJIT_CONFIG_ARM_V5 */ + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1); + FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2))); + if (dst == TMP_REG2 || (src2 == TMP_REG2 && op == SLJIT_REV_U16)) + return SLJIT_SUCCESS; + return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst)); case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); @@ -1546,7 +1694,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(!(src2 & SRC2_IMM)); compiler->status_flags_state = 0; - if (!HAS_FLAGS(op)) + if (!(flags & SET_FLAGS)) return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1)); FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1))); @@ -1574,19 +1722,19 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_SHL: case SLJIT_MSHL: shift_type = 0; - is_masked = GET_OPCODE(op) == SLJIT_MSHL; + is_masked = op == SLJIT_MSHL; break; case SLJIT_LSHR: case SLJIT_MLSHR: shift_type = 1; - is_masked = GET_OPCODE(op) == SLJIT_MLSHR; + is_masked = op == SLJIT_MLSHR; break; case SLJIT_ASHR: case SLJIT_MASHR: shift_type = 2; - is_masked = GET_OPCODE(op) == SLJIT_MASHR; + is_masked = op == SLJIT_MASHR; break; case SLJIT_ROTL: @@ -1626,7 +1774,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) - | RM8(src2) | (sljit_uw)(shift_type << 5) | 0x10 | RM(src1)); + | RM8(src2) | (sljit_ins)(shift_type << 5) | 0x10 | RM(src1)); } #undef EMIT_SHIFT_INS_AND_RETURN @@ -1643,8 +1791,7 @@ static sljit_uw get_imm(sljit_uw imm) if (!(imm & 0xff000000)) { imm <<= 8; rol = 8; - } - else { + } else { imm = (imm << 24) | (imm >> 8); rol = 0; } @@ -1666,22 +1813,19 @@ static sljit_uw get_imm(sljit_uw imm) if (!(imm & 0x00ffffff)) return SRC2_IMM | (imm >> 24) | (rol << 8); - else - return 0; + return 0; } -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) -static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive) +static sljit_uw compute_imm(sljit_uw imm, sljit_uw* imm2) { sljit_uw mask; sljit_uw imm1; - sljit_uw imm2; sljit_uw rol; /* Step1: Search a zero byte (8 continous zero bit). */ mask = 0xff000000; rol = 8; - while(1) { + while (1) { if (!(imm & mask)) { /* Rol imm by rol. */ imm = (imm << rol) | (imm >> (32 - rol)); @@ -1689,6 +1833,7 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl rol = 4 + (rol >> 1); break; } + rol += 2; mask >>= 2; if (mask & 0x3) { @@ -1718,9 +1863,8 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl if (!(imm & 0xff000000)) { imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); - imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); - } - else if (imm & 0xc0000000) { + *imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); + } else if (imm & 0xc0000000) { imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); imm <<= 8; rol += 4; @@ -1741,11 +1885,10 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl } if (!(imm & 0x00ffffff)) - imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); else return 0; - } - else { + } else { if (!(imm & 0xf0000000)) { imm <<= 4; rol += 2; @@ -1771,25 +1914,23 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl } if (!(imm & 0x00ffffff)) - imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); else return 0; } - FAIL_IF(push_inst(compiler, (positive ? MOV : MVN) | RD(reg) | imm1)); - FAIL_IF(push_inst(compiler, (positive ? ORR : BIC) | RD(reg) | RN(reg) | imm2)); - return 1; + return imm1; } -#endif static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm) { sljit_uw tmp; - -#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_uw imm1, imm2; +#else /* !SLJIT_CONFIG_ARM_V6 */ if (!(imm & ~(sljit_uw)0xffff)) return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ /* Create imm by 1 inst. */ tmp = get_imm(imm); @@ -1800,19 +1941,28 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, if (tmp) return push_inst(compiler, MVN | RD(reg) | tmp); -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) /* Create imm by 2 inst. */ - FAIL_IF(generate_int(compiler, reg, imm, 1)); - FAIL_IF(generate_int(compiler, reg, ~imm, 0)); + imm1 = compute_imm(imm, &imm2); + if (imm1 != 0) { + FAIL_IF(push_inst(compiler, MOV | RD(reg) | imm1)); + return push_inst(compiler, ORR | RD(reg) | RN(reg) | imm2); + } + + imm1 = compute_imm(~imm, &imm2); + if (imm1 != 0) { + FAIL_IF(push_inst(compiler, MVN | RD(reg) | imm1)); + return push_inst(compiler, BIC | RD(reg) | RN(reg) | imm2); + } /* Load integer. */ return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm); -#else +#else /* !SLJIT_CONFIG_ARM_V6 */ FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); if (imm <= 0xffff) return SLJIT_SUCCESS; return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ } static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, @@ -1849,13 +1999,13 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s argw &= 0x3; if (argw != 0 && (mask == 0xff)) { - FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_uw)argw << 7))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_ins)argw << 7))); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); } /* Bit 25: RM is offset. */ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, - RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_uw)argw << 7))); + RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_ins)argw << 7))); } arg &= REG_MASK; @@ -1917,10 +2067,16 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 /* We prefers register and simple consts. */ sljit_s32 dst_reg; - sljit_s32 src1_reg; + sljit_s32 src1_reg = 0; sljit_s32 src2_reg = 0; sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; sljit_s32 neg_op = 0; + sljit_u32 imm2; + + op = GET_OPCODE(op); + + if (flags & SET_FLAGS) + inp_flags &= ~ALLOW_DOUBLE_IMM; if (dst == TMP_REG2) flags |= UNUSED_RETURN; @@ -1928,7 +2084,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); if (inp_flags & ALLOW_NEG_IMM) { - switch (GET_OPCODE(op)) { + switch (op) { case SLJIT_ADD: compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; neg_op = SLJIT_SUB; @@ -1952,10 +2108,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (!(inp_flags & ALLOW_IMM)) break; - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { src2_reg = (sljit_s32)get_imm((sljit_uw)src2w); if (src2_reg) break; + if (inp_flags & ALLOW_INV_IMM) { src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w); if (src2_reg) { @@ -1963,8 +2120,9 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } } + if (neg_op != 0) { - src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w); + src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w); if (src2_reg) { op = neg_op | GET_ALL_FLAGS(op); break; @@ -1972,7 +2130,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } - if (src1 & SLJIT_IMM) { + if (src1 == SLJIT_IMM) { src2_reg = (sljit_s32)get_imm((sljit_uw)src1w); if (src2_reg) { flags |= ARGS_SWAPPED; @@ -1980,6 +2138,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 src1w = src2w; break; } + if (inp_flags & ALLOW_INV_IMM) { src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w); if (src2_reg) { @@ -1989,8 +2148,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } } + if (neg_op >= SLJIT_SUB) { /* Note: additive operation (commutative). */ + SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC); + src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w); if (src2_reg) { src1 = src2; @@ -2008,8 +2170,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 else if (src1 & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); src1_reg = TMP_REG1; - } - else { + } else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) { FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); src1_reg = TMP_REG1; } @@ -2038,8 +2199,62 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 src2_reg = src2; else if (src2 & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); - else + else if (!(inp_flags & ALLOW_DOUBLE_IMM)) FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w)); + else { + SLJIT_ASSERT(!(flags & SET_FLAGS)); + + if (src1_reg == 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1_reg = TMP_REG1; + } + + src2_reg = (sljit_s32)compute_imm((sljit_uw)src2w, &imm2); + + if (src2_reg == 0 && neg_op != 0) { + src2_reg = (sljit_s32)compute_imm((sljit_uw)-src2w, &imm2); + if (src2_reg != 0) + op = neg_op; + } + + if (src2_reg == 0) { + FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)src2w)); + src2_reg = TMP_REG2; + } else { + FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); + src1_reg = dst_reg; + src2_reg = (sljit_s32)imm2; + + if (op == SLJIT_ADDC) + op = SLJIT_ADD; + else if (op == SLJIT_SUBC) + op = SLJIT_SUB; + } + } + } + + if (src1_reg == 0) { + SLJIT_ASSERT((inp_flags & ALLOW_DOUBLE_IMM) && !(flags & SET_FLAGS)); + + src1_reg = (sljit_s32)compute_imm((sljit_uw)src1w, &imm2); + + if (src1_reg == 0 && neg_op != 0) { + src1_reg = (sljit_s32)compute_imm((sljit_uw)-src1w, &imm2); + if (src1_reg != 0) + op = neg_op; + } + + if (src1_reg == 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1_reg = TMP_REG1; + } else { + FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src2_reg, (sljit_uw)src1_reg)); + src1_reg = dst_reg; + src2_reg = (sljit_s32)imm2; + + if (op == SLJIT_ADDC) + op = SLJIT_ADD; + } } FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); @@ -2129,7 +2344,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile SLJIT_ASSERT(saved_reg_list[1] < 8); FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); } - return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_uw)(saved_reg_count >= 3 ? 16 : 8) + return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_ins)(saved_reg_count >= 3 ? 16 : 8) | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); } return SLJIT_SUCCESS; @@ -2159,21 +2374,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOV_U8: - return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: + case SLJIT_REV_U32: + case SLJIT_REV_S32: return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + return emit_op(compiler, op, HALF_SIZE, dst, dstw, TMP_REG1, 0, src, srcw); } return SLJIT_SUCCESS; @@ -2197,14 +2418,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile case SLJIT_ADDC: case SLJIT_SUB: case SLJIT_SUBC: - return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w); case SLJIT_OR: - return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, op, ALLOW_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w); case SLJIT_XOR: - inp_flags = ALLOW_IMM; - if (((src1 & SLJIT_IMM) && src1w == -1) || ((src2 & SLJIT_IMM) && src2w == -1)) { + inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM; + if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) { inp_flags |= ALLOW_INV_IMM; } return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w); @@ -2223,7 +2444,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile case SLJIT_MASHR: case SLJIT_ROTL: case SLJIT_ROTR: - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { compiler->shift_imm = src2w & 0x1f; return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w); } else { @@ -2268,15 +2489,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * ADJUST_LOCAL_OFFSET(src3, src3w); /* Shift type of ROR is 3. */ - if (src3 & SLJIT_IMM) { + if (src3 == SLJIT_IMM) { src3w &= 0x1f; if (src3w == 0) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_uw)(is_left ? 0 : 1) << 5) | ((sljit_uw)src3w << 7))); + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_ins)(is_left ? 0 : 1) << 5) | ((sljit_ins)src3w << 7))); src3w = (src3w ^ 0x1f) + 1; - return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_uw)(is_left ? 1 : 0) << 5) | ((sljit_uw)src3w << 7)); + return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | ((sljit_ins)src3w << 7)); } if (src3 & SLJIT_MEM) { @@ -2289,10 +2510,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * src3 = TMP_REG2; } - FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_uw)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg))); - FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_uw)(is_left ? 1 : 0) << 5) | (1 << 7))); + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_ins)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | (1 << 7))); FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f)); - return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_uw)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1)); + return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_ins)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, @@ -2318,12 +2539,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp case SLJIT_PREFETCH_L2: case SLJIT_PREFETCH_L3: case SLJIT_PREFETCH_ONCE: -#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) SLJIT_ASSERT(src & SLJIT_MEM); return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1); -#else /* !SLJIT_CONFIG_ARM_V7 */ - return SLJIT_SUCCESS; -#endif /* SLJIT_CONFIG_ARM_V7 */ } return SLJIT_SUCCESS; @@ -2369,16 +2586,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return reg_map[reg]; -} + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - return (freg_map[reg] << 1); + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64) + return freg_map[reg]; + + if (type != SLJIT_SIMD_REG_128) + return freg_map[reg] & ~0x1; + + return -1; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -2388,7 +2609,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); - return push_inst(compiler, *(sljit_uw*)instruction); + return push_inst(compiler, *(sljit_ins*)instruction); } /* --------------------------------------------------------------------- */ @@ -2397,18 +2618,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c #define FPU_LOAD (1 << 20) #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ - ((inst) | (sljit_uw)((add) << 23) | RN(base) | VD(freg) | (sljit_uw)(offs)) + ((inst) | (sljit_ins)((add) << 23) | RN(base) | VD(freg) | (sljit_ins)(offs)) static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_uw imm; - sljit_uw inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); + sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); SLJIT_ASSERT(arg & SLJIT_MEM); arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 7))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7))); arg = TMP_REG2; argw = 0; } @@ -2463,14 +2684,12 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); } -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - op ^= SLJIT_32; - if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1))); else if (src & SLJIT_MEM) { @@ -2482,13 +2701,27 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1))); } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_32, dst_r, TMP_FREG1, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(ins, ins & SLJIT_32, dst_r, TMP_FREG1, 0))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw); +} + static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -2506,7 +2739,12 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile } FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0))); - return push_inst(compiler, VMRS); + FAIL_IF(push_inst(compiler, VMRS)); + + if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL) + return SLJIT_SUCCESS; + + return push_inst(compiler, (CMP - CONDITIONAL) | (0x60000000 /* VS */) | SET_FLAGS | RN(TMP_REG1) | RM(TMP_REG1)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -2587,18 +2825,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil case SLJIT_ADD_F64: FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1))); break; - case SLJIT_SUB_F64: FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1))); break; - case SLJIT_MUL_F64: FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1))); break; - case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1))); break; + case SLJIT_COPYSIGN_F64: + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(src2) | RD(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src1, 0))); + FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | SRC2_IMM | 0)); + return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0)); } if (dst_r == TMP_FREG1) @@ -2609,11 +2849,79 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil #undef EMIT_FPU_DATA_TRANSFER +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ +#if defined(__ARM_NEON) && __ARM_NEON + sljit_u32 exp; + sljit_ins ins; +#endif /* NEON */ + union { + sljit_u32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + +#if defined(__ARM_NEON) && __ARM_NEON + if ((u.imm << (32 - 19)) == 0) { + exp = (u.imm >> (23 + 2)) & 0x3f; + + if (exp == 0x20 || exp == 0x1f) { + ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f); + return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf)); + } + } +#endif /* NEON */ + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ +#if defined(__ARM_NEON) && __ARM_NEON + sljit_u32 exp; + sljit_ins ins; +#endif /* NEON */ + union { + sljit_u32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + +#if defined(__ARM_NEON) && __ARM_NEON + if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) { + exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff; + + if (exp == 0x100 || exp == 0xff) { + ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f); + return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf)); + } + } +#endif /* NEON */ + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0])); + if (u.imm[0] == u.imm[1]) + return push_inst(compiler, VMOV2 | RN(TMP_REG1) | RD(TMP_REG1) | VM(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1])); + return push_inst(compiler, VMOV2 | RN(TMP_REG2) | RD(TMP_REG1) | VM(freg)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { sljit_s32 reg2; - sljit_uw inst; + sljit_ins inst; CHECK_ERROR(); CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); @@ -2640,19 +2948,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: case SLJIT_F_EQUAL: case SLJIT_ORDERED_EQUAL: - case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_UNORDERED_OR_EQUAL: return 0x00000000; case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: case SLJIT_F_NOT_EQUAL: case SLJIT_UNORDERED_OR_NOT_EQUAL: - case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + case SLJIT_ORDERED_NOT_EQUAL: return 0x10000000; case SLJIT_CARRY: @@ -2757,31 +3067,29 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile SLJIT_ASSERT(reg_map[TMP_REG1] != 14); -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (type >= SLJIT_FAST_CALL) PTR_FAIL_IF(prepare_blx(compiler)); + + jump->addr = compiler->size; PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0)); - if (jump->flags & SLJIT_REWRITABLE_JUMP) { - jump->addr = compiler->size; + if (jump->flags & SLJIT_REWRITABLE_JUMP) compiler->patches++; - } if (type >= SLJIT_FAST_CALL) { jump->flags |= IS_BL; + jump->addr = compiler->size; PTR_FAIL_IF(emit_blx(compiler)); } - - if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) - jump->addr = compiler->size; -#else +#else /* !SLJIT_CONFIG_ARM_V6 */ + jump->addr = compiler->size; if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; - PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type))); - jump->addr = compiler->size; -#endif + compiler->size += JUMP_MAX_SIZE - 1; +#endif /* SLJIT_CONFIG_ARM_V6 */ return jump; } @@ -2799,7 +3107,7 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit sljit_u8 *offset_ptr = offsets; if (src && FAST_IS_REG(*src)) - src_offset = (sljit_uw)reg_map[*src] * sizeof(sljit_sw); + src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw); arg_types >>= SLJIT_ARG_SHIFT; @@ -2834,7 +3142,7 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit if (is_tail_call) offset += sizeof(sljit_sw); - offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7; + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_u32)0x7; *extra_space = offset; @@ -2964,8 +3272,6 @@ static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit #endif /* __SOFTFP__ */ -#undef EMIT_FPU_OPERATION - SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { @@ -3032,7 +3338,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_ASSERT(reg_map[TMP_REG1] != 14); - if (!(src & SLJIT_IMM)) { + if (src != SLJIT_IMM) { if (FAST_IS_REG(src)) { SLJIT_ASSERT(reg_map[src] != 14); return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); @@ -3049,17 +3355,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); jump->u.target = (sljit_uw)srcw; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) if (type >= SLJIT_FAST_CALL) FAIL_IF(prepare_blx(compiler)); - FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); - if (type >= SLJIT_FAST_CALL) - FAIL_IF(emit_blx(compiler)); -#else - FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); - FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); -#endif jump->addr = compiler->size; + FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); + if (type >= SLJIT_FAST_CALL) { + jump->addr = compiler->size; + FAIL_IF(emit_blx(compiler)); + } +#else /* !SLJIT_CONFIG_ARM_V6 */ + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); + compiler->size += JUMP_MAX_SIZE - 1; +#endif /* SLJIT_CONFIG_ARM_V6 */ return SLJIT_SUCCESS; } @@ -3157,7 +3466,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 type) { sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op); - sljit_uw cc, ins; + sljit_ins cc, ins; CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); @@ -3193,61 +3502,114 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) { - sljit_uw cc, tmp; + sljit_ins cc, tmp; CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (src2_reg != dst_reg && src1 == dst_reg) { + src1 = src2_reg; + src1w = 0; + src2_reg = dst_reg; + type ^= 0x1; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2)); + + if (src2_reg != dst_reg) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + src1 = TMP_REG1; + src1w = 0; + } + } else if (dst_reg != src2_reg) + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src2_reg))); cc = get_cc(compiler, type & ~SLJIT_32); - if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - tmp = get_imm((sljit_uw)srcw); + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + tmp = get_imm((sljit_uw)src1w); if (tmp) return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc); - tmp = get_imm(~(sljit_uw)srcw); + tmp = get_imm(~(sljit_uw)src1w); if (tmp) return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc); #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - tmp = (sljit_uw)srcw; + tmp = (sljit_ins)src1w; FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff))); if (tmp <= 0xffff) return SLJIT_SUCCESS; return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff)); -#else - FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); - src = TMP_REG1; -#endif +#else /* !SLJIT_CONFIG_ARM_V7 */ + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1 = TMP_REG1; +#endif /* SLJIT_CONFIG_ARM_V7 */ } - return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc); + return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src1)) & ~COND_MASK) | cc); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + type ^= SLJIT_32; + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, (type & SLJIT_32), dst_freg, src2_freg, 0))); + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + cc = get_cc(compiler, type & ~SLJIT_32); + return push_inst(compiler, EMIT_FPU_OPERATION((VMOV_F32 & ~COND_MASK) | cc, (type & SLJIT_32), dst_freg, src1, 0)); +} + +#undef EMIT_FPU_OPERATION + static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset) { sljit_s32 arg = *mem; sljit_sw argw = *memw; sljit_uw imm, tmp; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - sljit_sw mask = max_offset >= 0xf00 ? 0xfff : 0xff; - sljit_sw sign = max_offset >= 0xf00 ? 0x1000 : 0x100; -#else /* !SLJIT_CONFIG_ARM_V5 */ sljit_sw mask = 0xfff; sljit_sw sign = 0x1000; SLJIT_ASSERT(max_offset >= 0xf00); -#endif /* SLJIT_CONFIG_ARM_V5 */ *mem = TMP_REG1; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { *memw = 0; - return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 7)); + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)(argw & 0x3) << 7)); } arg &= REG_MASK; @@ -3295,158 +3657,6 @@ static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg)); } -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - -static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 reg, - sljit_s32 mem, sljit_sw memw) -{ - sljit_s32 flags, steps, tmp_reg; - sljit_uw add, shift; - - switch (type & 0xff) { - case SLJIT_MOV_U8: - case SLJIT_MOV_S8: - flags = BYTE_SIZE; - if (!(type & SLJIT_MEM_STORE)) - flags |= LOAD_DATA; - if ((type & 0xff) == SLJIT_MOV_S8) - flags |= SIGNED; - - return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1); - - case SLJIT_MOV_U16: - FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 1)); - flags = BYTE_SIZE; - steps = 1; - break; - - case SLJIT_MOV_S16: - FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 1)); - flags = BYTE_SIZE | SIGNED; - steps = 1; - break; - - default: - if (type & SLJIT_MEM_UNALIGNED_32) { - flags = WORD_SIZE; - if (!(type & SLJIT_MEM_STORE)) - flags |= LOAD_DATA; - - return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1); - } - - if (!(type & SLJIT_MEM_UNALIGNED_16)) { - FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 3)); - flags = BYTE_SIZE; - steps = 3; - break; - } - - FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 2)); - - add = 1; - if (memw < 0) { - add = 0; - memw = -memw; - } - - tmp_reg = reg; - - if (type & SLJIT_MEM_STORE) { - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, reg, mem, TYPE2_TRANSFER_IMM(memw)))); - FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (16 << 7) | (2 << 4))); - } else { - if (reg == mem) { - SLJIT_ASSERT(reg != TMP_REG1); - tmp_reg = TMP_REG1; - } - - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, tmp_reg, mem, TYPE2_TRANSFER_IMM(memw)))); - } - - if (!add) { - memw -= 2; - if (memw <= 0) { - memw = -memw; - add = 1; - } - } else - memw += 2; - - if (type & SLJIT_MEM_STORE) - return push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw))); - - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw)))); - return push_inst(compiler, ORR | RD(reg) | RN(tmp_reg) | RM(TMP_REG2) | (16 << 7)); - } - - SLJIT_ASSERT(steps > 0); - - add = 1; - if (memw < 0) { - add = 0; - memw = -memw; - } - - if (type & SLJIT_MEM_STORE) { - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, reg, mem, memw))); - FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (8 << 7) | (2 << 4))); - - while (1) { - if (!add) { - memw -= 1; - if (memw == 0) - add = 1; - } else - memw += 1; - - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, TMP_REG2, mem, memw))); - - if (--steps == 0) - return SLJIT_SUCCESS; - - FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(TMP_REG2) | (8 << 7) | (2 << 4))); - } - } - - tmp_reg = reg; - - if (reg == mem) { - SLJIT_ASSERT(reg != TMP_REG1); - tmp_reg = TMP_REG1; - } - - shift = 8; - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, tmp_reg, mem, memw))); - - do { - if (!add) { - memw -= 1; - if (memw == 0) - add = 1; - } else - memw += 1; - - if (steps > 1) { - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, TMP_REG2, mem, memw))); - FAIL_IF(push_inst(compiler, ORR | RD(tmp_reg) | RN(tmp_reg) | RM(TMP_REG2) | (shift << 7))); - shift += 8; - } - } while (--steps != 0); - - flags |= LOAD_DATA; - - if (flags & SIGNED) - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw)))); - else - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, memw))); - - return push_inst(compiler, ORR | RD(reg) | RN(tmp_reg) | RM(TMP_REG2) | (shift << 7)); -} - -#endif /* SLJIT_CONFIG_ARM_V5 */ - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) @@ -3456,30 +3666,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile CHECK_ERROR(); CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); - if (!(reg & REG_PAIR_MASK)) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - ADJUST_LOCAL_OFFSET(mem, memw); -#endif /* SLJIT_CONFIG_ARM_V5 */ - + if (!(reg & REG_PAIR_MASK)) return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); - } ADJUST_LOCAL_OFFSET(mem, memw); -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16)) { - FAIL_IF(update_mem_addr(compiler, &mem, &memw, (type & SLJIT_MEM_UNALIGNED_16) ? 0xfff - 6 : 0xfff - 7)); - - if (!(type & SLJIT_MEM_STORE) && REG_PAIR_FIRST(reg) == (mem & REG_MASK)) { - FAIL_IF(sljit_emit_mem_unaligned(compiler, type, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw))); - return sljit_emit_mem_unaligned(compiler, type, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw); - } - - FAIL_IF(sljit_emit_mem_unaligned(compiler, type, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw)); - return sljit_emit_mem_unaligned(compiler, type, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw)); - } -#endif /* SLJIT_CONFIG_ARM_V5 */ - FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); flags = WORD_SIZE; @@ -3502,7 +3693,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler * sljit_s32 mem, sljit_sw memw) { sljit_s32 flags; - sljit_uw is_type1_transfer, inst; + sljit_ins is_type1_transfer, inst; CHECK_ERROR(); CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); @@ -3561,7 +3752,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler * if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { memw &= 0x3; - inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_uw)memw << 7)); + inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_ins)memw << 7)); if (is_type1_transfer) inst |= (1 << 25); @@ -3587,7 +3778,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler * else memw = -memw; - return push_inst(compiler, inst | (sljit_uw)memw); + return push_inst(compiler, inst | (sljit_ins)memw); } if (memw >= 0) @@ -3595,76 +3786,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler * else memw = -memw; - return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_uw)memw)); + return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_ins)memw)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - sljit_s32 max_offset; - sljit_s32 dst; -#endif /* SLJIT_CONFIG_ARM_V5 */ - CHECK_ERROR(); CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); - if (type & SLJIT_MEM_UNALIGNED_32) + if (type & SLJIT_MEM_ALIGNED_32) return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw); -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (type & SLJIT_MEM_STORE) { - FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2))); - - if (type & SLJIT_32) - return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw); - - max_offset = 0xfff - 7; - if (type & SLJIT_MEM_UNALIGNED_16) - max_offset++; - - FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset)); - mem |= SLJIT_MEM; - - FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw)); - - FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2))); - return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw + 4); - } - - max_offset = (type & SLJIT_32) ? 0xfff - 3 : 0xfff - 7; - if (type & SLJIT_MEM_UNALIGNED_16) - max_offset++; - - FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset)); - - dst = TMP_REG1; - - /* Stack offset adjustment is not needed because dst - is not stored on the stack when mem is SLJIT_SP. */ - - if (mem == TMP_REG1) { - dst = SLJIT_R3; - - if (compiler->scratches >= 4) - FAIL_IF(push_inst(compiler, STR | (1 << 21) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8)); - } - - mem |= SLJIT_MEM; - - FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_UNALIGNED_16), dst, mem, memw)); - FAIL_IF(push_inst(compiler, VMOV | VN(freg) | RD(dst))); - - if (!(type & SLJIT_32)) { - FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_UNALIGNED_16), dst, mem, memw + 4)); - FAIL_IF(push_inst(compiler, VMOV | VN(freg) | 0x80 | RD(dst))); - } - - if (dst == SLJIT_R3 && compiler->scratches >= 4) - FAIL_IF(push_inst(compiler, (LDR ^ (0x1 << 24)) | (0x1 << 23) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8)); - return SLJIT_SUCCESS; -#else /* !SLJIT_CONFIG_ARM_V5 */ if (type & SLJIT_MEM_STORE) { FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2))); @@ -3690,11 +3824,714 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1)); FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1)); return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1)); -#endif /* SLJIT_CONFIG_ARM_V5 */ +} + +static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw) +{ + sljit_s32 mem = *mem_ptr; + sljit_uw imm; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + *mem_ptr = TMP_REG1; + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 7)); + } + + if (SLJIT_UNLIKELY(!(mem & REG_MASK))) { + *mem_ptr = TMP_REG1; + return load_immediate(compiler, TMP_REG1, (sljit_uw)memw); + } + + mem &= REG_MASK; + + if (memw == 0) { + *mem_ptr = mem; + return SLJIT_SUCCESS; + } + + *mem_ptr = TMP_REG1; + imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw)); + + if (imm != 0) + return push_inst(compiler, ((memw < 0) ? SUB : ADD) | RD(TMP_REG1) | RN(mem) | imm); + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem)); +} + +static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg) +{ + freg += freg & 0x1; + + SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)); + + if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS) + freg--; + + return freg; +} + +#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (!(srcdst & SLJIT_MEM)) { + if (reg_size == 4) + srcdst = simd_get_quad_reg_index(srcdst); + + if (type & SLJIT_SIMD_STORE) + ins = VD(srcdst) | VN(freg) | VM(freg); + else + ins = VD(freg) | VN(srcdst) | VM(srcdst); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst(compiler, VORR | ins); + } + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size > 3) + elem_size = 3; + + ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(freg) + | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8)); + + SLJIT_ASSERT(reg_size >= alignment); + + if (alignment == 3) + ins |= 0x10; + else if (alignment >= 3) + ins |= 0x20; + + return push_inst(compiler, ins | RN(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf); +} + +static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) +{ + sljit_ins result; + + if (elem_size > 1 && (sljit_u16)value == (value >> 16)) { + elem_size = 1; + value = (sljit_u16)value; + } + + if (elem_size == 1 && (sljit_u8)value == (value >> 8)) { + elem_size = 0; + value = (sljit_u8)value; + } + + switch (elem_size) { + case 0: + SLJIT_ASSERT(value <= 0xff); + result = 0xe00; + break; + case 1: + SLJIT_ASSERT(value <= 0xffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x800; + break; + } + + if ((value & 0xff) == 0) { + value >>= 8; + result |= 0xa00; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffff; + result = (1 << 5); + } + break; + default: + SLJIT_ASSERT(value <= 0xffffffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x000; + break; + } + + if ((value & ~(sljit_uw)0xff00) == 0) { + value >>= 8; + result |= 0x200; + break; + } + + if ((value & ~(sljit_uw)0xff0000) == 0) { + value >>= 16; + result |= 0x400; + break; + } + + if ((value & ~(sljit_uw)0xff000000) == 0) { + value >>= 24; + result |= 0x600; + break; + } + + if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) { + value >>= 8; + result |= 0xc00; + break; + } + + if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) { + value >>= 16; + result |= 0xd00; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value = ~value; + result = (1 << 5); + } + break; + } + + return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 17) | result; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imm; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (src == SLJIT_IMM && srcw == 0) + return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(freg)); + + if (SLJIT_UNLIKELY(elem_size == 3)) { + SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw)); + src = freg; + } else if (freg != src) + FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (freg != src) + return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)); + return SLJIT_SUCCESS; + } + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + ins = (sljit_ins)(elem_size << 6); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 5; + + return push_inst(compiler, VLD1_r | ins | VD(freg) | RN(src) | 0xf); + } + + if (type & SLJIT_SIMD_FLOAT) { + SLJIT_ASSERT(elem_size == 2); + ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2)); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst(compiler, VDUP_s | ins | VD(freg) | (sljit_ins)freg_map[src]); + } + + if (src == SLJIT_IMM) { + if (elem_size < 2) + srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + imm = simd_get_imm(elem_size, (sljit_uw)srcw); + + if (imm != ~(sljit_ins)0) { + if (reg_size == 4) + imm |= (sljit_ins)1 << 6; + + return push_inst(compiler, VMOV_i | imm | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + src = TMP_REG1; + } + + switch (elem_size) { + case 0: + ins = 1 << 22; + break; + case 1: + ins = 1 << 5; + break; + default: + ins = 0; + break; + } + + if (reg_size == 4) + ins |= (sljit_ins)1 << 21; + + return push_inst(compiler, VDUP | ins | VN(freg) | RD(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (type & SLJIT_SIMD_LANE_ZERO) { + ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6); + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3 && !(srcdst & SLJIT_MEM)) { + if (lane_index == 1) + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (srcdst != freg) + FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(srcdst) | VM(srcdst))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst(compiler, VMOV_i | VD(freg)); + } + + if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) { + FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(freg) | VM(freg))); + srcdst = TMP_FREG2; + srcdstw = 0; + } + } + + FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(freg))); + } + + if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) { + lane_index -= (0x8 >> elem_size); + freg += SLJIT_QUAD_OTHER_HALF(freg); + } + + if (srcdst & SLJIT_MEM) { + if (elem_size == 3) + return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw); + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + lane_index = lane_index << elem_size; + ins = (sljit_ins)((elem_size << 10) | (lane_index << 5)); + return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(freg) | RN(srcdst) | 0xf); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3) { + if (type & SLJIT_SIMD_STORE) + return push_inst(compiler, VORR | VD(srcdst) | VN(freg) | VM(freg)); + return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(freg) | VM(srcdst)); + } + + if (type & SLJIT_SIMD_STORE) { + if (freg_ebit_map[freg] == 0) { + if (lane_index == 1) + freg = SLJIT_F64_SECOND(freg); + + return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(freg)); + } + + FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1))); + return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1)); + } + + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1))); + return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1)); + } + + if (srcdst == SLJIT_IMM) { + if (elem_size < 2) + srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw)); + srcdst = TMP_REG1; + } + + if (elem_size == 0) + ins = 0x400000; + else if (elem_size == 1) + ins = 0x20; + else + ins = 0; + + lane_index = lane_index << elem_size; + ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5)); + + if (type & SLJIT_SIMD_STORE) { + ins |= (1 << 20); + + if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED)) + ins |= (1 << 23); + } + + return push_inst(compiler, VMOV_s | ins | VN(freg) | RD(srcdst)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) { + freg = simd_get_quad_reg_index(freg); + src = simd_get_quad_reg_index(src); + + if (src_lane_index >= (0x8 >> elem_size)) { + src_lane_index -= (0x8 >> elem_size); + src += SLJIT_QUAD_OTHER_HALF(src); + } + } + + if (elem_size == 3) { + if (freg != src) + FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (freg != src) + return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)); + return SLJIT_SUCCESS; + } + + ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size)); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_s32 dst_reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + if (reg_size == 4 && elem2_size - elem_size == 1) + FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(freg) | RN(src) | 0xf)); + else + FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(freg) | RN(src) | 0xf)); + src = freg; + } else if (reg_size == 4) + src = simd_get_quad_reg_index(src); + + if (!(type & SLJIT_SIMD_FLOAT)) { + dst_reg = (reg_size == 4) ? freg : TMP_FREG2; + + do { + FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24)) + | ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src))); + src = dst_reg; + } while (++elem_size < elem2_size); + + if (dst_reg == TMP_FREG2) + return push_inst(compiler, VORR | VD(freg) | VN(TMP_FREG2) | VM(TMP_FREG2)); + return SLJIT_SUCCESS; + } + + /* No SIMD variant, must use VFP instead. */ + SLJIT_ASSERT(reg_size == 4); + + if (freg == src) { + freg += SLJIT_QUAD_OTHER_HALF(freg); + FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20)); + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src)); + } + + FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src))); + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imms; + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + imms = 0x243219; + ins = VSHR | (1 << 24) | (0x9 << 16); + break; + case 1: + imms = (reg_size == 4) ? 0x243219 : 0x2231; + ins = VSHR | (1 << 24) | (0x11 << 16); + break; + case 2: + imms = (reg_size == 4) ? 0x2231 : 0x21; + ins = VSHR | (1 << 24) | (0x21 << 16); + break; + default: + imms = 0x21; + ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7); + break; + } + + if (reg_size == 4) { + freg = simd_get_quad_reg_index(freg); + ins |= (sljit_ins)1 << 6; + } + + SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0); + FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(freg))); + + if (reg_size == 4 && elem_size > 0) + FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2))); + + ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0; + + while (imms >= 0x100) { + FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG2) | VM(TMP_FREG2))); + imms >>= 8; + } + + FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG2) | VM(TMP_FREG2))); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG2))); + + if (reg_size == 4 && elem_size == 0) { + SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]); + FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG1))); + FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7))); + } + + if (dst_r == TMP_REG1) + return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = VAND; + break; + case SLJIT_SIMD_OP2_OR: + ins = VORR; + break; + case SLJIT_SIMD_OP2_XOR: + ins = VEOR; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) { + dst_freg = simd_get_quad_reg_index(dst_freg); + src1_freg = simd_get_quad_reg_index(src1_freg); + src2_freg = simd_get_quad_reg_index(src2_freg); + ins |= (sljit_ins)1 << 6; + } + + return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg)); } #undef FPU_LOAD +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_u32 ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = LDREXB; + break; + case SLJIT_MOV_U16: + ins = LDREXH; + break; + default: + ins = LDREX; + break; + } + + return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_u32 ins; + + /* temp_reg == mem_reg is undefined so use another temp register */ + SLJIT_UNUSED_ARG(temp_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = STREXB; + break; + case SLJIT_MOV_U16: + ins = STREXH; + break; + default: + ins = STREX; + break; + } + + FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg))); + if (op & SLJIT_SET_ATOMIC_STORED) + return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1)); + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; @@ -3704,58 +4541,62 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; - -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - PTR_FAIL_IF(push_inst_with_unique_literal(compiler, - EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_uw)init_value)); - compiler->patches++; -#else - PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value)); -#endif - const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); PTR_FAIL_IF(!const_); set_const(const_, compiler); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, + EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_ins)init_value)); + compiler->patches++; +#else /* !SLJIT_CONFIG_ARM_V6 */ + PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0)); compiler->patches++; -#else - PTR_FAIL_IF(emit_imm(compiler, dst_r, 0)); -#endif +#else /* !SLJIT_CONFIG_ARM_V6 */ + PTR_FAIL_IF(push_inst(compiler, RD(dst_r))); +#endif /* SLJIT_CONFIG_ARM_V6 */ - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 1); + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + compiler->size += 1; +#endif /* SLJIT_CONFIG_ARM_V7 */ if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - inline_set_jump_addr(addr, executable_offset, new_target, 1); + set_jump_addr(addr, executable_offset, new_target, 1); } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - inline_set_const(addr, executable_offset, (sljit_uw)new_constant, 1); + set_const_value(addr, executable_offset, (sljit_uw)new_constant, 1); } diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_64.c old mode 100644 new mode 100755 index c3215742f4..50e4c14dd3 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_64.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_64.c @@ -67,81 +67,125 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define ADC 0x9a000000 -#define ADD 0x8b000000 -#define ADDE 0x8b200000 -#define ADDI 0x91000000 -#define AND 0x8a000000 -#define ANDI 0x92000000 -#define ASRV 0x9ac02800 -#define B 0x14000000 -#define B_CC 0x54000000 -#define BL 0x94000000 -#define BLR 0xd63f0000 -#define BR 0xd61f0000 -#define BRK 0xd4200000 -#define CBZ 0xb4000000 -#define CLZ 0xdac01000 -#define CSEL 0x9a800000 -#define CSINC 0x9a800400 -#define EOR 0xca000000 -#define EORI 0xd2000000 -#define EXTR 0x93c00000 -#define FABS 0x1e60c000 -#define FADD 0x1e602800 -#define FCMP 0x1e602000 -#define FCVT 0x1e224000 -#define FCVTZS 0x9e780000 -#define FDIV 0x1e601800 -#define FMOV 0x1e604000 -#define FMOV_R 0x9e660000 -#define FMUL 0x1e600800 -#define FNEG 0x1e614000 -#define FSUB 0x1e603800 -#define LDRI 0xf9400000 -#define LDRI_F64 0xfd400000 -#define LDRI_POST 0xf8400400 -#define LDP 0xa9400000 -#define LDP_F64 0x6d400000 -#define LDP_POST 0xa8c00000 -#define LDR_PRE 0xf8400c00 -#define LSLV 0x9ac02000 -#define LSRV 0x9ac02400 -#define MADD 0x9b000000 -#define MOVK 0xf2800000 -#define MOVN 0x92800000 -#define MOVZ 0xd2800000 -#define NOP 0xd503201f -#define ORN 0xaa200000 -#define ORR 0xaa000000 -#define ORRI 0xb2000000 -#define RBIT 0xdac00000 -#define RET 0xd65f0000 -#define REV 0xdac00c00 -#define RORV 0x9ac02c00 -#define SBC 0xda000000 -#define SBFM 0x93000000 -#define SCVTF 0x9e620000 -#define SDIV 0x9ac00c00 -#define SMADDL 0x9b200000 -#define SMULH 0x9b403c00 -#define STP 0xa9000000 -#define STP_F64 0x6d000000 -#define STP_PRE 0xa9800000 -#define STRB 0x38206800 -#define STRBI 0x39000000 -#define STRI 0xf9000000 -#define STRI_F64 0xfd000000 -#define STR_FI 0x3d000000 -#define STR_FR 0x3c206800 -#define STUR_FI 0x3c000000 -#define STURBI 0x38000000 -#define SUB 0xcb000000 -#define SUBI 0xd1000000 -#define SUBS 0xeb000000 -#define UBFM 0xd3000000 -#define UDIV 0x9ac00800 -#define UMULH 0x9bc03c00 +#define ADC 0x9a000000 +#define ADD 0x8b000000 +#define ADDE 0x8b200000 +#define ADDI 0x91000000 +#define ADR 0x10000000 +#define ADRP 0x90000000 +#define AND 0x8a000000 +#define ANDI 0x92000000 +#define AND_v 0x0e201c00 +#define ASRV 0x9ac02800 +#define B 0x14000000 +#define B_CC 0x54000000 +#define BL 0x94000000 +#define BLR 0xd63f0000 +#define BR 0xd61f0000 +#define BRK 0xd4200000 +#define CAS 0xc8a07c00 +#define CASB 0x08a07c00 +#define CASH 0x48a07c00 +#define CBZ 0xb4000000 +#define CCMPI 0xfa400800 +#define CLZ 0xdac01000 +#define CSEL 0x9a800000 +#define CSINC 0x9a800400 +#define DUP_e 0x0e000400 +#define DUP_g 0x0e000c00 +#define EOR 0xca000000 +#define EOR_v 0x2e201c00 +#define EORI 0xd2000000 +#define EXTR 0x93c00000 +#define FABS 0x1e60c000 +#define FADD 0x1e602800 +#define FCMP 0x1e602000 +#define FCSEL 0x1e600c00 +#define FCVT 0x1e224000 +#define FCVTL 0x0e217800 +#define FCVTZS 0x9e780000 +#define FDIV 0x1e601800 +#define FMOV 0x1e604000 +#define FMOV_R 0x9e660000 +#define FMOV_I 0x1e601000 +#define FMUL 0x1e600800 +#define FNEG 0x1e614000 +#define FSUB 0x1e603800 +#define INS 0x4e001c00 +#define INS_e 0x6e000400 +#define LD1 0x0c407000 +#define LD1_s 0x0d400000 +#define LD1R 0x0d40c000 +#define LDRI 0xf9400000 +#define LDRI_F64 0xfd400000 +#define LDRI_POST 0xf8400400 +#define LDP 0xa9400000 +#define LDP_F64 0x6d400000 +#define LDP_POST 0xa8c00000 +#define LDR_PRE 0xf8400c00 +#define LDXR 0xc85f7c00 +#define LDXRB 0x085f7c00 +#define LDXRH 0x485f7c00 +#define LSLV 0x9ac02000 +#define LSRV 0x9ac02400 +#define MADD 0x9b000000 +#define MOVI 0x0f000400 +#define MOVK 0xf2800000 +#define MOVN 0x92800000 +#define MOVZ 0xd2800000 +#define NOP 0xd503201f +#define ORN 0xaa200000 +#define ORR 0xaa000000 +#define ORR_v 0x0ea01c00 +#define ORRI 0xb2000000 +#define RBIT 0xdac00000 +#define RET 0xd65f0000 +#define REV 0xdac00c00 +#define REV16 0xdac00400 +#define RORV 0x9ac02c00 +#define SBC 0xda000000 +#define SBFM 0x93400000 +#define SCVTF 0x9e620000 +#define SDIV 0x9ac00c00 +#define SMADDL 0x9b200000 +#define SMOV 0x0e002c00 +#define SMULH 0x9b403c00 +#define SSHLL 0x0f00a400 +#define ST1 0x0c007000 +#define ST1_s 0x0d000000 +#define STP 0xa9000000 +#define STP_F64 0x6d000000 +#define STP_PRE 0xa9800000 +#define STRB 0x38206800 +#define STRBI 0x39000000 +#define STRI 0xf9000000 +#define STRI_F64 0xfd000000 +#define STR_FI 0x3d000000 +#define STR_FR 0x3c206800 +#define STUR_FI 0x3c000000 +#define STURBI 0x38000000 +#define STXR 0xc8007c00 +#define STXRB 0x8007c00 +#define STXRH 0x48007c00 +#define SUB 0xcb000000 +#define SUBI 0xd1000000 +#define SUBS 0xeb000000 +#define TBZ 0x36000000 +#define UBFM 0xd3400000 +#define UCVTF 0x9e630000 +#define UDIV 0x9ac00800 +#define UMOV 0x0e003c00 +#define UMULH 0x9bc03c00 +#define USHLL 0x2f00a400 +#define USHR 0x2f000400 +#define USRA 0x2f001400 +#define XTN 0x0e212800 + +#define CSET (CSINC | RM(TMP_ZERO) | RN(TMP_ZERO)) +#define LDR (STRI | (1 << 22)) +#define LDRB (STRBI | (1 << 22)) +#define LDRH (LDRB | (1 << 30)) +#define MOV (ORR | RN(TMP_ZERO)) static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { @@ -160,74 +204,260 @@ static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21)); } -static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; - if (jump->flags & SLJIT_REWRITABLE_JUMP) { - jump->flags |= PATCH_ABS64; - return 0; - } + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } - diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; + diff = (sljit_sw)target_addr - (sljit_sw)code_ptr - executable_offset; if (jump->flags & IS_COND) { diff += SSIZE_OF(ins); if (diff <= 0xfffff && diff >= -0x100000) { - code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; - jump->addr -= sizeof(sljit_ins); + *(--code_ptr) ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; jump->flags |= PATCH_COND; - return 5; + jump->addr -= sizeof(sljit_ins); + return code_ptr; } diff -= SSIZE_OF(ins); } if (diff <= 0x7ffffff && diff >= -0x8000000) { + if (jump->flags & IS_COND) + code_ptr[-1] -= (4 << 5); jump->flags |= PATCH_B; - return 4; + return code_ptr; } if (target_addr < 0x100000000l) { if (jump->flags & IS_COND) - code_ptr[-5] -= (2 << 5); - code_ptr[-2] = code_ptr[0]; - return 2; + code_ptr[-1] -= (2 << 5); + code_ptr[2] = code_ptr[0]; + return code_ptr + 2; + } + + if (diff <= 0xfffff000l && diff >= -0x100000000l) { + if (jump->flags & IS_COND) + code_ptr[-1] -= (2 << 5); + jump->flags |= PATCH_B32; + code_ptr[2] = code_ptr[0]; + return code_ptr + 2; } if (target_addr < 0x1000000000000l) { if (jump->flags & IS_COND) - code_ptr[-5] -= (1 << 5); + code_ptr[-1] -= (1 << 5); jump->flags |= PATCH_ABS48; - code_ptr[-1] = code_ptr[0]; + code_ptr[3] = code_ptr[0]; + return code_ptr + 3; + } + +exit: + jump->flags |= PATCH_ABS64; + code_ptr[4] = code_ptr[0]; + return code_ptr + 4; +} + +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)4 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff <= 0xfffff && diff >= -0x100000) { + jump->flags |= PATCH_B; + return 0; + } + + if (diff <= 0xfffff000l && diff >= -0x100000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_B32; return 1; } - jump->flags |= PATCH_ABS64; - return 0; -} + if (addr < 0x100000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + return 1; + } -static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) -{ - if (max_label < 0x100000000l) { - put_label->flags = 0; + if (addr < 0x1000000000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS48; return 2; } - if (max_label < 0x1000000000000l) { - put_label->flags = 1; - return 1; + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS64; + return 3; +} + +static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_sw addr = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr); + sljit_ins* buf_ptr = (sljit_ins*)jump->addr; + sljit_u32 dst; + SLJIT_UNUSED_ARG(executable_offset); + + if (!(jump->flags & JUMP_MOV_ADDR)) { + if (jump->flags & PATCH_COND) { + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); + buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); + return; + } + + if (jump->flags & PATCH_B) { + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); + buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); + return; + } + + dst = (buf_ptr[0] >> 5) & 0x1f; + + if (jump->flags & PATCH_B32) { + addr -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) & ~(sljit_sw)0xfff; + SLJIT_ASSERT(addr <= 0xfffff000l && addr >= -0x100000000l); + buf_ptr[0] = ADRP | (((sljit_ins)(addr >> 12) & 0x3) << 29) | (((sljit_ins)(addr >> 14) & 0x7ffff) << 5) | dst; + buf_ptr[1] = ADDI | dst | (dst << 5) | ((sljit_ins)(addr & 0xfff) << 10); + return; + } + } else { + dst = *buf_ptr; + + if (jump->flags & PATCH_B) { + addr -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + SLJIT_ASSERT(addr <= 0xfffff && addr >= -0x100000); + buf_ptr[0] = ADR | (((sljit_ins)addr & 0x3) << 29) | (((sljit_ins)(addr >> 2) & 0x7ffff) << 5) | dst; + return; + } + + if (jump->flags & PATCH_B32) { + addr -= ((sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) & ~(sljit_sw)0xfff; + SLJIT_ASSERT(addr <= 0xffffffffl && addr >= -0x100000000l); + buf_ptr[0] = ADRP | (((sljit_ins)(addr >> 12) & 0x3) << 29) | (((sljit_ins)(addr >> 14) & 0x7ffff) << 5) | dst; + buf_ptr[1] = ADDI | dst | (dst << 5) | ((sljit_ins)(addr & 0xfff) << 10); + return; + } } - put_label->flags = 2; - return 0; + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); + + buf_ptr[0] = MOVZ | (((sljit_ins)addr & 0xffff) << 5) | dst; + buf_ptr[1] = MOVK | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21) | dst; + if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) + buf_ptr[2] = MOVK | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21) | dst; + + if (jump->flags & PATCH_ABS64) + buf_ptr[3] = MOVK | ((sljit_ins)((sljit_uw)addr >> 48) << 5) | (3 << 21) | dst; +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { + if (jump->u.target < 0x100000000l) + total_size = 3; + else if (jump->u.target < 0x1000000000000l) + total_size = 4; + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if ((jump->flags & IS_COND) && (diff + 1) <= (0xfffff / SSIZE_OF(ins)) && (diff + 1) >= (-0x100000 / SSIZE_OF(ins))) + total_size = 0; + else if (diff <= (0x7ffffff / SSIZE_OF(ins)) && diff >= (-0x8000000 / SSIZE_OF(ins))) + total_size = 1; + else if (diff <= (0xfffff000l / SSIZE_OF(ins)) && diff >= (-0x100000000l / SSIZE_OF(ins))) + total_size = 3; + } + } + + size_reduce += JUMP_MAX_SIZE - total_size; + } else { + /* Real size minus 1. Unit size: instruction. */ + total_size = 3; + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff <= (0xfffff / SSIZE_OF(ins)) && diff >= (-0x100000 / SSIZE_OF(ins))) + total_size = 0; + else if (diff <= (0xfffff000l / SSIZE_OF(ins)) && diff >= (-0x100000000l / SSIZE_OF(ins))) + total_size = 1; + } else if (jump->u.target < 0x100000000l) + total_size = 1; + else if (jump->u.target < 0x1000000000000l) + total_size = 2; + + size_reduce += 3 - total_size; + } + + jump->flags |= total_size << JUMP_SIZE_SHIFT; + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; } SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) @@ -238,67 +468,75 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; sljit_sw addr; - sljit_u32 dst; struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); + + reduce_code_size(compiler); code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; word_count = 0; - next_addr = 0; executable_offset = SLJIT_EXEC_OFFSET(code); label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == word_count) { - jump->addr = (sljit_uw)(code_ptr - 4); - code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); - jump = jump->next; - } - if (const_ && const_->addr == word_count) { + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + SLJIT_ASSERT((jump->flags & PATCH_COND) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { + word_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_sw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = (sljit_uw)addr; + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)(code_ptr - 3); - code_ptr -= put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; word_count++; @@ -308,7 +546,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -316,61 +554,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); jump = compiler->jumps; while (jump) { - do { - addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); - buf_ptr = (sljit_ins *)jump->addr; - - if (jump->flags & PATCH_B) { - addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); - buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); - if (jump->flags & IS_COND) - buf_ptr[-1] -= (4 << 5); - break; - } - if (jump->flags & PATCH_COND) { - addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); - buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); - break; - } - - SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); - SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); - - dst = buf_ptr[0] & 0x1f; - buf_ptr[0] = MOVZ | dst | (((sljit_ins)addr & 0xffff) << 5); - buf_ptr[1] = MOVK | dst | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21); - if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) - buf_ptr[2] = MOVK | dst | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21); - if (jump->flags & PATCH_ABS64) - buf_ptr[3] = MOVK | dst | ((sljit_ins)(addr >> 48) << 5) | (3 << 21); - } while (0); + generate_jump_or_mov_addr(jump, executable_offset); jump = jump->next; } - put_label = compiler->put_labels; - while (put_label) { - addr = (sljit_sw)put_label->label->addr; - buf_ptr = (sljit_ins*)put_label->addr; - - buf_ptr[0] |= ((sljit_ins)addr & 0xffff) << 5; - buf_ptr[1] |= ((sljit_ins)(addr >> 16) & 0xffff) << 5; - - if (put_label->flags >= 1) - buf_ptr[2] |= ((sljit_ins)(addr >> 32) & 0xffff) << 5; - - if (put_label->flags >= 2) - buf_ptr[3] |= (sljit_ins)(addr >> 48) << 5; - - put_label = put_label->next; - } - compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); @@ -387,8 +578,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) { switch (feature_type) { case SLJIT_HAS_FPU: + case SLJIT_HAS_SIMD: #ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; + return (SLJIT_IS_FPU_AVAILABLE) != 0; #else /* Available by default. */ return 1; @@ -402,6 +594,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_PREFETCH: case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: return 1; default: @@ -409,6 +602,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) } } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 2; + } + + return 0; +} + /* --------------------------------------------------------------------- */ /* Core code generator functions. */ /* --------------------------------------------------------------------- */ @@ -642,6 +846,10 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: + case SLJIT_REV_U16: + case SLJIT_REV_S16: + case SLJIT_REV_U32: + case SLJIT_REV_S32: case SLJIT_ADDC: case SLJIT_SUBC: /* No form with immediate operand (except imm 0, which @@ -725,6 +933,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10); } + inv_bits |= inv_bits >> 9; FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits)); goto set_flags; case SLJIT_LSHR: @@ -734,6 +943,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (flags & ARG1_IMM) break; + inv_bits |= inv_bits >> 9; if (op >= SLJIT_ASHR) inv_bits |= 1 << 30; @@ -787,22 +997,22 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if (dst == arg2) return SLJIT_SUCCESS; - return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + return push_inst(compiler, MOV | RD(dst) | RM(arg2)); case SLJIT_MOV_U8: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (7 << 10)); + inv_bits |= inv_bits >> 9; + return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); case SLJIT_MOV_S8: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if (!(flags & INT_OP)) - inv_bits |= 1 << 22; + inv_bits |= inv_bits >> 9; return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); case SLJIT_MOV_U16: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (15 << 10)); + inv_bits |= inv_bits >> 9; + return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); case SLJIT_MOV_S16: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if (!(flags & INT_OP)) - inv_bits |= 1 << 22; + inv_bits |= inv_bits >> 9; return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); case SLJIT_MOV32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); @@ -811,7 +1021,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s /* fallthrough */ case SLJIT_MOV_U32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + return push_inst(compiler, (MOV ^ W_OP) | RD(dst) | RM(arg2)); case SLJIT_MOV_S32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); @@ -826,6 +1036,21 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s SLJIT_ASSERT(arg1 == TMP_REG1); inv_bits |= inv_bits >> 21; return push_inst(compiler, (REV ^ inv_bits) | RD(dst) | RN(arg2)); + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); + FAIL_IF(push_inst(compiler, (REV16 ^ (sljit_ins)0x80000000) | RD(dst) | RN(arg2))); + if (dst == TMP_REG1 || (arg2 == TMP_REG2 && op == SLJIT_REV_U16)) + return SLJIT_SUCCESS; + inv_bits |= inv_bits >> 9; + return push_inst(compiler, ((op == SLJIT_REV_U16 ? UBFM : SBFM) ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10)); + case SLJIT_REV_U32: + case SLJIT_REV_S32: + SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); + FAIL_IF(push_inst(compiler, (REV ^ (sljit_ins)0x80000400) | RD(dst) | RN(arg2))); + if (op == SLJIT_REV_U32 || dst == TMP_REG1) + return SLJIT_SUCCESS; + return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(dst) | (31 << 10)); case SLJIT_ADD: compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); @@ -955,14 +1180,20 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s if (argw <= 0xff && argw >= -0x100) return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); - if (argw >= 0) { - if (argw <= 0xfff0ff && ((argw + 0x100) & 0xfff) <= 0x1ff) { + if (((argw + 0x100) & 0xfff) <= 0x1ff && argw <= 0xfff0ff && argw >= -0xfff100) { + if (argw >= 0) { + if (argw & 0x100) + argw += 0x1000; + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); + } else { + if (!(argw & 0x100)) + argw -= 0x1000; + + FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10))); + return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); } - } else if (argw >= -0xfff100 && ((-argw + 0xff) & 0xfff) <= 0x1ff) { - FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10))); - return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); } FAIL_IF(load_immediate(compiler, tmp_reg, argw)); @@ -1072,7 +1303,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi while (arg_types) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - saved_arg_count) | RN(TMP_ZERO) | RM(tmp))); + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(tmp))); saved_arg_count++; } tmp++; @@ -1279,7 +1510,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c src = TMP_REG1; srcw = 0; } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { - FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); src = TMP_REG1; srcw = 0; } @@ -1309,12 +1540,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return push_inst(compiler, NOP); case SLJIT_LMUL_UW: case SLJIT_LMUL_SW: - FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(SLJIT_R0))); FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_SW: - FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RD(TMP_REG1) | RM(SLJIT_R0))); FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); @@ -1356,33 +1587,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile break; case SLJIT_MOV_U8: mem_flags = BYTE_SIZE; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_u8)srcw; break; case SLJIT_MOV_S8: mem_flags = BYTE_SIZE | SIGNED; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_s8)srcw; break; case SLJIT_MOV_U16: mem_flags = HALF_SIZE; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_u16)srcw; break; case SLJIT_MOV_S16: mem_flags = HALF_SIZE | SIGNED; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_s16)srcw; break; case SLJIT_MOV_U32: mem_flags = INT_SIZE; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_u32)srcw; break; case SLJIT_MOV_S32: case SLJIT_MOV32: mem_flags = INT_SIZE | SIGNED; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_s32)srcw; break; default: @@ -1391,7 +1622,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile break; } - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); else if (!(src & SLJIT_MEM)) dst_r = src; @@ -1404,11 +1635,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile } flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; - mem_flags = WORD_SIZE; - if (op_flags & SLJIT_32) { - flags |= INT_OP; + switch (op) { + case SLJIT_REV_U16: + case SLJIT_REV_S16: + mem_flags = HALF_SIZE; + break; + case SLJIT_REV_U32: + case SLJIT_REV_S32: mem_flags = INT_SIZE; + break; + default: + mem_flags = WORD_SIZE; + + if (op_flags & SLJIT_32) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + break; } if (src & SLJIT_MEM) { @@ -1458,12 +1702,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile src2 = TMP_REG2; } - if (src1 & SLJIT_IMM) + if (src1 == SLJIT_IMM) flags |= ARG1_IMM; else src1w = src1; - if (src2 & SLJIT_IMM) + if (src2 == SLJIT_IMM) flags |= ARG2_IMM; else src2w = src2; @@ -1510,7 +1754,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * inv_bits = (op & SLJIT_32) ? W_OP : 0; - if (src3 & SLJIT_IMM) { + if (src3 == SLJIT_IMM) { mask = inv_bits ? 0x1f : 0x3f; src3w &= mask; @@ -1528,7 +1772,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2)); src3 = TMP_REG2; } else if (dst_reg == src3) { - FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src3))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src3))); src3 = TMP_REG2; } @@ -1541,7 +1785,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * else imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22))); - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(TMP_REG1) | RN(src2_reg) | imm)); + FAIL_IF(push_inst(compiler, (UBFM ^ (inv_bits | (inv_bits >> 9))) | RD(TMP_REG1) | RN(src2_reg) | imm)); /* Set imm to mask. */ imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22))); @@ -1565,7 +1809,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp switch (op) { case SLJIT_FAST_RETURN: if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_LR) | RM(src))); else FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1)); @@ -1607,7 +1851,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp switch (op) { case SLJIT_FAST_ENTER: if (FAST_IS_REG(dst)) - return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); + return push_inst(compiler, MOV | RD(dst) | RM(TMP_LR)); break; case SLJIT_GET_RETURN_ADDRESS: dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; @@ -1621,15 +1865,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return reg_map[reg]; -} + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_64 && type != SLJIT_SIMD_REG_128) + return -1; -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return freg_map[reg]; } @@ -1707,7 +1952,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp inv_bits |= W_OP; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw)); src = TMP_FREG1; } @@ -1718,34 +1963,59 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; - - if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) - inv_bits |= W_OP; if (src & SLJIT_MEM) { - emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1); + emit_op_mem(compiler, (ins & W_OP) ? WORD_SIZE : INT_SIZE, TMP_REG1, src, srcw, TMP_REG1); src = TMP_REG1; - } else if (src & SLJIT_IMM) { - if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) - srcw = (sljit_s32)srcw; - + } else if (src == SLJIT_IMM) { FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; } - FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); + FAIL_IF(push_inst(compiler, ins | VD(dst_r) | RN(src))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, ((op & SLJIT_32) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, ((ins & (1 << 22)) ? WORD_SIZE : INT_SIZE) | STORE, TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) { + inv_bits |= W_OP; + + if (src == SLJIT_IMM) + srcw = (sljit_s32)srcw; + } + + return sljit_emit_fop1_conv_f64_from_w(compiler, SCVTF ^ inv_bits, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + inv_bits |= W_OP; + + if (src == SLJIT_IMM) + srcw = (sljit_u32)srcw; + } + + return sljit_emit_fop1_conv_f64_from_w(compiler, UCVTF ^ inv_bits, dst, dstw, src, srcw); +} + static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -1754,16 +2024,22 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } - return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); + FAIL_IF(push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2))); + + if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, CSINC | (0x0 << 12) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(TMP_ZERO))); + return push_inst(compiler, CCMPI | (0x0 << 16) | (0x7 << 12) | RN(TMP_REG1) | 0x4); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -1782,7 +2058,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw); + FAIL_IF(emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw)); src = dst_r; } @@ -1827,11 +2103,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } @@ -1848,6 +2124,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; + case SLJIT_COPYSIGN_F64: + FAIL_IF(push_inst(compiler, (FMOV_R ^ ((op & SLJIT_32) ? (W_OP | (1 << 22)) : 0)) | VN(src2) | RD(TMP_REG1))); + FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src1))); + FAIL_IF(push_inst(compiler, TBZ | ((op & SLJIT_32) ? 0 : ((sljit_ins)1 << 31)) | (0x1f << 19) | (2 << 5) | RT(TMP_REG1))); + return push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(dst_r)); } if (!(dst & SLJIT_MEM)) @@ -1855,6 +2136,62 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + sljit_u32 exp; + union { + sljit_u32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_ZERO) | VD(freg) | (1 << 16)); + + if ((u.imm << (32 - 19)) == 0) { + exp = (u.imm >> (23 + 2)) & 0x3f; + + if (exp == 0x20 || exp == 0x1f) + return push_inst(compiler, (FMOV_I ^ (1 << 22)) | (sljit_ins)((((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f)) << 13) | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_s32)u.imm)); + return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_REG1) | VD(freg) | (1 << 16)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + sljit_uw exp; + union { + sljit_uw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, FMOV_R | RN(TMP_ZERO) | VD(freg) | (sljit_ins)1 << 16); + + if ((u.imm << (64 - 48)) == 0) { + exp = (u.imm >> (52 + 2)) & 0x1ff; + + if (exp == 0x100 || exp == 0xff) + return push_inst(compiler, FMOV_I | (sljit_ins)((((u.imm >> 56) & 0x80) | ((u.imm >> 48) & 0x7f)) << 13) | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_sw)u.imm)); + return push_inst(compiler, FMOV_R | RN(TMP_REG1) | VD(freg) | (1 << 16)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { @@ -1864,12 +2201,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) - inst = FMOV_R | RN(reg) | VD(freg) | (sljit_ins)1 << 16; + inst = FMOV_R | RN(reg) | VD(freg) | (1 << 16); else inst = FMOV_R | VN(freg) | RD(reg); if (op & SLJIT_32) - inst ^= ((sljit_ins)1 << 31) | ((sljit_ins)1 << 22); + inst ^= W_OP | (1 << 22); return push_inst(compiler, inst); } @@ -1882,15 +2219,17 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: case SLJIT_F_EQUAL: case SLJIT_ORDERED_EQUAL: - case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_UNORDERED_OR_EQUAL: return 0x1; case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: case SLJIT_F_NOT_EQUAL: case SLJIT_UNORDERED_OR_NOT_EQUAL: - case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + case SLJIT_ORDERED_NOT_EQUAL: return 0x0; case SLJIT_CARRY: @@ -1996,14 +2335,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type < SLJIT_JUMP) { jump->flags |= IS_COND; PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(compiler, type))); - } - else if (type >= SLJIT_FAST_CALL) + } else if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; - PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1))); + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2041,7 +2380,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } - else if (src & SLJIT_IMM) { + else if (src == SLJIT_IMM) { PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; } @@ -2052,9 +2391,11 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi inv_bits |= 1 << 24; PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); - PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2065,7 +2406,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - if (!(src & SLJIT_IMM)) { + if (src != SLJIT_IMM) { if (src & SLJIT_MEM) { ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); @@ -2080,8 +2421,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); jump->u.target = (sljit_uw)srcw; - FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); } @@ -2101,7 +2443,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi if (type & SLJIT_CALL_RETURN) { if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { - FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); src = TMP_REG1; } @@ -2161,27 +2503,53 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) { sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0; sljit_ins cc; CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); - if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (src1 == SLJIT_IMM) { if (type & SLJIT_32) - srcw = (sljit_s32)srcw; - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - src = TMP_REG1; - srcw = 0; + src1w = (sljit_s32)src1w; + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1 = TMP_REG1; + } else if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG2)); + src1 = TMP_REG1; } cc = get_cc(compiler, type & ~SLJIT_32); + return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(src2_reg) | RM(src1)); +} - return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins inv_bits = (type & SLJIT_32) ? (1 << 22) : 0; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + cc = get_cc(compiler, type & ~SLJIT_32); + return push_inst(compiler, (FCSEL ^ inv_bits) | (cc << 12) | VD(dst_freg) | VN(src2_freg) | VM(src1)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, @@ -2338,6 +2706,661 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); } +static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw) +{ + sljit_ins ins; + sljit_s32 mem = *mem_ptr; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + *mem_ptr = TMP_REG1; + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)); + } + + if (!(mem & REG_MASK)) { + *mem_ptr = TMP_REG1; + return load_immediate(compiler, TMP_REG1, memw); + } + + mem &= REG_MASK; + + if (memw == 0) { + *mem_ptr = mem; + return SLJIT_SUCCESS; + } + + *mem_ptr = TMP_REG1; + + if (memw < -0xffffff || memw > 0xffffff) { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem)); + } + + ins = ADDI; + + if (memw < 0) { + memw = -memw; + ins = SUBI; + } + + if (memw > 0xfff) { + FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG1) | RN(mem) | ((sljit_ins)(memw >> 12) << 10))); + + memw &= 0xfff; + if (memw == 0) + return SLJIT_SUCCESS; + + mem = TMP_REG1; + } + + return push_inst(compiler, ins | RD(TMP_REG1) | RN(mem) | ((sljit_ins)memw << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (!(srcdst & SLJIT_MEM)) { + if (type & SLJIT_SIMD_STORE) + ins = VD(srcdst) | VN(freg) | VM(freg); + else + ins = VD(freg) | VN(srcdst) | VM(srcdst); + + if (reg_size == 4) + ins |= (1 << 30); + + return push_inst(compiler, ORR_v | ins); + } + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size > 3) + elem_size = 3; + + ins = (type & SLJIT_SIMD_STORE) ? ST1 : LD1; + + if (reg_size == 4) + ins |= (1 << 30); + + return push_inst(compiler, ins | ((sljit_ins)elem_size << 10) | RN(srcdst) | VT(freg)); +} + +static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) +{ + sljit_ins result; + + if (elem_size > 2 && (sljit_u32)value == (value >> 32)) { + elem_size = 2; + value = (sljit_u32)value; + } + + if (elem_size == 2 && (sljit_u16)value == (value >> 16)) { + elem_size = 1; + value = (sljit_u16)value; + } + + if (elem_size == 1 && (sljit_u8)value == (value >> 8)) { + elem_size = 0; + value = (sljit_u8)value; + } + + switch (elem_size) { + case 0: + SLJIT_ASSERT(value <= 0xff); + result = 0xe000; + break; + case 1: + SLJIT_ASSERT(value <= 0xffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x8000; + break; + } + + if ((value & 0xff) == 0) { + value >>= 8; + result |= 0xa000; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffff; + result = (1 << 29); + } + break; + case 2: + SLJIT_ASSERT(value <= 0xffffffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x0000; + break; + } + + if ((value & ~(sljit_uw)0xff00) == 0) { + value >>= 8; + result |= 0x2000; + break; + } + + if ((value & ~(sljit_uw)0xff0000) == 0) { + value >>= 16; + result |= 0x4000; + break; + } + + if ((value & ~(sljit_uw)0xff000000) == 0) { + value >>= 24; + result |= 0x6000; + break; + } + + if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) { + value >>= 8; + result |= 0xc000; + break; + } + + if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) { + value >>= 16; + result |= 0xd000; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffffffff; + result = (1 << 29); + } + break; + default: + return ~(sljit_ins)0; + } + + return (((sljit_ins)value & 0x1f) << 5) | (((sljit_ins)value & 0xe0) << 11) | result; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imm; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + ins = (sljit_ins)elem_size << 10; + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, LD1R | ins | RN(src) | VT(freg)); + } + + ins = (sljit_ins)1 << (16 + elem_size); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) + return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(freg)); + + return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src)); + } + + if (src == SLJIT_IMM) { + if (elem_size < 3) + srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + imm = simd_get_imm(elem_size, (sljit_uw)srcw); + + if (imm != ~(sljit_ins)0) { + imm |= ins & ((sljit_ins)1 << 30); + + return push_inst(compiler, MOVI | imm | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (type & SLJIT_SIMD_LANE_ZERO) { + ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 30); + + if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(freg) | VM(freg))); + srcdst = TMP_FREG1; + srcdstw = 0; + } + + FAIL_IF(push_inst(compiler, MOVI | ins | VD(freg))); + } + + if (srcdst & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size == 3) + ins = 0x8400; + else if (elem_size == 0) + ins = 0; + else + ins = (sljit_ins)0x2000 << elem_size; + + lane_index = lane_index << elem_size; + ins |= (sljit_ins)(((lane_index & 0x8) << 27) | ((lane_index & 0x7) << 10)); + + return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? ST1_s : LD1_s) | ins | RN(srcdst) | VT(freg)); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (type & SLJIT_SIMD_STORE) + ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(freg); + else + ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | VN(srcdst); + + return push_inst(compiler, ins); + } + + if (srcdst == SLJIT_IMM) { + if (elem_size < 3) + srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw)); + srcdst = TMP_REG1; + } + + if (type & SLJIT_SIMD_STORE) { + ins = RD(srcdst) | VN(freg); + + if ((type & SLJIT_SIMD_LANE_SIGNED) && (elem_size < 2 || (elem_size == 2 && !(type & SLJIT_32)))) { + ins |= SMOV; + + if (!(type & SLJIT_32)) + ins |= (sljit_ins)1 << 30; + } else + ins |= UMOV; + } else + ins = INS | VD(freg) | RN(srcdst); + + if (elem_size == 3) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, ins | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size))); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + ins = (((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + if (reg_size == 4 && elem2_size - elem_size == 1) + FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(freg))); + else + FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(freg))); + src = freg; + } + + if (type & SLJIT_SIMD_FLOAT) { + SLJIT_ASSERT(reg_size == 4); + return push_inst(compiler, FCVTL | (1 << 22) | VD(freg) | VN(src)); + } + + do { + FAIL_IF(push_inst(compiler, ((type & SLJIT_SIMD_EXTEND_SIGNED) ? SSHLL : USHLL) + | ((sljit_ins)1 << (19 + elem_size)) | VD(freg) | VN(src))); + src = freg; + } while (++elem_size < elem2_size); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imms; + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + imms = 0x643219; + ins = USHR | (0x9 << 16); + break; + case 1: + imms = (reg_size == 4) ? 0x643219 : 0x6231; + ins = USHR | (0x11 << 16); + break; + case 2: + imms = (reg_size == 4) ? 0x6231 : 0x61; + ins = USHR | (0x21 << 16); + break; + default: + imms = 0x61; + ins = USHR | (0x41 << 16); + break; + } + + if (reg_size == 4) + ins |= (1 << 30); + + FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(freg))); + + if (reg_size == 4 && elem_size > 0) + FAIL_IF(push_inst(compiler, XTN | ((sljit_ins)(elem_size - 1) << 22) | VD(TMP_FREG1) | VN(TMP_FREG1))); + + if (imms >= 0x100) { + ins = (reg_size == 4 && elem_size == 0) ? (1 << 30) : 0; + + do { + FAIL_IF(push_inst(compiler, USRA | ins | ((imms & 0xff) << 16) | VD(TMP_FREG1) | VN(TMP_FREG1))); + imms >>= 8; + } while (imms >= 0x100); + } + + FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1))); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + ins = (0x1 << 16); + + if (reg_size == 4 && elem_size == 0) { + FAIL_IF(push_inst(compiler, INS_e | (0x3 << 16) | (0x8 << 11) | VD(TMP_FREG1) | VN(TMP_FREG1))); + ins = (0x2 << 16); + } + + FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1))); + + if (dst_r == TMP_REG1) + return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG1, dst, dstw, TMP_REG2); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = AND_v; + break; + case SLJIT_SIMD_OP2_OR: + ins = ORR_v; + break; + case SLJIT_SIMD_OP2_XOR: + ins = EOR_v; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + +#ifdef __ARM_FEATURE_ATOMICS + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = LDR ^ (1 << 30); + break; + case SLJIT_MOV_U16: + ins = LDRH; + break; + case SLJIT_MOV_U8: + ins = LDRB; + break; + default: + ins = LDR; + break; + } +#else /* !__ARM_FEATURE_ATOMICS */ + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = LDXR ^ (1 << 30); + break; + case SLJIT_MOV_U8: + ins = LDXRB; + break; + case SLJIT_MOV_U16: + ins = LDXRH; + break; + default: + ins = LDXR; + break; + } +#endif /* ARM_FEATURE_ATOMICS */ + return push_inst(compiler, ins | RN(mem_reg) | RT(dst_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins; + sljit_s32 tmp = temp_reg; + sljit_ins cmp = 0; + sljit_ins inv_bits = W_OP; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + +#ifdef __ARM_FEATURE_ATOMICS + if (op & SLJIT_SET_ATOMIC_STORED) + cmp = (SUBS ^ W_OP) | RD(TMP_ZERO); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = CAS ^ (1 << 30); + break; + case SLJIT_MOV_U16: + ins = CASH; + break; + case SLJIT_MOV_U8: + ins = CASB; + break; + default: + ins = CAS; + inv_bits = 0; + if (cmp) + cmp ^= W_OP; + break; + } + + if (cmp) { + FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RM(temp_reg) | RD(TMP_REG1))); + tmp = TMP_REG1; + } + FAIL_IF(push_inst(compiler, ins | RM(tmp) | RN(mem_reg) | RD(src_reg))); + if (!cmp) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, cmp | RM(tmp) | RN(temp_reg))); + FAIL_IF(push_inst(compiler, (CSET ^ inv_bits) | RD(tmp))); + return push_inst(compiler, cmp | RM(tmp) | RN(TMP_ZERO)); +#else /* !__ARM_FEATURE_ATOMICS */ + SLJIT_UNUSED_ARG(tmp); + SLJIT_UNUSED_ARG(inv_bits); + + if (op & SLJIT_SET_ATOMIC_STORED) + cmp = (SUBI ^ W_OP) | (1 << 29); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = STXR ^ (1 << 30); + break; + case SLJIT_MOV_U8: + ins = STXRB; + break; + case SLJIT_MOV_U16: + ins = STXRH; + break; + default: + ins = STXR; + break; + } + + FAIL_IF(push_inst(compiler, ins | RM(TMP_REG1) | RN(mem_reg) | RT(src_reg))); + return cmp ? push_inst(compiler, cmp | RD(TMP_ZERO) | RN(TMP_REG1)) : SLJIT_SUCCESS; +#endif /* __ARM_FEATURE_ATOMICS */ +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { sljit_s32 dst_reg; @@ -2399,26 +3422,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, 0)); + PTR_FAIL_IF(push_inst(compiler, RD(dst_r))); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 1); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 1); + + compiler->size += 3; if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_T2_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_T2_32.c old mode 100644 new mode 100755 index 73dd7f99d5..4d74f4803d --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_T2_32.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_T2_32.c @@ -49,8 +49,20 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 }; -static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 +static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = { + 0, + 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, + 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6 +}; + +static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = { + 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1 }; #define COPY_BITS(src, from, to, bits) \ @@ -75,13 +87,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) /* Thumb32 encodings. */ -#define RD4(rd) ((sljit_ins)reg_map[rd] << 8) -#define RN4(rn) ((sljit_ins)reg_map[rn] << 16) #define RM4(rm) ((sljit_ins)reg_map[rm]) +#define RD4(rd) ((sljit_ins)reg_map[rd] << 8) #define RT4(rt) ((sljit_ins)reg_map[rt] << 12) -#define DD4(dd) ((sljit_ins)freg_map[dd] << 12) -#define DN4(dn) ((sljit_ins)freg_map[dn] << 16) -#define DM4(dm) ((sljit_ins)freg_map[dm]) +#define RN4(rn) ((sljit_ins)reg_map[rn] << 16) + +#define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5)) +#define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22)) +#define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7)) + #define IMM5(imm) \ (COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6)) #define IMM12(imm) \ @@ -128,9 +142,12 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define EORS 0x4040 #define EOR_W 0xea800000 #define IT 0xbf00 -#define LDR_SP 0x9800 #define LDR 0xf8d00000 +#define LDR_SP 0x9800 #define LDRD 0xe9500000 +#define LDREX 0xe8500f00 +#define LDREXB 0xe8d00f4f +#define LDREXH 0xe8d00f5f #define LDRI 0xf8500800 #define LSLS 0x4080 #define LSLSI 0x0000 @@ -162,6 +179,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define PUSH_W 0xe92d0000 #define REV 0xba00 #define REV_W 0xfa90f080 +#define REV16 0xba40 +#define REV16_W 0xfa90f090 #define RBIT 0xfa90f0a0 #define RORS 0x41c0 #define ROR_W 0xfa60f000 @@ -173,8 +192,11 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SBC_W 0xeb600000 #define SDIV 0xfb90f0f0 #define SMULL 0xfb800000 -#define STRD 0xe9400000 #define STR_SP 0x9000 +#define STRD 0xe9400000 +#define STREX 0xe8400000 +#define STREXB 0xe8c00f40 +#define STREXH 0xe8c00f50 #define SUBS 0x1a00 #define SUBSI3 0x1e00 #define SUBSI8 0x3800 @@ -197,23 +219,57 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define UXTH_W 0xfa1ff080 #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 +#define VAND 0xef000110 #define VCMP_F32 0xeeb40a40 #define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F32_U32 0xeeb80a40 #define VCVT_F64_F32 0xeeb70ac0 #define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 +#define VDUP 0xee800b10 +#define VDUP_s 0xffb00c00 +#define VEOR 0xff000110 +#define VLD1 0xf9200000 +#define VLD1_r 0xf9a00c00 +#define VLD1_s 0xf9a00000 #define VLDR_F32 0xed100a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 #define VMOV2 0xec400a10 +#define VMOV_i 0xef800010 +#define VMOV_s 0xee000b10 +#define VMOVN 0xffb20200 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 +#define VORR 0xef200110 #define VPOP 0xecbd0b00 #define VPUSH 0xed2d0b00 +#define VSHLL 0xef800a10 +#define VSHR 0xef800010 +#define VSRA 0xef800110 +#define VST1 0xf9000000 +#define VST1_s 0xf9800000 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32) +{ + if (compiler->scratches == -1) + return 0; + + if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0)) + fr -= SLJIT_F64_SECOND(0); + + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) + || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); +} + +#endif /* SLJIT_ARGUMENT_CHECKS */ + static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst) { sljit_u16 *ptr; @@ -236,7 +292,7 @@ static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +static sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); @@ -244,134 +300,259 @@ static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); } -static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) +/* Dst must be in bits[11-8] */ +static void set_imm32_const(sljit_u16 *inst, sljit_ins dst, sljit_uw new_imm) { - sljit_ins dst = inst[1] & 0x0f00; - SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1)); inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff)); inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1)); inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16)); } -static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) +static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) +{ + sljit_ins dst = inst[1] & 0x0f00; + SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); + set_imm32_const(inst, dst, new_imm); +} + +static SLJIT_INLINE sljit_u16* detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) { sljit_sw diff; if (jump->flags & SLJIT_REWRITABLE_JUMP) - return 0; + goto exit; if (jump->flags & JUMP_ADDR) { /* Branch to ARM code is not optimized yet. */ if (!(jump->u.target & 0x1)) - return 0; - diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1; - } - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1; + goto exit; + diff = (sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset; + } else { + SLJIT_ASSERT(jump->u.label != NULL); + diff = (sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2); } if (jump->flags & IS_COND) { SLJIT_ASSERT(!(jump->flags & IS_BL)); - if (diff <= 127 && diff >= -128) { + /* Size of the prefix IT instruction. */ + diff += SSIZE_OF(u16); + if (diff <= 0xff && diff >= -0x100) { jump->flags |= PATCH_TYPE1; - return 5; + jump->addr = (sljit_uw)(code_ptr - 1); + return code_ptr - 1; } - if (diff <= 524287 && diff >= -524288) { + if (diff <= 0xfffff && diff >= -0x100000) { jump->flags |= PATCH_TYPE2; - return 4; + jump->addr = (sljit_uw)(code_ptr - 1); + return code_ptr; } - /* +1 comes from the prefix IT instruction. */ - diff--; - if (diff <= 8388607 && diff >= -8388608) { - jump->flags |= PATCH_TYPE3; - return 3; - } - } - else if (jump->flags & IS_BL) { - if (diff <= 8388607 && diff >= -8388608) { - jump->flags |= PATCH_BL; - return 3; - } - } - else { - if (diff <= 1023 && diff >= -1024) { - jump->flags |= PATCH_TYPE4; - return 4; - } - if (diff <= 8388607 && diff >= -8388608) { + diff -= SSIZE_OF(u16); + } else if (jump->flags & IS_BL) { + /* Branch and link. */ + if (diff <= 0xffffff && diff >= -0x1000000) { jump->flags |= PATCH_TYPE5; - return 3; + return code_ptr + 1; } + goto exit; + } else if (diff <= 0x7ff && diff >= -0x800) { + jump->flags |= PATCH_TYPE3; + return code_ptr; } - return 0; + if (diff <= 0xffffff && diff >= -0x1000000) { + jump->flags |= PATCH_TYPE4; + return code_ptr + 1; + } + +exit: + code_ptr[4] = code_ptr[0]; + + if (jump->flags & IS_COND) { + code_ptr[3] = code_ptr[-1]; + jump->addr = (sljit_uw)(code_ptr - 1); + } + + return code_ptr + 4; } -static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + /* The pc+4 offset is represented by the 2 * SSIZE_OF(sljit_u16) below. */ + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + /* Note: ADR with imm8 does not set the last bit (Thumb2 flag). */ + + if (diff <= 0xffd + 2 * SSIZE_OF(u16) && diff >= -0xfff + 2 * SSIZE_OF(u16)) { + jump->flags |= PATCH_TYPE6; + return 1; + } + + return 3; +} + +static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) { sljit_s32 type = (jump->flags >> 4) & 0xf; + sljit_u16 *jump_inst = (sljit_u16*)jump->addr; sljit_sw diff; - sljit_u16 *jump_inst; - sljit_s32 s, j1, j2; + sljit_ins ins; + + diff = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr); if (SLJIT_UNLIKELY(type == 0)) { - modify_imm32_const((sljit_u16*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); + ins = (jump->flags & JUMP_MOV_ADDR) ? *jump_inst : RDN3(TMP_REG1); + set_imm32_const((sljit_u16*)jump->addr, ins, (sljit_uw)diff); return; } - if (jump->flags & JUMP_ADDR) { - SLJIT_ASSERT(jump->u.target & 0x1); - diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; + if (SLJIT_UNLIKELY(type == 6)) { + SLJIT_ASSERT(jump->flags & JUMP_MOV_ADDR); + diff -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_inst + 2, executable_offset) & ~(sljit_sw)0x3; + + SLJIT_ASSERT(diff <= 0xfff && diff >= -0xfff); + + ins = ADDWI >> 16; + if (diff <= 0) { + diff = -diff; + ins = SUBWI >> 16; + } + + jump_inst[1] = (sljit_u16)(jump_inst[0] | COPY_BITS(diff, 8, 12, 3) | (diff & 0xff)); + jump_inst[0] = (sljit_u16)(ins | 0xf | COPY_BITS(diff, 11, 10, 1)); + return; } - else { - SLJIT_ASSERT(jump->u.label->addr & 0x1); - diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; - } - jump_inst = (sljit_u16*)jump->addr; + + SLJIT_ASSERT((diff & 0x1) != 0 && !(jump->flags & JUMP_MOV_ADDR)); + diff = (diff - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; switch (type) { case 1: /* Encoding T1 of 'B' instruction */ - SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); + SLJIT_ASSERT(diff <= 0x7f && diff >= -0x80 && (jump->flags & IS_COND)); jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff)); return; case 2: /* Encoding T3 of 'B' instruction */ - SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); + SLJIT_ASSERT(diff <= 0x7ffff && diff >= -0x80000 && (jump->flags & IS_COND)); jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1)); jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff)); return; case 3: - SLJIT_ASSERT(jump->flags & IS_COND); - *jump_inst++ = (sljit_u16)(IT | ((jump->flags >> 4) & 0xf0) | 0x8); - diff--; - type = 5; - break; - case 4: /* Encoding T2 of 'B' instruction */ - SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); + SLJIT_ASSERT(diff <= 0x3ff && diff >= -0x400 && !(jump->flags & IS_COND)); jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff)); return; } - SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608); + SLJIT_ASSERT(diff <= 0x7fffff && diff >= -0x800000); - /* Really complex instruction form for branches. */ - s = (diff >> 23) & 0x1; - j1 = (~(diff >> 22) ^ s) & 0x1; - j2 = (~(diff >> 21) ^ s) & 0x1; - jump_inst[0] = (sljit_u16)(0xf000 | ((sljit_ins)s << 10) | COPY_BITS(diff, 11, 0, 10)); - jump_inst[1] = (sljit_u16)((j1 << 13) | (j2 << 11) | (diff & 0x7ff)); + /* Really complex instruction form for branches. Negate with sign bit. */ + diff ^= ((diff >> 2) & 0x600000) ^ 0x600000; + + jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(diff, 11, 0, 10) | COPY_BITS(diff, 23, 10, 1)); + jump_inst[1] = (sljit_u16)((diff & 0x7ff) | COPY_BITS(diff, 22, 13, 1) | COPY_BITS(diff, 21, 11, 1)); + + SLJIT_ASSERT(type == 4 || type == 5); /* The others have a common form. */ - if (type == 5) /* Encoding T4 of 'B' instruction */ + if (type == 4) /* Encoding T4 of 'B' instruction */ jump_inst[1] |= 0x9000; - else if (type == 6) /* Encoding T1 of 'BL' instruction */ + else /* Encoding T1 of 'BL' instruction */ jump_inst[1] |= 0xd000; - else - SLJIT_UNREACHABLE(); +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2; + + if (jump->flags & IS_COND) { + diff++; + + if (diff <= (0xff / SSIZE_OF(u16)) && diff >= (-0x100 / SSIZE_OF(u16))) + total_size = 0; + else if (diff <= (0xfffff / SSIZE_OF(u16)) && diff >= (-0x100000 / SSIZE_OF(u16))) + total_size = 1; + diff--; + } else if (!(jump->flags & IS_BL) && diff <= (0x7ff / SSIZE_OF(u16)) && diff >= (-0x800 / SSIZE_OF(u16))) + total_size = 1; + + if (total_size == JUMP_MAX_SIZE && diff <= (0xffffff / SSIZE_OF(u16)) && diff >= (-0x1000000 / SSIZE_OF(u16))) + total_size = 2; + } + + size_reduce += JUMP_MAX_SIZE - total_size; + } else { + /* Real size minus 1. Unit size: instruction. */ + total_size = 3; + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff <= (0xffd / SSIZE_OF(u16)) && diff >= (-0xfff / SSIZE_OF(u16))) + total_size = 1; + } + + size_reduce += 3 - total_size; + } + + jump->flags |= total_size << JUMP_SIZE_SHIFT; + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; } SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) @@ -382,64 +563,76 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_u16 *buf_ptr; sljit_u16 *buf_end; sljit_uw half_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; + sljit_sw addr; sljit_sw executable_offset; struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); + + reduce_code_size(compiler); code = (sljit_u16*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_u16), compiler->exec_allocator_data); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; half_count = 0; - next_addr = 0; executable_offset = SLJIT_EXEC_OFFSET(code); label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_u16*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 1); do { *code_ptr = *buf_ptr++; - if (next_addr == half_count) { + if (next_min_addr == half_count) { SLJIT_ASSERT(!label || label->size >= half_count); SLJIT_ASSERT(!jump || jump->addr >= half_count); SLJIT_ASSERT(!const_ || const_->addr >= half_count); - SLJIT_ASSERT(!put_label || put_label->addr >= half_count); /* These structures are ordered by their address. */ - if (label && label->size == half_count) { - label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + if (next_min_addr == next_label_size) { + label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == half_count) { - jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); - code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); - jump = jump->next; - } - if (const_ && const_->addr == half_count) { + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + half_count = half_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr < + ((jump->flags >> JUMP_SIZE_SHIFT) + ((jump->flags & 0xf0) <= PATCH_TYPE2)) * sizeof(sljit_u16)); + } else { + half_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_sw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = (sljit_uw)addr; + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == half_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; half_count++; @@ -449,7 +642,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == half_count) { - label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -457,21 +650,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); jump = compiler->jumps; while (jump) { - set_jump_instruction(jump, executable_offset); + generate_jump_or_mov_addr(jump, executable_offset); jump = jump->next; } - put_label = compiler->put_labels; - while (put_label) { - modify_imm32_const((sljit_u16 *)put_label->addr, put_label->label->addr); - put_label = put_label->next; - } - compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16); @@ -490,13 +676,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) { switch (feature_type) { case SLJIT_HAS_FPU: + case SLJIT_HAS_F64_AS_F32_PAIR: + case SLJIT_HAS_SIMD: #ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; + return (SLJIT_IS_FPU_AVAILABLE) != 0; #else /* Available by default. */ return 1; #endif + case SLJIT_SIMD_REGS_ARE_PAIRS: case SLJIT_HAS_CLZ: case SLJIT_HAS_CTZ: case SLJIT_HAS_REV: @@ -505,6 +694,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_PREFETCH: case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: return 1; default: @@ -621,6 +811,10 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: + case SLJIT_REV_U16: + case SLJIT_REV_S16: + case SLJIT_REV_U32: + case SLJIT_REV_S32: case SLJIT_MUL: /* No form with immediate operand. */ break; @@ -657,9 +851,14 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; case SLJIT_ADDC: compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + if (flags & ARG2_IMM) { + imm = get_imm(~imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + } break; case SLJIT_SUB: compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; @@ -712,9 +911,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) break; - imm = get_imm(imm); + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(~imm); if (imm != INVALID_IMM) - return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; case SLJIT_AND: imm2 = get_imm(imm); @@ -793,8 +995,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s imm = arg2; arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm)); - } - else { + } else { imm = arg1; arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1; FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm)); @@ -842,9 +1043,28 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2))); return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst)); case SLJIT_REV: + case SLJIT_REV_U32: + case SLJIT_REV_S32: + SLJIT_ASSERT(arg1 == TMP_REG2); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, REV | RD3(dst) | RN3(arg2)); return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2)); + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2); + + flags &= 0xffff; + if (IS_2_LO_REGS(dst, arg2)) + FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2))); + else + FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2))); + + if (dst == TMP_REG1 || (arg2 == TMP_REG1 && flags == SLJIT_REV_U16)) + return SLJIT_SUCCESS; + + if (reg_map[dst] <= 7) + return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst)); + return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst)); case SLJIT_ADD: compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; if (IS_3_LO_REGS(dst, arg1, arg2)) @@ -895,32 +1115,36 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MSHL: - FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f)); - arg2 = TMP_REG2; + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f)); + arg2 = (sljit_uw)reg; /* fallthrough */ case SLJIT_SHL: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MLSHR: - FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f)); - arg2 = TMP_REG2; + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f)); + arg2 = (sljit_uw)reg; /* fallthrough */ case SLJIT_LSHR: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MASHR: - FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f)); - arg2 = TMP_REG2; + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f)); + arg2 = (sljit_uw)reg; /* fallthrough */ case SLJIT_ASHR: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_ROTL: - FAIL_IF(push_inst32(compiler, RSB_WI | RD4(TMP_REG2) | RN4(arg2) | 0)); - arg2 = TMP_REG2; + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, RSB_WI | RD4(reg) | RN4(arg2) | 0)); + arg2 = (sljit_uw)reg; /* fallthrough */ case SLJIT_ROTR: if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) @@ -1180,12 +1404,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { - FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); } else { if (fsaveds > 0) - FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) - FAIL_IF(push_inst32(compiler, VPUSH | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + FAIL_IF(push_inst32(compiler, VPUSH | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); } } @@ -1262,17 +1486,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: if (offset != old_offset) - *remap_ptr++ = VMOV_F32 | SLJIT_32 | DD4(offset) | DM4(old_offset); + *remap_ptr++ = VMOV_F32 | SLJIT_32 | VD4(offset) | VM4(old_offset); old_offset++; offset++; break; case SLJIT_ARG_TYPE_F32: if (f32_offset != 0) { - *remap_ptr++ = VMOV_F32 | 0x20 | DD4(offset) | DM4(f32_offset); + *remap_ptr++ = VMOV_F32 | 0x20 | VD4(offset) | VM4(f32_offset); f32_offset = 0; } else { if (offset != old_offset) - *remap_ptr++ = VMOV_F32 | DD4(offset) | DM4(old_offset); + *remap_ptr++ = VMOV_F32 | VD4(offset) | VM4(old_offset); f32_offset = old_offset; old_offset++; } @@ -1406,12 +1630,12 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { - FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); } else { if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) - FAIL_IF(push_inst32(compiler, VPOP | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + FAIL_IF(push_inst32(compiler, VPOP | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); if (fsaveds > 0) - FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); } local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; @@ -1710,22 +1934,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile break; case SLJIT_MOV_U8: flags = BYTE_SIZE; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_u8)srcw; break; case SLJIT_MOV_S8: flags = BYTE_SIZE | SIGNED; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_s8)srcw; break; case SLJIT_MOV_U16: flags = HALF_SIZE; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_u16)srcw; break; case SLJIT_MOV_S16: flags = HALF_SIZE | SIGNED; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_s16)srcw; break; default: @@ -1734,7 +1958,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile break; } - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw)); else if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1)); @@ -1750,10 +1974,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); } + SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0); flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) + flags |= HALF_SIZE; + if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } @@ -1783,7 +2011,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (dst == TMP_REG1) flags |= UNUSED_RETURN; - if (src1 & SLJIT_IMM) + if (src1 == SLJIT_IMM) flags |= ARG1_IMM; else if (src1 & SLJIT_MEM) { emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1); @@ -1792,7 +2020,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile else src1w = src1; - if (src2 & SLJIT_IMM) + if (src2 == SLJIT_IMM) flags |= ARG2_IMM; else if (src2 & SLJIT_MEM) { src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1; @@ -1841,7 +2069,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * ADJUST_LOCAL_OFFSET(src3, src3w); - if (src3 & SLJIT_IMM) { + if (src3 == SLJIT_IMM) { src3w &= 0x1f; if (src3w == 0) @@ -1946,16 +2174,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return reg_map[reg]; -} + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - return (freg_map[reg] << 1); + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64) + return freg_map[reg]; + + if (type != SLJIT_SIMD_REG_128) + return freg_map[reg] & ~0x1; + + return -1; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -1991,35 +2223,35 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, if ((arg & REG_MASK) && (argw & 0x3) == 0) { if (!(argw & ~0x3fc)) - return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)argw >> 2)); + return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)argw >> 2)); if (!(-argw & ~0x3fc)) - return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)-argw >> 2)); + return push_inst32(compiler, inst | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)-argw >> 2)); } if (arg & REG_MASK) { if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { FAIL_IF(compiler->error); - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg)); } imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); if (imm != INVALID_IMM) { FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); } imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); if (imm != INVALID_IMM) { argw = -argw; FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); - return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); + return push_inst32(compiler, inst | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); } } FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); if (arg & REG_MASK) FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK)))); - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg)); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, @@ -2033,41 +2265,53 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp src = TMP_FREG1; } - FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | DD4(TMP_FREG1) | DM4(src))); + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | VD4(TMP_FREG1) | VM4(src))); if (FAST_IS_REG(dst)) - return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); + return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | VN4(TMP_FREG1)); /* Store the integer value from a VFP register. */ return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); } -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - op ^= SLJIT_32; - if (FAST_IS_REG(src)) - FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); + FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | VN4(TMP_FREG1))); else if (src & SLJIT_MEM) { /* Load the integer value into a VFP register. */ FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); } else { FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); - FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); + FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | VN4(TMP_FREG1))); } - FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_32) | DD4(dst_r) | DM4(TMP_FREG1))); + FAIL_IF(push_inst32(compiler, ins | VD4(dst_r) | VM4(TMP_FREG1))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw); +} + static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -2075,17 +2319,23 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile op ^= SLJIT_32; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } - FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | DD4(src1) | DM4(src2))); - return push_inst32(compiler, VMRS); + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | VD4(src1) | VM4(src2))); + FAIL_IF(push_inst32(compiler, VMRS)); + + if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst16(compiler, IT | (0x6 << 4) | 0x8)); + return push_inst16(compiler, CMP /* Rm, Rn = r0 */); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -2105,7 +2355,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil op ^= SLJIT_32; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw)); src = dst_r; } @@ -2113,19 +2363,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) - FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); else dst_r = src; } break; case SLJIT_NEG_F64: - FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); break; case SLJIT_ABS_F64: - FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); op ^= SLJIT_32; break; } @@ -2152,27 +2402,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2))); break; case SLJIT_SUB_F64: - FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2))); break; case SLJIT_MUL_F64: - FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2))); break; case SLJIT_DIV_F64: - FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2))); break; + case SLJIT_COPYSIGN_F64: + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(src2) | RT4(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0))); + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src1))); + FAIL_IF(push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0)); + FAIL_IF(push_inst16(compiler, IT | (0xb << 4) | 0x8)); + return push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(dst_r)); } if (!(dst & SLJIT_MEM)) @@ -2180,6 +2436,74 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ +#if defined(__ARM_NEON) && __ARM_NEON + sljit_u32 exp; + sljit_ins ins; +#endif /* NEON */ + union { + sljit_u32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + +#if defined(__ARM_NEON) && __ARM_NEON + if ((u.imm << (32 - 19)) == 0) { + exp = (u.imm >> (23 + 2)) & 0x3f; + + if (exp == 0x20 || exp == 0x1f) { + ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f); + return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf)); + } + } +#endif /* NEON */ + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ +#if defined(__ARM_NEON) && __ARM_NEON + sljit_u32 exp; + sljit_ins ins; +#endif /* NEON */ + union { + sljit_u32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + +#if defined(__ARM_NEON) && __ARM_NEON + if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) { + exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff; + + if (exp == 0x100 || exp == 0xff) { + ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f); + return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf)); + } + } +#endif /* NEON */ + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0])); + if (u.imm[0] == u.imm[1]) + return push_inst32(compiler, VMOV2 | RN4(TMP_REG1) | RT4(TMP_REG1) | VM4(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1])); + return push_inst32(compiler, VMOV2 | RN4(TMP_REG2) | RT4(TMP_REG1) | VM4(freg)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { @@ -2193,9 +2517,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi reg2 = REG_PAIR_SECOND(reg); reg = REG_PAIR_FIRST(reg); - inst = VMOV2 | RN4(reg) | RT4(reg2) | DM4(freg); + inst = VMOV2 | RN4(reg) | RT4(reg2) | VM4(freg); } else { - inst = VMOV | DN4(freg) | RT4(reg); + inst = VMOV | VN4(freg) | RT4(reg); if (!(op & SLJIT_32)) inst |= 1 << 7; @@ -2215,15 +2539,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: case SLJIT_F_EQUAL: case SLJIT_ORDERED_EQUAL: - case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_UNORDERED_OR_EQUAL: return 0x0; case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: case SLJIT_F_NOT_EQUAL: case SLJIT_UNORDERED_OR_NOT_EQUAL: - case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + case SLJIT_ORDERED_NOT_EQUAL: return 0x1; case SLJIT_CARRY: @@ -2327,7 +2653,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); if (type < SLJIT_JUMP) { jump->flags |= IS_COND; cc = get_cc(compiler, type); @@ -2343,6 +2668,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); } + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2498,18 +2825,18 @@ static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: if (offset != new_offset) - FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | DD4(new_offset) | DM4(offset))); + FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(new_offset) | VM4(offset))); new_offset++; offset++; break; case SLJIT_ARG_TYPE_F32: if (f32_offset != 0) { - FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(f32_offset) | DM4(offset))); + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(f32_offset) | VM4(offset))); f32_offset = 0; } else { if (offset != new_offset) - FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(new_offset) | DM4(offset))); + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(new_offset) | VM4(offset))); f32_offset = new_offset; new_offset++; } @@ -2591,7 +2918,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_ASSERT(reg_map[TMP_REG1] != 14); - if (!(src & SLJIT_IMM)) { + if (src != SLJIT_IMM) { if (FAST_IS_REG(src)) { SLJIT_ASSERT(reg_map[src] != 14); return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src)); @@ -2608,8 +2935,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); jump->u.target = (sljit_uw)srcw; - FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)); } @@ -2690,8 +3018,8 @@ static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *com if (FAST_IS_REG(src)) { if (op & SLJIT_32) - return push_inst32(compiler, VMOV | (1 << 20) | DN4(src) | RT4(SLJIT_R0)); - return push_inst32(compiler, VMOV2 | (1 << 20) | DM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1)); + return push_inst32(compiler, VMOV | (1 << 20) | VN4(src) | RT4(SLJIT_R0)); + return push_inst32(compiler, VMOV2 | (1 << 20) | VM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1)); } SLJIT_SKIP_CHECKS(compiler); @@ -2756,23 +3084,47 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) { sljit_uw cc, tmp; CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (src2_reg != dst_reg && src1 == dst_reg) { + src1 = src2_reg; + src1w = 0; + src2_reg = dst_reg; + type ^= 0x1; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2)); + + if (src2_reg != dst_reg) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + src1 = TMP_REG1; + src1w = 0; + } + } else if (dst_reg != src2_reg) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(dst_reg, src2_reg))); cc = get_cc(compiler, type & ~SLJIT_32); - if (!(src & SLJIT_IMM)) { + if (src1 != SLJIT_IMM) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); - return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src)); + return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1)); } - tmp = (sljit_uw) srcw; + tmp = (sljit_uw)src1w; if (tmp < 0x10000) { /* set low 16 bits, set hi 16 bits to 0. */ @@ -2781,13 +3133,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)); } - tmp = get_imm((sljit_uw)srcw); + tmp = get_imm((sljit_uw)src1w); if (tmp != INVALID_IMM) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp); } - tmp = get_imm(~(sljit_uw)srcw); + tmp = get_imm(~(sljit_uw)src1w); if (tmp != INVALID_IMM) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp); @@ -2795,13 +3147,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4)); - tmp = (sljit_uw) srcw; + tmp = (sljit_uw)src1w; FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff))); return push_inst32(compiler, MOVT | RD4(dst_reg) | COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + type ^= SLJIT_32; + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src2_freg))); + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8)); + return push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src1)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) @@ -2815,7 +3197,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (!(reg & REG_PAIR_MASK)) return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); - if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32)) { + if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) { if ((mem & REG_MASK) == 0) { if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) { imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff)); @@ -2826,7 +3208,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile imm = get_imm((sljit_uw)(memw & ~0xfff)); if (imm != INVALID_IMM) - memw &= 0xff; + memw &= 0xfff; } if (imm == INVALID_IMM) { @@ -3103,11 +3485,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); - if (type & SLJIT_MEM_UNALIGNED_32) + if (type & SLJIT_MEM_ALIGNED_32) return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw); if (type & SLJIT_MEM_STORE) { - FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | DN4(freg) | RT4(TMP_REG2))); + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | RT4(TMP_REG2))); if (type & SLJIT_32) return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1); @@ -3116,13 +3498,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil mem |= SLJIT_MEM; FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1)); - FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | DN4(freg) | 0x80 | RT4(TMP_REG2))); + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | 0x80 | RT4(TMP_REG2))); return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1); } if (type & SLJIT_32) { FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); - return push_inst32(compiler, VMOV | DN4(freg) | RT4(TMP_REG2)); + return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG2)); } FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); @@ -3130,11 +3512,715 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1)); - return push_inst32(compiler, VMOV2 | DM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1)); + return push_inst32(compiler, VMOV2 | VM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1)); +} + +static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw) +{ + sljit_uw imm; + sljit_s32 mem = *mem_ptr; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + *mem_ptr = TMP_REG1; + return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)); + } + + if (SLJIT_UNLIKELY(!(mem & REG_MASK))) { + *mem_ptr = TMP_REG1; + return load_immediate(compiler, TMP_REG1, (sljit_uw)memw); + } + + mem &= REG_MASK; + + if (memw == 0) { + *mem_ptr = mem; + return SLJIT_SUCCESS; + } + + *mem_ptr = TMP_REG1; + imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw)); + + if (imm != INVALID_IMM) + return push_inst32(compiler, ((memw < 0) ? SUB_WI : ADD_WI) | RD4(TMP_REG1) | RN4(mem) | imm); + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem)); +} + +static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg) +{ + freg += freg & 0x1; + + SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)); + + if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS) + freg--; + + return freg; +} + +#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (!(srcdst & SLJIT_MEM)) { + if (reg_size == 4) + srcdst = simd_get_quad_reg_index(srcdst); + + if (type & SLJIT_SIMD_STORE) + ins = VD4(srcdst) | VN4(freg) | VM4(freg); + else + ins = VD4(freg) | VN4(srcdst) | VM4(srcdst); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst32(compiler, VORR | ins); + } + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size > 3) + elem_size = 3; + + ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(freg) + | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8)); + + SLJIT_ASSERT(reg_size >= alignment); + + if (alignment == 3) + ins |= 0x10; + else if (alignment >= 4) + ins |= 0x20; + + return push_inst32(compiler, ins | RN4(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf); +} + +static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) +{ + sljit_ins result; + + if (elem_size > 1 && (sljit_u16)value == (value >> 16)) { + elem_size = 1; + value = (sljit_u16)value; + } + + if (elem_size == 1 && (sljit_u8)value == (value >> 8)) { + elem_size = 0; + value = (sljit_u8)value; + } + + switch (elem_size) { + case 0: + SLJIT_ASSERT(value <= 0xff); + result = 0xe00; + break; + case 1: + SLJIT_ASSERT(value <= 0xffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x800; + break; + } + + if ((value & 0xff) == 0) { + value >>= 8; + result |= 0xa00; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffff; + result = (1 << 5); + } + break; + default: + SLJIT_ASSERT(value <= 0xffffffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x000; + break; + } + + if ((value & ~(sljit_uw)0xff00) == 0) { + value >>= 8; + result |= 0x200; + break; + } + + if ((value & ~(sljit_uw)0xff0000) == 0) { + value >>= 16; + result |= 0x400; + break; + } + + if ((value & ~(sljit_uw)0xff000000) == 0) { + value >>= 24; + result |= 0x600; + break; + } + + if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) { + value >>= 8; + result |= 0xc00; + break; + } + + if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) { + value >>= 16; + result |= 0xd00; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value = ~value; + result = (1 << 5); + } + break; + } + + return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 21) | result; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imm; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (src == SLJIT_IMM && srcw == 0) + return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(freg)); + + if (SLJIT_UNLIKELY(elem_size == 3)) { + SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw)); + src = freg; + } else if (freg != src) + FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (freg != src) + return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)); + return SLJIT_SUCCESS; + } + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + ins = (sljit_ins)(elem_size << 6); + + if (reg_size == 4) + ins |= 1 << 5; + + return push_inst32(compiler, VLD1_r | ins | VD4(freg) | RN4(src) | 0xf); + } + + if (type & SLJIT_SIMD_FLOAT) { + SLJIT_ASSERT(elem_size == 2); + ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2)); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst32(compiler, VDUP_s | ins | VD4(freg) | (sljit_ins)freg_map[src]); + } + + if (src == SLJIT_IMM) { + if (elem_size < 2) + srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + imm = simd_get_imm(elem_size, (sljit_uw)srcw); + + if (imm != ~(sljit_ins)0) { + if (reg_size == 4) + imm |= (sljit_ins)1 << 6; + + return push_inst32(compiler, VMOV_i | imm | VD4(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + src = TMP_REG1; + } + + switch (elem_size) { + case 0: + ins = 1 << 22; + break; + case 1: + ins = 1 << 5; + break; + default: + ins = 0; + break; + } + + if (reg_size == 4) + ins |= (sljit_ins)1 << 21; + + return push_inst32(compiler, VDUP | ins | VN4(freg) | RT4(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (type & SLJIT_SIMD_LANE_ZERO) { + ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6); + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3 && !(srcdst & SLJIT_MEM)) { + if (lane_index == 1) + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (srcdst != freg) + FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(srcdst) | VM4(srcdst))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst32(compiler, VMOV_i | VD4(freg)); + } + + if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) { + FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(freg) | VM4(freg))); + srcdst = TMP_FREG2; + srcdstw = 0; + } + } + + FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(freg))); + } + + if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) { + lane_index -= (0x8 >> elem_size); + freg += SLJIT_QUAD_OTHER_HALF(freg); + } + + if (srcdst & SLJIT_MEM) { + if (elem_size == 3) + return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw); + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + lane_index = lane_index << elem_size; + ins = (sljit_ins)((elem_size << 10) | (lane_index << 5)); + return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(freg) | RN4(srcdst) | 0xf); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3) { + if (type & SLJIT_SIMD_STORE) + return push_inst32(compiler, VORR | VD4(srcdst) | VN4(freg) | VM4(freg)); + return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(freg) | VM4(srcdst)); + } + + if (type & SLJIT_SIMD_STORE) { + if (freg_ebit_map[freg] == 0) { + if (lane_index == 1) + freg = SLJIT_F64_SECOND(freg); + + return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(freg)); + } + + FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1))); + return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1)); + } + + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1))); + return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1)); + } + + if (srcdst == SLJIT_IMM) { + if (elem_size < 2) + srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw)); + srcdst = TMP_REG1; + } + + if (elem_size == 0) + ins = 0x400000; + else if (elem_size == 1) + ins = 0x20; + else + ins = 0; + + lane_index = lane_index << elem_size; + ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5)); + + if (type & SLJIT_SIMD_STORE) { + ins |= (1 << 20); + + if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED)) + ins |= (1 << 23); + } + + return push_inst32(compiler, VMOV_s | ins | VN4(freg) | RT4(srcdst)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) { + freg = simd_get_quad_reg_index(freg); + src = simd_get_quad_reg_index(src); + + if (src_lane_index >= (0x8 >> elem_size)) { + src_lane_index -= (0x8 >> elem_size); + src += SLJIT_QUAD_OTHER_HALF(src); + } + } + + if (elem_size == 3) { + if (freg != src) + FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (freg != src) + return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)); + return SLJIT_SUCCESS; + } + + ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size)); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_s32 dst_reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + if (reg_size == 4 && elem2_size - elem_size == 1) + FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(freg) | RN4(src) | 0xf)); + else + FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(freg) | RN4(src) | 0xf)); + src = freg; + } else if (reg_size == 4) + src = simd_get_quad_reg_index(src); + + if (!(type & SLJIT_SIMD_FLOAT)) { + dst_reg = (reg_size == 4) ? freg : TMP_FREG2; + + do { + FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28)) + | ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src))); + src = dst_reg; + } while (++elem_size < elem2_size); + + if (dst_reg == TMP_FREG2) + return push_inst32(compiler, VORR | VD4(freg) | VN4(TMP_FREG2) | VM4(TMP_FREG2)); + return SLJIT_SUCCESS; + } + + /* No SIMD variant, must use VFP instead. */ + SLJIT_ASSERT(reg_size == 4); + + if (freg == src) { + freg += SLJIT_QUAD_OTHER_HALF(freg); + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20)); + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src)); + } + + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src))); + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imms; + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + imms = 0x243219; + ins = VSHR | (1 << 28) | (0x9 << 16); + break; + case 1: + imms = (reg_size == 4) ? 0x243219 : 0x2231; + ins = VSHR | (1 << 28) | (0x11 << 16); + break; + case 2: + imms = (reg_size == 4) ? 0x2231 : 0x21; + ins = VSHR | (1 << 28) | (0x21 << 16); + break; + default: + imms = 0x21; + ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7); + break; + } + + if (reg_size == 4) { + freg = simd_get_quad_reg_index(freg); + ins |= (sljit_ins)1 << 6; + } + + SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0); + FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(freg))); + + if (reg_size == 4 && elem_size > 0) + FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2))); + + ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0; + + while (imms >= 0x100) { + FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2))); + imms >>= 8; + } + + FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2))); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG2))); + + if (reg_size == 4 && elem_size == 0) { + SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]); + FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2)| VN4(TMP_FREG1))); + FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12))); + } + + if (dst_r == TMP_REG1) + return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = VAND; + break; + case SLJIT_SIMD_OP2_OR: + ins = VORR; + break; + case SLJIT_SIMD_OP2_XOR: + ins = VEOR; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) { + dst_freg = simd_get_quad_reg_index(dst_freg); + src1_freg = simd_get_quad_reg_index(src1_freg); + src2_freg = simd_get_quad_reg_index(src2_freg); + ins |= (sljit_ins)1 << 6; + } + + return push_inst32(compiler, ins | VD4(dst_freg) | VN4(src1_freg) | VM4(src2_freg)); } #undef FPU_LOAD +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = LDREXB; + break; + case SLJIT_MOV_U16: + ins = LDREXH; + break; + default: + ins = LDREX; + break; + } + + return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins; + + /* temp_reg == mem_reg is undefined so use another temp register */ + SLJIT_UNUSED_ARG(temp_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = STREXB | RM4(TMP_REG1); + break; + case SLJIT_MOV_U16: + ins = STREXH | RM4(TMP_REG1); + break; + default: + ins = STREX | RD4(TMP_REG1); + break; + } + + FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg))); + if (op & SLJIT_SET_ATOMIC_STORED) + return push_inst32(compiler, CMPI_W | RN4(TMP_REG1)); + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; @@ -3156,25 +4242,26 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, 0)); + PTR_FAIL_IF(push_inst16(compiler, RDN3(dst_r))); + compiler->size += 3; if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeLOONGARCH_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeLOONGARCH_64.c new file mode 100755 index 0000000000..a6fd044855 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeLOONGARCH_64.c @@ -0,0 +1,3158 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "LOONGARCH" SLJIT_CPUINFO; +} + +typedef sljit_u32 sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_ZERO 0 + +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5) +#define RETURN_ADDR_REG TMP_REG2 +#define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 9 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* +LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them): + +| Format name | Composition | +| 2R | Opcode + Rj + Rd | +| 3R | Opcode + Rk + Rj + Rd | +| 4R | Opcode + Ra + Rk + Rj + Rd | +| 2RI8 | Opcode + I8 + Rj + Rd | +| 2RI12 | Opcode + I12 + Rj + Rd | +| 2RI14 | Opcode + I14 + Rj + Rd | +| 2RI16 | Opcode + I16 + Rj + Rd | +| 1RI21 | Opcode + I21L + Rj + I21H | +| I26 | Opcode + I26L + I26H | + +Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands. +I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and +lower parts in the instruction word, denoted by the “L” and “H” suffixes. */ + +#define RD(rd) ((sljit_ins)reg_map[rd]) +#define RJ(rj) ((sljit_ins)reg_map[rj] << 5) +#define RK(rk) ((sljit_ins)reg_map[rk] << 10) +#define RA(ra) ((sljit_ins)reg_map[ra] << 15) + +#define FD(fd) ((sljit_ins)reg_map[fd]) +#define FRD(fd) ((sljit_ins)freg_map[fd]) +#define FRJ(fj) ((sljit_ins)freg_map[fj] << 5) +#define FRK(fk) ((sljit_ins)freg_map[fk] << 10) +#define FRA(fa) ((sljit_ins)freg_map[fa] << 15) + +#define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10) +#define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10) +#define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10) +#define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10) +#define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f)) +#define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff)) + +#define OPC_I26(opc) ((sljit_ins)(opc) << 26) +#define OPC_1RI21(opc) ((sljit_ins)(opc) << 26) +#define OPC_2RI16(opc) ((sljit_ins)(opc) << 26) +#define OPC_2RI14(opc) ((sljit_ins)(opc) << 24) +#define OPC_2RI12(opc) ((sljit_ins)(opc) << 22) +#define OPC_2RI8(opc) ((sljit_ins)(opc) << 18) +#define OPC_4R(opc) ((sljit_ins)(opc) << 20) +#define OPC_3R(opc) ((sljit_ins)(opc) << 15) +#define OPC_2R(opc) ((sljit_ins)(opc) << 10) +#define OPC_1RI20(opc) ((sljit_ins)(opc) << 25) + +/* Arithmetic operation instructions */ +#define ADD_W OPC_3R(0x20) +#define ADD_D OPC_3R(0x21) +#define SUB_W OPC_3R(0x22) +#define SUB_D OPC_3R(0x23) +#define ADDI_W OPC_2RI12(0xa) +#define ADDI_D OPC_2RI12(0xb) +#define ANDI OPC_2RI12(0xd) +#define ORI OPC_2RI12(0xe) +#define XORI OPC_2RI12(0xf) +#define ADDU16I_D OPC_2RI16(0x4) +#define LU12I_W OPC_1RI20(0xa) +#define LU32I_D OPC_1RI20(0xb) +#define LU52I_D OPC_2RI12(0xc) +#define SLT OPC_3R(0x24) +#define SLTU OPC_3R(0x25) +#define SLTI OPC_2RI12(0x8) +#define SLTUI OPC_2RI12(0x9) +#define PCADDI OPC_1RI20(0xc) +#define PCALAU12I OPC_1RI20(0xd) +#define PCADDU12I OPC_1RI20(0xe) +#define PCADDU18I OPC_1RI20(0xf) +#define NOR OPC_3R(0x28) +#define AND OPC_3R(0x29) +#define OR OPC_3R(0x2a) +#define XOR OPC_3R(0x2b) +#define ORN OPC_3R(0x2c) +#define ANDN OPC_3R(0x2d) +#define MUL_W OPC_3R(0x38) +#define MULH_W OPC_3R(0x39) +#define MULH_WU OPC_3R(0x3a) +#define MUL_D OPC_3R(0x3b) +#define MULH_D OPC_3R(0x3c) +#define MULH_DU OPC_3R(0x3d) +#define MULW_D_W OPC_3R(0x3e) +#define MULW_D_WU OPC_3R(0x3f) +#define DIV_W OPC_3R(0x40) +#define MOD_W OPC_3R(0x41) +#define DIV_WU OPC_3R(0x42) +#define MOD_WU OPC_3R(0x43) +#define DIV_D OPC_3R(0x44) +#define MOD_D OPC_3R(0x45) +#define DIV_DU OPC_3R(0x46) +#define MOD_DU OPC_3R(0x47) + +/* Bit-shift instructions */ +#define SLL_W OPC_3R(0x2e) +#define SRL_W OPC_3R(0x2f) +#define SRA_W OPC_3R(0x30) +#define SLL_D OPC_3R(0x31) +#define SRL_D OPC_3R(0x32) +#define SRA_D OPC_3R(0x33) +#define ROTR_W OPC_3R(0x36) +#define ROTR_D OPC_3R(0x37) +#define SLLI_W OPC_3R(0x81) +#define SLLI_D ((sljit_ins)(0x41) << 16) +#define SRLI_W OPC_3R(0x89) +#define SRLI_D ((sljit_ins)(0x45) << 16) +#define SRAI_W OPC_3R(0x91) +#define SRAI_D ((sljit_ins)(0x49) << 16) +#define ROTRI_W OPC_3R(0x99) +#define ROTRI_D ((sljit_ins)(0x4d) << 16) + +/* Bit-manipulation instructions */ +#define CLO_W OPC_2R(0x4) +#define CLZ_W OPC_2R(0x5) +#define CTO_W OPC_2R(0x6) +#define CTZ_W OPC_2R(0x7) +#define CLO_D OPC_2R(0x8) +#define CLZ_D OPC_2R(0x9) +#define CTO_D OPC_2R(0xa) +#define CTZ_D OPC_2R(0xb) +#define REVB_2H OPC_2R(0xc) +#define REVB_4H OPC_2R(0xd) +#define REVB_2W OPC_2R(0xe) +#define REVB_D OPC_2R(0xf) +#define REVH_2W OPC_2R(0x10) +#define REVH_D OPC_2R(0x11) +#define BITREV_4B OPC_2R(0x12) +#define BITREV_8B OPC_2R(0x13) +#define BITREV_W OPC_2R(0x14) +#define BITREV_D OPC_2R(0x15) +#define EXT_W_H OPC_2R(0x16) +#define EXT_W_B OPC_2R(0x17) +#define BSTRINS_W (0x1 << 22 | 1 << 21) +#define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15) +#define BSTRINS_D (0x2 << 22) +#define BSTRPICK_D (0x3 << 22) + +/* Branch instructions */ +#define BEQZ OPC_1RI21(0x10) +#define BNEZ OPC_1RI21(0x11) +#define JIRL OPC_2RI16(0x13) +#define B OPC_I26(0x14) +#define BL OPC_I26(0x15) +#define BEQ OPC_2RI16(0x16) +#define BNE OPC_2RI16(0x17) +#define BLT OPC_2RI16(0x18) +#define BGE OPC_2RI16(0x19) +#define BLTU OPC_2RI16(0x1a) +#define BGEU OPC_2RI16(0x1b) + +/* Memory access instructions */ +#define LD_B OPC_2RI12(0xa0) +#define LD_H OPC_2RI12(0xa1) +#define LD_W OPC_2RI12(0xa2) +#define LD_D OPC_2RI12(0xa3) + +#define ST_B OPC_2RI12(0xa4) +#define ST_H OPC_2RI12(0xa5) +#define ST_W OPC_2RI12(0xa6) +#define ST_D OPC_2RI12(0xa7) + +#define LD_BU OPC_2RI12(0xa8) +#define LD_HU OPC_2RI12(0xa9) +#define LD_WU OPC_2RI12(0xaa) + +#define LDX_B OPC_3R(0x7000) +#define LDX_H OPC_3R(0x7008) +#define LDX_W OPC_3R(0x7010) +#define LDX_D OPC_3R(0x7018) + +#define STX_B OPC_3R(0x7020) +#define STX_H OPC_3R(0x7028) +#define STX_W OPC_3R(0x7030) +#define STX_D OPC_3R(0x7038) + +#define LDX_BU OPC_3R(0x7040) +#define LDX_HU OPC_3R(0x7048) +#define LDX_WU OPC_3R(0x7050) + +#define PRELD OPC_2RI12(0xab) + +/* Atomic memory access instructions */ +#define LL_W OPC_2RI14(0x20) +#define SC_W OPC_2RI14(0x21) +#define LL_D OPC_2RI14(0x22) +#define SC_D OPC_2RI14(0x23) + +/* LoongArch V1.10 Instructions */ +#define AMCAS_B OPC_3R(0x70B0) +#define AMCAS_H OPC_3R(0x70B1) +#define AMCAS_W OPC_3R(0x70B2) +#define AMCAS_D OPC_3R(0x70B3) + +/* Other instructions */ +#define BREAK OPC_3R(0x54) +#define DBGCALL OPC_3R(0x55) +#define SYSCALL OPC_3R(0x56) + +/* Basic Floating-Point Instructions */ +/* Floating-Point Arithmetic Operation Instructions */ +#define FADD_S OPC_3R(0x201) +#define FADD_D OPC_3R(0x202) +#define FSUB_S OPC_3R(0x205) +#define FSUB_D OPC_3R(0x206) +#define FMUL_S OPC_3R(0x209) +#define FMUL_D OPC_3R(0x20a) +#define FDIV_S OPC_3R(0x20d) +#define FDIV_D OPC_3R(0x20e) +#define FCMP_COND_S OPC_4R(0xc1) +#define FCMP_COND_D OPC_4R(0xc2) +#define FCOPYSIGN_S OPC_3R(0x225) +#define FCOPYSIGN_D OPC_3R(0x226) +#define FSEL OPC_4R(0xd0) +#define FABS_S OPC_2R(0x4501) +#define FABS_D OPC_2R(0x4502) +#define FNEG_S OPC_2R(0x4505) +#define FNEG_D OPC_2R(0x4506) +#define FMOV_S OPC_2R(0x4525) +#define FMOV_D OPC_2R(0x4526) + +/* Floating-Point Conversion Instructions */ +#define FCVT_S_D OPC_2R(0x4646) +#define FCVT_D_S OPC_2R(0x4649) +#define FTINTRZ_W_S OPC_2R(0x46a1) +#define FTINTRZ_W_D OPC_2R(0x46a2) +#define FTINTRZ_L_S OPC_2R(0x46a9) +#define FTINTRZ_L_D OPC_2R(0x46aa) +#define FFINT_S_W OPC_2R(0x4744) +#define FFINT_S_L OPC_2R(0x4746) +#define FFINT_D_W OPC_2R(0x4748) +#define FFINT_D_L OPC_2R(0x474a) + +/* Floating-Point Move Instructions */ +#define FMOV_S OPC_2R(0x4525) +#define FMOV_D OPC_2R(0x4526) +#define MOVGR2FR_W OPC_2R(0x4529) +#define MOVGR2FR_D OPC_2R(0x452a) +#define MOVGR2FRH_W OPC_2R(0x452b) +#define MOVFR2GR_S OPC_2R(0x452d) +#define MOVFR2GR_D OPC_2R(0x452e) +#define MOVFRH2GR_S OPC_2R(0x452f) +#define MOVGR2FCSR OPC_2R(0x4530) +#define MOVFCSR2GR OPC_2R(0x4532) +#define MOVFR2CF OPC_2R(0x4534) +#define MOVCF2FR OPC_2R(0x4535) +#define MOVGR2CF OPC_2R(0x4536) +#define MOVCF2GR OPC_2R(0x4537) + +/* Floating-Point Branch Instructions */ +#define BCEQZ OPC_I26(0x12) +#define BCNEZ OPC_I26(0x12) + +/* Floating-Point Common Memory Access Instructions */ +#define FLD_S OPC_2RI12(0xac) +#define FLD_D OPC_2RI12(0xae) +#define FST_S OPC_2RI12(0xad) +#define FST_D OPC_2RI12(0xaf) + +#define FLDX_S OPC_3R(0x7060) +#define FLDX_D OPC_3R(0x7068) +#define FSTX_S OPC_3R(0x7070) +#define FSTX_D OPC_3R(0x7078) + +#define I12_MAX (0x7ff) +#define I12_MIN (-0x800) +#define BRANCH16_MAX (0x7fff << 2) +#define BRANCH16_MIN (-(0x8000 << 2)) +#define BRANCH21_MAX (0xfffff << 2) +#define BRANCH21_MIN (-(0x100000 << 2)) +#define JUMP_MAX (0x1ffffff << 2) +#define JUMP_MIN (-(0x2000000 << 2)) +#define JIRL_MAX (0x7fff << 2) +#define JIRL_MIN (-(0x8000 << 2)) + +#define S32_MAX (0x7fffffffl) +#define S32_MIN (-0x80000000l) +#define S52_MAX (0x7ffffffffffffl) + +#define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D)) + +/* LoongArch CPUCFG register for feature detection */ +#define LOONGARCH_CFG2 0x02 +#define LOONGARCH_FEATURE_LAMCAS (1 << 28) + +static sljit_u32 cpu_feature_list = 0; + +static SLJIT_INLINE sljit_u32 get_cpu_features(void) +{ + if (cpu_feature_list == 0) + __asm__ ("cpucfg %0, %1" : "+&r"(cpu_feature_list) : "r"(LOONGARCH_CFG2)); + return cpu_feature_list; +} + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + + inst = (sljit_ins *)jump->addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->u.label != NULL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; + + if (jump->flags & IS_COND) { + inst--; + diff += SSIZE_OF(ins); + + if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) { + jump->flags |= PATCH_B; + inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000; + jump->addr = (sljit_uw)inst; + return inst; + } + + inst++; + diff -= SSIZE_OF(ins); + } + + if (diff >= JUMP_MIN && diff <= JUMP_MAX) { + if (jump->flags & IS_COND) { + inst[-1] |= (sljit_ins)IMM_I16(2); + } + + jump->flags |= PATCH_J; + return inst; + } + + if (diff >= S32_MIN && diff <= S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] |= (sljit_ins)IMM_I16(3); + + jump->flags |= PATCH_REL32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= (sljit_uw)S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] |= (sljit_ins)IMM_I16(3); + + jump->flags |= PATCH_ABS32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= S52_MAX) { + if (jump->flags & IS_COND) + inst[-1] |= (sljit_ins)IMM_I16(4); + + jump->flags |= PATCH_ABS52; + inst[2] = inst[0]; + return inst + 2; + } + +exit: + if (jump->flags & IS_COND) + inst[-1] |= (sljit_ins)IMM_I16(5); + inst[3] = inst[0]; + return inst + 3; +} + +static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +{ + if (max_label <= (sljit_uw)S32_MAX) { + put_label->flags = PATCH_ABS32; + return 1; + } + + if (max_label <= S52_MAX) { + put_label->flags = PATCH_ABS52; + return 2; + } + + put_label->flags = 0; + return 3; +} + +static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +{ + struct sljit_jump *jump = NULL; + struct sljit_put_label *put_label; + sljit_uw flags; + sljit_ins *inst; + sljit_uw addr; + + if (reg != 0) { + jump = (struct sljit_jump*)dst; + flags = jump->flags; + inst = (sljit_ins*)jump->addr; + addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->addr; + } else { + put_label = (struct sljit_put_label*)dst; + flags = put_label->flags; + inst = (sljit_ins*)put_label->addr; + addr = put_label->label->addr; + reg = *inst; + } + + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= S32_MAX); + inst[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); + } else if (flags & PATCH_ABS52) { + inst[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); + inst[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5); + inst += 1; + } else { + inst[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); + inst[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5); + inst[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52); + inst += 2; + } + + if (jump != NULL) { + SLJIT_ASSERT((inst[1] & OPC_2RI16(0x3f)) == JIRL); + inst[1] = (inst[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2); + } else + inst[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + if (jump && jump->addr == word_count) { + word_count += 3; + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; + + code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); + word_count += 3; + + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + if (!(jump->flags & (PATCH_B | PATCH_J | PATCH_REL32))) { + load_addr_to_reg(jump, TMP_REG1); + break; + } + + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->addr; + buf_ptr = (sljit_ins *)jump->addr; + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + + if (jump->flags & PATCH_B) { + SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX); + buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2); + break; + } + + if (jump->flags & PATCH_REL32) { + SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); + + buf_ptr[0] = PCADDU12I | RD(TMP_REG1) | (sljit_ins)((sljit_sw)addr & ~0xfff); + SLJIT_ASSERT((buf_ptr[1] & OPC_2RI16(0x3f)) == JIRL); + buf_ptr[1] |= IMM_I16((addr & 0xfff) >> 2); + break; + } + + SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX); + if (jump->flags & IS_CALL) + buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2); + else + buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2); + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { + load_addr_to_reg(put_label, 0); + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) + { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_ATOMIC: + return (LOONGARCH_FEATURE_LAMCAS & get_cpu_features()); + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CTZ: + case SLJIT_HAS_REV: + case SLJIT_HAS_ROT: + case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_COPY_F32: + case SLJIT_HAS_COPY_F64: + return 1; + + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + SLJIT_UNUSED_ARG(type); + + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define ARG_TEST 0x00020 +#define ALT_KEEP_CACHE 0x00040 +#define CUMULATIVE_OP 0x00080 +#define IMM_OP 0x00100 +#define MOVE_OP 0x00200 +#define SRC2_IMM 0x00400 + +#define UNUSED_DEST 0x00800 +#define REG_DEST 0x01000 +#define REG1_SOURCE 0x02000 +#define REG2_SOURCE 0x04000 +#define SLOW_SRC1 0x08000 +#define SLOW_SRC2 0x10000 +#define SLOW_DEST 0x20000 + +#define STACK_STORE ST_D +#define STACK_LOAD LD_D + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm) +{ + if (imm <= I12_MAX && imm >= I12_MIN) + return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm)); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5))); + return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)); + } else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) { + FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5))); + FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm))); + return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)); + } + FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5))); + FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm))); + FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5))); + return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52)); +} + +#define STACK_MAX_DISTANCE (-I12_MIN) + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 i, tmp, offset; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + compiler->local_size = local_size; + + if (local_size <= STACK_MAX_DISTANCE) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size))); + offset = local_size - SSIZE_OF(sw); + local_size = 0; + } else { + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE))); + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(load_immediate(compiler, TMP_REG1, local_size)); + offset = STACK_MAX_DISTANCE - SSIZE_OF(sw); + } + + FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset))); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1))); + else if (local_size > 0) + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size))); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = SLJIT_R0; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0))); + saved_arg_count++; + } + tmp++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + + return SLJIT_SUCCESS; +} + +#define STACK_MAX_DISTANCE (-I12_MIN - 16) + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 i, tmp, offset; + sljit_s32 local_size = compiler->local_size; + + if (local_size > STACK_MAX_DISTANCE) { + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) { + FAIL_IF(load_immediate(compiler, TMP_REG2, local_size)); + FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2))); + } else + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size))); + + local_size = STACK_MAX_DISTANCE; + } + + SLJIT_ASSERT(local_size > 0); + + offset = local_size - SSIZE_OF(sw); + if (!is_return_to) + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset))); + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)); +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static const sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ST_D /* st.d */, +/* u w l */ LD_D /* ld.d */, +/* u b s */ ST_B /* st.b */, +/* u b l */ LD_BU /* ld.bu */, +/* u h s */ ST_H /* st.h */, +/* u h l */ LD_HU /* ld.hu */, +/* u i s */ ST_W /* st.w */, +/* u i l */ LD_WU /* ld.wu */, + +/* s w s */ ST_D /* st.d */, +/* s w l */ LD_D /* ld.d */, +/* s b s */ ST_B /* st.b */, +/* s b l */ LD_B /* ld.b */, +/* s h s */ ST_H /* st.h */, +/* s h l */ LD_H /* ld.h */, +/* s i s */ ST_W /* st.w */, +/* s i l */ LD_W /* ld.w */, + +/* d s */ FST_D /* fst.d */, +/* d l */ FLD_D /* fld.d */, +/* s s */ FST_S /* fst.s */, +/* s l */ FLD_S /* fld.s */, +}; + +static const sljit_ins data_transfer_insts_x[16 + 4] = { +/* u w s */ STX_D /* stx.d */, +/* u w l */ LDX_D /* ldx.d */, +/* u b s */ STX_B /* stx.b */, +/* u b l */ LDX_BU /* ldx.bu */, +/* u h s */ STX_H /* stx.h */, +/* u h l */ LDX_HU /* ldx.hu */, +/* u i s */ STX_W /* stx.w */, +/* u i l */ LDX_WU /* ldx.wu */, + +/* s w s */ STX_D /* stx.d */, +/* s w l */ LDX_D /* ldx.d */, +/* s b s */ STX_B /* stx.b */, +/* s b l */ LDX_B /* ldx.b */, +/* s h s */ STX_H /* stx.h */, +/* s h l */ LDX_H /* ldx.h */, +/* s i s */ STX_W /* stx.w */, +/* s i l */ LDX_W /* ldx.w */, + +/* d s */ FSTX_D /* fstx.d */, +/* d l */ FLDX_D /* fldx.d */, +/* s s */ FSTX_S /* fstx.s */, +/* s l */ FLDX_S /* fldx.s */, +}; + +static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_ins ins; + sljit_s32 base = arg & REG_MASK; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (arg & OFFS_REG_MASK) { + sljit_s32 offs = OFFS_REG(arg); + + SLJIT_ASSERT(!argw); + ins = data_transfer_insts_x[flags & MEM_MASK] | + ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) | + RJ(base) | RK(offs); + } else { + SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN); + + ins = data_transfer_insts[flags & MEM_MASK] | + ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) | + RJ(base) | IMM_I12(argw); + } + return push_inst(compiler, ins); +} + +/* Can perform an operation using at most 1 instruction. */ +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* argw == 0 (ldx/stx rd, rj, rk) can be used. + * argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */ + if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) { + /* Works for both absolute and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw)); + return -1; + } + return 0; +} + +#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0)) + +/* See getput_arg below. + Note: can_cache is called only for binary operators. */ +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + if (arg & OFFS_REG_MASK) + return 0; + + if (arg == next_arg) { + if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN) + || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = TMP_REG1; + sljit_sw offset; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + /* Since tmp can be the same as base or offset registers, + * these might be unavailable after modifying tmp. */ + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + tmp_r = reg; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) + FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw))); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN) + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw); + + if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) { + offset = argw - compiler->cache_argw; + } else { + sljit_sw argw_hi=TO_ARGW_HI(argw); + compiler->cache_arg = SLJIT_MEM; + + if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + compiler->cache_argw = argw; + offset = 0; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi)); + compiler->cache_argw = argw_hi; + offset = argw & 0xfff; + argw = argw_hi; + } + } + + if (!base) + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset); + + if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base))); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset); + } + + if (!offset) + return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0); + + FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base))); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset); +} + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = TMP_REG1; + + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + tmp_r = reg; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) + FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw))); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0); + } else { + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + + if (base != 0) + return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0); + } +} + +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +#define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v)))) + +/* andi/ori/xori are zero-extended */ +#define EMIT_LOGICAL(op_imm, op_reg) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) {\ + FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \ + FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \ + } \ + if (!(flags & UNUSED_DEST)) { \ + if (dst == src1) { \ + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \ + FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \ + } else { \ + FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \ + FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \ + } \ + } \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \ + } \ + while (0) + +#define EMIT_SHIFT(imm, reg) \ + op_imm = (imm); \ + op_reg = (reg) + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_s32 is_overflow, is_carry, carry_src_r, is_handled; + sljit_ins op_imm, op_reg; + sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2)); + + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2)); + + case SLJIT_REV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2)); + + case SLJIT_REV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2))); + return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst)); + + case SLJIT_REV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2))); + return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16)); + + case SLJIT_REV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); + FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); + return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0)); + + case SLJIT_REV_U32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); + FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); + return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16)); + + case SLJIT_ADD: + /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0))); + else { + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1))); + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); + } + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + + if (is_overflow || carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0))); + carry_src_r = OTHER_FLAG; + } + } + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (is_overflow || carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG)); + + case SLJIT_ADDC: + carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2))); + } else { + if (carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0))); + carry_src_r = EQUAL_FLAG; + } + } + + FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r))); + } + + FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG))); + + if (carry_src_r == 0) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG))); + /* Set carry flag. */ + return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG)); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == I12_MIN) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS) { + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { + FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_LESS: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2))); + break; + case SLJIT_GREATER: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1))); + break; + case SLJIT_SIG_LESS: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2))); + break; + case SLJIT_SIG_GREATER: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1))); + break; + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0))); + else { + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1))); + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); + } + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG)); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == I12_MIN) { + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); + + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2))); + } + else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG))); + + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG))); + + if (!is_carry) + return SLJIT_SUCCESS; + + return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1)); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) + return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2)); + + if (op & SLJIT_32) { + FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2))); + FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2))); + return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)); + } + + FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2))); + FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63)))); + return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG)); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + case SLJIT_MSHL: + if (op & SLJIT_32) { + EMIT_SHIFT(SLLI_W, SLL_W); + } else { + EMIT_SHIFT(SLLI_D, SLL_D); + } + break; + + case SLJIT_LSHR: + case SLJIT_MLSHR: + if (op & SLJIT_32) { + EMIT_SHIFT(SRLI_W, SRL_W); + } else { + EMIT_SHIFT(SRLI_D, SRL_D); + } + break; + + case SLJIT_ASHR: + case SLJIT_MASHR: + if (op & SLJIT_32) { + EMIT_SHIFT(SRAI_W, SRA_W); + } else { + EMIT_SHIFT(SRAI_D, SRA_D); + } + break; + + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & SRC2_IMM) { + SLJIT_ASSERT(src2 != 0); + + if (GET_OPCODE(op) == SLJIT_ROTL) + src2 = word_size - src2; + return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)); + + } + + if (src2 == TMP_ZERO) { + if (dst != src1) + return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0)); + return SLJIT_SUCCESS; + } + + if (GET_OPCODE(op) == SLJIT_ROTL) { + FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2))); + src2 = OTHER_FLAG; + } + return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2)); + + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2)); + } + + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2)); +} + +#undef IMM_EXTEND + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_sw src2_r = 0; + sljit_s32 sugg_src2_r = TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (dst == 0) { + SLJIT_ASSERT(HAS_FLAGS(op)); + flags |= UNUSED_DEST; + dst = TMP_REG2; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (flags & MOVE_OP) + sugg_src2_r = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) { + flags |= SRC2_IMM; + src2_r = src2w; + } + else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 == SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + else + src1_r = TMP_ZERO; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) + dst_r = (sljit_s32)src2_r; + } + else if (src2 == SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } + else { + src2_r = TMP_ZERO; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } + } + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BREAK); + case SLJIT_NOP: + return push_inst(compiler, ANDI | RD(TMP_ZERO) | RJ(TMP_ZERO) | IMM_I12(0)); + case SLJIT_LMUL_UW: + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1))); + return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1)); + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1))); + return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1)); + case SLJIT_DIVMOD_UW: + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1))); + return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1)); + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1))); + return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1)); + case SLJIT_DIV_UW: + return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)); + case SLJIT_DIV_SW: + return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)); + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (op & SLJIT_32) + flags = INT_DATA | SIGNED_DATA; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); + + case SLJIT_MOV_S32: + /* Logical operators have no W variant, so sign extended input is necessary for them. */ + case SLJIT_MOV32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); + + case SLJIT_MOV_U8: + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_REV: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U32: + case SLJIT_REV_S32: + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (op & SLJIT_32) { + flags |= INT_DATA | SIGNED_DATA; + if (src1 == SLJIT_IMM) + src1w = (sljit_s32)src1w; + if (src2 == SLJIT_IMM) + src2w = (sljit_s32)src2w; + } + + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + compiler->status_flags_state = 0; + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: + if (src2 == SLJIT_IMM) { + if (op & SLJIT_32) + src2w &= 0x1f; + else + src2w &= 0x3f; + } + + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_s32 is_left; + sljit_ins ins1, ins2, ins3; + sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; + + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + if (src3 == SLJIT_IMM) { + src3w &= bit_length - 1; + + if (src3w == 0) + return SLJIT_SUCCESS; + + if (is_left) { + ins1 = INST(SLLI, op) | IMM_I12(src3w); + src3w = bit_length - src3w; + ins2 = INST(SRLI, op) | IMM_I12(src3w); + } else { + ins1 = INST(SRLI, op) | IMM_I12(src3w); + src3w = bit_length - src3w; + ins2 = INST(SLLI, op) | IMM_I12(src3w); + } + + FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg))); + FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg))); + return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1)); + } + + if (src3 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w)); + src3 = TMP_REG2; + } else if (dst_reg == src3) { + push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0)); + src3 = TMP_REG2; + } + + if (is_left) { + ins1 = INST(SLL, op); + ins2 = INST(SRLI, op); + ins3 = INST(SRL, op); + } else { + ins1 = INST(SRL, op); + ins2 = INST(SLLI, op); + ins3 = INST(SLL, op); + } + + FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3))); + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1))); + FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1))); + src2_reg = TMP_REG1; + } else + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3))); + + FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2))); + return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 base = src & REG_MASK; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0))); + else + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); + + return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) { + srcw &= 0x3; + if (SLJIT_UNLIKELY(srcw)) + FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw))); + FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1))); + } else { + if (base && srcw <= I12_MAX && srcw >= I12_MIN) + return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + if (base != 0) + FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1))); + } + return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1)); + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + if (FAST_IS_REG(dst)) + return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0)); + + SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2); + break; + case SLJIT_GET_RETURN_ADDRESS: + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw))); + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER) + return -1; + + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(size); + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ +#define SET_COND(cond) (sljit_ins)(cond << 15) + +#define COND_CUN SET_COND(0x8) /* UN */ +#define COND_CEQ SET_COND(0x4) /* EQ */ +#define COND_CUEQ SET_COND(0xc) /* UN EQ */ +#define COND_CLT SET_COND(0x2) /* LT */ +#define COND_CULT SET_COND(0xa) /* UN LT */ +#define COND_CLE SET_COND(0x6) /* LT EQ */ +#define COND_CULE SET_COND(0xe) /* UN LT EQ */ +#define COND_CNE SET_COND(0x10) /* GT LT */ +#define COND_CUNE SET_COND(0x18) /* UN GT LT */ +#define COND_COR SET_COND(0x14) /* GT LT EQ */ + +#define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D) +#define FCD(cd) (sljit_ins)(cd & 0x7) +#define FCJ(cj) (sljit_ins)((cj & 0x7) << 5) +#define FCA(ca) (sljit_ins)((ca & 0x7) << 15) +#define F_OTHER_FLAG 1 + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) + +/* convert to inter exact toward zero */ +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inst; + sljit_u32 word_data = 0; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + switch (GET_OPCODE(op)) + { + case SLJIT_CONV_SW_FROM_F64: + word_data = 1; + inst = FINST(FTINTRZ_L, op); + break; + case SLJIT_CONV_S32_FROM_F64: + inst = FINST(FTINTRZ_W, op); + break; + default: + inst = BREAK; + SLJIT_UNREACHABLE(); + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src))); + FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inst; + sljit_u32 word_data = 0; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + switch (GET_OPCODE(op)) + { + case SLJIT_CONV_F64_FROM_SW: + word_data = 1; + inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L); + break; + case SLJIT_CONV_F64_FROM_S32: + inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W); + break; + default: + inst = BREAK; + SLJIT_UNREACHABLE(); + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); + src = TMP_REG1; + } else if (src == SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src))); + FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inst; + sljit_u32 word_data = 0; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + switch (GET_OPCODE(op)) + { + case SLJIT_CONV_F64_FROM_UW: + word_data = 1; + inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L); + break; + case SLJIT_CONV_F64_FROM_U32: + inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W); + break; + default: + inst = BREAK; + SLJIT_UNREACHABLE(); + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); + src = TMP_REG1; + } else if (src == SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) + srcw = (sljit_u32)srcw; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + if (!word_data) + FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0))); + + FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4))); + + FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src))); + FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r))); + FAIL_IF(push_inst(compiler, B | IMM_I26(7))); + + FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1))); + FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1))); + FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2))); + FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1))); + FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r))); + FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG))); + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1))); + break; + case SLJIT_UNORDERED_OR_GREATER: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1))); + break; + case SLJIT_UNORDERED_OR_LESS: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_UNORDERED_OR_EQUAL: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + break; + default: /* SLJIT_UNORDERED */ + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + } + return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */ + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src))); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2))); + break; + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0)); + src2 = TMP_FREG1; + } + + if (src1 & SLJIT_MEM) { + reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg; + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0)); + src1 = reg; + } + + return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) + inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg); + else + inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg); + return push_inst(compiler, inst); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +static sljit_ins get_jump_instruction(sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: + return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO); + case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_STORED: + return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO); + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_CARRY: + return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO); + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: + return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO); + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_UNORDERED: + return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO); + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED: + return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO); + default: + /* Not conditional branch. */ + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + inst = get_jump_instruction(type); + + if (inst != 0) { + PTR_FAIL_IF(push_inst(compiler, inst)); + jump->flags |= IS_COND; + } + + jump->addr = compiler->size; + inst = JIRL | RJ(TMP_REG1) | IMM_I16(0); + + if (type >= SLJIT_FAST_CALL) { + jump->flags |= IS_CALL; + inst |= RD(RETURN_ADDR_REG); + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += 3; + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_s32 flags; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG2, src2, src2w, 0, 0)); + src2 = TMP_REG2; + } + + if (src1 == SLJIT_IMM) { + if (src1w != 0) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1 = TMP_REG1; + } + else + src1 = TMP_ZERO; + } + + if (src2 == SLJIT_IMM) { + if (src2w != 0) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w)); + src2 = TMP_REG2; + } + else + src2 = TMP_ZERO; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND)); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + inst = BNE | RJ(src1) | RD(src2); + break; + case SLJIT_NOT_EQUAL: + inst = BEQ | RJ(src1) | RD(src2); + break; + case SLJIT_LESS: + inst = BGEU | RJ(src1) | RD(src2); + break; + case SLJIT_GREATER_EQUAL: + inst = BLTU | RJ(src1) | RD(src2); + break; + case SLJIT_GREATER: + inst = BGEU | RJ(src2) | RD(src1); + break; + case SLJIT_LESS_EQUAL: + inst = BLTU | RJ(src2) | RD(src1); + break; + case SLJIT_SIG_LESS: + inst = BGE | RJ(src1) | RD(src2); + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BLT | RJ(src1) | RD(src2); + break; + case SLJIT_SIG_GREATER: + inst = BGE | RJ(src2) | RD(src1); + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BLT | RJ(src2) | RD(src1); + break; + default: + inst = BREAK; + SLJIT_UNREACHABLE(); + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += 3; + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (src != SLJIT_IMM) { + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0)); + jump->u.target = (sljit_uw)srcw; + + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += 3; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP; + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 src_r, dst_r, invert; + sljit_s32 saved_op = op; + sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw)); + + if (type < SLJIT_F_EQUAL) { + src_r = OTHER_FLAG; + invert = type & 0x1; + + switch (type) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1))); + src_r = dst_r; + break; + case SLJIT_ATOMIC_STORED: + case SLJIT_ATOMIC_NOT_STORED: + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1))); + src_r = dst_r; + invert ^= 0x1; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { + src_r = OTHER_FLAG; + break; + } + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1))); + src_r = dst_r; + invert ^= 0x1; + break; + } + } else { + invert = 0; + src_r = OTHER_FLAG; + + switch (type) { + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED: + invert = 1; + break; + } + } + + if (invert) { + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1))); + src_r = dst_r; + } + + if (op < SLJIT_ADD) { + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, src_r, dst, dstw); + + if (src_r != dst_r) + return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0)); + return SLJIT_SUCCESS; + } + + mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE; + + if (dst & SLJIT_MEM) + return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0); + return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_ins *ptr; + sljit_uw size; + sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(dst_reg) | IMM_I12(0))); + + if ((src1 & REG_MASK) == dst_reg) + src1 = (src1 & ~REG_MASK) | TMP_REG2; + + if (OFFS_REG(src1) == dst_reg) + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + } + + FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0))); + } + } + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w)); + } else if (src1 == SLJIT_IMM) { + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; + FAIL_IF(load_immediate(compiler, dst_reg, src1w)); + } else + FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0))); + + *ptr = get_jump_instruction(type & ~SLJIT_32) | IMM_I16(compiler->size - size); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_s32 invert = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) { + if ((type & ~SLJIT_32) == SLJIT_EQUAL) + invert = 1; + FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG))); + } + else + FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG))); + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w)); + if (invert) + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(dst_freg) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(dst_freg) | FCA(F_OTHER_FLAG)); + } else { + if (invert) + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG)); + } +} + +#undef FLOAT_DATA + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + if (SLJIT_UNLIKELY(memw != 0)) { + FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw))); + FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK))); + } else + FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem)))); + + mem = TMP_REG1; + memw = 0; + } else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) { + if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw))); + memw &= 0xfff; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); + memw = 0; + } + + if (mem & REG_MASK) + FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK))); + + mem = TMP_REG1; + } else { + mem &= REG_MASK; + memw &= 0xfff; + } + + SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff)); + + if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff)); + return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw); + } + + flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0); + + FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw)); + return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff); +} + +#undef TO_ARGW_HI + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, + sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + if (!(LOONGARCH_FEATURE_LAMCAS & get_cpu_features())) + return SLJIT_ERR_UNSUPPORTED; + + switch(GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = LD_BU; + break; + case SLJIT_MOV_U16: + ins = LD_HU; + break; + case SLJIT_MOV32: + ins = LD_W; + break; + case SLJIT_MOV_U32: + ins = LD_WU; + break; + default: + ins = LD_D; + break; + } + + return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, + sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins = 0; + sljit_ins unsign = 0; + sljit_s32 tmp = temp_reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + if (!(LOONGARCH_FEATURE_LAMCAS & get_cpu_features())) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = AMCAS_B; + unsign = BSTRPICK_D | (7 << 16); + break; + case SLJIT_MOV_U16: + ins = AMCAS_H; + unsign = BSTRPICK_D | (15 << 16); + break; + case SLJIT_MOV32: + ins = AMCAS_W; + break; + case SLJIT_MOV_U32: + ins = AMCAS_W; + unsign = BSTRPICK_D | (31 << 16); + break; + default: + ins = AMCAS_D; + break; + } + + if (op & SLJIT_SET_ATOMIC_STORED) { + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(temp_reg) | RK(TMP_ZERO))); + tmp = TMP_REG1; + } + FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg))); + if (!(op & SLJIT_SET_ATOMIC_STORED)) + return SLJIT_SUCCESS; + + if (unsign) + FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp))); + + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(tmp) | RK(temp_reg))); + return push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(EQUAL_FLAG) | IMM_I12(1)); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) +{ + SLJIT_UNUSED_ARG(last_ins); + + FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5))); + FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5))); + FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52)))); + return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); + + SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W); + inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5); + + SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D); + inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5); + + SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D); + inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52); + + SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL); + if ((inst[3] & OPC_2RI12(0x3ff)) == ORI) + inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target); + else + inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); + + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); + + compiler->size += 3; + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_32.c old mode 100644 new mode 100755 index 1691905db7..9620b945f6 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_32.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_32.c @@ -26,6 +26,49 @@ /* mips 32-bit arch dependent functions. */ +static sljit_s32 emit_copysign(struct sljit_compiler *compiler, sljit_s32 op, + sljit_sw src1, sljit_sw src2, sljit_sw dst) +{ + int is_32 = (op & SLJIT_32); + sljit_ins mfhc = MFC1, mthc = MTC1; + sljit_ins src1_r = FS(src1), src2_r = FS(src2), dst_r = FS(dst); + + if (!is_32) { + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + mfhc = MFHC1; + mthc = MTHC1; + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + src1_r |= (1 << 11); + src2_r |= (1 << 11); + dst_r |= (1 << 11); + break; + } + } + + FAIL_IF(push_inst(compiler, mfhc | T(TMP_REG1) | src1_r, DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, mfhc | T(TMP_REG2) | src2_r, DR(TMP_REG2))); + if (!is_32 && src1 != dst) + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(src1) | FD(dst), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + else + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + FAIL_IF(push_inst(compiler, XOR | T(TMP_REG1) | D(TMP_REG2) | S(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SRL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SLL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, XOR | T(TMP_REG2) | D(TMP_REG1) | S(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, mthc | T(TMP_REG1) | dst_r, MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + if (mthc == MTC1) + return push_inst(compiler, NOP, UNMOVABLE_INS); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) { if (!(imm & ~0xffff)) @@ -44,33 +87,106 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_ return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + struct { +#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN + sljit_s32 lo; + sljit_s32 hi; +#else /* !SLJIT_LITTLE_ENDIAN */ + sljit_s32 hi; + sljit_s32 lo; +#endif /* SLJIT_LITTLE_ENDIAN */ + } bin; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.bin.lo != 0) + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.bin.lo)); + if (u.bin.hi != 0) + FAIL_IF(load_immediate(compiler, DR(TMP_REG2), u.bin.hi)); + + FAIL_IF(push_inst(compiler, MTC1 | (u.bin.lo != 0 ? T(TMP_REG1) : TA(0)) | FS(freg), MOVABLE_INS)); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + return push_inst(compiler, MTHC1 | (u.bin.hi != 0 ? T(TMP_REG2) : TA(0)) | FS(freg), MOVABLE_INS); +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MTC1 | (u.bin.hi != 0 ? T(TMP_REG2) : TA(0)) | FS(freg) | (1 << 11), MOVABLE_INS)); + break; + } +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { - sljit_s32 reg2; - sljit_ins inst; + sljit_s32 reg2 = 0; + sljit_ins inst = FS(freg); + sljit_ins mthc = MTC1, mfhc = MFC1; + int is_32 = (op & SLJIT_32); CHECK_ERROR(); CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + op = GET_OPCODE(op); if (reg & REG_PAIR_MASK) { reg2 = REG_PAIR_SECOND(reg); reg = REG_PAIR_FIRST(reg); - inst = T(reg2) | FS(freg) | (1 << 11); + inst |= T(reg2); if (op == SLJIT_COPY_TO_F64) FAIL_IF(push_inst(compiler, MTC1 | inst, MOVABLE_INS)); else FAIL_IF(push_inst(compiler, MFC1 | inst, DR(reg2))); + + inst = FS(freg) | (1 << 11); +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + if (cpu_feature_list & CPU_FEATURE_FR) { + mthc = MTHC1; + mfhc = MFHC1; + inst = FS(freg); + } +#endif /* SLJIT_MIPS_REV >= 2 */ } - inst = T(reg) | FS(freg); + inst |= T(reg); + if (!is_32 && !reg2) { + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + mthc = MTHC1; + mfhc = MFHC1; + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + inst |= (1 << 11); + break; + } + } - if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) - return push_inst(compiler, MTC1 | inst, MOVABLE_INS); + if (op == SLJIT_COPY_TO_F64) + FAIL_IF(push_inst(compiler, mthc | inst, MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, mfhc | inst, DR(reg))); - return push_inst(compiler, MFC1 | inst, DR(reg)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + if (mthc == MTC1 || mfhc == MFC1) + return push_inst(compiler, NOP, UNMOVABLE_INS); +#endif /* MIPS III */ + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) @@ -103,6 +219,11 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_ins ins = NOP; sljit_u8 offsets[4]; sljit_u8 *offsets_ptr = offsets; +#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN + sljit_ins f64_hi = TA(7), f64_lo = TA(6); +#else + sljit_ins f64_hi = TA(6), f64_lo = TA(7); +#endif /* SLJIT_LITTLE_ENDIAN */ SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); @@ -167,20 +288,28 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: - if (*offsets_ptr < 4 * sizeof (sljit_sw)) { + if (*offsets_ptr < 4 * sizeof(sljit_sw)) { if (prev_ins != NOP) FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); /* Must be preceded by at least one other argument, * and its starting offset must be 8 because of alignment. */ SLJIT_ASSERT((*offsets_ptr >> 2) == 2); - - prev_ins = MFC1 | TA(6) | FS(float_arg_count) | (1 << 11); - ins = MFC1 | TA(7) | FS(float_arg_count); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + prev_ins = MFHC1 | f64_hi | FS(float_arg_count); + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + prev_ins = MFC1 | f64_hi | FS(float_arg_count) | (1 << 11); + break; + } + ins = MFC1 | f64_lo | FS(float_arg_count); } else if (*offsets_ptr < 254) ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); else if (*offsets_ptr == 254) - ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + ins = MOV_fmt(FMT_D) | FS(SLJIT_FR0) | FD(TMP_FREG1); float_arg_count--; break; @@ -190,7 +319,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t else if (*offsets_ptr < 254) ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); else if (*offsets_ptr == 254) - ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + ins = MOV_fmt(FMT_S) | FS(SLJIT_FR0) | FD(TMP_FREG1); float_arg_count--; break; @@ -314,7 +443,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); else if (src != PIC_ADDR_REG) FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_64.c old mode 100644 new mode 100755 index a29fe0730d..52a0d3fb7a --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_64.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_64.c @@ -26,6 +26,23 @@ /* mips 64-bit arch dependent functions. */ +static sljit_s32 emit_copysign(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_s32 src2, sljit_s32 dst) +{ + FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG1) | FS(src1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG2) | FS(src2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG2) | T(TMP_REG1) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | T(TMP_REG2) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SELECT_OP(DMTC1, MTC1) | T(TMP_REG1) | FS(dst), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + if (!(op & SLJIT_32)) + return push_inst(compiler, NOP, UNMOVABLE_INS); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) { sljit_s32 shift = 32; @@ -128,6 +145,35 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_ return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) { + FAIL_IF(push_inst(compiler, DMTC1 | TA(0) | FS(freg), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + return SLJIT_SUCCESS; + } + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.imm)); + FAIL_IF(push_inst(compiler, DMTC1 | T(TMP_REG1) | FS(freg), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { @@ -139,9 +185,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi inst = T(reg) | FS(freg); if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) - return push_inst(compiler, ((op & SLJIT_32) ? MTC1 : DMTC1) | inst, MOVABLE_INS); + FAIL_IF(push_inst(compiler, SELECT_OP(DMTC1, MTC1) | inst, MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | inst, DR(reg))); - return push_inst(compiler, ((op & SLJIT_32) ? MFC1 : DMFC1) | inst, DR(reg)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + if (!(op & SLJIT_32)) + return push_inst(compiler, NOP, UNMOVABLE_INS); +#endif /* MIPS III */ + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) @@ -199,17 +251,17 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: if (arg_count != float_arg_count) - ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count); + ins = MOV_fmt(FMT_D) | FS(float_arg_count) | FD(arg_count); else if (arg_count == 1) - ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + ins = MOV_fmt(FMT_D) | FS(SLJIT_FR0) | FD(TMP_FREG1); arg_count--; float_arg_count--; break; case SLJIT_ARG_TYPE_F32: if (arg_count != float_arg_count) - ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count); + ins = MOV_fmt(FMT_S) | FS(float_arg_count) | FD(arg_count); else if (arg_count == 1) - ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + ins = MOV_fmt(FMT_S) | FS(SLJIT_FR0) | FD(TMP_FREG1); arg_count--; float_arg_count--; break; @@ -316,7 +368,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); else if (src != PIC_ADDR_REG) FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_common.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_common.c old mode 100644 new mode 100755 index 2b00d4f16d..eda4a1a64e --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_common.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_common.c @@ -26,9 +26,12 @@ /* Latest MIPS architecture. */ -#ifndef __mips_hard_float +#ifdef HAVE_PRCTL +#include +#endif + +#if !defined(__mips_hard_float) || defined(__mips_single_float) /* Disable automatic detection, covers both -msoft-float and -mno-float */ -#undef SLJIT_IS_FPU_AVAILABLE #define SLJIT_IS_FPU_AVAILABLE 0 #endif @@ -42,6 +45,14 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) return "MIPS64-R6" SLJIT_CPUINFO; #endif /* SLJIT_CONFIG_MIPS_32 */ +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 5) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R5" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R5" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + #elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -83,27 +94,31 @@ typedef sljit_u32 sljit_ins; #define EQUAL_FLAG 3 #define OTHER_FLAG 1 +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31, 3, 1 +}; + #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) #define TMP_FREG3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3) -static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { - 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31 -}; - #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { - 0, 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20, 12, 10, 16 +static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3) << 1) + 1] = { + 0, + 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20, + 12, 10, 16, + 1, 15, 3, 5, 7, 9, 19, 31, 29, 27, 25, 23, 21, + 13, 11, 17 }; -#else +#else /* !SLJIT_CONFIG_MIPS_32 */ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { 0, 0, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 30, 29, 28, 27, 26, 25, 24, 12, 11, 10 }; -#endif +#endif /* SLJIT_CONFIG_MIPS_32 */ /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -200,12 +215,18 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define DMULTU (HI(0) | LO(29)) #endif /* SLJIT_MIPS_REV >= 6 */ #define DIV_S (HI(17) | FMT_S | LO(3)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 #define DINSU (HI(31) | LO(6)) -#define DMFC1 (HI(17) | (1 << 21) | LO(0)) -#define DMTC1 (HI(17) | (5 << 21) | LO(0)) +#endif /* SLJIT_MIPS_REV >= 2 */ +#define DMFC1 (HI(17) | (1 << 21)) +#define DMTC1 (HI(17) | (5 << 21)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 #define DROTR (HI(0) | (1 << 21) | LO(58)) #define DROTR32 (HI(0) | (1 << 21) | LO(62)) #define DROTRV (HI(0) | (1 << 6) | LO(22)) +#define DSBH (HI(31) | (2 << 6) | LO(36)) +#define DSHD (HI(31) | (5 << 6) | LO(36)) +#endif /* SLJIT_MIPS_REV >= 2 */ #define DSLL (HI(0) | LO(56)) #define DSLL32 (HI(0) | LO(60)) #define DSLLV (HI(0) | LO(20)) @@ -233,7 +254,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define LWL (HI(34)) #define LWR (HI(38)) #define LWC1 (HI(49)) -#define MFC1 (HI(17) | (0 << 21)) +#define MFC1 (HI(17)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define MFHC1 (HI(17) | (3 << 21)) +#endif /* SLJIT_MIPS_REV >= 2 */ #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) #define MOD (HI(0) | (3 << 6) | LO(26)) #define MODU (HI(0) | (3 << 6) | LO(27)) @@ -241,8 +265,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define MFHI (HI(0) | LO(16)) #define MFLO (HI(0) | LO(18)) #endif /* SLJIT_MIPS_REV >= 6 */ -#define MOV_S (HI(17) | FMT_S | LO(6)) #define MTC1 (HI(17) | (4 << 21)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define MTHC1 (HI(17) | (7 << 21)) +#endif /* SLJIT_MIPS_REV >= 2 */ #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) #define MUH (HI(0) | (3 << 6) | LO(24)) #define MUHU (HI(0) | (3 << 6) | LO(25)) @@ -258,8 +284,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define NOR (HI(0) | LO(39)) #define OR (HI(0) | LO(37)) #define ORI (HI(13)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 #define ROTR (HI(0) | (1 << 21) | LO(2)) #define ROTRV (HI(0) | (1 << 6) | LO(6)) +#endif /* SLJIT_MIPS_REV >= 2 */ #define SD (HI(63)) #define SDL (HI(44)) #define SDR (HI(45)) @@ -281,6 +309,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define SWR (HI(46)) #define SWC1 (HI(57)) #define TRUNC_W_S (HI(17) | FMT_S | LO(13)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define WSBH (HI(31) | (2 << 6) | LO(32)) +#endif /* SLJIT_MIPS_REV >= 2 */ #define XOR (HI(0) | LO(38)) #define XORI (HI(14)) @@ -291,15 +322,21 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #else /* SLJIT_MIPS_REV < 6 */ #define DCLZ (HI(28) | LO(36)) #define MOVF (HI(0) | (0 << 16) | LO(1)) +#define MOVF_S (HI(17) | FMT_S | (0 << 16) | LO(17)) #define MOVN (HI(0) | LO(11)) +#define MOVN_S (HI(17) | FMT_S | LO(19)) #define MOVT (HI(0) | (1 << 16) | LO(1)) +#define MOVT_S (HI(17) | FMT_S | (1 << 16) | LO(17)) #define MOVZ (HI(0) | LO(10)) +#define MOVZ_S (HI(17) | FMT_S | LO(18)) #define MUL (HI(28) | LO(2)) #endif /* SLJIT_MIPS_REV >= 6 */ #define PREF (HI(51)) #define PREFX (HI(19) | LO(15)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 #define SEB (HI(31) | (16 << 6) | LO(32)) #define SEH (HI(31) | (24 << 6) | LO(32)) +#endif /* SLJIT_MIPS_REV >= 2 */ #endif /* SLJIT_MIPS_REV >= 1 */ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -320,10 +357,107 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define LOAD_W LD #endif +#define MOV_fmt(f) (HI(17) | f | LO(6)) + #define SIMM_MAX (0x7fff) #define SIMM_MIN (-0x8000) #define UIMM_MAX (0xffff) +#define CPU_FEATURE_DETECTED (1 << 0) +#define CPU_FEATURE_FPU (1 << 1) +#define CPU_FEATURE_FP64 (1 << 2) +#define CPU_FEATURE_FR (1 << 3) + +static sljit_u32 cpu_feature_list = 0; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + && (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32) +{ + if (compiler->scratches == -1) + return 0; + + if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0)) + fr -= SLJIT_F64_SECOND(0); + + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) + || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); +} + +#endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_ARGUMENT_CHECKS */ + +static void get_cpu_features(void) +{ +#if !defined(SLJIT_IS_FPU_AVAILABLE) && defined(__GNUC__) + sljit_u32 fir = 0; +#endif /* !SLJIT_IS_FPU_AVAILABLE && __GNUC__ */ + sljit_u32 feature_list = CPU_FEATURE_DETECTED; + +#if defined(SLJIT_IS_FPU_AVAILABLE) +#if SLJIT_IS_FPU_AVAILABLE + feature_list |= CPU_FEATURE_FPU; +#if SLJIT_IS_FPU_AVAILABLE == 64 + feature_list |= CPU_FEATURE_FP64; +#endif /* SLJIT_IS_FPU_AVAILABLE == 64 */ +#endif /* SLJIT_IS_FPU_AVAILABLE */ +#elif defined(__GNUC__) + __asm__ ("cfc1 %0, $0" : "=r"(fir)); + if ((fir & (0x3 << 16)) == (0x3 << 16)) + feature_list |= CPU_FEATURE_FPU; + +#if (defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64) \ + && (!defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV < 2) + if ((feature_list & CPU_FEATURE_FPU)) + feature_list |= CPU_FEATURE_FP64; +#else /* SLJIT_CONFIG_MIPS32 || SLJIT_MIPS_REV >= 2 */ + if ((fir & (1 << 22))) + feature_list |= CPU_FEATURE_FP64; +#endif /* SLJIT_CONFIG_MIPS_64 && SLJIT_MIPS_REV < 2 */ +#endif /* SLJIT_IS_FPU_AVAILABLE */ + + if ((feature_list & CPU_FEATURE_FPU) && (feature_list & CPU_FEATURE_FP64)) { +#if defined(SLJIT_CONFIG_MIPS_32) && SLJIT_CONFIG_MIPS_32 +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 6 + feature_list |= CPU_FEATURE_FR; +#elif defined(SLJIT_DETECT_FR) && SLJIT_DETECT_FR == 0 +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 5 + feature_list |= CPU_FEATURE_FR; +#endif /* SLJIT_MIPS_REV >= 5 */ +#else + sljit_s32 flag = -1; +#ifndef FR_GET_FP_MODE + sljit_f64 zero = 0.0; +#else /* PR_GET_FP_MODE */ + flag = prctl(PR_GET_FP_MODE); + + if (flag > 0) + feature_list |= CPU_FEATURE_FR; +#endif /* FP_GET_PR_MODE */ +#if ((defined(SLJIT_DETECT_FR) && SLJIT_DETECT_FR == 2) \ + || (!defined(PR_GET_FP_MODE) && (!defined(SLJIT_DETECT_FR) || SLJIT_DETECT_FR >= 1))) \ + && (defined(__GNUC__) && (defined(__mips) && __mips >= 2)) + if (flag < 0) { + __asm__ (".set oddspreg\n" + "lwc1 $f17, %0\n" + "ldc1 $f16, %1\n" + "swc1 $f17, %0\n" + : "+m" (flag) : "m" (zero) : "$f16", "$f17"); + if (flag) + feature_list |= CPU_FEATURE_FR; + } +#endif /* (!PR_GET_FP_MODE || (PR_GET_FP_MODE && SLJIT_DETECT_FR == 2)) && __GNUC__ */ +#endif /* SLJIT_MIPS_REV >= 6 */ +#else /* !SLJIT_CONFIG_MIPS_32 */ + /* StatusFR=1 is the only mode supported by the code in MIPS64 */ + feature_list |= CPU_FEATURE_FR; +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + + cpu_feature_list = feature_list; +} + /* dest_reg is the absolute name of the register Useful for reordering instructions in the delay slot. */ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot) @@ -370,7 +504,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } @@ -501,72 +635,63 @@ static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) -static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) { - if (max_label < 0x80000000l) { - put_label->flags = PATCH_ABS32; + sljit_uw addr; + SLJIT_UNUSED_ARG(executable_offset); + + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (addr < 0x80000000l) { + jump->flags |= PATCH_ABS32; return 1; } - if (max_label < 0x800000000000l) { - put_label->flags = PATCH_ABS48; + if (addr < 0x800000000000l) { + jump->flags |= PATCH_ABS48; return 3; } - put_label->flags = 0; return 5; } #endif /* SLJIT_CONFIG_MIPS_64 */ -static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump) { - struct sljit_jump *jump; - struct sljit_put_label *put_label; - sljit_uw flags; - sljit_ins *inst; - sljit_uw addr; - - if (reg != 0) { - jump = (struct sljit_jump*)dst; - flags = jump->flags; - inst = (sljit_ins*)jump->addr; - addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - } else { - put_label = (struct sljit_put_label*)dst; -#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - flags = put_label->flags; -#endif - inst = (sljit_ins*)put_label->addr; - addr = put_label->label->addr; - reg = *inst; - } + sljit_uw flags = jump->flags; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : PIC_ADDR_REG; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - inst[0] = LUI | T(reg) | IMM(addr >> 16); + ins[0] = LUI | T(reg) | IMM(addr >> 16); #else /* !SLJIT_CONFIG_MIPS_32 */ if (flags & PATCH_ABS32) { SLJIT_ASSERT(addr < 0x80000000l); - inst[0] = LUI | T(reg) | IMM(addr >> 16); + ins[0] = LUI | T(reg) | IMM(addr >> 16); } else if (flags & PATCH_ABS48) { SLJIT_ASSERT(addr < 0x800000000000l); - inst[0] = LUI | T(reg) | IMM(addr >> 32); - inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); - inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); - inst += 2; + ins[0] = LUI | T(reg) | IMM(addr >> 32); + ins[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + ins[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + ins += 2; } else { - inst[0] = LUI | T(reg) | IMM(addr >> 48); - inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff); - inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); - inst[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); - inst[4] = DSLL | T(reg) | D(reg) | SH_IMM(16); - inst += 4; + ins[0] = LUI | T(reg) | IMM(addr >> 48); + ins[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff); + ins[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + ins[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + ins[4] = DSLL | T(reg) | D(reg) | SH_IMM(16); + ins += 4; } #endif /* SLJIT_CONFIG_MIPS_32 */ - inst[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff); + ins[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff); } SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) @@ -577,14 +702,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; sljit_uw addr; - struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); @@ -596,58 +719,61 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; - next_addr = 0; executable_offset = SLJIT_EXEC_OFFSET(code); label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == word_count) { + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - word_count += 2; -#else - word_count += 6; -#endif - jump->addr = (sljit_uw)(code_ptr - 1); - code_ptr = detect_jump_type(jump, code, executable_offset); + word_count += 2; +#else /* !SLJIT_CONFIG_MIPS_32 */ + word_count += 6; +#endif /* SLJIT_CONFIG_MIPS_32 */ + jump->addr = (sljit_uw)(code_ptr - 1); + code_ptr = detect_jump_type(jump, code, executable_offset); + } else { + jump->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + code_ptr += 1; + word_count += 1; +#else /* !SLJIT_CONFIG_MIPS_32 */ + code_ptr += mov_addr_get_length(jump, code, executable_offset); + word_count += 5; +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + jump = jump->next; - } - if (const_ && const_->addr == word_count) { + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - code_ptr += 1; - word_count += 1; -#else - code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); - word_count += 5; -#endif - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; word_count++; @@ -657,7 +783,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->u.addr = (sljit_uw)code_ptr; label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -665,13 +791,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); jump = compiler->jumps; while (jump) { do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { @@ -687,15 +812,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil break; } - load_addr_to_reg(jump, PIC_ADDR_REG); + load_addr_to_reg(jump); } while (0); - jump = jump->next; - } - put_label = compiler->put_labels; - while (put_label) { - load_addr_to_reg(put_label, 0); - put_label = put_label->next; + jump = jump->next; } compiler->error = SLJIT_ERR_COMPILED; @@ -717,20 +837,20 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) { -#if defined(__GNUC__) && !defined(SLJIT_IS_FPU_AVAILABLE) - sljit_sw fir = 0; -#endif /* __GNUC__ && !SLJIT_IS_FPU_AVAILABLE */ - switch (feature_type) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + && (!defined(SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE) + case SLJIT_HAS_F64_AS_F32_PAIR: + if (!cpu_feature_list) + get_cpu_features(); + + return (cpu_feature_list & CPU_FEATURE_FR) != 0; +#endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_IS_FPU_AVAILABLE */ case SLJIT_HAS_FPU: -#ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; -#elif defined(__GNUC__) - __asm__ ("cfc1 %0, $0" : "=r"(fir)); - return (fir >> 22) & 0x1; -#else -#error "FIR check is not implemented for this architecture" -#endif + if (!cpu_feature_list) + get_cpu_features(); + + return (cpu_feature_list & CPU_FEATURE_FPU) != 0; case SLJIT_HAS_ZERO_REGISTER: case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: @@ -745,6 +865,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) return 2; #endif /* SLJIT_MIPS_REV >= 1 */ #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + case SLJIT_HAS_REV: case SLJIT_HAS_ROT: return 1; #endif /* SLJIT_MIPS_REV >= 2 */ @@ -755,7 +876,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) { - return (type >= SLJIT_ORDERED_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); + SLJIT_UNUSED_ARG(type); + return 0; } /* --------------------------------------------------------------------- */ @@ -795,6 +917,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw); static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SELECT_OP(a, b) (b) +#else +#define SELECT_OP(a, b) (!(op & SLJIT_32) ? a : b) +#endif + #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #include "sljitNativeMIPS_32.c" #else @@ -922,10 +1050,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (word_arg_count == 0 && float_arg_count <= 2) { if (float_arg_count == 1) - FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); } else if (arg_count < 4) { FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS)); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + FAIL_IF(push_inst(compiler, MTHC1 | TA(5 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS)); + break; + } } else FAIL_IF(push_inst(compiler, LDC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); arg_count++; @@ -935,7 +1072,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (word_arg_count == 0 && float_arg_count <= 2) { if (float_arg_count == 1) - FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); } else if (arg_count < 4) FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); else @@ -970,16 +1107,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi case SLJIT_ARG_TYPE_F64: float_arg_count++; if (arg_count != float_arg_count) - FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); else if (arg_count == 1) - FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); break; case SLJIT_ARG_TYPE_F32: float_arg_count++; if (arg_count != float_arg_count) - FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); else if (arg_count == 1) - FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); break; default: word_arg_count++; @@ -1142,7 +1279,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c FAIL_IF(emit_stack_frame_release(compiler, 1, &ins)); - if (!(src & SLJIT_IMM)) { + if (src != SLJIT_IMM) { FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS)); return push_inst(compiler, ins, UNMOVABLE_INS); } @@ -1392,16 +1529,12 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define SELECT_OP(a, b) (b) - #define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \ op_imm = (imm); \ op_v = (v); #else /* !SLJIT_CONFIG_MIPS_32 */ -#define SELECT_OP(a, b) \ - (!(op & SLJIT_32) ? a : b) #define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \ op_dimm = (dimm); \ @@ -1418,10 +1551,10 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj { sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ); #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - sljit_ins max = (op & SLJIT_32) ? 32 : 64; -#else /* !SLJIT_CONFIG_RISCV_64 */ - sljit_ins max = 32; -#endif /* SLJIT_CONFIG_RISCV_64 */ + sljit_ins word_size = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_MIPS_64 */ + sljit_ins word_size = 32; +#endif /* SLJIT_CONFIG_MIPS_64 */ /* The TMP_REG2 is the next value. */ if (src != TMP_REG2) @@ -1429,7 +1562,7 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG2) | TA(0) | IMM(is_clz ? 13 : 14), UNMOVABLE_INS)); /* The OTHER_FLAG is the counter. Delay slot. */ - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(max), OTHER_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(word_size), OTHER_FLAG)); if (!is_clz) { FAIL_IF(push_inst(compiler, ANDI | S(TMP_REG2) | T(TMP_REG1) | IMM(1), DR(TMP_REG1))); @@ -1441,7 +1574,7 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(0), OTHER_FLAG)); /* The TMP_REG1 is the next shift. */ - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(TMP_REG1) | IMM(max), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(TMP_REG1) | IMM(word_size), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(TMP_REG2) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1))); @@ -1465,18 +1598,39 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) { - SLJIT_UNUSED_ARG(op); +#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64 + int is_32 = (op & SLJIT_32); +#endif /* SLJIT_CONFIG_MIPS_64 */ + op = GET_OPCODE(op); +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64 + if (!is_32 && (op == SLJIT_REV)) { + FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst))); + return push_inst(compiler, DSHD | T(dst) | D(dst), DR(dst)); + } + if (op != SLJIT_REV && src != TMP_REG2) { + FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG1), DR(TMP_REG1))); + src = TMP_REG1; + } +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, WSBH | T(src) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, ROTR | T(dst) | D(dst) | SH_IMM(16), DR(dst))); +#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64 + if (op == SLJIT_REV_U32 && dst != TMP_REG2 && dst != TMP_REG3) + FAIL_IF(push_inst(compiler, DINSU | T(dst) | SA(0) | (31 << 11), DR(dst))); +#endif /* SLJIT_CONFIG_MIPS_64 */ +#else /* SLJIT_MIPS_REV < 2 */ #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (!(op & SLJIT_32)) { + if (!is_32) { FAIL_IF(push_inst(compiler, DSRL32 | T(src) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, ORI | SA(0) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG)); FAIL_IF(push_inst(compiler, DSLL32 | T(src) | D(dst) | SH_IMM(0), DR(dst))); FAIL_IF(push_inst(compiler, DSLL32 | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(0), OTHER_FLAG)); FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, ORI | SA(OTHER_FLAG) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG)); FAIL_IF(push_inst(compiler, DSRL | T(dst) | D(TMP_REG1) | SH_IMM(16), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, ORI | SA(OTHER_FLAG) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG)); FAIL_IF(push_inst(compiler, AND | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); FAIL_IF(push_inst(compiler, AND | S(TMP_REG1) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, DSLL | TA(OTHER_FLAG) | DA(EQUAL_FLAG) | SH_IMM(8), EQUAL_FLAG)); @@ -1490,6 +1644,11 @@ static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(8), DR(dst))); return push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)); } + + if (op != SLJIT_REV && src != TMP_REG2) { + FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG2) | SH_IMM(0), DR(TMP_REG2))); + src = TMP_REG2; + } #endif /* SLJIT_CONFIG_MIPS_64 */ FAIL_IF(push_inst(compiler, SRL | T(src) | D(TMP_REG1) | SH_IMM(16), DR(TMP_REG1))); @@ -1502,7 +1661,37 @@ static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s FAIL_IF(push_inst(compiler, AND | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); FAIL_IF(push_inst(compiler, AND | S(TMP_REG1) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, SLL | T(dst) | D(dst) | SH_IMM(8), DR(dst))); + FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst))); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op == SLJIT_REV_U32 && dst != TMP_REG2 && dst != TMP_REG3) { + FAIL_IF(push_inst(compiler, DSLL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst))); + FAIL_IF(push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst))); + } +#endif /* SLJIT_CONFIG_MIPS_64 */ +#endif /* SLJIT_MIPR_REV >= 2 */ + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#if defined(SLJIT_CONFIG_MIPS_32) && SLJIT_CONFIG_MIPS_32 + FAIL_IF(push_inst(compiler, WSBH | T(src) | D(dst), DR(dst))); +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst))); +#endif /* SLJIT_CONFIG_MIPS_32 */ + if (GET_OPCODE(op) == SLJIT_REV_U16) + return push_inst(compiler, ANDI | S(dst) | T(dst) | 0xffff, DR(dst)); + else + return push_inst(compiler, SEH | T(dst) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 2 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(src) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(src) | D(dst) | SH_IMM(24), DR(dst))); + FAIL_IF(push_inst(compiler, ANDI | S(TMP_REG1) | T(TMP_REG1) | 0xff, DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SELECT_OP(DSRL32, SRL) : SELECT_OP(DSRA32, SRA)) | T(dst) | D(dst) | SH_IMM(16), DR(dst))); return push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ } static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, @@ -1532,17 +1721,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); -#else /* SLJIT_MIPS_REV < 1 */ +#else /* SLJIT_MIPS_REV < 2 */ FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 1 */ +#endif /* SLJIT_MIPS_REV >= 2 */ #else /* !SLJIT_CONFIG_MIPS_32 */ -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) if (op & SLJIT_32) return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 1 */ +#endif /* SLJIT_MIPS_REV >= 2 */ FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); #endif /* SLJIT_CONFIG_MIPS_32 */ @@ -1561,17 +1750,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); -#else /* SLJIT_MIPS_REV < 1 */ +#else /* SLJIT_MIPS_REV < 2 */ FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 1 */ +#endif /* SLJIT_MIPS_REV >= 2 */ #else /* !SLJIT_CONFIG_MIPS_32 */ -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) if (op & SLJIT_32) return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 1 */ +#endif /* SLJIT_MIPS_REV >= 2 */ FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); #endif /* SLJIT_CONFIG_MIPS_32 */ @@ -1585,7 +1774,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) if (dst == src2) - return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst)); + return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11), DR(dst)); #endif /* SLJIT_MIPS_REV >= 2 */ FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); @@ -1630,9 +1819,16 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #endif /* SLJIT_MIPS_REV >= 1 */ case SLJIT_REV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + case SLJIT_REV_U32: + case SLJIT_REV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && src2 != TMP_REG1 && dst != TMP_REG1); return emit_rev(compiler, op, dst, src2); + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return emit_rev16(compiler, op, dst, src2); + case SLJIT_ADD: /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; @@ -2080,9 +2276,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 compiler->cache_argw = 0; } - if (dst == TMP_REG2) { + if (dst == 0) { SLJIT_ASSERT(HAS_FLAGS(op)); flags |= UNUSED_DEST; + dst = TMP_REG2; } else if (FAST_IS_REG(dst)) { dst_r = dst; @@ -2094,10 +2291,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= SLOW_DEST; if (flags & IMM_OP) { - if ((src2 & SLJIT_IMM) && src2w != 0 && CHECK_IMM(flags, src2w)) { + if (src2 == SLJIT_IMM && src2w != 0 && CHECK_IMM(flags, src2w)) { flags |= SRC2_IMM; src2_r = src2w; - } else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && CHECK_IMM(flags, src1w)) { + } else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && CHECK_IMM(flags, src1w)) { flags |= SRC2_IMM; src2_r = src1w; @@ -2114,7 +2311,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 src1_r = src1; flags |= REG1_SOURCE; } - else if (src1 & SLJIT_IMM) { + else if (src1 == SLJIT_IMM) { if (src1w) { FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); src1_r = TMP_REG1; @@ -2137,7 +2334,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) dst_r = (sljit_s32)src2_r; } - else if (src2 & SLJIT_IMM) { + else if (src2 == SLJIT_IMM) { if (!(flags & SRC2_IMM)) { if (src2w) { FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w)); @@ -2325,29 +2522,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) case SLJIT_MOV_U32: - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); case SLJIT_MOV_S32: case SLJIT_MOV32: - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); #endif case SLJIT_MOV_U8: - return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U32: + case SLJIT_REV_S32: + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); } SLJIT_UNREACHABLE(); @@ -2370,9 +2575,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (op & SLJIT_32) { flags |= INT_DATA | SIGNED_DATA; - if (src1 & SLJIT_IMM) + if (src1 == SLJIT_IMM) src1w = (sljit_s32)src1w; - if (src2 & SLJIT_IMM) + if (src2 == SLJIT_IMM) src2w = (sljit_s32)src2w; } #endif @@ -2393,7 +2598,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_XOR: - if (((src1 & SLJIT_IMM) && src1w == -1) || ((src2 & SLJIT_IMM) && src2w == -1)) { + if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) { return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); } /* fallthrough */ @@ -2410,10 +2615,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile case SLJIT_ROTL: case SLJIT_ROTR: #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - if (src2 & SLJIT_IMM) + if (src2 == SLJIT_IMM) src2w &= 0x1f; #else - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { if (op & SLJIT_32) src2w &= 0x1f; else @@ -2435,7 +2640,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); } #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) @@ -2474,7 +2679,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * ADJUST_LOCAL_OFFSET(src3, src3w); - if (src3 & SLJIT_IMM) { + if (src3 == SLJIT_IMM) { src3w &= bit_length - 1; if (src3w == 0) @@ -2591,21 +2796,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return reg_map[reg]; -} + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER) + return -1; -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return FR(reg); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); + CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -2617,14 +2825,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* --------------------------------------------------------------------- */ #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) -#define FMT(op) ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) << (21 - 8)) +#define FMT(op) (FMT_S | (~(sljit_ins)op & SLJIT_32) << (21 - (5 + 3))) static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags (sljit_u32)0 + sljit_u32 flags = 0; #else sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; #endif @@ -2638,18 +2846,13 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp if (FAST_IS_REG(dst)) { FAIL_IF(push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS)); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) +#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1) FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif +#endif /* MIPS III */ return SLJIT_SUCCESS; } - /* Store the integer value from a VFP register. */ return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, FR(TMP_FREG1), dst, dstw, 0, 0); - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef flags -#endif } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, @@ -2657,43 +2860,158 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags (sljit_u32)0 + sljit_u32 flags = 0; #else sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21; #endif - sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - if (FAST_IS_REG(src)) { - FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) - FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif - } else if (src & SLJIT_MEM) { - /* Load the integer value into a VFP register. */ + if (src & SLJIT_MEM) FAIL_IF(emit_op_mem2(compiler, (flags ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); - } else { + if (src == SLJIT_IMM) { #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) - srcw = (sljit_s32)srcw; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; #endif - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); - FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1) FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif +#endif /* MIPS III */ } - FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); if (dst & SLJIT_MEM) return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); return SLJIT_SUCCESS; +} +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef flags + sljit_u32 flags = 0; +#else + sljit_u32 flags = 1 << 21; #endif + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW ? WORD_DATA : INT_DATA) | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw)); + src = TMP_REG1; + } else if (src == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) + srcw = (sljit_u32)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + src = TMP_REG1; + } + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + if (src != TMP_REG1) { + FAIL_IF(push_inst(compiler, DSLL32 | T(src) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, DSRL32 | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1))); + } + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); + return SLJIT_SUCCESS; + } +#else /* !SLJIT_CONFIG_MIPS_64 */ + if (!(op & SLJIT_32)) { + FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG2) | SH_IMM(1), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SRL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(1), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG2) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | 1 | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, BGEZ | S(src) | 5, UNMOVABLE_INS)); +#else /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, BGEZ | S(src) | 4, UNMOVABLE_INS)); +#endif /* SLJIT_MIPS_REV < 1 */ + + FAIL_IF(push_inst(compiler, LUI | T(TMP_REG2) | IMM(0x41e0), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, MTC1 | TA(0) | FS(TMP_FREG2), UNMOVABLE_INS)); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + FAIL_IF(push_inst(compiler, MTHC1 | T(TMP_REG2) | FS(TMP_FREG2), UNMOVABLE_INS)); + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(TMP_FREG2) | (1 << 11), UNMOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + break; + } + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(TMP_FREG2) | FS(dst_r) | FD(dst_r), UNMOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); + return SLJIT_SUCCESS; + } +#endif /* SLJIT_CONFIG_MIPS_64 */ + +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, BLTZ | S(src) | 5, UNMOVABLE_INS)); +#else /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, BLTZ | S(src) | 4, UNMOVABLE_INS)); +#endif /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, ANDI | S(src) | T(TMP_REG2) | IMM(1), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* !SLJIT_MIPS_REV */ + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, BEQ | 6, UNMOVABLE_INS)); +#else /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, BEQ | 5, UNMOVABLE_INS)); +#endif /* SLJIT_MIPS_REV < 1 */ + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + FAIL_IF(push_inst(compiler, DSRL | T(src) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1))); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, SRL | T(src) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1))); +#endif /* SLJIT_CONFIG_MIPS_64 */ + + FAIL_IF(push_inst(compiler, OR | S(TMP_REG1) | T(TMP_REG2) | D(TMP_REG1), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* !SLJIT_MIPS_REV */ + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(dst_r) | FS(dst_r) | FD(dst_r), UNMOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); + return SLJIT_SUCCESS; } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, @@ -2772,7 +3090,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) - FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MOV_fmt(FMT(op)) | FS(src) | FD(dst_r), MOVABLE_INS)); else dst_r = src; } @@ -2853,18 +3171,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil case SLJIT_ADD_F64: FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_SUB_F64: FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_MUL_F64: FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; + case SLJIT_COPYSIGN_F64: + return emit_copysign(compiler, op, src1, src2, dst_r); } if (dst_r == TMP_FREG2) @@ -2873,8 +3190,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return SLJIT_SUCCESS; } -#undef FLOAT_DATA -#undef FMT +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, MTC1 | TA(0) | FS(freg), MOVABLE_INS); + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.imm)); + return push_inst(compiler, MTC1 | T(TMP_REG1) | FS(freg), MOVABLE_INS); +} /* --------------------------------------------------------------------- */ /* Conditional instructions */ @@ -3032,7 +3366,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile } #define RESOLVE_IMM1() \ - if (src1 & SLJIT_IMM) { \ + if (src1 == SLJIT_IMM) { \ if (src1w) { \ PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \ src1 = TMP_REG1; \ @@ -3042,7 +3376,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile } #define RESOLVE_IMM2() \ - if (src2 & SLJIT_IMM) { \ + if (src2 == SLJIT_IMM) { \ if (src2w) { \ PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \ src2 = TMP_REG2; \ @@ -3094,10 +3428,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2))) jump->flags |= IS_MOVABLE; PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | BRANCH_LENGTH, UNMOVABLE_INS)); - } - else if (type >= SLJIT_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) { + } else if (type >= SLJIT_SIG_LESS && ((src1 == SLJIT_IMM && src1w == 0) || (src2 == SLJIT_IMM && src2w == 0))) { inst = NOP; - if ((src1 & SLJIT_IMM) && (src1w == 0)) { + if (src1 == SLJIT_IMM && src1w == 0) { RESOLVE_IMM2(); switch (type) { case SLJIT_SIG_LESS: @@ -3145,7 +3478,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler else { if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) { RESOLVE_IMM1(); - if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN) + if (src2 == SLJIT_IMM && src2w <= SIMM_MAX && src2w >= SIMM_MIN) PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1))); else { RESOLVE_IMM2(); @@ -3155,7 +3488,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } else { RESOLVE_IMM2(); - if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN) + if (src1 == SLJIT_IMM && src1w <= SIMM_MAX && src1w >= SIMM_MIN) PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1))); else { RESOLVE_IMM1(); @@ -3190,9 +3523,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler #undef BR_T #undef BR_F -#undef FLOAT_DATA -#undef FMT - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump = NULL; @@ -3200,7 +3530,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); @@ -3232,8 +3562,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi #endif } - FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - return SLJIT_SUCCESS; + return push_inst(compiler, NOP, UNMOVABLE_INS); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, @@ -3335,50 +3664,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, TMP_REG2, 0); } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + +static sljit_ins get_select_cc(sljit_s32 type, sljit_s32 is_float) { -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) - sljit_ins ins; -#endif /* SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6 */ - - CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) - - if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { -#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (type & SLJIT_32) - srcw = (sljit_s32)srcw; -#endif - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); - src = TMP_REG1; - srcw = 0; - } - switch (type & ~SLJIT_32) { case SLJIT_EQUAL: - ins = MOVZ | TA(EQUAL_FLAG); - break; + return (is_float ? MOVZ_S : MOVZ) | TA(EQUAL_FLAG); case SLJIT_NOT_EQUAL: - ins = MOVN | TA(EQUAL_FLAG); - break; + return (is_float ? MOVN_S : MOVN) | TA(EQUAL_FLAG); case SLJIT_LESS: case SLJIT_GREATER: case SLJIT_SIG_LESS: case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: - ins = MOVN | TA(OTHER_FLAG); - break; + case SLJIT_CARRY: + return (is_float ? MOVN_S : MOVN) | TA(OTHER_FLAG); case SLJIT_GREATER_EQUAL: case SLJIT_LESS_EQUAL: case SLJIT_SIG_GREATER_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: - ins = MOVZ | TA(OTHER_FLAG); - break; + case SLJIT_NOT_CARRY: + return (is_float ? MOVZ_S : MOVZ) | TA(OTHER_FLAG); case SLJIT_F_EQUAL: case SLJIT_F_LESS: case SLJIT_F_LESS_EQUAL: @@ -3389,8 +3697,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil case SLJIT_UNORDERED_OR_LESS_EQUAL: case SLJIT_ORDERED_LESS_EQUAL: case SLJIT_UNORDERED: - ins = MOVT; - break; + return is_float ? MOVT_S : MOVT; case SLJIT_F_NOT_EQUAL: case SLJIT_F_GREATER_EQUAL: case SLJIT_F_GREATER: @@ -3401,21 +3708,159 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil case SLJIT_ORDERED_GREATER: case SLJIT_UNORDERED_OR_GREATER: case SLJIT_ORDERED: - ins = MOVF; - break; + return is_float ? MOVF_S : MOVF; default: - ins = MOVZ | TA(OTHER_FLAG); SLJIT_UNREACHABLE(); - break; + return (is_float ? MOVZ_S : MOVZ) | TA(OTHER_FLAG); + } +} + +#endif /* SLJIT_MIPS_REV >= 1 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_ins mov_ins = (type & SLJIT_32) ? ADDU : DADDU; +#else /* !SLJIT_CONFIG_MIPS_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; + sljit_ins mov_ins = ADDU; +#endif /* SLJIT_CONFIG_MIPS_64 */ + +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + struct sljit_label *label; + struct sljit_jump *jump; +#endif /* !(SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + ADJUST_LOCAL_OFFSET(src1, src1w); + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src1, src1w)); + src1 = TMP_REG2; + } else if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); + src1 = TMP_REG1; } - return push_inst(compiler, ins | S(src) | D(dst_reg), DR(dst_reg)); + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, mov_ins | S(src2_reg) | TA(0) | D(dst_reg), DR(dst_reg))); + } + + return push_inst(compiler, get_select_cc(type, 0) | S(src1) | D(dst_reg), DR(dst_reg)); #else /* SLJIT_MIPS_REV < 1 || SLJIT_MIPS_REV >= 6 */ - return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(push_inst(compiler, ADDU_W | S(dst_reg) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + + if ((src1 & REG_MASK) == dst_reg) + src1 = (src1 & ~REG_MASK) | TMP_REG2; + + if (OFFS_REG(src1) == dst_reg) + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + } + + FAIL_IF(push_inst(compiler, mov_ins | S(src2_reg) | TA(0) | D(dst_reg), DR(dst_reg))); + } + } + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1); + FAIL_IF(!jump); + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, DR(dst_reg), src1, src1w)); + } else if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(load_immediate(compiler, DR(dst_reg), src1w)); + } else + FAIL_IF(push_inst(compiler, mov_ins | S(src1) | TA(0) | D(dst_reg), DR(dst_reg))); + + SLJIT_SKIP_CHECKS(compiler); + label = sljit_emit_label(compiler); + FAIL_IF(!label); + + sljit_set_label(jump, label); + return SLJIT_SUCCESS; #endif /* SLJIT_MIPS_REV >= 1 */ } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + struct sljit_label *label; + struct sljit_jump *jump; +#endif /* !(SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, MOV_fmt(FMT(type)) | FS(src2_freg) | FD(dst_freg), MOVABLE_INS)); + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(TMP_FREG1), src1, src1w)); + src1 = TMP_FREG1; + } + + return push_inst(compiler, get_select_cc(type, 1) | FMT(type) | FS(src1) | FD(dst_freg), MOVABLE_INS); + +#else /* SLJIT_MIPS_REV < 1 || SLJIT_MIPS_REV >= 6 */ + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1); + FAIL_IF(!jump); + + if (src1 & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(dst_freg), src1, src1w)); + else + FAIL_IF(push_inst(compiler, MOV_fmt(FMT(type)) | FS(src1) | FD(dst_freg), MOVABLE_INS)); + + SLJIT_SKIP_CHECKS(compiler); + label = sljit_emit_label(compiler); + FAIL_IF(!label); + + sljit_set_label(jump, label); + return SLJIT_SUCCESS; +#endif /* SLJIT_MIPS_REV >= 1 */ +} + +#undef FLOAT_DATA +#undef FMT + static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s16 max_offset) { sljit_s32 arg = *mem; @@ -3458,21 +3903,33 @@ static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem } #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) -#define MEM16_IMM_FIRST(memw) IMM((memw) + 1) -#define MEM16_IMM_SECOND(memw) IMM(memw) -#define MEMF64_FS_FIRST(freg) FS(freg) -#define MEMF64_FS_SECOND(freg) (FS(freg) | ((sljit_ins)1 << 11)) +#define IMM_LEFT(memw) IMM((memw) + SSIZE_OF(sw) - 1) +#define IMM_RIGHT(memw) IMM(memw) +#define IMM_32_LEFT(memw) IMM((memw) + SSIZE_OF(s32) - 1) +#define IMM_32_RIGHT(memw) IMM(memw) +#define IMM_F64_FIRST_LEFT(memw) IMM((memw) + SSIZE_OF(s32) - 1) +#define IMM_F64_FIRST_RIGHT(memw) IMM(memw) +#define IMM_F64_SECOND_LEFT(memw) IMM((memw) + SSIZE_OF(f64) - 1) +#define IMM_F64_SECOND_RIGHT(memw) IMM((memw) + SSIZE_OF(s32)) +#define IMM_16_FIRST(memw) IMM((memw) + 1) +#define IMM_16_SECOND(memw) IMM(memw) #else /* !SLJIT_LITTLE_ENDIAN */ -#define MEM16_IMM_FIRST(memw) IMM(memw) -#define MEM16_IMM_SECOND(memw) IMM((memw) + 1) -#define MEMF64_FS_FIRST(freg) (FS(freg) | ((sljit_ins)1 << 11)) -#define MEMF64_FS_SECOND(freg) FS(freg) +#define IMM_LEFT(memw) IMM(memw) +#define IMM_RIGHT(memw) IMM((memw) + SSIZE_OF(sw) - 1) +#define IMM_32_LEFT(memw) IMM(memw) +#define IMM_32_RIGHT(memw) IMM((memw) + SSIZE_OF(s32) - 1) +#define IMM_F64_FIRST_LEFT(memw) IMM((memw) + SSIZE_OF(s32)) +#define IMM_F64_FIRST_RIGHT(memw) IMM((memw) + SSIZE_OF(f64) - 1) +#define IMM_F64_SECOND_LEFT(memw) IMM(memw) +#define IMM_F64_SECOND_RIGHT(memw) IMM((memw) + SSIZE_OF(s32) - 1) +#define IMM_16_FIRST(memw) IMM(memw) +#define IMM_16_SECOND(memw) IMM((memw) + 1) #endif /* SLJIT_LITTLE_ENDIAN */ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16)) +#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16)) #else /* !SLJIT_CONFIG_MIPS_32 */ -#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32)) +#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) #endif /* SLJIT_CONFIG_MIPS_32 */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, @@ -3509,10 +3966,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile ins_right = ((type & SLJIT_MEM_STORE) ? SDR : LDR) | S(mem); #endif /* SLJIT_CONFIG_MIPS_32 */ - FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM(memw), DR(REG_PAIR_FIRST(reg)))); - FAIL_IF(push_inst(compiler, ins_right | T(REG_PAIR_FIRST(reg)) | IMM(memw + (SSIZE_OF(sw) - 1)), DR(REG_PAIR_FIRST(reg)))); - FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg)))); - return push_inst(compiler, ins_right | T(REG_PAIR_SECOND(reg)) | IMM((memw + 2 * SSIZE_OF(sw) - 1)), DR(REG_PAIR_SECOND(reg))); + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM_LEFT(memw), DR(REG_PAIR_FIRST(reg)))); + FAIL_IF(push_inst(compiler, ins_right | T(REG_PAIR_FIRST(reg)) | IMM_RIGHT(memw), DR(REG_PAIR_FIRST(reg)))); + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM_LEFT(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg)))); + return push_inst(compiler, ins_right | T(REG_PAIR_SECOND(reg)) | IMM_RIGHT(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg))); } #endif /* !(SLJIT_MIPS_REV >= 6) */ @@ -3553,8 +4010,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (type & SLJIT_MEM_STORE) { FAIL_IF(push_inst(compiler, SRA_W | T(reg) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), MOVABLE_INS)); - return push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), MOVABLE_INS); + FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(TMP_REG2) | IMM_16_FIRST(memw), MOVABLE_INS)); + return push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(reg) | IMM_16_SECOND(memw), MOVABLE_INS); } flags = BYTE_DATA | LOAD_DATA; @@ -3562,15 +4019,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (op == SLJIT_MOV_S16) flags |= SIGNED_DATA; - FAIL_IF(push_inst(compiler, data_transfer_insts[flags] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA | LOAD_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), DR(reg))); + FAIL_IF(push_inst(compiler, data_transfer_insts[flags] | S(mem) | T(TMP_REG2) | IMM_16_FIRST(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA | LOAD_DATA] | S(mem) | T(reg) | IMM_16_SECOND(memw), DR(reg))); FAIL_IF(push_inst(compiler, SLL_W | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2))); return push_inst(compiler, OR | S(reg) | T(TMP_REG2) | D(reg), DR(reg)); case SLJIT_MOV: case SLJIT_MOV_P: #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - if (type & SLJIT_MEM_UNALIGNED_32) { + if (type & SLJIT_MEM_ALIGNED_32) { flags = WORD_DATA; if (!(type & SLJIT_MEM_STORE)) flags |= LOAD_DATA; @@ -3582,8 +4039,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); if (type & SLJIT_MEM_STORE) { - FAIL_IF(push_inst(compiler, SDL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS)); - return push_inst(compiler, SDR | S(mem) | T(reg) | IMM(memw + 7), MOVABLE_INS); + FAIL_IF(push_inst(compiler, SDL | S(mem) | T(reg) | IMM_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SDR | S(mem) | T(reg) | IMM_RIGHT(memw), MOVABLE_INS); } if (mem == reg) { @@ -3591,8 +4048,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile mem = TMP_REG1; } - FAIL_IF(push_inst(compiler, LDL | S(mem) | T(reg) | IMM(memw), DR(reg))); - return push_inst(compiler, LDR | S(mem) | T(reg) | IMM(memw + 7), DR(reg)); + FAIL_IF(push_inst(compiler, LDL | S(mem) | T(reg) | IMM_LEFT(memw), DR(reg))); + return push_inst(compiler, LDR | S(mem) | T(reg) | IMM_RIGHT(memw), DR(reg)); #endif /* SLJIT_CONFIG_MIPS_32 */ } @@ -3600,8 +4057,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); if (type & SLJIT_MEM_STORE) { - FAIL_IF(push_inst(compiler, SWL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS)); - return push_inst(compiler, SWR | S(mem) | T(reg) | IMM(memw + 3), MOVABLE_INS); + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(reg) | IMM_32_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), MOVABLE_INS); } if (mem == reg) { @@ -3609,18 +4066,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile mem = TMP_REG1; } - FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM(memw), DR(reg))); + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM_32_LEFT(memw), DR(reg))); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg)); + return push_inst(compiler, LWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), DR(reg)); #else /* !SLJIT_CONFIG_MIPS_32 */ - FAIL_IF(push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), DR(reg))); if (op != SLJIT_MOV_U32) return SLJIT_SUCCESS; #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) - return push_inst(compiler, DINSU | T(reg) | SA(0) | (31 << 11) | (0 << 11), DR(reg)); -#else /* SLJIT_MIPS_REV < 1 */ + return push_inst(compiler, DINSU | T(reg) | SA(0) | (31 << 11), DR(reg)); +#else /* SLJIT_MIPS_REV < 2 */ FAIL_IF(push_inst(compiler, DSLL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg))); return push_inst(compiler, DSRL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg)); #endif /* SLJIT_MIPS_REV >= 2 */ @@ -3643,77 +4100,97 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil if (type & SLJIT_MEM_STORE) { if (type & SLJIT_32) { FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) +#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1) FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif - FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS)); - return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS); +#endif /* MIPS III */ + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_32_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_32_RIGHT(memw), MOVABLE_INS); } #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), DR(TMP_REG2))); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_LEFT(memw), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_RIGHT(memw), MOVABLE_INS)); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + FAIL_IF(push_inst(compiler, MFHC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg) | (1 << 11), DR(TMP_REG2))); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); #endif - FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS)); + break; + } - FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), DR(TMP_REG2))); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) - FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif - FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), MOVABLE_INS)); - return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS); + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_RIGHT(memw), MOVABLE_INS); #else /* !SLJIT_CONFIG_MIPS_32 */ - FAIL_IF(push_inst(compiler, MFC1 | (1 << 21) | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, DMFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif - FAIL_IF(push_inst(compiler, SDL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS)); - return push_inst(compiler, SDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS); +#endif /* MIPS III */ + FAIL_IF(push_inst(compiler, SDL | S(mem) | T(TMP_REG2) | IMM_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SDR | S(mem) | T(TMP_REG2) | IMM_RIGHT(memw), MOVABLE_INS); #endif /* SLJIT_CONFIG_MIPS_32 */ } if (type & SLJIT_32) { - FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_32_LEFT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_32_RIGHT(memw), DR(TMP_REG2))); FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS)); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) +#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1) FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif +#endif /* MIPS III */ return SLJIT_SUCCESS; } #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_LEFT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_RIGHT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), MOVABLE_INS)); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) - FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_LEFT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_RIGHT(memw), DR(TMP_REG2))); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + return push_inst(compiler, MTHC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS); +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg) | (1 << 11), MOVABLE_INS)); + break; + } #else /* !SLJIT_CONFIG_MIPS_32 */ - FAIL_IF(push_inst(compiler, LDL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, LDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LDL | S(mem) | T(TMP_REG2) | IMM_LEFT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LDR | S(mem) | T(TMP_REG2) | IMM_RIGHT(memw), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, MTC1 | (1 << 21) | T(TMP_REG2) | FS(freg), MOVABLE_INS)); -#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) - FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); -#endif + FAIL_IF(push_inst(compiler, DMTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS)); #endif /* SLJIT_CONFIG_MIPS_32 */ +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ return SLJIT_SUCCESS; } #endif /* !SLJIT_MIPS_REV || SLJIT_MIPS_REV < 6 */ -#undef MEM16_IMM_FIRST -#undef MEM16_IMM_SECOND -#undef MEMF64_FS_FIRST -#undef MEMF64_FS_SECOND +#undef IMM_16_SECOND +#undef IMM_16_FIRST +#undef IMM_F64_SECOND_RIGHT +#undef IMM_F64_SECOND_LEFT +#undef IMM_F64_FIRST_RIGHT +#undef IMM_F64_FIRST_LEFT +#undef IMM_32_RIGHT +#undef IMM_32_LEFT +#undef IMM_RIGHT +#undef IMM_LEFT #undef MEM_CHECK_UNALIGNED #undef TO_ARGW_HI @@ -3740,18 +4217,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r, UNMOVABLE_INS)); @@ -3764,5 +4241,5 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, DR(TMP_REG2), dst, dstw)); - return put_label; + return jump; } diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_32.c old mode 100644 new mode 100755 index 25cfcb9072..2352fad5d4 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_32.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_32.c @@ -325,6 +325,109 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_ return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); } +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_s32 invert_sign = 1; + + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ (sljit_sw)0x80000000)); + src = TMP_REG1; + invert_sign = 0; + } else if (!FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + /* First, a special double precision floating point value is constructed: + (2^53 + (src xor (2^31))) + The upper 32 bits of this number is a constant, and the lower 32 bits + is simply the value of the source argument. The xor 2^31 operation adds + 0x80000000 to the source argument, which moves it into the 0 - 0xffffffff + range. Finally we substract 2^53 + 2^31 to get the converted value. */ + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); + if (invert_sign) + FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000)); + FAIL_IF(push_inst(compiler, STW | S(TMP_REG2) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI)); + FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG2) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + + FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } else if (!FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + /* First, a special double precision floating point value is constructed: + (2^53 + src) + The upper 32 bits of this number is a constant, and the lower 32 bits + is simply the value of the source argument. Finally we substract 2^53 + to get the converted value. */ + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); + FAIL_IF(push_inst(compiler, STW | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + FAIL_IF(push_inst(compiler, STW | S(TMP_REG2) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI)); + + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STW | S(TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG2) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + + FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_s32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm[0] != 0) + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0])); + if (u.imm[1] != 0) + FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1])); + + /* Saved in the same endianness. */ + FAIL_IF(push_inst(compiler, STW | S(u.imm[0] != 0 ? TMP_REG1 : TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STW | S(u.imm[1] != 0 ? TMP_REG2 : TMP_ZERO) | A(SLJIT_SP) | (TMP_MEM_OFFSET + sizeof(sljit_s32)))); + return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { @@ -352,9 +455,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, STW | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI)); if (reg2 != 0) - FAIL_IF(push_inst(compiler, STW | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW)); + FAIL_IF(push_inst(compiler, STW | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); else - FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW)); + FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); } @@ -362,7 +465,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET)); if (reg2 != 0) - FAIL_IF(push_inst(compiler, LWZ | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW)); + FAIL_IF(push_inst(compiler, LWZ | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); return push_inst(compiler, LWZ | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI); } diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_64.c old mode 100644 new mode 100755 index 8d774cf57a..b3cf9d074d --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_64.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_64.c @@ -49,7 +49,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); - if (!(imm & ~0xffff)) + if (((sljit_uw)imm >> 16) == 0) return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); if (imm <= 0x7fffffffl && imm >= -0x80000000l) { @@ -57,6 +57,11 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; } + if (((sljit_uw)imm >> 32) == 0) { + FAIL_IF(push_inst(compiler, ORIS | S(TMP_ZERO) | A(reg) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; + } + /* Count leading zeroes. */ tmp = (sljit_uw)((imm >= 0) ? imm : ~imm); ASM_SLJIT_CLZ(tmp, shift); @@ -563,6 +568,126 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_ return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); } +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src == SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) { + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); + else + FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + } else + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1)); + + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_u32)srcw)); + src = TMP_REG1; + } else { + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, CLRLDI(TMP_REG1, src, 32))); + else + FAIL_IF(emit_op_mem(compiler, INT_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + } else { + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } else if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | 1) | A(src) | 0)); + FAIL_IF(push_inst(compiler, BCx | (12 << 21) | (0 << 16) | 20)); + FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + FAIL_IF(push_inst(compiler, Bx | ((op & SLJIT_32) ? 36 : 32))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, RLWINM | S(src) | A(TMP_REG2) | RLWI_SH(10) | RLWI_MBE(10, 21))); + else + FAIL_IF(push_inst(compiler, ANDI | S(src) | A(TMP_REG2) | 0x1)); + + /* Shift right. */ + FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(63) | RLDI_MB(1))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, RLDICR | S(TMP_REG1) | A(TMP_REG1) | RLDI_SH(0) | RLDI_ME(53))); + + FAIL_IF(push_inst(compiler, OR | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2))); + + FAIL_IF(push_inst(compiler, STD | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + FAIL_IF(push_inst(compiler, FADD | FD(dst_r) | FA(dst_r) | FB(dst_r))); + } + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + + FAIL_IF(push_inst(compiler, STD | S(u.imm != 0 ? TMP_REG1 : TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_common.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_common.c old mode 100644 new mode 100755 index 81ba7d36b0..d40764826e --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_common.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_common.c @@ -132,7 +132,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { OE and Rc flag (see ALT_SET_FLAGS). */ #define OE(flags) ((flags) & ALT_SET_FLAGS) /* Rc flag (see ALT_SET_FLAGS). */ -#define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10) +#define RC(flags) ((sljit_ins)((flags) & ALT_SET_FLAGS) >> 10) #define HI(opcode) ((sljit_ins)(opcode) << 26) #define LO(opcode) ((sljit_ins)(opcode) << 1) @@ -150,6 +150,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define BCx (HI(16)) #define BCCTR (HI(19) | LO(528) | (3 << 11)) #define BLR (HI(19) | LO(16) | (0x14 << 21)) +#if defined(_ARCH_PWR10) && _ARCH_PWR10 +#define BRD (HI(31) | LO(187)) +#endif /* POWER10 */ #define CNTLZD (HI(31) | LO(58)) #define CNTLZW (HI(31) | LO(26)) #define CMP (HI(31) | LO(0)) @@ -184,6 +187,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define LD (HI(58) | 0) #define LFD (HI(50)) #define LFS (HI(48)) +#if defined(_ARCH_PWR7) && _ARCH_PWR7 +#define LDBRX (HI(31) | LO(532)) +#endif /* POWER7 */ +#define LHBRX (HI(31) | LO(790)) #define LWBRX (HI(31) | LO(534)) #define LWZ (HI(32)) #define MFCR (HI(31) | LO(19)) @@ -221,11 +228,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SRD (HI(31) | LO(539)) #define SRW (HI(31) | LO(536)) #define STD (HI(62) | 0) +#if defined(_ARCH_PWR7) && _ARCH_PWR7 +#define STDBRX (HI(31) | LO(660)) +#endif /* POWER7 */ #define STDU (HI(62) | 1) #define STDUX (HI(31) | LO(181)) #define STFD (HI(54)) #define STFIWX (HI(31) | LO(983)) #define STFS (HI(52)) +#define STHBRX (HI(31) | LO(918)) #define STW (HI(36)) #define STWBRX (HI(31) | LO(662)) #define STWU (HI(37)) @@ -264,11 +275,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #endif /* SLJIT_CONFIG_PPC_32 */ #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) -#define TMP_MEM_OFFSET_LOW TMP_MEM_OFFSET -#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET + sizeof(sljit_s32)) +#define TMP_MEM_OFFSET_LO (TMP_MEM_OFFSET) +#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET + sizeof(sljit_s32)) +#define LWBRX_FIRST_REG S(TMP_REG1) +#define LWBRX_SECOND_REG S(dst) #else /* !SLJIT_LITTLE_ENDIAN */ -#define TMP_MEM_OFFSET_LOW (TMP_MEM_OFFSET + sizeof(sljit_s32)) -#define TMP_MEM_OFFSET_HI TMP_MEM_OFFSET +#define TMP_MEM_OFFSET_LO (TMP_MEM_OFFSET + sizeof(sljit_s32)) +#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET) +#define LWBRX_FIRST_REG S(dst) +#define LWBRX_SECOND_REG S(TMP_REG1) #endif /* SLJIT_LITTLE_ENDIAN */ #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) @@ -295,24 +310,23 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; - sljit_uw extra_jump_flags; #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) - return 0; + goto exit; #else if (jump->flags & SLJIT_REWRITABLE_JUMP) - return 0; + goto exit; #endif if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } @@ -321,99 +335,254 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in goto keep_address; #endif - diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l; + diff = (sljit_sw)target_addr - (sljit_sw)code_ptr - executable_offset; - extra_jump_flags = 0; if (jump->flags & IS_COND) { if (diff <= 0x7fff && diff >= -0x8000) { jump->flags |= PATCH_B; - return 1; + return code_ptr; } if (target_addr <= 0xffff) { jump->flags |= PATCH_B | PATCH_ABS_B; - return 1; + return code_ptr; } - extra_jump_flags = REMOVE_COND; diff -= SSIZE_OF(ins); } if (diff <= 0x01ffffff && diff >= -0x02000000) { - jump->flags |= PATCH_B | extra_jump_flags; - return 1; + jump->flags |= PATCH_B; + } else if (target_addr <= 0x01ffffff) { + jump->flags |= PATCH_B | PATCH_ABS_B; } - if (target_addr <= 0x03ffffff) { - jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags; - return 1; + if (jump->flags & PATCH_B) { + if (!(jump->flags & IS_COND)) + return code_ptr; + + code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001); + code_ptr[1] = Bx; + jump->addr += sizeof(sljit_ins); + jump->flags -= IS_COND; + return code_ptr + 1; } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) keep_address: -#endif - if (target_addr <= 0x7fffffff) { +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + if (target_addr < 0x80000000l) { jump->flags |= PATCH_ABS32; - return 1; + code_ptr[2] = MTCTR | S(TMP_CALL_REG); + code_ptr[3] = code_ptr[0]; + return code_ptr + 3; } - if (target_addr <= 0x7fffffffffffl) { + if (target_addr < 0x800000000000l) { jump->flags |= PATCH_ABS48; - return 1; + code_ptr[4] = MTCTR | S(TMP_CALL_REG); + code_ptr[5] = code_ptr[0]; + return code_ptr + 5; } -#endif +#endif /* SLJIT_CONFIG_PPC_64 */ - return 0; +exit: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + code_ptr[2] = MTCTR | S(TMP_CALL_REG); + code_ptr[3] = code_ptr[0]; +#else /* !SLJIT_CONFIG_PPC_32 */ + code_ptr[5] = MTCTR | S(TMP_CALL_REG); + code_ptr[6] = code_ptr[0]; +#endif /* SLJIT_CONFIG_PPC_32 */ + return code_ptr + JUMP_MAX_SIZE - 1; } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) { - if (max_label < 0x100000000l) { - put_label->flags = 0; + sljit_uw addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)5 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (addr < 0x80000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS32; return 1; } - if (max_label < 0x1000000000000l) { - put_label->flags = 1; + if (addr < 0x800000000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS48; return 3; } - put_label->flags = 2; + SLJIT_ASSERT(jump->flags >= ((sljit_uw)4 << JUMP_SIZE_SHIFT)); return 4; } -static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) +#endif /* SLJIT_CONFIG_PPC_64 */ + +static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) { - sljit_uw addr = put_label->label->addr; - sljit_ins *inst = (sljit_ins *)put_label->addr; - sljit_u32 reg = *inst; + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_s32 reg; + SLJIT_UNUSED_ARG(executable_offset); - if (put_label->flags == 0) { - SLJIT_ASSERT(addr < 0x100000000l); - inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 16); - } - else { - if (put_label->flags == 1) { - SLJIT_ASSERT(addr < 0x1000000000000l); - inst[0] = ORI | S(TMP_ZERO) | A(reg) | IMM(addr >> 32); - } - else { - inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48); - inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff); - inst++; + if (flags & PATCH_B) { + if (flags & IS_COND) { + if (!(flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); + ins[0] = BCx | ((sljit_ins)addr & 0xfffc) | (ins[0] & 0x03ff0001); + } else { + SLJIT_ASSERT(addr <= 0xffff); + ins[0] = BCx | ((sljit_ins)addr & 0xfffc) | 0x2 | ((*ins) & 0x03ff0001); + } + return; } - inst[1] = SLDI(32) | S(reg) | A(reg); - inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff); - inst += 2; + if (!(flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); + ins[0] = Bx | ((sljit_ins)addr & 0x03fffffc) | (ins[0] & 0x1); + } else { + SLJIT_ASSERT(addr <= 0x03ffffff); + ins[0] = Bx | ((sljit_ins)addr & 0x03fffffc) | 0x2 | (ins[0] & 0x1); + } + return; } - inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff); + reg = (flags & JUMP_MOV_ADDR) ? (sljit_s32)ins[0] : TMP_CALL_REG; + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 16); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr); +#else /* !SLJIT_CONFIG_PPC_32 */ + + /* The TMP_ZERO cannot be used because it is restored for tail calls. */ + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr < 0x80000000l); + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 16); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr); + return; + } + + if (flags & PATCH_ABS48) { + SLJIT_ASSERT(addr < 0x800000000000l); + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 32); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr >> 16); + ins[2] = SLDI(16) | S(reg) | A(reg); + ins[3] = ORI | S(reg) | A(reg) | IMM(addr); + return; + } + + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 48); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr >> 32); + ins[2] = SLDI(32) | S(reg) | A(reg); + ins[3] = ORIS | S(reg) | A(reg) | IMM(addr >> 16); + ins[4] = ORI | S(reg) | A(reg) | IMM(addr); +#endif /* SLJIT_CONFIG_PPC_32 */ } +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE - 1; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { + if (jump->u.target <= 0x01ffffff) + total_size = 1 - 1; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + else if (jump->u.target < 0x80000000l) + total_size = 4 - 1; + else if (jump->u.target < 0x800000000000l) + total_size = 6 - 1; #endif /* SLJIT_CONFIG_PPC_64 */ + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (jump->flags & IS_COND) { + if (diff <= (0x7fff / SSIZE_OF(ins)) && diff >= (-0x8000 / SSIZE_OF(ins))) + total_size = 1 - 1; + else if ((diff - 1) <= (0x01ffffff / SSIZE_OF(ins)) && (diff - 1) >= (-0x02000000 / SSIZE_OF(ins))) + total_size = 2 - 1; + } else if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins))) + total_size = 1 - 1; + } + } + + size_reduce += (JUMP_MAX_SIZE - 1) - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + } else { + total_size = (sljit_uw)4 << JUMP_SIZE_SHIFT; + + if (jump->flags & JUMP_ADDR) { + if (jump->u.target < 0x80000000l) { + total_size = (sljit_uw)1 << JUMP_SIZE_SHIFT; + size_reduce += 3; + } else if (jump->u.target < 0x800000000000l) { + total_size = (sljit_uw)3 << JUMP_SIZE_SHIFT; + size_reduce += 1; + } + } + jump->flags |= total_size; +#endif /* SLJIT_CONFIG_PPC_64 */ + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) { @@ -423,20 +592,20 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; - sljit_uw addr; struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); + + reduce_code_size(compiler); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + /* add to compiler->size additional instruction space to hold the trampoline and padding */ #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); #else @@ -445,91 +614,64 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #endif code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; word_count = 0; - next_addr = 0; executable_offset = SLJIT_EXEC_OFFSET(code); label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); /* These structures are ordered by their address. */ - if (label && label->size == word_count) { + if (next_min_addr == next_label_size) { /* Just recording the address. */ - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == word_count) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - jump->addr = (sljit_uw)(code_ptr - 3); -#else - jump->addr = (sljit_uw)(code_ptr - 6); -#endif - if (detect_jump_type(jump, code_ptr, code, executable_offset)) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - code_ptr[-3] = code_ptr[0]; - code_ptr -= 3; -#else - if (jump->flags & PATCH_ABS32) { - code_ptr -= 3; - code_ptr[-1] = code_ptr[2]; - code_ptr[0] = code_ptr[3]; - } - else if (jump->flags & PATCH_ABS48) { - code_ptr--; - code_ptr[-1] = code_ptr[0]; - code_ptr[0] = code_ptr[1]; - /* rldicr rX,rX,32,31 -> rX,rX,16,47 */ - SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6); - code_ptr[-3] ^= 0x8422; - /* oris -> ori */ - code_ptr[-2] ^= 0x4000000; - } - else { - code_ptr[-6] = code_ptr[0]; - code_ptr -= 6; - } -#endif - if (jump->flags & REMOVE_COND) { - code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001); - code_ptr++; - jump->addr += sizeof(sljit_ins); - code_ptr[0] = Bx; - jump->flags -= IS_COND; - } + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count += jump->flags >> JUMP_SIZE_SHIFT; + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + SLJIT_ASSERT(((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { + jump->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + word_count += jump->flags >> JUMP_SIZE_SHIFT; + code_ptr += mov_addr_get_length(jump, code, executable_offset); +#else /* !SLJIT_CONFIG_PPC_64 */ + word_count++; + code_ptr++; +#endif /* SLJIT_CONFIG_PPC_64 */ } jump = jump->next; - } - if (const_ && const_->addr == word_count) { + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); - word_count += 4; -#endif - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; word_count++; @@ -539,7 +681,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -547,7 +689,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) SLJIT_ASSERT(code_ptr - code <= (sljit_sw)(compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)))); @@ -557,87 +698,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { - do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins *)jump->addr; - - if (jump->flags & PATCH_B) { - if (jump->flags & IS_COND) { - if (!(jump->flags & PATCH_ABS_B)) { - addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); - SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); - *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); - } - else { - SLJIT_ASSERT(addr <= 0xffff); - *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); - } - } - else { - if (!(jump->flags & PATCH_ABS_B)) { - addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); - SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); - *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | ((*buf_ptr) & 0x1); - } - else { - SLJIT_ASSERT(addr <= 0x03ffffff); - *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); - } - } - break; - } - - /* Set the fields of immediate loads. */ -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[1] |= (sljit_ins)addr & 0xffff; -#else - if (jump->flags & PATCH_ABS32) { - SLJIT_ASSERT(addr <= 0x7fffffff); - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[1] |= (sljit_ins)addr & 0xffff; - break; - } - - if (jump->flags & PATCH_ABS48) { - SLJIT_ASSERT(addr <= 0x7fffffffffff); - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 32) & 0xffff; - buf_ptr[1] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[3] |= (sljit_ins)addr & 0xffff; - break; - } - - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3] | buf_ptr[4]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 48) & 0xffff; - buf_ptr[1] |= (sljit_ins)(addr >> 32) & 0xffff; - buf_ptr[3] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[4] |= (sljit_ins)addr & 0xffff; -#endif - } while (0); + generate_jump_or_mov_addr(jump, executable_offset); jump = jump->next; } - put_label = compiler->put_labels; - while (put_label) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - addr = put_label->label->addr; - buf_ptr = (sljit_ins *)put_label->addr; - - SLJIT_ASSERT((buf_ptr[0] & 0xfc1f0000) == ADDIS && (buf_ptr[1] & 0xfc000000) == ORI); - buf_ptr[0] |= (addr >> 16) & 0xffff; - buf_ptr[1] |= addr & 0xffff; -#else - put_label_set(put_label); -#endif - put_label = put_label->next; - } - compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); @@ -655,8 +721,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins) + sizeof(struct sljit_function_context); return code_ptr; #else + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); return code; #endif } @@ -666,12 +734,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) switch (feature_type) { case SLJIT_HAS_FPU: #ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; + return (SLJIT_IS_FPU_AVAILABLE) != 0; #else /* Available by default. */ return 1; #endif - + case SLJIT_HAS_REV: +#if defined(_ARCH_PWR10) && _ARCH_PWR10 + return 1; +#else /* !POWER10 */ + return 2; +#endif /* POWER10 */ /* A saved register is set to a zero value. */ case SLJIT_HAS_ZERO_REGISTER: case SLJIT_HAS_CLZ: @@ -680,7 +753,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) return 1; case SLJIT_HAS_CTZ: - case SLJIT_HAS_REV: return 2; default: @@ -690,7 +762,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) { - return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL); + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + return 1; + } + + return 0; } /* --------------------------------------------------------------------- */ @@ -714,6 +796,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) #define MEM_MASK 0x7f +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 6)) + /* Other inp_flags. */ /* Integer opertion and set flags -> requires exts on 64 bit systems. */ @@ -737,6 +821,9 @@ ALT_FORM1 0x001000 ... ALT_FORM5 0x010000 */ +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg); + #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) #include "sljitNativePPC_32.c" #else @@ -759,9 +846,6 @@ ALT_FORM5 0x010000 */ #define STACK_MAX_DISTANCE (0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET) -static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, - sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg); - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) @@ -1237,7 +1321,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 src1_r = src1; flags |= REG1_SOURCE; } - else if (src1 & SLJIT_IMM) { + else if (src1 == SLJIT_IMM) { src1_r = TMP_ZERO; if (src1w != 0) { FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); @@ -1257,7 +1341,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) dst_r = src2_r; } - else if (src2 & SLJIT_IMM) { + else if (src2 == SLJIT_IMM) { src2_r = TMP_ZERO; if (src2w != 0) { FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); @@ -1327,30 +1411,58 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return SLJIT_SUCCESS; } -static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op_flags, +static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { sljit_s32 mem, offs_reg, inp_flags; sljit_sw memw; - SLJIT_UNUSED_ARG(op_flags); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 is_32 = op & SLJIT_32; + + op = GET_OPCODE(op); +#endif /* SLJIT_CONFIG_PPC_64 */ if (!((dst | src) & SLJIT_MEM)) { /* Both are registers. */ + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (src == dst) { + FAIL_IF(push_inst(compiler, RLWIMI | S(dst) | A(dst) | RLWI_SH(16) | RLWI_MBE(8, 15))); + FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | RLWI_SH(24) | RLWI_MBE(16, 31))); + } else { + FAIL_IF(push_inst(compiler, RLWINM | S(src) | A(dst) | RLWI_SH(8) | RLWI_MBE(16, 23))); + FAIL_IF(push_inst(compiler, RLWIMI | S(src) | A(dst) | RLWI_SH(24) | RLWI_MBE(24, 31))); + } + + if (op == SLJIT_REV_U16) + return SLJIT_SUCCESS; + return push_inst(compiler, EXTSH | S(dst) | A(dst)); + } + #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (!(op_flags & SLJIT_32)) { + if (!is_32) { +#if defined(_ARCH_PWR10) && _ARCH_PWR10 + return push_inst(compiler, BRD | S(src) | A(dst)); +#else /* !POWER10 */ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_HI))); FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32))); FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(SLJIT_SP) | B(TMP_REG2))); - FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_LOW))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_LO))); FAIL_IF(push_inst(compiler, STWBRX | S(TMP_REG1) | A(SLJIT_SP) | B(TMP_REG2))); return push_inst(compiler, LD | D(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET); +#endif /* POWER10 */ } #endif /* SLJIT_CONFIG_PPC_64 */ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET))); FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(SLJIT_SP) | B(TMP_REG2))); - return push_inst(compiler, LWZ | D(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET); + FAIL_IF(push_inst(compiler, LWZ | D(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op == SLJIT_REV_S32) + return push_inst(compiler, EXTSW | S(dst) | A(dst)); +#endif /* SLJIT_CONFIG_PPC_64 */ + return SLJIT_SUCCESS; } mem = src; @@ -1361,11 +1473,16 @@ static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op_flags, memw = dstw; if (src & SLJIT_MEM) { + inp_flags = HALF_DATA | LOAD_DATA; + + if (op != SLJIT_REV_U16 && op != SLJIT_REV_S16) { #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - inp_flags = ((op_flags & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + inp_flags = (is_32 ? INT_DATA : WORD_DATA) | LOAD_DATA; #else /* !SLJIT_CONFIG_PPC_64 */ - inp_flags = WORD_DATA | LOAD_DATA; + inp_flags = WORD_DATA | LOAD_DATA; #endif /* SLJIT_CONFIG_PPC_64 */ + } + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src, srcw, TMP_REG2)); src = TMP_REG1; } @@ -1395,30 +1512,60 @@ static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op_flags, offs_reg = TMP_REG2; } + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (dst & SLJIT_MEM) + return push_inst(compiler, STHBRX | S(src) | A(mem) | B(offs_reg)); + + FAIL_IF(push_inst(compiler, LHBRX | S(dst) | A(mem) | B(offs_reg))); + + if (op == SLJIT_REV_U16) + return SLJIT_SUCCESS; + return push_inst(compiler, EXTSH | S(dst) | A(dst)); + } + #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (!(op_flags & SLJIT_32)) { + if (!is_32) { if (dst & SLJIT_MEM) { +#if defined(_ARCH_PWR7) && _ARCH_PWR7 + return push_inst(compiler, STDBRX | S(src) | A(mem) | B(offs_reg)); +#else /* !POWER7 */ +#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN + FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32))); + FAIL_IF(push_inst(compiler, STWBRX | S(TMP_REG1) | A(mem) | B(offs_reg))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32)))); + return push_inst(compiler, STWBRX | S(src) | A(mem) | B(TMP_REG2)); +#else /* !SLJIT_LITTLE_ENDIAN */ FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(mem) | B(offs_reg))); FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32))); FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32)))); return push_inst(compiler, STWBRX | S(TMP_REG1) | A(mem) | B(TMP_REG2)); +#endif /* SLJIT_LITTLE_ENDIAN */ +#endif /* POWER7 */ } - - FAIL_IF(push_inst(compiler, LWBRX | S(dst) | A(mem) | B(offs_reg))); +#if defined(_ARCH_PWR7) && _ARCH_PWR7 + return push_inst(compiler, LDBRX | S(dst) | A(mem) | B(offs_reg)); +#else /* !POWER7 */ + FAIL_IF(push_inst(compiler, LWBRX | LWBRX_FIRST_REG | A(mem) | B(offs_reg))); FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32)))); - FAIL_IF(push_inst(compiler, LWBRX | S(TMP_REG1) | A(mem) | B(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWBRX | LWBRX_SECOND_REG | A(mem) | B(TMP_REG2))); return push_inst(compiler, RLDIMI | S(TMP_REG1) | A(dst) | RLDI_SH(32) | RLDI_MB(0)); +#endif /* POWER7 */ } #endif /* SLJIT_CONFIG_PPC_64 */ if (dst & SLJIT_MEM) return push_inst(compiler, STWBRX | S(src) | A(mem) | B(offs_reg)); - return push_inst(compiler, LWBRX | S(dst) | A(mem) | B(offs_reg)); + FAIL_IF(push_inst(compiler, LWBRX | S(dst) | A(mem) | B(offs_reg))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op == SLJIT_REV_S32) + return push_inst(compiler, EXTSW | S(dst) | A(dst)); +#endif /* SLJIT_CONFIG_PPC_64 */ + return SLJIT_SUCCESS; } #define EMIT_MOV(type, type_flags, type_cast) \ - emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw) + emit_op(compiler, (src == SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? type_cast srcw : srcw) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, @@ -1449,7 +1596,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile if (op == SLJIT_MOV_S32) op = SLJIT_MOV_U32; } - else if (src & SLJIT_IMM) { + else if (src == SLJIT_IMM) { if (op == SLJIT_MOV_U32) op = SLJIT_MOV_S32; } @@ -1497,12 +1644,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_CLZ: case SLJIT_CTZ: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - return emit_op(compiler, op, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); -#else + if (op_flags & SLJIT_32) + flags |= ALT_FORM1; +#endif /* SLJIT_CONFIG_PPC_64 */ return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); -#endif + case SLJIT_REV_U32: + case SLJIT_REV_S32: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op |= SLJIT_32; +#endif /* SLJIT_CONFIG_PPC_64 */ + /* fallthrough */ case SLJIT_REV: - return emit_rev(compiler, op_flags, dst, dstw, src, srcw); + case SLJIT_REV_U16: + case SLJIT_REV_S16: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op |= (op_flags & SLJIT_32); +#endif /* SLJIT_CONFIG_PPC_64 */ + return emit_rev(compiler, op, dst, dstw, src, srcw); } return SLJIT_SUCCESS; @@ -1510,40 +1668,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #undef EMIT_MOV +/* Macros for checking different operand types / values. */ #define TEST_SL_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN) - + ((src) == SLJIT_IMM && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN) #define TEST_UL_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & ~0xffff)) - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define TEST_SH_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l) -#else -#define TEST_SH_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & 0xffff)) -#endif - + ((src) == SLJIT_IMM && !((srcw) & ~0xffff)) #define TEST_UH_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & ~(sljit_sw)0xffff0000)) + ((src) == SLJIT_IMM && !((srcw) & ~(sljit_sw)0xffff0000)) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_SH_IMM(src, srcw) \ + ((src) == SLJIT_IMM && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l) #define TEST_ADD_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l) -#else -#define TEST_ADD_IMM(src, srcw) \ - ((src) & SLJIT_IMM) -#endif - -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + ((src) == SLJIT_IMM && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l) #define TEST_UI_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff)) -#else -#define TEST_UI_IMM(src, srcw) \ - ((src) & SLJIT_IMM) -#endif + ((src) == SLJIT_IMM && !((srcw) & ~0xffffffff)) -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define TEST_ADD_FORM1(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ || (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z | SLJIT_SET_CARRY)) @@ -1553,14 +1693,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #define TEST_SUB_FORM3(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ || (op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z)) -#else + +#else /* !SLJIT_CONFIG_PPC_64 */ +#define TEST_SH_IMM(src, srcw) \ + ((src) == SLJIT_IMM && !((srcw) & 0xffff)) +#define TEST_ADD_IMM(src, srcw) \ + ((src) == SLJIT_IMM) +#define TEST_UI_IMM(src, srcw) \ + ((src) == SLJIT_IMM) + #define TEST_ADD_FORM1(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) #define TEST_SUB_FORM2(op) \ (GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) #define TEST_SUB_FORM3(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) -#endif +#endif /* SLJIT_CONFIG_PPC_64 */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, @@ -1579,9 +1727,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (op & SLJIT_32) { /* Most operations expect sign extended arguments. */ flags |= INT_DATA | SIGNED_DATA; - if (src1 & SLJIT_IMM) + if (src1 == SLJIT_IMM) src1w = (sljit_s32)(src1w); - if (src2 & SLJIT_IMM) + if (src2 == SLJIT_IMM) src2w = (sljit_s32)(src2w); if (HAS_FLAGS(op)) flags |= ALT_SIGN_EXT; @@ -1597,7 +1745,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (TEST_ADD_FORM1(op)) return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); - if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { + if (!HAS_FLAGS(op) && (src1 == SLJIT_IMM || src2 == SLJIT_IMM)) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1666,7 +1814,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); } - if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) { + if (src2 == SLJIT_IMM && src2w >= 0 && src2w <= (SIMM_MAX + 1)) { compiler->imm = (sljit_ins)src2w; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } @@ -1682,7 +1830,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile } if (TEST_SUB_FORM2(op)) { - if ((src2 & SLJIT_IMM) && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) { + if (src2 == SLJIT_IMM && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) { compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } @@ -1741,10 +1889,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_XOR: - if ((src2 & SLJIT_IMM) && src2w == -1) { + if (src2 == SLJIT_IMM && src2w == -1) { return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM4, dst, dstw, TMP_REG1, 0, src1, src1w); } - if ((src1 & SLJIT_IMM) && src1w == -1) { + if (src1 == SLJIT_IMM && src1w == -1) { return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM4, dst, dstw, TMP_REG1, 0, src2, src2w); } /* fallthrough */ @@ -1794,7 +1942,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (op & SLJIT_32) flags |= ALT_FORM2; #endif - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { compiler->imm = (sljit_ins)src2w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } @@ -1846,7 +1994,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * ADJUST_LOCAL_OFFSET(src3, src3w); - if (src3 & SLJIT_IMM) { + if (src3 == SLJIT_IMM) { src3w &= bit_length - 1; if (src3w == 0) @@ -1985,21 +2133,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return reg_map[reg]; -} + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER) + return -1; -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return freg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); + CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -2010,7 +2161,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 6)) #define SELECT_FOP(op, single, double) ((sljit_ins)((op & SLJIT_32) ? single : double)) static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, @@ -2068,83 +2218,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw)); } -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw) -{ -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - - sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - - if (src & SLJIT_IMM) { - if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) - srcw = (sljit_s32)srcw; - - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - src = TMP_REG1; - } else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) { - if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); - else - FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); - src = TMP_REG1; - } - - if (FAST_IS_REG(src)) { - FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET)); - FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); - } else - FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1)); - - FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); - - if (dst & SLJIT_MEM) - return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); - - if (op & SLJIT_32) - return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); - return SLJIT_SUCCESS; - -#else /* !SLJIT_CONFIG_PPC_64 */ - - sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - sljit_s32 invert_sign = 1; - - if (src & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ (sljit_sw)0x80000000)); - src = TMP_REG1; - invert_sign = 0; - } else if (!FAST_IS_REG(src)) { - FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); - src = TMP_REG1; - } - - /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31))) - The double precision format has exactly 53 bit precision, so the lower 32 bit represents - the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000 - to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating - point value, we need to subtract 2^53 + 2^31 from the constructed value. */ - FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); - if (invert_sign) - FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000)); - FAIL_IF(push_inst(compiler, STW | S(TMP_REG2) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI)); - FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW)); - FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000)); - FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); - FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW)); - FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG2) | A(SLJIT_SP) | TMP_MEM_OFFSET)); - - FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); - - if (dst & SLJIT_MEM) - return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); - if (op & SLJIT_32) - return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); - return SLJIT_SUCCESS; - -#endif /* SLJIT_CONFIG_PPC_64 */ -} - static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -2252,18 +2325,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil case SLJIT_ADD_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2))); break; - case SLJIT_SUB_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2))); break; - case SLJIT_MUL_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); break; - case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2))); break; + case SLJIT_COPYSIGN_F64: + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? STFS : STFD) | FS(src2) | A(SLJIT_SP) | TMP_MEM_OFFSET)); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, LWZ | S(TMP_REG1) | A(SLJIT_SP) | ((op & SLJIT_32) ? TMP_MEM_OFFSET : TMP_MEM_OFFSET_HI))); +#else /* !SLJIT_CONFIG_PPC_32 */ + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? LWZ : LD) | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); +#endif /* SLJIT_CONFIG_PPC_32 */ + FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src1))); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(TMP_REG1) | 0)); +#else /* !SLJIT_CONFIG_PPC_32 */ + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((op & SLJIT_32) ? 0 : 1)) | A(TMP_REG1) | 0)); +#endif /* SLJIT_CONFIG_PPC_32 */ + FAIL_IF(push_inst(compiler, BCx | (4 << 21) | (0 << 16) | 8)); + return push_inst(compiler, FNEG | FD(dst_r) | FB(dst_r)); } if (dst & SLJIT_MEM) @@ -2274,6 +2359,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil #undef SELECT_FOP +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + + FAIL_IF(push_inst(compiler, STW | S(u.imm != 0 ? TMP_REG1 : TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + return push_inst(compiler, LFS | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); +} + /* --------------------------------------------------------------------- */ /* Conditional instructions */ /* --------------------------------------------------------------------- */ @@ -2394,7 +2499,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, (sljit_u32)type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - if (type == SLJIT_CARRY || type == SLJIT_NOT_CARRY) + if ((type | 0x1) == SLJIT_NOT_CARRY) PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); /* In PPC, we don't need to touch the arguments. */ @@ -2405,16 +2510,19 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile jump->flags |= IS_CALL; #endif - PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); - PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG))); jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; return jump; } SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); @@ -2440,18 +2548,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - if (FAST_IS_REG(src)) { -#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) - if (type >= SLJIT_CALL && src != TMP_CALL_REG) { - FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); - src_r = TMP_CALL_REG; - } - else - src_r = src; -#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ - src_r = src; -#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ - } else if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { /* These jumps are converted to jump/call instructions when possible. */ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); @@ -2463,8 +2560,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump->flags |= IS_CALL; #endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ - FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); - src_r = TMP_CALL_REG; + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src)) { +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL && src != TMP_CALL_REG) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src_r = TMP_CALL_REG; + } else + src_r = src; +#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + src_r = src; +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ } else { ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG)); @@ -2472,8 +2585,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi } FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); - if (jump) - jump->addr = compiler->size; return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); } @@ -2481,6 +2592,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); @@ -2663,14 +2776,106 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0); } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) { - CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + sljit_ins *ptr; + sljit_uw size; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#else /* !SLJIT_CONFIG_PPC_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; +#endif /* SLJIT_CONFIG_PPC_64 */ - return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(push_inst(compiler, OR | S(dst_reg) | A(TMP_REG2) | B(dst_reg))); + + if ((src1 & REG_MASK) == dst_reg) + src1 = (src1 & ~REG_MASK) | TMP_REG2; + + if (OFFS_REG(src1) == dst_reg) + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + } + + FAIL_IF(push_inst(compiler, OR | S(src2_reg) | A(dst_reg) | B(src2_reg))); + } + } + + if (((type & ~SLJIT_32) | 0x1) == SLJIT_NOT_CARRY) + FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w, TMP_REG1)); + } else if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; +#endif /* SLJIT_CONFIG_RISCV_64 */ + FAIL_IF(load_immediate(compiler, dst_reg, src1w)); + } else + FAIL_IF(push_inst(compiler, OR | S(src1) | A(dst_reg) | B(src1))); + + *ptr = BCx | get_bo_bi_flags(compiler, (type ^ 0x1) & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins *ptr; + sljit_uw size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, FMR | FD(dst_freg) | FB(src2_freg))); + } + + if (((type & ~SLJIT_32) | 0x1) == SLJIT_NOT_CARRY) + FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w, TMP_REG1)); + else + FAIL_IF(push_inst(compiler, FMR | FD(dst_freg) | FB(src1))); + + *ptr = BCx | get_bo_bi_flags(compiler, (type ^ 0x1) & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 2); + return SLJIT_SUCCESS; } #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) @@ -2909,31 +3114,31 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); -#else PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + compiler->size++; +#else compiler->size += 4; #endif if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_32.c old mode 100644 new mode 100755 index 4490be2aaf..396c956c19 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_32.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_32.c @@ -27,7 +27,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) { SLJIT_UNUSED_ARG(tmp_r); - SLJIT_ASSERT(dst_r != tmp_r); if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); @@ -43,6 +42,31 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_s32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm[0] != 0) + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0], TMP_REG3)); + if (u.imm[1] != 0) + FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1], TMP_REG3)); + + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-16))); + FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(u.imm[0] != 0 ? TMP_REG1 : TMP_ZERO) | (8 << 7))); + FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(u.imm[1] != 0 ? TMP_REG2 : TMP_ZERO) | (12 << 7))); + FAIL_IF(push_inst(compiler, FLD | FRD(freg) | RS1(SLJIT_SP) | IMM_I(8))); + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(16)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_64.c old mode 100644 new mode 100755 index f93d6ff667..7fcf2c5273 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_64.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_64.c @@ -28,8 +28,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r { sljit_sw high; - SLJIT_ASSERT(dst_r != tmp_r); - if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); @@ -81,6 +79,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r return SLJIT_SUCCESS; } + SLJIT_ASSERT(dst_r != tmp_r); + high = imm >> 32; imm = (sljit_s32)imm; @@ -126,6 +126,26 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r return push_inst(compiler, XOR | RD(dst_r) | RS1(dst_r) | RS2(tmp_r)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, FMV_W_X | (1 << 25) | RS1(TMP_ZERO) | FRD(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm, TMP_REG3)); + return push_inst(compiler, FMV_W_X | (1 << 25) | RS1(TMP_REG1) | FRD(freg)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_common.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_common.c old mode 100644 new mode 100755 index 473e06040a..a0996276a3 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_common.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeRISCV_common.c @@ -100,6 +100,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { /* These conversion opcodes are partly defined. */ #define FCVT_S_D (F7(0x20) | OPC(0x53)) #define FCVT_S_W (F7(0x68) | OPC(0x53)) +#define FCVT_S_WU (F7(0x68) | F12(0x1) | OPC(0x53)) #define FCVT_W_S (F7(0x60) | F3(0x1) | OPC(0x53)) #define FMUL_S (F7(0x8) | F3(0x7) | OPC(0x53)) #define FMV_X_W (F7(0x70) | F3(0x0) | OPC(0x53)) @@ -180,24 +181,23 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; if (jump->flags & IS_COND) { - inst--; diff += SSIZE_OF(ins); if (diff >= BRANCH_MIN && diff <= BRANCH_MAX) { - jump->flags |= PATCH_B; + inst--; inst[0] = (inst[0] & 0x1fff07f) ^ 0x1000; + jump->flags |= PATCH_B; jump->addr = (sljit_uw)inst; return inst; } - inst++; diff -= SSIZE_OF(ins); } @@ -264,113 +264,236 @@ exit: #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) -static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { - if (max_label <= (sljit_uw)S32_MAX) { - put_label->flags = PATCH_ABS32; + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff >= S32_MIN && diff <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_REL32; return 1; } - if (max_label <= S44_MAX) { - put_label->flags = PATCH_ABS44; + if (addr <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS32; + return 1; + } + + if (addr <= S44_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS44; return 3; } - if (max_label <= S52_MAX) { - put_label->flags = PATCH_ABS52; + if (addr <= S52_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)4 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS52; return 4; } - put_label->flags = 0; + SLJIT_ASSERT(jump->flags >= ((sljit_uw)5 << JUMP_SIZE_SHIFT)); return 5; } #endif /* SLJIT_CONFIG_RISCV_64 */ -static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset) { - struct sljit_jump *jump = NULL; - struct sljit_put_label *put_label; - sljit_uw flags; - sljit_ins *inst; + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1; #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) sljit_sw high; #endif - sljit_uw addr; + SLJIT_UNUSED_ARG(executable_offset); - if (reg != 0) { - jump = (struct sljit_jump*)dst; - flags = jump->flags; - inst = (sljit_ins*)jump->addr; - addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - } else { - put_label = (struct sljit_put_label*)dst; #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - flags = put_label->flags; -#endif - inst = (sljit_ins*)put_label->addr; - addr = put_label->label->addr; - reg = *inst; + if (flags & PATCH_REL32) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + + SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); + + if ((addr & 0x800) != 0) + addr += 0x1000; + + ins[0] = AUIPC | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & 0x707f) == JALR); + ins[1] = (ins[1] & 0xfffff) | IMM_I(addr); + } else + ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); + return; } +#endif if ((addr & 0x800) != 0) addr += 0x1000; #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + ins[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); #else /* !SLJIT_CONFIG_RISCV_32 */ if (flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= S32_MAX); - inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + ins[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); } else if (flags & PATCH_ABS44) { high = (sljit_sw)addr >> 12; SLJIT_ASSERT((sljit_uw)high <= 0x7fffffff); if (high > S32_MAX) { SLJIT_ASSERT((high & 0x800) != 0); - inst[0] = LUI | RD(reg) | (sljit_ins)0x80000000u; - inst[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high); + ins[0] = LUI | RD(reg) | (sljit_ins)0x80000000u; + ins[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high); } else { if ((high & 0x800) != 0) high += 0x1000; - inst[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff); - inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high); + ins[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff); + ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high); } - inst[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12); - inst += 2; + ins[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12); + ins += 2; } else { high = (sljit_sw)addr >> 32; if ((addr & 0x80000000l) != 0) high = ~high; - if ((high & 0x800) != 0) - high += 0x1000; - if (flags & PATCH_ABS52) { SLJIT_ASSERT(addr <= S52_MAX); - inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12); + ins[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12); } else { - inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff); - inst[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high); - inst++; + if ((high & 0x800) != 0) + high += 0x1000; + ins[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff); + ins[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high); + ins++; } - inst[1] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); - inst[2] = SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I((flags & PATCH_ABS52) ? 20 : 32); - inst[3] = XOR | RD(reg) | RS1(reg) | RS2(TMP_REG3); - inst += 3; + ins[1] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + ins[2] = SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I((flags & PATCH_ABS52) ? 20 : 32); + ins[3] = XOR | RD(reg) | RS1(reg) | RS2(TMP_REG3); + ins += 3; } #endif /* !SLJIT_CONFIG_RISCV_32 */ - if (jump != NULL) { - SLJIT_ASSERT((inst[1] & 0x707f) == JALR); - inst[1] = (inst[1] & 0xfffff) | IMM_I(addr); + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & 0x707f) == JALR); + ins[1] = (ins[1] & 0xfffff) | IMM_I(addr); } else - inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); + ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (jump->u.target <= S32_MAX) + total_size = 2; + else if (jump->u.target <= S44_MAX) + total_size = 4; + else if (jump->u.target <= S52_MAX) + total_size = 5; +#endif /* SLJIT_CONFIG_RISCV_64 */ + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH_MIN / SSIZE_OF(ins))) + total_size = 0; + else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins))) + total_size = 1; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 2; +#endif /* SLJIT_CONFIG_RISCV_64 */ + } + } + + size_reduce += JUMP_MAX_SIZE - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + } else { + total_size = 5; + + if (!(jump->flags & JUMP_ADDR)) { + /* Real size minus 1. Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 1; + } else if (jump->u.target < S32_MAX) + total_size = 1; + else if (jump->u.target < S44_MAX) + total_size = 3; + else if (jump->u.target <= S52_MAX) + total_size = 4; + + size_reduce += 5 - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; +#endif /* !SLJIT_CONFIG_RISCV_64 */ + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; } SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) @@ -381,77 +504,80 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; - sljit_uw next_addr; + SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; sljit_uw addr; struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); + + reduce_code_size(compiler); code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; word_count = 0; - next_addr = 0; executable_offset = SLJIT_EXEC_OFFSET(code); label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); do { *code_ptr = *buf_ptr++; - if (next_addr == word_count) { + if (next_min_addr == word_count) { SLJIT_ASSERT(!label || label->size >= word_count); SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); } - if (jump && jump->addr == word_count) { + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code, executable_offset); + SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - word_count += 1; -#else - word_count += 5; -#endif - jump->addr = (sljit_uw)code_ptr; - code_ptr = detect_jump_type(jump, code, executable_offset); + word_count += 1; + jump->addr = (sljit_uw)code_ptr; + code_ptr += 1; +#else /* !SLJIT_CONFIG_RISCV_32 */ + word_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_uw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = addr; +#endif /* SLJIT_CONFIG_RISCV_32 */ + } jump = jump->next; - } - if (const_ && const_->addr == word_count) { + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { const_->addr = (sljit_uw)code_ptr; const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; -#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - code_ptr += 1; - word_count += 1; -#else - code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); - word_count += 5; -#endif - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); + + SLJIT_GET_NEXT_MIN(); } code_ptr++; word_count++; @@ -461,7 +587,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->u.addr = (sljit_uw)code_ptr; label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -469,18 +595,17 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); jump = compiler->jumps; while (jump) { do { - if (!(jump->flags & (PATCH_B | PATCH_J | PATCH_REL32))) { - load_addr_to_reg(jump, TMP_REG1); + if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) { + load_addr_to_reg(jump, executable_offset); break; } - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; buf_ptr = (sljit_ins *)jump->addr; addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); @@ -491,31 +616,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil break; } -#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - if (jump->flags & PATCH_REL32) { - SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); - - if ((addr & 0x800) != 0) - addr += 0x1000; - - buf_ptr[0] = AUIPC | RD(TMP_REG1) | (sljit_ins)((sljit_sw)addr & ~0xfff); - SLJIT_ASSERT((buf_ptr[1] & 0x707f) == JALR); - buf_ptr[1] |= IMM_I(addr); - break; - } -#endif - SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX); addr = (addr & 0xff000) | ((addr & 0x800) << 9) | ((addr & 0x7fe) << 20) | ((addr & 0x100000) << 11); buf_ptr[0] = JAL | RD((jump->flags & IS_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | (sljit_ins)addr; } while (0); - jump = jump->next; - } - put_label = compiler->put_labels; - while (put_label) { - load_addr_to_reg(put_label, 0); - put_label = put_label->next; + jump = jump->next; } compiler->error = SLJIT_ERR_COMPILED; @@ -535,7 +641,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) switch (feature_type) { case SLJIT_HAS_FPU: #ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; + return (SLJIT_IS_FPU_AVAILABLE) != 0; #elif defined(__riscv_float_abi_soft) return 0; #else @@ -554,7 +660,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) { - return (type >= SLJIT_ORDERED_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 2; + + case SLJIT_UNORDERED: + case SLJIT_ORDERED: + return 1; + } + + return 0; } /* --------------------------------------------------------------------- */ @@ -929,7 +1045,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl /* Since tmp can be the same as base or offset registers, * these might be unavailable after modifying tmp. */ - if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA) && reg == TMP_REG2) tmp_r = reg; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { @@ -1057,16 +1173,16 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj { sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ); #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - sljit_ins word = (op & SLJIT_32) >> 5; - sljit_ins max = (op & SLJIT_32) ? 32 : 64; + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; + sljit_ins word_size = (op & SLJIT_32) ? 32 : 64; #else /* !SLJIT_CONFIG_RISCV_64 */ - sljit_ins max = 32; + sljit_ins word_size = 32; #endif /* SLJIT_CONFIG_RISCV_64 */ SLJIT_ASSERT(WORD == 0 || WORD == 0x8); /* The OTHER_FLAG is the counter. */ - FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(max))); + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(word_size))); /* The TMP_REG2 is the next value. */ if (src != TMP_REG2) @@ -1082,7 +1198,7 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, slj FAIL_IF(push_inst(compiler, BLT | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20))); /* The TMP_REG1 is the next shift. */ - FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG1) | RS1(TMP_ZERO) | IMM_I(max))); + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG1) | RS1(TMP_ZERO) | IMM_I(word_size))); FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(TMP_REG2) | IMM_I(0))); FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(1))); @@ -1140,6 +1256,22 @@ static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)); } +static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; + sljit_ins word_size = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_RISCV_64 */ + sljit_ins word_size = 32; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(src) | IMM_I(8))); + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src) | IMM_I(word_size - 8))); + FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(0xff))); + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SRLI : SRAI) | WORD | RD(dst) | RS1(dst) | IMM_I(word_size - 16))); + return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)); +} + #define EMIT_LOGICAL(op_imm, op_reg) \ if (flags & SRC2_IMM) { \ if (op & SLJIT_SET_Z) \ @@ -1164,7 +1296,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl sljit_s32 is_overflow, is_carry, carry_src_r, is_handled; sljit_ins op_imm, op_reg; #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - sljit_ins word = (op & SLJIT_32) >> 5; + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; #endif /* SLJIT_CONFIG_RISCV_64 */ SLJIT_ASSERT(WORD == 0 || WORD == 0x8); @@ -1234,9 +1366,28 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return emit_clz_ctz(compiler, op, dst, src2); case SLJIT_REV: + case SLJIT_REV_S32: +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + case SLJIT_REV_U32: +#endif /* SLJIT_CONFIG_RISCV_32 */ SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); return emit_rev(compiler, op, dst, src2); + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return emit_rev16(compiler, op, dst, src2); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case SLJIT_REV_U32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1); + FAIL_IF(emit_rev(compiler, op, dst, src2)); + if (dst == TMP_REG2) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(32))); + return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32)); +#endif /* SLJIT_CONFIG_RISCV_32 */ + case SLJIT_ADD: /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; @@ -1593,9 +1744,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 compiler->cache_argw = 0; } - if (dst == TMP_REG2) { + if (dst == 0) { SLJIT_ASSERT(HAS_FLAGS(op)); flags |= UNUSED_DEST; + dst = TMP_REG2; } else if (FAST_IS_REG(dst)) { dst_r = dst; @@ -1607,11 +1759,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= SLOW_DEST; if (flags & IMM_OP) { - if ((src2 & SLJIT_IMM) && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) { + if (src2 == SLJIT_IMM && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) { flags |= SRC2_IMM; src2_r = src2w; } - else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) { + else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) { flags |= SRC2_IMM; src2_r = src1w; @@ -1628,7 +1780,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 src1_r = src1; flags |= REG1_SOURCE; } - else if (src1 & SLJIT_IMM) { + else if (src1 == SLJIT_IMM) { if (src1w) { FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); src1_r = TMP_REG1; @@ -1651,7 +1803,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) dst_r = (sljit_s32)src2_r; } - else if (src2 & SLJIT_IMM) { + else if (src2 == SLJIT_IMM) { if (!(flags & SRC2_IMM)) { if (src2w) { FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w, TMP_REG3)); @@ -1708,7 +1860,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - sljit_ins word = (op & SLJIT_32) >> 5; + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; SLJIT_ASSERT(word == 0 || word == 0x8); #endif /* SLJIT_CONFIG_RISCV_64 */ @@ -1777,30 +1929,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) case SLJIT_MOV_U32: - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); case SLJIT_MOV_S32: /* Logical operators have no W variant, so sign extended input is necessary for them. */ case SLJIT_MOV32: - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); #endif case SLJIT_MOV_U8: - return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_CLZ: case SLJIT_CTZ: case SLJIT_REV: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U32: + case SLJIT_REV_S32: + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); } SLJIT_UNREACHABLE(); @@ -1823,9 +1983,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) if (op & SLJIT_32) { flags |= INT_DATA | SIGNED_DATA; - if (src1 & SLJIT_IMM) + if (src1 == SLJIT_IMM) src1w = (sljit_s32)src1w; - if (src2 & SLJIT_IMM) + if (src2 == SLJIT_IMM) src2w = (sljit_s32)src2w; } #endif @@ -1858,7 +2018,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile case SLJIT_MASHR: case SLJIT_ROTL: case SLJIT_ROTR: - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) src2w &= 0x1f; #else /* !SLJIT_CONFIG_RISCV_32 */ @@ -1884,7 +2044,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); + return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, @@ -1896,7 +2056,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * sljit_s32 is_left; sljit_ins ins1, ins2, ins3; #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - sljit_ins word = (op & SLJIT_32) >> 5; + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; #else /* !SLJIT_CONFIG_RISCV_64 */ @@ -1918,7 +2078,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * ADJUST_LOCAL_OFFSET(src3, src3w); - if (src3 & SLJIT_IMM) { + if (src3 == SLJIT_IMM) { src3w &= bit_length - 1; if (src3w == 0) @@ -1970,8 +2130,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * return push_inst(compiler, OR | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG1)); } -#undef WORD - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -2027,21 +2185,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return reg_map[reg]; -} + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER) + return -1; -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return freg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); + CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -2088,51 +2249,73 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp #endif } -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_ins inst; -#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21; -#endif - sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); -#else - FAIL_IF(emit_op_mem2(compiler, (flags ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); -#endif +#else /* SLJIT_CONFIG_RISCV_32 */ + FAIL_IF(emit_op_mem2(compiler, ((ins & (1 << 21)) ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); +#endif /* !SLJIT_CONFIG_RISCV_32 */ src = TMP_REG1; - } else if (src & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) - if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) - srcw = (sljit_s32)srcw; -#endif - + } else if (src == SLJIT_IMM) { FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3)); src = TMP_REG1; } - inst = FCVT_S_W | FMT(op) | FRD(dst_r) | RS1(src); + FAIL_IF(push_inst(compiler, ins | FRD(dst_r) | RS1(src))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, DOUBLE_DATA | ((sljit_s32)(~ins >> 24) & 0x2), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins = FCVT_S_W | FMT(op); #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) if (op & SLJIT_32) - inst |= F3(0x7); -#else - inst |= flags; + ins |= F3(0x7); +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + ins |= (1 << 21); + else if (src == SLJIT_IMM) + srcw = (sljit_s32)srcw; if (op != SLJIT_CONV_F64_FROM_S32) - inst |= F3(0x7); -#endif + ins |= F3(0x7); +#endif /* SLJIT_CONFIG_RISCV_32 */ - FAIL_IF(push_inst(compiler, inst)); + return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw); +} - if (dst & SLJIT_MEM) - return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); - return SLJIT_SUCCESS; +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins = FCVT_S_WU | FMT(op); + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (op & SLJIT_32) + ins |= F3(0x7); +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW) + ins |= (1 << 21); + else if (src == SLJIT_IMM) + srcw = (sljit_u32)srcw; + + if (op != SLJIT_CONV_F64_FROM_S32) + ins |= F3(0x7); +#endif /* SLJIT_CONFIG_RISCV_32 */ + + return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, @@ -2170,14 +2353,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile case SLJIT_UNORDERED_OR_LESS: inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1); break; - case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_UNORDERED_OR_EQUAL: FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2))); FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src1))); inst = OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1); break; default: /* SLJIT_UNORDERED */ - FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(TMP_FREG1) | FRS1(src1) | FRS2(src2))); - inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(TMP_FREG1) | FRS2(TMP_FREG1); + if (src1 == src2) { + inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src1); + break; + } + FAIL_IF(push_inst(compiler, FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src1))); + FAIL_IF(push_inst(compiler, FEQ_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src2))); + inst = AND | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1); break; } @@ -2304,6 +2492,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, FDIV_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); break; + + case SLJIT_COPYSIGN_F64: + return push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)); } if (dst_r == TMP_FREG2) @@ -2312,8 +2503,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return SLJIT_SUCCESS; } -#undef FLOAT_DATA -#undef FMT +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, FMV_W_X | RS1(TMP_ZERO) | FRD(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm, TMP_REG3)); + return push_inst(compiler, FMV_W_X | RS1(TMP_REG1) | FRD(freg)); +} /* --------------------------------------------------------------------- */ /* Conditional instructions */ @@ -2341,6 +2549,54 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define BRANCH_LENGTH ((sljit_ins)(7 * sizeof(sljit_ins)) << 7) #endif +static sljit_ins get_jump_instruction(sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + return BNE | RS1(EQUAL_FLAG) | RS2(TMP_ZERO); + case SLJIT_NOT_EQUAL: + return BEQ | RS1(EQUAL_FLAG) | RS2(TMP_ZERO); + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_CARRY: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_ORDERED_GREATER: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED: + return BEQ | RS1(OTHER_FLAG) | RS2(TMP_ZERO); + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED: + return BNE | RS1(OTHER_FLAG) | RS2(TMP_ZERO); + default: + /* Not conditional branch. */ + return 0; + } +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { struct sljit_jump *jump; @@ -2354,57 +2610,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - switch (type) { - case SLJIT_EQUAL: - inst = BNE | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; - break; - case SLJIT_NOT_EQUAL: - inst = BEQ | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; - break; - case SLJIT_LESS: - case SLJIT_GREATER: - case SLJIT_SIG_LESS: - case SLJIT_SIG_GREATER: - case SLJIT_OVERFLOW: - case SLJIT_CARRY: - case SLJIT_F_EQUAL: - case SLJIT_ORDERED_EQUAL: - case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ - case SLJIT_F_LESS: - case SLJIT_ORDERED_LESS: - case SLJIT_ORDERED_GREATER: - case SLJIT_F_LESS_EQUAL: - case SLJIT_ORDERED_LESS_EQUAL: - case SLJIT_ORDERED_GREATER_EQUAL: - case SLJIT_ORDERED: - inst = BEQ | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; - break; - case SLJIT_GREATER_EQUAL: - case SLJIT_LESS_EQUAL: - case SLJIT_SIG_GREATER_EQUAL: - case SLJIT_SIG_LESS_EQUAL: - case SLJIT_NOT_OVERFLOW: - case SLJIT_NOT_CARRY: - case SLJIT_F_NOT_EQUAL: - case SLJIT_UNORDERED_OR_NOT_EQUAL: - case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ - case SLJIT_F_GREATER_EQUAL: - case SLJIT_UNORDERED_OR_GREATER_EQUAL: - case SLJIT_UNORDERED_OR_LESS_EQUAL: - case SLJIT_F_GREATER: - case SLJIT_UNORDERED_OR_GREATER: - case SLJIT_UNORDERED_OR_LESS: - case SLJIT_UNORDERED: - inst = BNE | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; - break; - default: - /* Not conditional branch. */ - inst = 0; - break; - } + inst = get_jump_instruction(type); if (inst != 0) { - PTR_FAIL_IF(push_inst(compiler, inst)); + PTR_FAIL_IF(push_inst(compiler, inst | BRANCH_LENGTH)); jump->flags |= IS_COND; } @@ -2419,11 +2628,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst(compiler, inst)); /* Maximum number of instructions required for generating a constant. */ -#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - compiler->size += 1; -#else - compiler->size += 5; -#endif + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2474,7 +2679,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler src2 = TMP_REG2; } - if (src1 & SLJIT_IMM) { + if (src1 == SLJIT_IMM) { if (src1w != 0) { PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); src1 = TMP_REG1; @@ -2483,7 +2688,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler src1 = TMP_ZERO; } - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { if (src2w != 0) { PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w, TMP_REG3)); src2 = TMP_REG2; @@ -2536,11 +2741,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler PTR_FAIL_IF(push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); /* Maximum number of instructions required for generating a constant. */ -#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - compiler->size += 1; -#else - compiler->size += 5; -#endif + compiler->size += JUMP_MAX_SIZE - 1; return jump; } @@ -2553,7 +2754,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - if (!(src & SLJIT_IMM)) { + if (src != SLJIT_IMM) { if (src & SLJIT_MEM) { ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); @@ -2572,11 +2773,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); /* Maximum number of instructions required for generating a constant. */ -#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - compiler->size += 1; -#else - compiler->size += 5; -#endif + compiler->size += JUMP_MAX_SIZE - 1; return SLJIT_SUCCESS; } @@ -2695,16 +2892,110 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0); } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) { - CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + sljit_ins *ptr; + sljit_uw size; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(type & SLJIT_32) >> 5; + sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#else /* !SLJIT_CONFIG_RISCV_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; +#endif /* SLJIT_CONFIG_RISCV_64 */ - return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(dst_reg) | IMM_I(0))); + + if ((src1 & REG_MASK) == dst_reg) + src1 = (src1 & ~REG_MASK) | TMP_REG2; + + if (OFFS_REG(src1) == dst_reg) + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2); + } + + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src2_reg) | IMM_I(0))); + } + } + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w)); + } else if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (word) + src1w = (sljit_s32)src1w; +#endif /* SLJIT_CONFIG_RISCV_64 */ + FAIL_IF(load_immediate(compiler, dst_reg, src1w, TMP_REG1)); + } else + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src1) | IMM_I(0))); + + *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 9); + return SLJIT_SUCCESS; } +#undef WORD + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins *ptr; + sljit_uw size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src2_freg) | FRS2(src2_freg))); + } + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w)); + else + FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src1) | FRS2(src1))); + + *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 9); + return SLJIT_SUCCESS; +} + +#undef FLOAT_DATA +#undef FMT + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) @@ -2783,31 +3074,31 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_s32 dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) compiler->size += 1; -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ compiler->size += 5; -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); - return put_label; + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeS390X.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeS390X.c old mode 100644 new mode 100755 index 8d86d072b1..0a7df4a684 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeS390X.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeS390X.c @@ -38,17 +38,14 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) return "s390x" SLJIT_CPUINFO; } -/* Instructions. */ +/* Instructions are stored as 64 bit values regardless their size. */ typedef sljit_uw sljit_ins; -/* Instruction tags (most significant halfword). */ -static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; - #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) -static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { - 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14 }; /* there are also a[2-15] available, but they are slower to access and @@ -83,7 +80,7 @@ static const sljit_gpr r10 = 10; /* reg_map[9] */ static const sljit_gpr r11 = 11; /* reg_map[10] */ static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ -static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */ +static const sljit_gpr r14 = 14; /* reg_map[0]: return address */ static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */ @@ -96,20 +93,16 @@ static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stac #define tmp0 r0 #define tmp1 r1 -/* TODO(carenas): flags should move to a different register so that - * link register doesn't need to change - */ - /* When reg cannot be unused. */ #define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP) /* Link register. */ static const sljit_gpr link_r = 14; /* r14 */ -#define TMP_FREG1 (0) +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { - 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { + 0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1 }; #define R0A(r) (r) @@ -126,7 +119,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #define F0(r) ((sljit_ins)freg_map[r]) #define F4(r) (R4A((sljit_ins)freg_map[r])) +#define F12(r) (R12A((sljit_ins)freg_map[r])) #define F20(r) (R20A((sljit_ins)freg_map[r])) +#define F28(r) (R28A((sljit_ins)freg_map[r])) +#define F32(r) (R32A((sljit_ins)freg_map[r])) #define F36(r) (R36A((sljit_ins)freg_map[r])) struct sljit_s390x_const { @@ -141,50 +137,21 @@ static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) return reg_map[r]; } -/* Size of instruction in bytes. Tags must already be cleared. */ -static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins) -{ - /* keep faulting instructions */ - if (ins == 0) - return 2; - - if ((ins & 0x00000000ffffL) == ins) - return 2; - if ((ins & 0x0000ffffffffL) == ins) - return 4; - if ((ins & 0xffffffffffffL) == ins) - return 6; - - SLJIT_UNREACHABLE(); - return (sljit_uw)-1; -} - static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ibuf); *ibuf = ins; + + SLJIT_ASSERT(ins <= 0xffffffffffffL); + compiler->size++; - return SLJIT_SUCCESS; -} + if (ins & 0xffff00000000L) + compiler->size++; -static sljit_s32 encode_inst(void **ptr, sljit_ins ins) -{ - sljit_u16 *ibuf = (sljit_u16 *)*ptr; - sljit_uw size = sizeof_ins(ins); + if (ins & 0xffffffff0000L) + compiler->size++; - SLJIT_ASSERT((size & 6) == size); - switch (size) { - case 6: - *ibuf++ = (sljit_u16)(ins >> 32); - /* fallthrough */ - case 4: - *ibuf++ = (sljit_u16)(ins >> 16); - /* fallthrough */ - case 2: - *ibuf++ = (sljit_u16)(ins); - } - *ptr = (void*)ibuf; return SLJIT_SUCCESS; } @@ -211,6 +178,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t } /* fallthrough */ + case SLJIT_ATOMIC_STORED: case SLJIT_F_EQUAL: case SLJIT_ORDERED_EQUAL: return cc0; @@ -230,6 +198,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc1 | cc2 | cc3); case SLJIT_LESS: + case SLJIT_ATOMIC_NOT_STORED: return cc1; case SLJIT_GREATER_EQUAL: @@ -448,10 +417,12 @@ HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY) static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d) { + sljit_uw dh, dl; + SLJIT_ASSERT(is_s20(d)); - sljit_uw dh = (d >> 12) & 0xff; - sljit_uw dl = (d << 8) & 0xfff00; + dh = (d >> 12) & 0xff; + dl = ((sljit_uw)d << 8) & 0xfff00; return (dh | dl) << 8; } @@ -893,23 +864,17 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0) return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48))); - /* 6 byte instructions (requires extended immediate facility) */ - if (have_eimm()) { - if (is_s32(v)) - return push_inst(compiler, lgfi(target, (sljit_s32)v)); + if (is_s32(v)) + return push_inst(compiler, lgfi(target, (sljit_s32)v)); - if (((sljit_uw)v >> 32) == 0) - return push_inst(compiler, llilf(target, (sljit_u32)v)); + if (((sljit_uw)v >> 32) == 0) + return push_inst(compiler, llilf(target, (sljit_u32)v)); - if (((sljit_uw)v << 32) == 0) - return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32))); + if (((sljit_uw)v << 32) == 0) + return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32))); - FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v))); - return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32))); - } - - /* TODO(mundaym): instruction sequences that don't use extended immediates */ - abort(); + FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v))); + return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32))); } struct addr { @@ -989,24 +954,47 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr) /* May clobber tmp1. */ -static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r, +static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg, + sljit_s32 mem, sljit_sw memw, + sljit_s32 is_32bit, const sljit_ins* forms) +{ + struct addr addr; + + SLJIT_ASSERT(mem & SLJIT_MEM); + + if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) { + FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1)); + return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); + } + + FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1)); + return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +static const sljit_ins load_forms[3] = { + 0x58000000 /* l */, + 0xe30000000058 /* ly */, + 0xe30000000004 /* lg */ +}; + +static const sljit_ins store_forms[3] = { + 0x50000000 /* st */, + 0xe30000000050 /* sty */, + 0xe30000000024 /* stg */ +}; + +static const sljit_ins load_halfword_forms[3] = { + 0x48000000 /* lh */, + 0xe30000000078 /* lhy */, + 0xe30000000015 /* lgh */ +}; + +/* May clobber tmp1. */ +static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r, sljit_s32 src, sljit_sw srcw, sljit_s32 is_32bit) { - struct addr addr; - sljit_ins ins; - - SLJIT_ASSERT(src & SLJIT_MEM); - - if (is_32bit && ((src & OFFS_REG_MASK) || is_u12(srcw) || !is_s20(srcw))) { - FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); - return push_inst(compiler, 0x58000000 /* l */ | R20A(dst_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); - } - - FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); - - ins = is_32bit ? 0xe30000000058 /* ly */ : 0xe30000000004 /* lg */; - return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); + return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms); } /* May clobber tmp1. */ @@ -1026,24 +1014,11 @@ static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr d } /* May clobber tmp1. */ -static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r, +static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r, sljit_s32 dst, sljit_sw dstw, sljit_s32 is_32bit) { - struct addr addr; - sljit_ins ins; - - SLJIT_ASSERT(dst & SLJIT_MEM); - - if (is_32bit && ((dst & OFFS_REG_MASK) || is_u12(dstw) || !is_s20(dstw))) { - FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1)); - return push_inst(compiler, 0x50000000 /* st */ | R20A(src_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); - } - - FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1)); - - ins = is_32bit ? 0xe30000000050 /* sty */ : 0xe30000000024 /* stg */; - return push_inst(compiler, ins | R36A(src_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); + return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms); } #undef WHEN @@ -1052,15 +1027,17 @@ static sljit_s32 emit_move(struct sljit_compiler *compiler, sljit_gpr dst_r, sljit_s32 src, sljit_sw srcw) { + sljit_gpr src_r; + SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK)); - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) return push_load_imm_inst(compiler, dst_r, srcw); if (src & SLJIT_MEM) return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0); - sljit_gpr src_r = gpr(src & REG_MASK); + src_r = gpr(src & REG_MASK); return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); } @@ -1253,10 +1230,10 @@ static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 dst, sljit_sw dstw, sljit_sw srcw) { - SLJIT_ASSERT(dst & SLJIT_MEM); - sljit_gpr dst_r = tmp1; + SLJIT_ASSERT(dst & SLJIT_MEM); + if (dst & OFFS_REG_MASK) { sljit_gpr index = tmp1; @@ -1419,90 +1396,54 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil { struct sljit_label *label; struct sljit_jump *jump; - struct sljit_s390x_const *const_; - struct sljit_put_label *put_label; + struct sljit_const *const_; sljit_sw executable_offset; - sljit_uw ins_size = 0; /* instructions */ + sljit_uw ins_size = compiler->size << 1; sljit_uw pool_size = 0; /* literal pool */ sljit_uw pad_size; - sljit_uw i, j = 0; + sljit_uw half_count; + SLJIT_NEXT_DEFINE_TYPES; struct sljit_memory_fragment *buf; - void *code, *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_u16 *code; + sljit_u16 *code_ptr; sljit_uw *pool, *pool_ptr; - sljit_sw source, offset; /* TODO(carenas): only need 32 bit */ + sljit_ins ins; + sljit_sw source, offset; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); - /* branch handling */ - label = compiler->labels; jump = compiler->jumps; - put_label = compiler->put_labels; - - /* TODO(carenas): compiler->executable_size could be calculated - * before to avoid the following loop (except for - * pool_size) - */ - /* calculate the size of the code */ - for (buf = compiler->buf; buf != NULL; buf = buf->next) { - sljit_uw len = buf->used_size / sizeof(sljit_ins); - sljit_ins *ibuf = (sljit_ins *)buf->memory; - for (i = 0; i < len; ++i, ++j) { - sljit_ins ins = ibuf[i]; - - /* TODO(carenas): instruction tag vs size/addr == j - * using instruction tags for const is creative - * but unlike all other architectures, and is not - * done consistently for all other objects. - * This might need reviewing later. - */ - if (ins & sljit_ins_const) { - pool_size += sizeof(*pool); - ins &= ~sljit_ins_const; - } - if (label && label->size == j) { - label->size = ins_size; - label = label->next; - } - if (jump && jump->addr == j) { - if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { - /* encoded: */ - /* brasl %r14, (or brcl , ) */ - /* replace with: */ - /* lgrl %r1, */ - /* bras %r14, %r1 (or bcr , %r1) */ - pool_size += sizeof(*pool); - ins_size += 2; - } - jump = jump->next; - } - if (put_label && put_label->addr == j) { - pool_size += sizeof(*pool); - put_label = put_label->next; - } - ins_size += sizeof_ins(ins); + while (jump != NULL) { + if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) { + /* encoded: */ + /* brasl %r14, (or brcl , ) */ + /* replace with: */ + /* lgrl %r1, */ + /* bras %r14, %r1 (or bcr , %r1) */ + pool_size += sizeof(*pool); + if (!(jump->flags & JUMP_MOV_ADDR)) + ins_size += 2; } + jump = jump->next; } - /* emit trailing label */ - if (label && label->size == j) { - label->size = ins_size; - label = label->next; + const_ = compiler->consts; + while (const_) { + pool_size += sizeof(*pool); + const_ = const_->next; } - SLJIT_ASSERT(!label); - SLJIT_ASSERT(!jump); - SLJIT_ASSERT(!put_label); - /* pad code size to 8 bytes so is accessible with half word offsets */ /* the literal pool needs to be doubleword aligned */ pad_size = ((ins_size + 7UL) & ~7UL) - ins_size; SLJIT_ASSERT(pad_size < 8UL); /* allocate target buffer */ - code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size, - compiler->exec_allocator_data); + code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size, compiler->exec_allocator_data); PTR_FAIL_WITH_EXEC_IF(code); code_ptr = code; executable_offset = SLJIT_EXEC_OFFSET(code); @@ -1514,126 +1455,158 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil */ pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size); pool_ptr = pool; - const_ = (struct sljit_s390x_const *)compiler->consts; + buf = compiler->buf; + half_count = 0; - /* update label addresses */ label = compiler->labels; - while (label) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET( - (sljit_uw)code_ptr + label->size, executable_offset); - label = label->next; - } - - /* reset jumps */ jump = compiler->jumps; - put_label = compiler->put_labels; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); - /* emit the code */ - j = 0; - for (buf = compiler->buf; buf != NULL; buf = buf->next) { - sljit_uw len = buf->used_size / sizeof(sljit_ins); - sljit_ins *ibuf = (sljit_ins *)buf->memory; - for (i = 0; i < len; ++i, ++j) { - sljit_ins ins = ibuf[i]; - if (ins & sljit_ins_const) { - /* clear the const tag */ - ins &= ~sljit_ins_const; + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 3); + do { + ins = *buf_ptr++; - /* update instruction with relative address of constant */ - source = (sljit_sw)code_ptr; - offset = (sljit_sw)pool_ptr - source; + if (next_min_addr == half_count) { + SLJIT_ASSERT(!label || label->size >= half_count); + SLJIT_ASSERT(!jump || jump->addr >= half_count); + SLJIT_ASSERT(!const_ || const_->addr >= half_count); - SLJIT_ASSERT(!(offset & 1)); - offset >>= 1; /* halfword (not byte) offset */ - SLJIT_ASSERT(is_s32(offset)); + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } - ins |= (sljit_ins)offset & 0xffffffff; + if (next_min_addr == next_jump_addr) { + if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) { + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - /* update address */ - const_->const_.addr = (sljit_uw)pool_ptr; + jump->addr = (sljit_uw)pool_ptr; - /* store initial value into pool and update pool address */ - *(pool_ptr++) = (sljit_uw)const_->init_value; + /* store target into pool */ + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + pool_ptr++; - /* move to next constant */ - const_ = (struct sljit_s390x_const *)const_->const_.next; - } - if (jump && jump->addr == j) { - sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); - if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { - jump->addr = (sljit_uw)pool_ptr; + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; + } else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) { + sljit_ins arg; - /* load address into tmp1 */ - source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + jump->addr = (sljit_uw)pool_ptr; - SLJIT_ASSERT(!(offset & 1)); - offset >>= 1; + /* load address into tmp1 */ + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + + code_ptr[0] = (sljit_u16)(0xc408 | R4A(tmp1) /* lgrl */); + code_ptr[1] = (sljit_u16)(offset >> 16); + code_ptr[2] = (sljit_u16)offset; + code_ptr += 3; + pool_ptr++; + + /* branch to tmp1 */ + arg = (ins >> 36) & 0xf; + if (((ins >> 32) & 0xf) == 4) { + /* brcl -> bcr */ + ins = bcr(arg, tmp1); + } else { + SLJIT_ASSERT(((ins >> 32) & 0xf) == 5); + /* brasl -> basr */ + ins = basr(arg, tmp1); + } + + /* Adjust half_count. */ + half_count += 2; + } else + jump->addr = (sljit_uw)code_ptr; + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + /* update instruction with relative address of constant */ + source = (sljit_sw)code_ptr; + offset = (sljit_sw)pool_ptr - source; + + SLJIT_ASSERT(!(offset & 0x1)); + offset >>= 1; /* halfword (not byte) offset */ SLJIT_ASSERT(is_s32(offset)); - encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff)); - - /* store jump target into pool and update pool address */ - *(pool_ptr++) = (sljit_uw)target; - - /* branch to tmp1 */ - sljit_ins op = (ins >> 32) & 0xf; - sljit_ins arg = (ins >> 36) & 0xf; - switch (op) { - case 4: /* brcl -> bcr */ - ins = bcr(arg, tmp1); - break; - case 5: /* brasl -> basr */ - ins = basr(arg, tmp1); - break; - default: - abort(); - } - } - else { - jump->addr = (sljit_uw)code_ptr + 2; - source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - offset = target - source; - - /* offset must be halfword aligned */ - SLJIT_ASSERT(!(offset & 1)); - offset >>= 1; - SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */ - - /* patch jump target */ ins |= (sljit_ins)offset & 0xffffffff; + + /* update address */ + const_->addr = (sljit_uw)pool_ptr; + + /* store initial value into pool and update pool address */ + *(pool_ptr++) = (sljit_uw)(((struct sljit_s390x_const*)const_)->init_value); + + /* move to next constant */ + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); } - jump = jump->next; + + SLJIT_GET_NEXT_MIN(); } - if (put_label && put_label->addr == j) { - source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; - - /* store target into pool */ - *pool_ptr = put_label->label->addr; - offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; - pool_ptr++; - - SLJIT_ASSERT(!(offset & 1)); - offset >>= 1; - SLJIT_ASSERT(is_s32(offset)); - ins |= (sljit_ins)offset & 0xffffffff; - - put_label = put_label->next; + if (ins & 0xffff00000000L) { + *code_ptr++ = (sljit_u16)(ins >> 32); + half_count++; } - encode_inst(&code_ptr, ins); - } - } - SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr); + + if (ins & 0xffffffff0000L) { + *code_ptr++ = (sljit_u16)(ins >> 16); + half_count++; + } + + *code_ptr++ = (sljit_u16)ins; + half_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr); SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr); + jump = compiler->jumps; + while (jump != NULL) { + offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr); + + if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) { + /* Store jump target into pool. */ + *(sljit_uw*)(jump->addr) = (sljit_uw)offset; + } else { + code_ptr = (sljit_u16*)jump->addr; + offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + /* offset must be halfword aligned */ + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */ + + code_ptr[1] = (sljit_u16)(offset >> 16); + code_ptr[2] = (sljit_u16)offset; + } + jump = jump->next; + } + compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = ins_size; - code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset); - code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (pool_size) + compiler->executable_size += (pad_size + pool_size); + + code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); SLJIT_CACHE_FLUSH(code, code_ptr); SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); return code; @@ -1644,15 +1617,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) /* TODO(mundaym): implement all */ switch (feature_type) { case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + return 1; +#endif /* SLJIT_IS_FPU_AVAILABLE */ + case SLJIT_HAS_CLZ: case SLJIT_HAS_REV: case SLJIT_HAS_ROT: case SLJIT_HAS_PREFETCH: case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_SIMD: + case SLJIT_HAS_ATOMIC: return 1; + case SLJIT_HAS_CTZ: return 2; + case SLJIT_HAS_CMOV: return have_lscond1() ? 1 : 0; } @@ -1661,7 +1644,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) { - return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL); + SLJIT_UNUSED_ARG(type); + return 0; } /* --------------------------------------------------------------------- */ @@ -1738,7 +1722,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; compiler->local_size = local_size; - FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); + if (is_s20(-local_size)) + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); + else + FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size)); if (options & SLJIT_ENTER_REG_ARG) return SLJIT_SUCCESS; @@ -1783,8 +1770,10 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit if (is_u12(local_size)) FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size)); - else + else if (is_s20(local_size)) FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size))); + else + FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size)); offset = 2 * SSIZE_OF(sw); if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { @@ -2015,16 +2004,23 @@ static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op, struct addr addr; sljit_gpr reg; sljit_ins ins; + sljit_s32 opcode = GET_OPCODE(op); + sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16); if (dst & SLJIT_MEM) { if (src & SLJIT_MEM) { - FAIL_IF(load_word(compiler, tmp0, src, srcw, op & SLJIT_32)); + FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms)); reg = tmp0; } else reg = gpr(src); FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1)); - ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */; + + if (is_16bit) + ins = 0xe3000000003f /* strvh */; + else + ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */; + return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); } @@ -2032,12 +2028,46 @@ static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op, if (src & SLJIT_MEM) { FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); - ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */; - return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); + + if (is_16bit) + ins = 0xe3000000001f /* lrvh */; + else + ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */; + + FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset))); + + if (opcode == SLJIT_REV) + return SLJIT_SUCCESS; + + if (is_16bit) { + if (op & SLJIT_32) + ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */; + else + ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */; + } else + ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */; + + return push_inst(compiler, ins | R4A(reg) | R0A(reg)); } ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */; - return push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))); + FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src)))); + + if (opcode == SLJIT_REV) + return SLJIT_SUCCESS; + + if (!is_16bit) { + ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */; + return push_inst(compiler, ins | R4A(reg) | R0A(reg)); + } + + if (op & SLJIT_32) { + ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */; + return push_inst(compiler, ins | R20A(reg) | 16); + } + + ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */; + return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16)); } /* LEVAL will be defined later with different parameters as needed */ @@ -2116,7 +2146,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return SLJIT_SUCCESS; } /* LOAD IMMEDIATE */ - if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) { + if (FAST_IS_REG(dst) && src == SLJIT_IMM) { switch (opcode) { case SLJIT_MOV_U8: srcw = (sljit_sw)((sljit_u8)(srcw)); @@ -2195,14 +2225,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return SLJIT_SUCCESS; } /* STORE and STORE IMMEDIATE */ - if ((dst & SLJIT_MEM) - && (FAST_IS_REG(src) || (src & SLJIT_IMM))) { + if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) { + struct addr mem; sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0; - if (src & SLJIT_IMM) { + + if (src == SLJIT_IMM) { /* TODO(mundaym): MOVE IMMEDIATE? */ FAIL_IF(push_load_imm_inst(compiler, reg, srcw)); } - struct addr mem; FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); switch (opcode) { case SLJIT_MOV_U8: @@ -2269,7 +2299,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_UNREACHABLE(); } - SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */ + SLJIT_ASSERT(src != SLJIT_IMM); dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; src_r = FAST_IS_REG(src) ? gpr(src) : tmp0; @@ -2285,7 +2315,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r)); break; + case SLJIT_REV_U32: + case SLJIT_REV_S32: + op |= SLJIT_32; + /* fallthrough */ case SLJIT_REV: + case SLJIT_REV_U16: + case SLJIT_REV_S16: return sljit_emit_rev(compiler, op, dst, dstw, src, srcw); default: SLJIT_UNREACHABLE(); @@ -2341,7 +2377,7 @@ static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, const struct ins_forms *forms; sljit_ins ins; - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { if (sets_overflow) ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; @@ -2426,9 +2462,8 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; - if (src2 & SLJIT_IMM) { - if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) - { + if (src2 == SLJIT_IMM) { + if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) { if ((op & SLJIT_32) || is_s32(src2w)) { ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); @@ -2469,7 +2504,7 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, goto done; } - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { sljit_sw neg_src2w = -src2w; if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) { @@ -2577,7 +2612,7 @@ static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w); } - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { if (is_s16(src2w)) { ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A); @@ -2684,7 +2719,7 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o sljit_s32 type = GET_OPCODE(op); const struct ins_forms *forms; - if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) { + if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) { sljit_s32 count16 = 0; sljit_uw imm = (sljit_uw)src2w; @@ -2748,7 +2783,7 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, else FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); - if (!(src2 & SLJIT_IMM)) { + if (src2 != SLJIT_IMM) { if (FAST_IS_REG(src2)) base_r = gpr(src2); else { @@ -2808,7 +2843,7 @@ static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op else FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); - if (!(src2 & SLJIT_IMM)) { + if (src2 != SLJIT_IMM) { if (FAST_IS_REG(src2)) base_r = gpr(src2); else { @@ -2818,7 +2853,7 @@ static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op } if (GET_OPCODE(op) == SLJIT_ROTR) { - if (!(src2 & SLJIT_IMM)) { + if (src2 != SLJIT_IMM) { ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */; FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r))); base_r = tmp1; @@ -2826,7 +2861,7 @@ static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op src2w = -src2w; } - if (src2 & SLJIT_IMM) + if (src2 == SLJIT_IMM) imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f)); ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */; @@ -2867,7 +2902,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile compiler->mode = op & SLJIT_32; compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); - if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { + if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) { src1 ^= src2; src2 ^= src1; src1 ^= src2; @@ -2960,7 +2995,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * ADJUST_LOCAL_OFFSET(src3, src3w); - if (src3 & SLJIT_IMM) { + if (src3 == SLJIT_IMM) { src3w &= bit_length - 1; if (src3w == 0) @@ -3113,15 +3148,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return (sljit_s32)gpr(reg); -} + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return (sljit_s32)gpr(reg); + + if (type != SLJIT_FLOAT_REGISTER) + return -1; -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return (sljit_s32)freg_map[reg]; } @@ -3213,33 +3249,61 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - sljit_ins ins; - if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw)); src = (sljit_s32)tmp0; } else if (src & SLJIT_MEM) { - FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32)); + FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000)); src = (sljit_s32)tmp0; } + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src))); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + + if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */; else ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */; - FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src))); + return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw); +} - if (dst & SLJIT_MEM) - return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; - return SLJIT_SUCCESS; + if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) + srcw = (sljit_u32)srcw; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW) + ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */; + else + ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */; + + return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, @@ -3391,6 +3455,66 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (src2 & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w)); + src2 = TMP_FREG1; + } + + if (src1 & SLJIT_MEM) { + reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg; + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w)); + src1 = reg; + } + + return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32)))); + return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm)); + return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { @@ -3440,14 +3564,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { + struct sljit_jump *jump; sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_jump(compiler, type)); /* record jump */ - struct sljit_jump *jump = (struct sljit_jump *) - ensure_abuf(compiler, sizeof(struct sljit_jump)); + jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); jump->addr = compiler->size; @@ -3485,7 +3609,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */ FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); } @@ -3505,6 +3629,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); @@ -3536,13 +3662,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr loc_r = tmp1; sljit_u8 mask = get_cc(compiler, type); CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); - sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; - sljit_gpr loc_r = tmp1; switch (GET_OPCODE(op)) { case SLJIT_AND: case SLJIT_OR: @@ -3602,37 +3728,125 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) { - sljit_ins mask = get_cc(compiler, type & ~SLJIT_32); + sljit_ins mask; sljit_gpr src_r; + sljit_gpr dst_r = gpr(dst_reg); sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); - if (type & SLJIT_32) - srcw = (sljit_s32)srcw; + ADJUST_LOCAL_OFFSET(src1, src1w); - if (have_lscond2() && (src & SLJIT_IMM) && is_s16(srcw)) { - ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */; - return push_inst(compiler, ins | R36A(gpr(dst_reg)) | (mask << 32) | (sljit_ins)(srcw & 0xffff) << 16); + if (dst_reg != src2_reg) { + if (src1 == dst_reg) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32)); + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg)))); + } } - if (src & SLJIT_IMM) { - FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw)); + mask = get_cc(compiler, type & ~SLJIT_32); + + if (src1 & SLJIT_MEM) { + if (src1 & OFFS_REG_MASK) { + src_r = gpr(OFFS_REG(src1)); + + if (src1w != 0) { + FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16))); + src_r = tmp1; + } + + FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK)))); + src_r = tmp1; + src1w = 0; + } else if (!is_s20(src1w)) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w)); + + if (src1 & REG_MASK) + FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK)))); + + src_r = tmp1; + src1w = 0; + } else + src_r = gpr(src1 & REG_MASK); + + ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */; + return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w)); + } + + if (src1 == SLJIT_IMM) { + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; + + if (have_lscond2() && is_s16(src1w)) { + ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */; + return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16); + } + + FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w)); src_r = tmp0; } else - src_r = gpr(src); + src_r = gpr(src1); - if (have_lscond1()) { - ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */; - return push_inst(compiler, ins | (mask << 12) | R4A(gpr(dst_reg)) | R0A(src_r)); + ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */; + return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins ins; + struct sljit_label *label; + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else { + ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */; + FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg))); + } } - return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1); + FAIL_IF(!jump); + + if (!(src1 & SLJIT_MEM)) { + ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */; + FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1))); + } else + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w)); + + SLJIT_SKIP_CHECKS(compiler); + label = sljit_emit_label(compiler); + FAIL_IF(!label); + + sljit_set_label(jump, label); + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, @@ -3694,6 +3908,502 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + struct addr addr; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (!(srcdst & SLJIT_MEM)) { + if (type & SLJIT_SIMD_STORE) + ins = F36(srcdst) | F32(freg); + else + ins = F36(freg) | F32(srcdst); + + return push_inst(compiler, 0xe70000000056 /* vlr */ | ins); + } + + FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1)); + ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + + if (alignment >= 4) + ins |= 4 << 12; + else if (alignment == 3) + ins |= 3 << 12; + + return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + struct addr addr; + sljit_gpr reg; + sljit_sw sign_ext; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg) + | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12)); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) + return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)); + + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12)); + } + + if (src == SLJIT_IMM) { + sign_ext = 0x10000; + + switch (elem_size) { + case 0: + srcw &= 0xff; + sign_ext = (sljit_s8)srcw; + break; + case 1: + srcw &= 0xffff; + sign_ext = (sljit_s16)srcw; + break; + case 2: + if ((sljit_s32)srcw == (sljit_s16)srcw) { + srcw &= 0xffff; + sign_ext = (sljit_s16)srcw; + } else + srcw &= 0xffffffff; + break; + default: + if (srcw == (sljit_s16)srcw) { + srcw &= 0xffff; + sign_ext = (sljit_s16)srcw; + } + break; + } + + if (sign_ext != 0x10000) { + if (sign_ext == 0 || sign_ext == -1) + return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg) + | (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16))); + + return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg) + | ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12)); + } + + push_load_imm_inst(compiler, tmp0, srcw); + reg = tmp0; + } else + reg = gpr(src); + + FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12))); + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + struct addr addr; + sljit_gpr reg; + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (srcdst & SLJIT_MEM) { + FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1)); + ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + } + + if (type & SLJIT_SIMD_LANE_ZERO) { + if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1)) + return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12)); + + if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg))); + srcdst = TMP_FREG1; + srcdstw = 0; + } + + FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg))); + } + + if (srcdst & SLJIT_MEM) { + switch (elem_size) { + case 0: + ins |= 0xe70000000000 /* vleb */; + break; + case 1: + ins |= 0xe70000000001 /* vleh */; + break; + case 2: + ins |= 0xe70000000003 /* vlef */; + break; + default: + ins |= 0xe70000000002 /* vleg */; + break; + } + + /* Convert to vsteb - vsteg */ + if (type & SLJIT_SIMD_STORE) + ins |= 0x8; + + return push_inst(compiler, ins | ((sljit_ins)lane_index << 12)); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (type & SLJIT_SIMD_STORE) + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12)); + + if (elem_size == 3) { + if (lane_index == 0) + ins = F32(srcdst) | F28(freg) | (1 << 12); + else + ins = F32(freg) | F28(srcdst); + + return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins); + } + + FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12))); + return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12)); + } + + if (srcdst == SLJIT_IMM) { + switch (elem_size) { + case 0: + ins = 0xe70000000040 /* vleib */; + srcdstw &= 0xff; + break; + case 1: + ins = 0xe70000000041 /* vleih */; + srcdstw &= 0xffff; + break; + case 2: + if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) { + srcdstw &= 0xffff; + ins = 0xe70000000043 /* vleif */; + } else + srcdstw &= 0xffffffff; + break; + default: + if (srcdstw == (sljit_s16)srcdstw) { + srcdstw &= 0xffff; + ins = 0xe70000000042 /* vleig */; + } + break; + } + + if (ins != 0) + return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12)); + + push_load_imm_inst(compiler, tmp0, srcdstw); + reg = tmp0; + } else + reg = gpr(srcdst); + + ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12); + + if (!(type & SLJIT_SIMD_STORE)) + return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins); + + FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins)); + + if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + ins = 0xb9060000 /* lgbr */; + break; + case 1: + ins = 0xb9070000 /* lghr */; + break; + default: + ins = 0xb9140000 /* lgfr */; + break; + } + + return push_inst(compiler, ins | R4A(reg) | R0A(reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) + | ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + struct addr addr; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + + switch (elem2_size - elem_size) { + case 1: + ins |= 0xe70000000002 /* vleg */; + break; + case 2: + ins |= 0xe70000000003 /* vlef */; + break; + default: + ins |= 0xe70000000001 /* vleh */; + break; + } + + FAIL_IF(push_inst(compiler, ins)); + src = freg; + } + + if (type & SLJIT_SIMD_FLOAT) { + FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12))); + FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12))); + return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12)); + } + + ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg); + + do { + FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12))); + src = freg; + } while (++elem_size < elem2_size); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_gpr dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078); + push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038); + FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0))); + break; + case 1: + push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070); + break; + case 2: + push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060); + break; + default: + push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040); + break; + } + + if (elem_size != 0) + FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12))); + + FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1))); + + dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1) + | (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16)))); + + if (dst_r == tmp0) + return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = 0xe70000000068 /* vn */; + break; + case SLJIT_SIMD_OP2_OR: + ins = 0xe7000000006a /* vo */; + break; + case SLJIT_SIMD_OP2_XOR: + ins = 0xe7000000006d /* vx */; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins mask; + sljit_gpr tmp_r = gpr(temp_reg); + sljit_gpr mem_r = gpr(mem_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r)); + case SLJIT_MOV_U8: + mask = 0xff; + break; + case SLJIT_MOV_U16: + mask = 0xffff; + break; + default: + return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r)); + } + + /* tmp0 = (src_reg ^ tmp_r) & mask */ + FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask)); + FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r))); + FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc)); + FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1))); + + /* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */ + FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10))); + FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r))); + FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8))); + FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1))); + + /* Already computed: tmp_r = mem_r & ~0x3 */ + + FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r))); + FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1))); + return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r)); +} + /* --------------------------------------------------------------------- */ /* Other instructions */ /* --------------------------------------------------------------------- */ @@ -3724,9 +4434,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; if (have_genext()) - PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0))); + PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0))); else { - PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0))); + PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0))); PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1))); } @@ -3753,20 +4463,18 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( - struct sljit_compiler *compiler, - sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_gpr dst_r; CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; @@ -3780,7 +4488,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( if (dst & SLJIT_MEM) PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0)); - return put_label; + return jump; } /* TODO(carenas): EVAL probably should move up or be refactored */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_32.c old mode 100644 new mode 100755 index 69c917101f..d7399f8210 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_32.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_32.c @@ -62,21 +62,19 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw /* Both size flags cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); /* SSE2 and immediate is not possible. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); - SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) - && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) - && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2)); + SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT); size &= 0xf; - inst_size = size; + /* The mod r/m byte is always present. */ + inst_size = size + 1; - if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) - inst_size++; - if (flags & EX86_PREF_66) + if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) inst_size++; /* Calculate size of b. */ - inst_size += 1; /* mod r/m byte. */ if (b & SLJIT_MEM) { if (!(b & REG_MASK)) inst_size += sizeof(sljit_sw); @@ -87,8 +85,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw inst_size += sizeof(sljit_s8); else inst_size += sizeof(sljit_sw); - } - else if (reg_map[b & REG_MASK] == 5) { + } else if (reg_map[b & REG_MASK] == 5) { /* Swap registers if possible. */ if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5) b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK); @@ -105,15 +102,14 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw } /* Calculate size of a. */ - if (a & SLJIT_IMM) { + if (a == SLJIT_IMM) { if (flags & EX86_BIN_INS) { if (imma <= 127 && imma >= -128) { inst_size += 1; flags |= EX86_BYTE_ARG; } else inst_size += 4; - } - else if (flags & EX86_SHIFT_INS) { + } else if (flags & EX86_SHIFT_INS) { SLJIT_ASSERT(imma <= 0x1f); if (imma != 1) { inst_size++; @@ -125,8 +121,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw inst_size += sizeof(short); else inst_size += sizeof(sljit_sw); - } - else + } else SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); @@ -136,27 +131,26 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw INC_SIZE(inst_size); if (flags & EX86_PREF_F2) *inst++ = 0xf2; - if (flags & EX86_PREF_F3) + else if (flags & EX86_PREF_F3) *inst++ = 0xf3; - if (flags & EX86_PREF_66) + else if (flags & EX86_PREF_66) *inst++ = 0x66; buf_ptr = inst + size; /* Encode mod/rm byte. */ if (!(flags & EX86_SHIFT_INS)) { - if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + if ((flags & EX86_BIN_INS) && a == SLJIT_IMM) *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; - if (a & SLJIT_IMM) + if (a == SLJIT_IMM) *buf_ptr = 0; else if (!(flags & EX86_SSE2_OP1)) *buf_ptr = U8(reg_map[a] << 3); else - *buf_ptr = U8(a << 3); - } - else { - if (a & SLJIT_IMM) { + *buf_ptr = U8(freg_map[a] << 3); + } else { + if (a == SLJIT_IMM) { if (imma == 1) *inst = GROUP_SHIFT_1; else @@ -167,7 +161,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw } if (!(b & SLJIT_MEM)) { - *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b)); + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : freg_map[b])); buf_ptr++; } else if (b & REG_MASK) { reg_map_b = reg_map[b & REG_MASK]; @@ -183,8 +177,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw if (!(b & OFFS_REG_MASK)) *buf_ptr++ |= reg_map_b; else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3)); + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3)); + buf_ptr += 2; } if (immb != 0 || reg_map_b == 5) { @@ -195,25 +190,24 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw buf_ptr += sizeof(sljit_sw); } } - } - else { + } else { if (reg_map_b == 5) *buf_ptr |= 0x40; - *buf_ptr++ |= 0x04; - *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6)); + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6)); + buf_ptr += 2; if (reg_map_b == 5) *buf_ptr++ = 0; } - } - else { + } else { *buf_ptr++ |= 0x05; sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_sw); } - if (a & SLJIT_IMM) { + if (a == SLJIT_IMM) { if (flags & EX86_BYTE_ARG) *buf_ptr = U8(imma); else if (flags & EX86_HALF_ARG) @@ -222,35 +216,92 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw sljit_unaligned_store_sw(buf_ptr, imma); } - return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); + return inst; +} + +static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op, + /* The first and second register operand. */ + sljit_s32 a, sljit_s32 v, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 vex = 0; + sljit_u8 vex_m = 0; + sljit_uw size; + + SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + + if (op & VEX_OP_0F38) + vex_m = 0x2; + else if (op & VEX_OP_0F3A) + vex_m = 0x3; + + if (op & VEX_W) { + if (vex_m == 0) + vex_m = 0x1; + + vex |= 0x80; + } + + if (op & EX86_PREF_66) + vex |= 0x1; + else if (op & EX86_PREF_F2) + vex |= 0x3; + else if (op & EX86_PREF_F3) + vex |= 0x2; + + op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3); + + if (op & VEX_256) + vex |= 0x4; + + vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3)); + + size = op & ~(sljit_uw)0xff; + size |= (vex_m == 0) ? 3 : 4; + + inst = emit_x86_instruction(compiler, size, a, 0, b, immb); + FAIL_IF(!inst); + + if (vex_m == 0) { + inst[0] = 0xc5; + inst[1] = U8(vex | 0x80); + inst[2] = U8(op); + return SLJIT_SUCCESS; + } + + inst[0] = 0xc4; + inst[1] = U8(vex_m | 0xe0); + inst[2] = vex; + inst[3] = U8(op); + return SLJIT_SUCCESS; } /* --------------------------------------------------------------------- */ /* Enter / return */ /* --------------------------------------------------------------------- */ -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) { sljit_uw type = jump->flags >> TYPE_SHIFT; if (type == SLJIT_JUMP) { *code_ptr++ = JMP_i32; - jump->addr++; - } - else if (type >= SLJIT_FAST_CALL) { + } else if (type >= SLJIT_FAST_CALL) { *code_ptr++ = CALL_i32; - jump->addr++; - } - else { + } else { *code_ptr++ = GROUP_0F; *code_ptr++ = get_jump_code(type); - jump->addr += 2; } - if (jump->flags & JUMP_LABEL) - jump->flags |= PATCH_MW; - else + jump->addr = (sljit_uw)code_ptr; + + if (jump->flags & JUMP_ADDR) sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset)); + else + jump->flags |= PATCH_MW; code_ptr += 4; return code_ptr; @@ -578,8 +629,6 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) { - sljit_u8 *inst; - CHECK_ERROR(); CHECK(check_sljit_emit_return_void(compiler)); @@ -588,11 +637,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler FAIL_IF(emit_stack_frame_release(compiler, 0)); - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - RET(); - return SLJIT_SUCCESS; + return emit_byte(compiler, RET_near); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, @@ -782,7 +827,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, offset = stack_size + compiler->local_size; - if (!(src & SLJIT_IMM) && src != SLJIT_R0) { + if (src != SLJIT_IMM && src != SLJIT_R0) { if (word_arg_count >= 1) { EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); r2_offset = sizeof(sljit_sw); @@ -836,7 +881,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, stack_size = args_size + SSIZE_OF(sw); - if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) { + if (word_arg_count >= 1 && src != SLJIT_IMM && src != SLJIT_R0) { r2_offset = SSIZE_OF(sw); stack_size += SSIZE_OF(sw); } @@ -865,7 +910,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset); } - if (!(src & SLJIT_IMM) && src != SLJIT_R0) { + if (src != SLJIT_IMM && src != SLJIT_R0) { if (word_arg_count >= 1) { SLJIT_ASSERT(r2_offset == sizeof(sljit_sw)); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); @@ -952,13 +997,7 @@ static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 e sljit_u8 *inst; BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0); - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - RET(); - - return SLJIT_SUCCESS; + return emit_byte(compiler, RET_near); } static sljit_s32 tail_call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) @@ -1075,7 +1114,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi stack_size = type; FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw)); - if (!(src & SLJIT_IMM)) { + if (src != SLJIT_IMM) { src = SLJIT_R0; srcw = 0; } @@ -1148,15 +1187,9 @@ static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, CHECK_EXTRA_REGS(dst, dstw, (void)0); - if (FAST_IS_REG(dst)) { - /* Unused dest is possible here. */ - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - - INC_SIZE(1); - POP_REG(reg_map[dst]); - return SLJIT_SUCCESS; - } + /* Unused dest is possible here. */ + if (FAST_IS_REG(dst)) + return emit_byte(compiler, U8(POP_r + reg_map[dst])); /* Memory. */ inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); @@ -1291,6 +1324,201 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile return SLJIT_SUCCESS; } +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_u8 *inst, *jump_inst1, *jump_inst2; + sljit_uw size1, size2; + + /* Binary representation of 0x80000000. */ + static const sljit_f64 f64_high_bit = (sljit_f64)0x80000000ul; + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (!(op & SLJIT_32)) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= ROL; + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= SHR; + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_PREF_F2 | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = U8(get_jump_code(SLJIT_NOT_CARRY) - 0x10); + + size1 = compiler->size; + FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_PREF_F2 | EX86_SSE2, dst_r, SLJIT_MEM0(), (sljit_sw)&f64_high_bit)); + + inst[1] = U8(compiler->size - size1); + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, 0, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + + if (!FAST_IS_REG(src)) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + } + + BINARY_IMM32(CMP, 0, src, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JL_i8; + jump_inst1 = inst; + + size1 = compiler->size; + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JMP_i8; + jump_inst2 = inst; + + size2 = compiler->size; + + jump_inst1[1] = U8(size2 - size1); + + if (src != TMP_REG1) + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= SHR; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JNC_i8; + jump_inst1 = inst; + + size1 = compiler->size; + + BINARY_IMM32(OR, 1, TMP_REG1, 0); + jump_inst1[1] = U8(compiler->size - size1); + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0)); + + jump_inst2[1] = U8(compiler->size - size2); + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + sljit_u8 *inst; + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + + inst[0] = GROUP_66; + inst[1] = GROUP_0F; + + if (u.imm == 0) { + inst[2] = PXOR_x_xm; + inst[3] = U8(freg | (freg << 3) | MOD_REG); + } else { + inst[2] = MOVD_x_rm; + inst[3] = U8(reg_map[TMP_REG1] | (freg << 3) | MOD_REG); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + sljit_u8 *inst; + sljit_s32 tmp_freg = freg; + union { + sljit_s32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm[0] == 0) { + if (u.imm[1] == 0) + return emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0); + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]); + } else + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]); + + FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0)); + + if (u.imm[1] == 0) + return SLJIT_SUCCESS; + + if (u.imm[0] == 0) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + + inst[0] = GROUP_0F; + inst[1] = SHUFPS_x_xm; + inst[2] = U8(MOD_REG | (freg << 3) | freg); + inst[3] = 0x51; + return SLJIT_SUCCESS; + } + + if (u.imm[0] != u.imm[1]) { + SLJIT_ASSERT(u.imm[1] != 0 && cpu_feature_list != 0); + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]); + + if (cpu_feature_list & CPU_FEATURE_SSE41) { + FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0)); + return emit_byte(compiler, 1); + } + + FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, TMP_REG1, 0)); + tmp_freg = TMP_FREG; + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + + inst[0] = GROUP_0F; + inst[1] = UNPCKLPS_x_xm; + inst[2] = U8(MOD_REG | (freg << 3) | tmp_freg); + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { @@ -1305,6 +1533,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi reg2 = 0; reg2w = 0; + SLJIT_ASSERT(cpu_feature_list != 0); + + if (!(op & SLJIT_32) && (cpu_feature_list & CPU_FEATURE_SSE41)) { + if (reg & REG_PAIR_MASK) { + reg2 = REG_PAIR_FIRST(reg); + reg = REG_PAIR_SECOND(reg); + + CHECK_EXTRA_REGS(reg, regw, (void)0); + + FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x) + | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw)); + } else + reg2 = reg; + + CHECK_EXTRA_REGS(reg2, reg2w, (void)0); + + FAIL_IF(emit_groupf_ext(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? PINSRD_x_rm_i8 : PEXTRD_rm_x_i8) + | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, reg2, reg2w)); + return emit_byte(compiler, 1); + } + if (reg & REG_PAIR_MASK) { reg2 = REG_PAIR_SECOND(reg); reg = REG_PAIR_FIRST(reg); @@ -1317,12 +1566,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi CHECK_EXTRA_REGS(reg, regw, (void)0); - if (op & SLJIT_32) { - inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, reg, regw); - inst[0] = GROUP_0F; - inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x; - return SLJIT_SUCCESS; - } + if (op & SLJIT_32) + return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x) + | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw); if (op == SLJIT_COPY_FROM_F64) { inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); @@ -1334,32 +1580,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi inst[2] = PSHUFD_x_xm; inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg); inst[4] = 1; - } else if (reg != 0) { - inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, 0, reg, regw); - inst[0] = GROUP_0F; - inst[1] = MOVD_x_rm; - } + } else if (reg != 0) + FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw)); - if (reg2 != 0) { - inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, reg2, reg2w); - inst[0] = GROUP_0F; - inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x; - } + if (reg2 != 0) + FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x) + | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg2, reg2w)); if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); FAIL_IF(!inst); - INC_SIZE(4); + INC_SIZE(3); - inst[0] = GROUP_66; - inst[1] = GROUP_0F; - inst[2] = PUNPCKLDQ_x_xm; - inst[3] = U8(MOD_REG | (freg << 3) | (reg == 0 ? freg : TMP_FREG)); - } else { - inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, 0, reg, regw); inst[0] = GROUP_0F; - inst[1] = MOVD_rm_x; - } + inst[1] = UNPCKLPS_x_xm; + inst[2] = U8(MOD_REG | (freg << 3) | (reg == 0 ? freg : TMP_FREG)); + } else + FAIL_IF(emit_groupf(compiler, MOVD_rm_x | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw)); return SLJIT_SUCCESS; } diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_64.c old mode 100644 new mode 100755 index b5efc1fda1..b537a1a3f8 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_64.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_64.c @@ -38,7 +38,7 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, FAIL_IF(!inst); INC_SIZE(2 + sizeof(sljit_sw)); inst[0] = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); - inst[1] = U8(MOV_r_i32 | (reg_map[reg] & 0x7)); + inst[1] = U8(MOV_r_i32 | reg_lmap[reg]); sljit_unaligned_store_sw(inst + 2, imm); return SLJIT_SUCCESS; } @@ -72,7 +72,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw sljit_uw inst_size; /* The immediate operand must be 32 bit. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); + SLJIT_ASSERT(a != SLJIT_IMM || compiler->mode32 || IS_HALFWORD(imma)); /* Both cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); /* Size flags not allowed for typed instructions. */ @@ -80,26 +80,24 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw /* Both size flags cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); /* SSE2 and immediate is not possible. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); - SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) - && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) - && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2)); + SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT); size &= 0xf; - inst_size = size; + /* The mod r/m byte is always present. */ + inst_size = size + 1; if (!compiler->mode32 && !(flags & EX86_NO_REXW)) rex |= REX_W; else if (flags & EX86_REX) rex |= REX; - if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) - inst_size++; - if (flags & EX86_PREF_66) + if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) inst_size++; /* Calculate size of b. */ - inst_size += 1; /* mod r/m byte. */ if (b & SLJIT_MEM) { if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) { PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); @@ -119,8 +117,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw inst_size += sizeof(sljit_s8); else inst_size += sizeof(sljit_s32); - } - else if (reg_lmap[b & REG_MASK] == 5) { + } else if (reg_lmap[b & REG_MASK] == 5) { /* Swap registers if possible. */ if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5) b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK); @@ -140,23 +137,26 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw rex |= REX_X; } } - } - else if (!(flags & EX86_SSE2_OP2)) { + } else if (!(flags & EX86_SSE2_OP2)) { if (reg_map[b] >= 8) rex |= REX_B; - } - else if (freg_map[b] >= 8) + } else if (freg_map[b] >= 8) rex |= REX_B; - if (a & SLJIT_IMM) { + if ((flags & EX86_VEX_EXT) && (rex & 0x3)) { + SLJIT_ASSERT(size == 2); + size++; + inst_size++; + } + + if (a == SLJIT_IMM) { if (flags & EX86_BIN_INS) { if (imma <= 127 && imma >= -128) { inst_size += 1; flags |= EX86_BYTE_ARG; } else inst_size += 4; - } - else if (flags & EX86_SHIFT_INS) { + } else if (flags & EX86_SHIFT_INS) { SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f)); if (imma != 1) { inst_size++; @@ -168,8 +168,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw inst_size += sizeof(short); else inst_size += sizeof(sljit_s32); - } - else { + } else { SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ if (!(flags & EX86_SSE2_OP1)) { @@ -186,14 +185,16 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); PTR_FAIL_IF(!inst); - /* Encoding the byte. */ + /* Encoding prefixes. */ INC_SIZE(inst_size); if (flags & EX86_PREF_F2) *inst++ = 0xf2; - if (flags & EX86_PREF_F3) + else if (flags & EX86_PREF_F3) *inst++ = 0xf3; - if (flags & EX86_PREF_66) + else if (flags & EX86_PREF_66) *inst++ = 0x66; + + /* Rex is always the last prefix. */ if (rex) *inst++ = rex; @@ -201,18 +202,17 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw /* Encode mod/rm byte. */ if (!(flags & EX86_SHIFT_INS)) { - if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + if ((flags & EX86_BIN_INS) && a == SLJIT_IMM) *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; - if (a & SLJIT_IMM) + if (a == SLJIT_IMM) *buf_ptr = 0; else if (!(flags & EX86_SSE2_OP1)) *buf_ptr = U8(reg_lmap[a] << 3); else *buf_ptr = U8(freg_lmap[a] << 3); - } - else { - if (a & SLJIT_IMM) { + } else { + if (a == SLJIT_IMM) { if (imma == 1) *inst = GROUP_SHIFT_1; else @@ -239,8 +239,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw if (!(b & OFFS_REG_MASK)) *buf_ptr++ |= reg_lmap_b; else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3)); + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3)); + buf_ptr += 2; } if (immb != 0 || reg_lmap_b == 5) { @@ -251,26 +252,26 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw buf_ptr += sizeof(sljit_s32); } } - } - else { + } else { if (reg_lmap_b == 5) *buf_ptr |= 0x40; - *buf_ptr++ |= 0x04; - *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6)); + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6)); + buf_ptr += 2; if (reg_lmap_b == 5) *buf_ptr++ = 0; } - } - else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = 0x25; + } else { + buf_ptr[0] |= 0x04; + buf_ptr[1] = 0x25; + buf_ptr += 2; sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_s32); } - if (a & SLJIT_IMM) { + if (a == SLJIT_IMM) { if (flags & EX86_BYTE_ARG) *buf_ptr = U8(imma); else if (flags & EX86_HALF_ARG) @@ -279,33 +280,106 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma); } - return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); + return inst; +} + +static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op, + /* The first and second register operand. */ + sljit_s32 a, sljit_s32 v, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 vex = 0; + sljit_u8 vex_m = 0; + sljit_uw size; + + SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + + op |= EX86_REX; + + if (op & VEX_OP_0F38) + vex_m = 0x2; + else if (op & VEX_OP_0F3A) + vex_m = 0x3; + + if ((op & VEX_W) || ((op & VEX_AUTO_W) && !compiler->mode32)) { + if (vex_m == 0) + vex_m = 0x1; + + vex |= 0x80; + } + + if (op & EX86_PREF_66) + vex |= 0x1; + else if (op & EX86_PREF_F2) + vex |= 0x3; + else if (op & EX86_PREF_F3) + vex |= 0x2; + + op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3); + + if (op & VEX_256) + vex |= 0x4; + + vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3)); + + size = op & ~(sljit_uw)0xff; + size |= (vex_m == 0) ? (EX86_VEX_EXT | 2) : 3; + + inst = emit_x86_instruction(compiler, size, a, 0, b, immb); + FAIL_IF(!inst); + + SLJIT_ASSERT((inst[-1] & 0xf0) == REX); + + /* If X or B is present in REX prefix. */ + if (vex_m == 0 && inst[-1] & 0x3) + vex_m = 0x1; + + if (vex_m == 0) { + vex |= U8(((inst[-1] >> 2) ^ 0x1) << 7); + + inst[-1] = 0xc5; + inst[0] = vex; + inst[1] = U8(op); + return SLJIT_SUCCESS; + } + + vex_m |= U8((inst[-1] ^ 0x7) << 5); + inst[-1] = 0xc4; + inst[0] = vex_m; + inst[1] = vex; + inst[2] = U8(op); + return SLJIT_SUCCESS; } /* --------------------------------------------------------------------- */ /* Enter / return */ /* --------------------------------------------------------------------- */ -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr) +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr) { sljit_uw type = jump->flags >> TYPE_SHIFT; - int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff); + int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && (jump->flags & JUMP_ADDR) && (jump->u.target <= 0xffffffff); /* The relative jump below specialized for this case. */ SLJIT_ASSERT(reg_map[TMP_REG2] >= 8); if (type < SLJIT_JUMP) { /* Invert type. */ - *code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10); - *code_ptr++ = short_addr ? (6 + 3) : (10 + 3); + code_ptr[0] = U8(get_jump_code(type ^ 0x1) - 0x10); + code_ptr[1] = short_addr ? (6 + 3) : (10 + 3); + code_ptr += 2; } - *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B); - *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2]; + code_ptr[0] = short_addr ? REX_B : (REX_W | REX_B); + code_ptr[1] = MOV_r_i32 | reg_lmap[TMP_REG2]; + code_ptr += 2; jump->addr = (sljit_uw)code_ptr; - if (jump->flags & JUMP_LABEL) + if (!(jump->flags & JUMP_ADDR)) jump->flags |= PATCH_MD; else if (short_addr) sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target); @@ -314,60 +388,62 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw); - *code_ptr++ = REX_B; - *code_ptr++ = GROUP_FF; - *code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); + code_ptr[0] = REX_B; + code_ptr[1] = GROUP_FF; + code_ptr[2] = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); - return code_ptr; + return code_ptr + 3; } -static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label) +static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) { - if (max_label > HALFWORD_MAX) { - put_label->addr -= put_label->flags; - put_label->flags = PATCH_MD; + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) <= 10); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + jump->u.label->size; + + if (addr > 0xffffffffl) { + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 7); + code_ptr -= SSIZE_OF(s32) - 1; + + SLJIT_ASSERT((code_ptr[-3 - SSIZE_OF(s32)] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[-2 - SSIZE_OF(s32)] & 0xf8) == MOV_r_i32); + + code_ptr[-3 - SSIZE_OF(s32)] = U8(REX_W | ((code_ptr[-3 - SSIZE_OF(s32)] & 0x1) << 2)); + code_ptr[-1 - SSIZE_OF(s32)] = U8(((code_ptr[-2 - SSIZE_OF(s32)] & 0x7) << 3) | 0x5); + code_ptr[-2 - SSIZE_OF(s32)] = LEA_r_m; + + jump->flags |= PATCH_MW; + return code_ptr; + } + + jump->flags |= PATCH_MD; return code_ptr; } - if (put_label->flags == 0) { - /* Destination is register. */ - code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw); - - SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); - SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); - - if ((code_ptr[0] & 0x07) != 0) { - code_ptr[0] = U8(code_ptr[0] & ~0x08); - code_ptr += 2 + sizeof(sljit_s32); - } - else { - code_ptr[0] = code_ptr[1]; - code_ptr += 1 + sizeof(sljit_s32); - } - - put_label->addr = (sljit_uw)code_ptr; - return code_ptr; - } - - code_ptr -= put_label->flags + (2 + sizeof(sljit_uw)); - SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); + code_ptr -= 2 + sizeof(sljit_uw); SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); - if ((code_ptr[1] & 0xf8) == MOV_r_i32) { - code_ptr += 2 + sizeof(sljit_uw); - SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + if ((code_ptr[0] & 0x07) != 0) { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 6); + code_ptr[0] = U8(code_ptr[0] & ~0x08); + code_ptr += 2 + sizeof(sljit_s32); + } else { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 5); + code_ptr[0] = code_ptr[1]; + code_ptr += 1 + sizeof(sljit_s32); } - SLJIT_ASSERT(code_ptr[1] == MOV_rm_r); - - code_ptr[0] = U8(code_ptr[0] & ~0x4); - code_ptr[1] = MOV_rm_i32; - code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3)); - - code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32)); - put_label->addr = (sljit_uw)code_ptr; - put_label->flags = 0; return code_ptr; } @@ -539,16 +615,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi tmp = SLJIT_FS0 - fsaveds; for (i = SLJIT_FS0; i > tmp; i--) { - inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); - inst[0] = GROUP_0F; - inst[1] = MOVAPS_xm_x; + FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); saved_float_regs_offset += 16; } for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { - inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); - inst[0] = GROUP_0F; - inst[1] = MOVAPS_xm_x; + FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); saved_float_regs_offset += 16; } } @@ -606,16 +678,12 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit tmp = SLJIT_FS0 - fsaveds; for (i = SLJIT_FS0; i > tmp; i--) { - inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); - inst[0] = GROUP_0F; - inst[1] = MOVAPS_x_xm; + FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); saved_float_regs_offset += 16; } for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { - inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); - inst[0] = GROUP_0F; - inst[1] = MOVAPS_x_xm; + FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); saved_float_regs_offset += 16; } @@ -663,20 +731,13 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) { - sljit_u8 *inst; - CHECK_ERROR(); CHECK(check_sljit_emit_return_void(compiler)); compiler->mode32 = 0; FAIL_IF(emit_stack_frame_release(compiler, 0)); - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - RET(); - return SLJIT_SUCCESS; + return emit_byte(compiler, RET_near); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, @@ -875,13 +936,8 @@ static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_u8 *inst; if (FAST_IS_REG(dst)) { - if (reg_map[dst] < 8) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - POP_REG(reg_lmap[dst]); - return SLJIT_SUCCESS; - } + if (reg_map[dst] < 8) + return emit_byte(compiler, U8(POP_r + reg_lmap[dst])); inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); @@ -1040,15 +1096,15 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, compiler->mode32 = 0; - if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { if (FAST_IS_REG(dst)) { - if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_i32; - return SLJIT_SUCCESS; - } - return emit_load_imm64(compiler, dst, srcw); + if (!sign || ((sljit_u32)srcw <= 0x7fffffff)) + return emit_do_imm32(compiler, reg_map[dst] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[dst]), srcw); + + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; } compiler->mode32 = 1; inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); @@ -1069,7 +1125,7 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, *inst = MOVSXD_r_rm; } else { compiler->mode32 = 1; - FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); + EMIT_MOV(compiler, dst_r, 0, src, srcw); compiler->mode32 = 0; } } @@ -1085,6 +1141,168 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, return SLJIT_SUCCESS; } +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_u8 *inst, *jump_inst1, *jump_inst2; + sljit_uw size1, size2; + + compiler->mode32 = 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + if (src != SLJIT_IMM) { + compiler->mode32 = 1; + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + compiler->mode32 = 0; + } else + FAIL_IF(emit_do_imm32(compiler, reg_map[TMP_REG1] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[TMP_REG1]), srcw)); + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + + compiler->mode32 = 1; + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + + if (!FAST_IS_REG(src)) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + } + + BINARY_IMM32(CMP, 0, src, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JL_i8; + jump_inst1 = inst; + + size1 = compiler->size; + + compiler->mode32 = 0; + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JMP_i8; + jump_inst2 = inst; + + size2 = compiler->size; + + jump_inst1[1] = U8(size2 - size1); + + if (src != TMP_REG1) + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + + EMIT_MOV(compiler, TMP_REG2, 0, src, 0); + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= SHR; + + compiler->mode32 = 1; + BINARY_IMM32(AND, 1, TMP_REG2, 0); + + compiler->mode32 = 0; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG2, 0); + FAIL_IF(!inst); + inst[0] = OR_r_rm; + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + compiler->mode32 = 1; + FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0)); + + jump_inst2[1] = U8(compiler->size - size2); + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +static sljit_s32 sljit_emit_fset(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_u8 rex, sljit_s32 is_zero) +{ + sljit_u8 *inst; + sljit_u32 size; + + if (is_zero) { + rex = freg_map[freg] >= 8 ? (REX_R | REX_B) : 0; + } else { + if (freg_map[freg] >= 8) + rex |= REX_R; + if (reg_map[TMP_REG1] >= 8) + rex |= REX_B; + } + + size = (rex != 0) ? 5 : 4; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + + *inst++ = GROUP_66; + if (rex != 0) + *inst++ = rex; + inst[0] = GROUP_0F; + + if (is_zero) { + inst[1] = PXOR_x_xm; + inst[2] = U8(freg_lmap[freg] | (freg_lmap[freg] << 3) | MOD_REG); + } else { + inst[1] = MOVD_x_rm; + inst[2] = U8(reg_lmap[TMP_REG1] | (freg_lmap[freg] << 3) | MOD_REG); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) { + compiler->mode32 = 1; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm); + } + + return sljit_emit_fset(compiler, freg, 0, u.imm == 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) { + compiler->mode32 = 0; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm); + } + + return sljit_emit_fset(compiler, freg, REX_W, u.imm == 0); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 freg, sljit_s32 reg) { diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_common.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_common.c old mode 100644 new mode 100755 index a9645bc175..5e63067124 --- a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_common.c +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_common.c @@ -61,17 +61,20 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 15 - R15 */ -#define TMP_FREG (0) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -/* Last register + 1. */ -#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3 }; +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { + 0, 1, 2, 3, 4, 5, 6, 7, 0 +}; + #define CHECK_EXTRA_REGS(p, w, do) \ if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \ @@ -81,12 +84,10 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { #else /* SLJIT_CONFIG_X86_32 */ -/* Last register + 1. */ -#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present - Note: avoid to use r12 and r13 for memory addessing + Note: avoid to use r12 and r13 for memory addressing therefore r12 is better to be a higher saved register. */ #ifndef _WIN64 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */ @@ -95,7 +96,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { }; /* low-map. reg_map & 0x7. */ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { - 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 + 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 }; #else /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */ @@ -109,12 +110,12 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { #endif /* Args: xmm0-xmm3 */ -static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { - 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { + 0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4 }; /* low-map. freg_map & 0x7. */ -static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { - 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 +static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { + 0, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4 }; #define REX_W 0x48 @@ -140,161 +141,242 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #define U8(v) ((sljit_u8)(v)) - /* Size flags for emit_x86_instruction: */ -#define EX86_BIN_INS 0x0010 -#define EX86_SHIFT_INS 0x0020 -#define EX86_REX 0x0040 -#define EX86_NO_REXW 0x0080 -#define EX86_BYTE_ARG 0x0100 -#define EX86_HALF_ARG 0x0200 -#define EX86_PREF_66 0x0400 -#define EX86_PREF_F2 0x0800 -#define EX86_PREF_F3 0x1000 -#define EX86_SSE2_OP1 0x2000 -#define EX86_SSE2_OP2 0x4000 +#define EX86_BIN_INS ((sljit_uw)0x000010) +#define EX86_SHIFT_INS ((sljit_uw)0x000020) +#define EX86_BYTE_ARG ((sljit_uw)0x000040) +#define EX86_HALF_ARG ((sljit_uw)0x000080) +/* Size flags for both emit_x86_instruction and emit_vex_instruction: */ +#define EX86_REX ((sljit_uw)0x000100) +#define EX86_NO_REXW ((sljit_uw)0x000200) +#define EX86_PREF_66 ((sljit_uw)0x000400) +#define EX86_PREF_F2 ((sljit_uw)0x000800) +#define EX86_PREF_F3 ((sljit_uw)0x001000) +#define EX86_SSE2_OP1 ((sljit_uw)0x002000) +#define EX86_SSE2_OP2 ((sljit_uw)0x004000) #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) +#define EX86_VEX_EXT ((sljit_uw)0x008000) +/* Op flags for emit_vex_instruction: */ +#define VEX_OP_0F38 ((sljit_uw)0x010000) +#define VEX_OP_0F3A ((sljit_uw)0x020000) +#define VEX_SSE2_OPV ((sljit_uw)0x040000) +#define VEX_AUTO_W ((sljit_uw)0x080000) +#define VEX_W ((sljit_uw)0x100000) +#define VEX_256 ((sljit_uw)0x200000) + +#define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66) +#define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) /* --------------------------------------------------------------------- */ -/* Instrucion forms */ +/* Instruction forms */ /* --------------------------------------------------------------------- */ -#define ADD (/* BINARY */ 0 << 3) -#define ADD_EAX_i32 0x05 -#define ADD_r_rm 0x03 -#define ADD_rm_r 0x01 -#define ADDSD_x_xm 0x58 -#define ADC (/* BINARY */ 2 << 3) -#define ADC_EAX_i32 0x15 -#define ADC_r_rm 0x13 -#define ADC_rm_r 0x11 -#define AND (/* BINARY */ 4 << 3) -#define AND_EAX_i32 0x25 -#define AND_r_rm 0x23 -#define AND_rm_r 0x21 -#define ANDPD_x_xm 0x54 -#define BSR_r_rm (/* GROUP_0F */ 0xbd) -#define BSF_r_rm (/* GROUP_0F */ 0xbc) -#define BSWAP_r (/* GROUP_0F */ 0xc8) -#define CALL_i32 0xe8 -#define CALL_rm (/* GROUP_FF */ 2 << 3) -#define CDQ 0x99 -#define CMOVE_r_rm (/* GROUP_0F */ 0x44) -#define CMP (/* BINARY */ 7 << 3) -#define CMP_EAX_i32 0x3d -#define CMP_r_rm 0x3b -#define CMP_rm_r 0x39 -#define CVTPD2PS_x_xm 0x5a -#define CVTSI2SD_x_rm 0x2a -#define CVTTSD2SI_r_xm 0x2c -#define DIV (/* GROUP_F7 */ 6 << 3) -#define DIVSD_x_xm 0x5e -#define FLDS 0xd9 -#define FLDL 0xdd -#define FSTPS 0xd9 -#define FSTPD 0xdd -#define INT3 0xcc -#define IDIV (/* GROUP_F7 */ 7 << 3) -#define IMUL (/* GROUP_F7 */ 5 << 3) -#define IMUL_r_rm (/* GROUP_0F */ 0xaf) -#define IMUL_r_rm_i8 0x6b -#define IMUL_r_rm_i32 0x69 -#define JE_i8 0x74 -#define JNE_i8 0x75 -#define JMP_i8 0xeb -#define JMP_i32 0xe9 -#define JMP_rm (/* GROUP_FF */ 4 << 3) -#define LEA_r_m 0x8d -#define LOOP_i8 0xe2 -#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd) -#define MOV_r_rm 0x8b -#define MOV_r_i32 0xb8 -#define MOV_rm_r 0x89 -#define MOV_rm_i32 0xc7 -#define MOV_rm8_i8 0xc6 -#define MOV_rm8_r8 0x88 -#define MOVAPS_x_xm 0x28 -#define MOVAPS_xm_x 0x29 -#define MOVD_x_rm 0x6e -#define MOVD_rm_x 0x7e -#define MOVSD_x_xm 0x10 -#define MOVSD_xm_x 0x11 -#define MOVSXD_r_rm 0x63 -#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) -#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) -#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) -#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) -#define MUL (/* GROUP_F7 */ 4 << 3) -#define MULSD_x_xm 0x59 -#define NEG_rm (/* GROUP_F7 */ 3 << 3) -#define NOP 0x90 -#define NOT_rm (/* GROUP_F7 */ 2 << 3) -#define OR (/* BINARY */ 1 << 3) -#define OR_r_rm 0x0b -#define OR_EAX_i32 0x0d -#define OR_rm_r 0x09 -#define OR_rm8_r8 0x08 -#define POP_r 0x58 -#define POP_rm 0x8f -#define POPF 0x9d -#define PREFETCH 0x18 -#define PSHUFD_x_xm 0x70 -#define PUNPCKLDQ_x_xm 0x62 -#define PUSH_i32 0x68 -#define PUSH_r 0x50 -#define PUSH_rm (/* GROUP_FF */ 6 << 3) -#define PUSHF 0x9c -#define ROL (/* SHIFT */ 0 << 3) -#define ROR (/* SHIFT */ 1 << 3) -#define RET_near 0xc3 -#define RET_i16 0xc2 -#define SBB (/* BINARY */ 3 << 3) -#define SBB_EAX_i32 0x1d -#define SBB_r_rm 0x1b -#define SBB_rm_r 0x19 -#define SAR (/* SHIFT */ 7 << 3) -#define SHL (/* SHIFT */ 4 << 3) -#define SHLD (/* GROUP_0F */ 0xa5) -#define SHRD (/* GROUP_0F */ 0xad) -#define SHR (/* SHIFT */ 5 << 3) -#define SUB (/* BINARY */ 5 << 3) -#define SUB_EAX_i32 0x2d -#define SUB_r_rm 0x2b -#define SUB_rm_r 0x29 -#define SUBSD_x_xm 0x5c -#define TEST_EAX_i32 0xa9 -#define TEST_rm_r 0x85 -#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc) -#define UCOMISD_x_xm 0x2e -#define UNPCKLPD_x_xm 0x14 -#define XCHG_EAX_r 0x90 -#define XCHG_r_rm 0x87 -#define XOR (/* BINARY */ 6 << 3) -#define XOR_EAX_i32 0x35 -#define XOR_r_rm 0x33 -#define XOR_rm_r 0x31 -#define XORPD_x_xm 0x57 +#define ADD (/* BINARY */ 0 << 3) +#define ADD_EAX_i32 0x05 +#define ADD_r_rm 0x03 +#define ADD_rm_r 0x01 +#define ADDSD_x_xm 0x58 +#define ADC (/* BINARY */ 2 << 3) +#define ADC_EAX_i32 0x15 +#define ADC_r_rm 0x13 +#define ADC_rm_r 0x11 +#define AND (/* BINARY */ 4 << 3) +#define AND_EAX_i32 0x25 +#define AND_r_rm 0x23 +#define AND_rm_r 0x21 +#define ANDPD_x_xm 0x54 +#define BSR_r_rm (/* GROUP_0F */ 0xbd) +#define BSF_r_rm (/* GROUP_0F */ 0xbc) +#define BSWAP_r (/* GROUP_0F */ 0xc8) +#define CALL_i32 0xe8 +#define CALL_rm (/* GROUP_FF */ 2 << 3) +#define CDQ 0x99 +#define CMOVE_r_rm (/* GROUP_0F */ 0x44) +#define CMP (/* BINARY */ 7 << 3) +#define CMP_EAX_i32 0x3d +#define CMP_r_rm 0x3b +#define CMP_rm_r 0x39 +#define CMPS_x_xm 0xc2 +#define CMPXCHG_rm_r 0xb1 +#define CMPXCHG_rm8_r 0xb0 +#define CVTPD2PS_x_xm 0x5a +#define CVTPS2PD_x_xm 0x5a +#define CVTSI2SD_x_rm 0x2a +#define CVTTSD2SI_r_xm 0x2c +#define DIV (/* GROUP_F7 */ 6 << 3) +#define DIVSD_x_xm 0x5e +#define EXTRACTPS_x_xm 0x17 +#define FLDS 0xd9 +#define FLDL 0xdd +#define FSTPS 0xd9 +#define FSTPD 0xdd +#define INSERTPS_x_xm 0x21 +#define INT3 0xcc +#define IDIV (/* GROUP_F7 */ 7 << 3) +#define IMUL (/* GROUP_F7 */ 5 << 3) +#define IMUL_r_rm (/* GROUP_0F */ 0xaf) +#define IMUL_r_rm_i8 0x6b +#define IMUL_r_rm_i32 0x69 +#define JL_i8 0x7c +#define JE_i8 0x74 +#define JNC_i8 0x73 +#define JNE_i8 0x75 +#define JMP_i8 0xeb +#define JMP_i32 0xe9 +#define JMP_rm (/* GROUP_FF */ 4 << 3) +#define LEA_r_m 0x8d +#define LOOP_i8 0xe2 +#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd) +#define MOV_r_rm 0x8b +#define MOV_r_i32 0xb8 +#define MOV_rm_r 0x89 +#define MOV_rm_i32 0xc7 +#define MOV_rm8_i8 0xc6 +#define MOV_rm8_r8 0x88 +#define MOVAPS_x_xm 0x28 +#define MOVAPS_xm_x 0x29 +#define MOVD_x_rm 0x6e +#define MOVD_rm_x 0x7e +#define MOVDDUP_x_xm 0x12 +#define MOVDQA_x_xm 0x6f +#define MOVDQA_xm_x 0x7f +#define MOVHLPS_x_x 0x12 +#define MOVHPD_m_x 0x17 +#define MOVHPD_x_m 0x16 +#define MOVLHPS_x_x 0x16 +#define MOVLPD_m_x 0x13 +#define MOVLPD_x_m 0x12 +#define MOVMSKPS_r_x (/* GROUP_0F */ 0x50) +#define MOVQ_x_xm (/* GROUP_0F */ 0x7e) +#define MOVSD_x_xm 0x10 +#define MOVSD_xm_x 0x11 +#define MOVSHDUP_x_xm 0x16 +#define MOVSXD_r_rm 0x63 +#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) +#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) +#define MOVUPS_x_xm 0x10 +#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) +#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) +#define MUL (/* GROUP_F7 */ 4 << 3) +#define MULSD_x_xm 0x59 +#define NEG_rm (/* GROUP_F7 */ 3 << 3) +#define NOP 0x90 +#define NOT_rm (/* GROUP_F7 */ 2 << 3) +#define OR (/* BINARY */ 1 << 3) +#define OR_r_rm 0x0b +#define OR_EAX_i32 0x0d +#define OR_rm_r 0x09 +#define OR_rm8_r8 0x08 +#define ORPD_x_xm 0x56 +#define PACKSSWB_x_xm (/* GROUP_0F */ 0x63) +#define PAND_x_xm 0xdb +#define PCMPEQD_x_xm 0x76 +#define PINSRB_x_rm_i8 0x20 +#define PINSRW_x_rm_i8 0xc4 +#define PINSRD_x_rm_i8 0x22 +#define PEXTRB_rm_x_i8 0x14 +#define PEXTRW_rm_x_i8 0x15 +#define PEXTRD_rm_x_i8 0x16 +#define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7) +#define PMOVSXBD_x_xm 0x21 +#define PMOVSXBQ_x_xm 0x22 +#define PMOVSXBW_x_xm 0x20 +#define PMOVSXDQ_x_xm 0x25 +#define PMOVSXWD_x_xm 0x23 +#define PMOVSXWQ_x_xm 0x24 +#define PMOVZXBD_x_xm 0x31 +#define PMOVZXBQ_x_xm 0x32 +#define PMOVZXBW_x_xm 0x30 +#define PMOVZXDQ_x_xm 0x35 +#define PMOVZXWD_x_xm 0x33 +#define PMOVZXWQ_x_xm 0x34 +#define POP_r 0x58 +#define POP_rm 0x8f +#define POPF 0x9d +#define POR_x_xm 0xeb +#define PREFETCH 0x18 +#define PSHUFB_x_xm 0x00 +#define PSHUFD_x_xm 0x70 +#define PSHUFLW_x_xm 0x70 +#define PSRLDQ_x 0x73 +#define PSLLD_x_i8 0x72 +#define PSLLQ_x_i8 0x73 +#define PUSH_i32 0x68 +#define PUSH_r 0x50 +#define PUSH_rm (/* GROUP_FF */ 6 << 3) +#define PUSHF 0x9c +#define PXOR_x_xm 0xef +#define ROL (/* SHIFT */ 0 << 3) +#define ROR (/* SHIFT */ 1 << 3) +#define RET_near 0xc3 +#define RET_i16 0xc2 +#define SBB (/* BINARY */ 3 << 3) +#define SBB_EAX_i32 0x1d +#define SBB_r_rm 0x1b +#define SBB_rm_r 0x19 +#define SAR (/* SHIFT */ 7 << 3) +#define SHL (/* SHIFT */ 4 << 3) +#define SHLD (/* GROUP_0F */ 0xa5) +#define SHRD (/* GROUP_0F */ 0xad) +#define SHR (/* SHIFT */ 5 << 3) +#define SHUFPS_x_xm 0xc6 +#define SUB (/* BINARY */ 5 << 3) +#define SUB_EAX_i32 0x2d +#define SUB_r_rm 0x2b +#define SUB_rm_r 0x29 +#define SUBSD_x_xm 0x5c +#define TEST_EAX_i32 0xa9 +#define TEST_rm_r 0x85 +#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc) +#define UCOMISD_x_xm 0x2e +#define UNPCKLPD_x_xm 0x14 +#define UNPCKLPS_x_xm 0x14 +#define VBROADCASTSD_x_xm 0x19 +#define VBROADCASTSS_x_xm 0x18 +#define VEXTRACTF128_x_ym 0x19 +#define VEXTRACTI128_x_ym 0x39 +#define VINSERTF128_y_y_xm 0x18 +#define VINSERTI128_y_y_xm 0x38 +#define VPBROADCASTB_x_xm 0x78 +#define VPBROADCASTD_x_xm 0x58 +#define VPBROADCASTQ_x_xm 0x59 +#define VPBROADCASTW_x_xm 0x79 +#define VPERMPD_y_ym 0x01 +#define VPERMQ_y_ym 0x00 +#define XCHG_EAX_r 0x90 +#define XCHG_r_rm 0x87 +#define XOR (/* BINARY */ 6 << 3) +#define XOR_EAX_i32 0x35 +#define XOR_r_rm 0x33 +#define XOR_rm_r 0x31 +#define XORPD_x_xm 0x57 -#define GROUP_0F 0x0f -#define GROUP_66 0x66 -#define GROUP_F3 0xf3 -#define GROUP_F7 0xf7 -#define GROUP_FF 0xff -#define GROUP_BINARY_81 0x81 -#define GROUP_BINARY_83 0x83 -#define GROUP_SHIFT_1 0xd1 -#define GROUP_SHIFT_N 0xc1 -#define GROUP_SHIFT_CL 0xd3 +#define GROUP_0F 0x0f +#define GROUP_66 0x66 +#define GROUP_F3 0xf3 +#define GROUP_F7 0xf7 +#define GROUP_FF 0xff +#define GROUP_BINARY_81 0x81 +#define GROUP_BINARY_83 0x83 +#define GROUP_SHIFT_1 0xd1 +#define GROUP_SHIFT_N 0xc1 +#define GROUP_SHIFT_CL 0xd3 +#define GROUP_LOCK 0xf0 -#define MOD_REG 0xc0 -#define MOD_DISP8 0x40 +#define MOD_REG 0xc0 +#define MOD_DISP8 0x40 -#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s)) +#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s)) -#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r))) -#define POP_REG(r) (*inst++ = U8(POP_r + (r))) -#define RET() (*inst++ = RET_near) -#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) +#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r))) +#define POP_REG(r) (*inst++ = U8(POP_r + (r))) +#define RET() (*inst++ = RET_near) +#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) + +#define SLJIT_INST_LABEL 255 +#define SLJIT_INST_JUMP 254 +#define SLJIT_INST_MOV_ADDR 253 +#define SLJIT_INST_CONST 252 /* Multithreading does not affect these static variables, since they store built-in CPU features. Therefore they can be overwritten by different threads @@ -303,9 +385,12 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) #define CPU_FEATURE_SSE2 0x002 #endif -#define CPU_FEATURE_LZCNT 0x004 -#define CPU_FEATURE_TZCNT 0x008 -#define CPU_FEATURE_CMOV 0x010 +#define CPU_FEATURE_SSE41 0x004 +#define CPU_FEATURE_LZCNT 0x008 +#define CPU_FEATURE_TZCNT 0x010 +#define CPU_FEATURE_CMOV 0x020 +#define CPU_FEATURE_AVX 0x040 +#define CPU_FEATURE_AVX2 0x080 static sljit_u32 cpu_feature_list = 0; @@ -338,124 +423,117 @@ static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) /* Utility functions */ /******************************************************/ -static void get_cpu_features(void) +static void execute_cpu_id(sljit_u32 info[4]) { - sljit_u32 feature_list = CPU_FEATURE_DETECTED; - sljit_u32 value; - #if defined(_MSC_VER) && _MSC_VER >= 1400 - int CPUInfo[4]; - - __cpuid(CPUInfo, 0); - if (CPUInfo[0] >= 7) { - __cpuidex(CPUInfo, 7, 0); - if (CPUInfo[1] & 0x8) - feature_list |= CPU_FEATURE_TZCNT; - } - - __cpuid(CPUInfo, (int)0x80000001); - if (CPUInfo[2] & 0x20) - feature_list |= CPU_FEATURE_LZCNT; - - __cpuid(CPUInfo, 1); - value = (sljit_u32)CPUInfo[3]; + __cpuidex((int*)info, (int)info[0], (int)info[2]); #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__) /* AT&T syntax. */ __asm__ ( - "movl $0x0, %%eax\n" - "lzcnt %%eax, %%eax\n" - "setnz %%al\n" - "movl %%eax, %0\n" - : "=g" (value) - : #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - : "eax" -#else - : "rax" -#endif - ); - - if (value & 0x1) - feature_list |= CPU_FEATURE_LZCNT; - - __asm__ ( - "movl $0x0, %%eax\n" - "tzcnt %%eax, %%eax\n" - "setnz %%al\n" - "movl %%eax, %0\n" - : "=g" (value) - : -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - : "eax" -#else - : "rax" -#endif - ); - - if (value & 0x1) - feature_list |= CPU_FEATURE_TZCNT; - - __asm__ ( - "movl $0x1, %%eax\n" -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - /* On x86-32, there is no red zone, so this - should work (no need for a local variable). */ - "push %%ebx\n" -#endif + "movl %0, %%esi\n" + "movl (%%esi), %%eax\n" + "movl 8(%%esi), %%ecx\n" + "pushl %%ebx\n" "cpuid\n" -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - "pop %%ebx\n" -#endif - "movl %%edx, %0\n" - : "=g" (value) + "movl %%eax, (%%esi)\n" + "movl %%ebx, 4(%%esi)\n" + "popl %%ebx\n" + "movl %%ecx, 8(%%esi)\n" + "movl %%edx, 12(%%esi)\n" +#else /* !SLJIT_CONFIG_X86_32 */ + "movq %0, %%rsi\n" + "movl (%%rsi), %%eax\n" + "movl 8(%%rsi), %%ecx\n" + "cpuid\n" + "movl %%eax, (%%rsi)\n" + "movl %%ebx, 4(%%rsi)\n" + "movl %%ecx, 8(%%rsi)\n" + "movl %%edx, 12(%%rsi)\n" +#endif /* SLJIT_CONFIG_X86_32 */ : + : "r" (info) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - : "eax", "ecx", "edx" -#else - : "rax", "rbx", "rcx", "rdx" -#endif + : "memory", "eax", "ecx", "edx", "esi" +#else /* !SLJIT_CONFIG_X86_32 */ + : "memory", "rax", "rbx", "rcx", "rdx", "rsi" +#endif /* SLJIT_CONFIG_X86_32 */ ); -#else /* _MSC_VER && _MSC_VER >= 1400 */ +#else /* _MSC_VER < 1400 */ /* Intel syntax. */ __asm { - mov eax, 0 - lzcnt eax, eax - setnz al - mov value, eax - } - - if (value & 0x1) - feature_list |= CPU_FEATURE_LZCNT; - - __asm { - mov eax, 0 - tzcnt eax, eax - setnz al - mov value, eax - } - - if (value & 0x1) - feature_list |= CPU_FEATURE_TZCNT; - - __asm { - mov eax, 1 +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + mov esi, info + mov eax, [esi] + mov ecx, [esi + 8] cpuid - mov value, edx + mov [esi], eax + mov [esi + 4], ebx + mov [esi + 8], ecx + mov [esi + 12], edx +#else /* !SLJIT_CONFIG_X86_32 */ + mov rsi, info + mov eax, [rsi] + mov ecx, [rsi + 8] + cpuid + mov [rsi], eax + mov [rsi + 4], ebx + mov [rsi + 8], ecx + mov [rsi + 12], edx +#endif /* SLJIT_CONFIG_X86_32 */ } #endif /* _MSC_VER && _MSC_VER >= 1400 */ +} +static void get_cpu_features(void) +{ + sljit_u32 feature_list = CPU_FEATURE_DETECTED; + sljit_u32 info[4]; + sljit_u32 max_id; + + info[0] = 0; + execute_cpu_id(info); + max_id = info[0]; + + if (max_id >= 7) { + info[0] = 7; + info[2] = 0; + execute_cpu_id(info); + + if (info[1] & 0x8) + feature_list |= CPU_FEATURE_TZCNT; + if (info[1] & 0x20) + feature_list |= CPU_FEATURE_AVX2; + } + + if (max_id >= 1) { + info[0] = 1; + execute_cpu_id(info); + + if (info[2] & 0x80000) + feature_list |= CPU_FEATURE_SSE41; + if (info[2] & 0x10000000) + feature_list |= CPU_FEATURE_AVX; #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) - if (value & 0x4000000) - feature_list |= CPU_FEATURE_SSE2; + if (info[3] & 0x4000000) + feature_list |= CPU_FEATURE_SSE2; #endif - if (value & 0x8000) - feature_list |= CPU_FEATURE_CMOV; + if (info[3] & 0x8000) + feature_list |= CPU_FEATURE_CMOV; + } + + info[0] = 0x80000001; + info[2] = 0; /* Silences an incorrect compiler warning. */ + execute_cpu_id(info); + + if (info[2] & 0x20) + feature_list |= CPU_FEATURE_LZCNT; cpu_feature_list = feature_list; } @@ -464,15 +542,15 @@ static sljit_u8 get_jump_code(sljit_uw type) { switch (type) { case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: case SLJIT_F_EQUAL: case SLJIT_UNORDERED_OR_EQUAL: - case SLJIT_ORDERED_EQUAL: /* Not supported. */ return 0x84 /* je */; case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: case SLJIT_F_NOT_EQUAL: case SLJIT_ORDERED_NOT_EQUAL: - case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */ return 0x85 /* jne */; case SLJIT_LESS: @@ -520,61 +598,58 @@ static sljit_u8 get_jump_code(sljit_uw type) return 0x81 /* jno */; case SLJIT_UNORDERED: + case SLJIT_ORDERED_EQUAL: /* NaN. */ return 0x8a /* jp */; case SLJIT_ORDERED: + case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */ return 0x8b /* jpo */; } return 0; } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset); -#else -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr); -static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label); -#endif +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset); +#else /* !SLJIT_CONFIG_X86_32 */ +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr); +static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset); +#endif /* SLJIT_CONFIG_X86_32 */ -static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) +static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) { sljit_uw type = jump->flags >> TYPE_SHIFT; sljit_s32 short_jump; sljit_uw label_addr; - if (jump->flags & JUMP_LABEL) - label_addr = (sljit_uw)(code + jump->u.label->size); - else + if (jump->flags & JUMP_ADDR) label_addr = jump->u.target - (sljit_uw)executable_offset; + else + label_addr = (sljit_uw)(code + jump->u.label->size); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) - return generate_far_jump_code(jump, code_ptr); -#endif + if ((sljit_sw)(label_addr - (sljit_uw)(code_ptr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 5)) < HALFWORD_MIN) + return detect_far_jump_type(jump, code_ptr); +#endif /* SLJIT_CONFIG_X86_64 */ - short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; + short_jump = (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) >= -0x80 && (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) <= 0x7f; if (type == SLJIT_JUMP) { if (short_jump) *code_ptr++ = JMP_i8; else *code_ptr++ = JMP_i32; - jump->addr++; - } - else if (type >= SLJIT_FAST_CALL) { + } else if (type > SLJIT_JUMP) { short_jump = 0; *code_ptr++ = CALL_i32; - jump->addr++; - } - else if (short_jump) { + } else if (short_jump) { *code_ptr++ = U8(get_jump_code(type) - 0x10); - jump->addr++; - } - else { + } else { *code_ptr++ = GROUP_0F; *code_ptr++ = get_jump_code(type); - jump->addr += 2; } + jump->addr = (sljit_uw)code_ptr; + if (short_jump) { jump->flags |= PATCH_MB; code_ptr += sizeof(sljit_s8); @@ -586,6 +661,171 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code return code_ptr; } +static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_uw jump_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + + if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr); +#else /* SLJIT_CONFIG_X86_32 */ + if (flags & PATCH_MD) { + SLJIT_ASSERT(addr > HALFWORD_MAX); + sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr); + return; + } + + if (flags & PATCH_MW) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN); + } else { + SLJIT_ASSERT(addr <= HALFWORD_MAX); + } + sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr); +#endif /* !SLJIT_CONFIG_X86_32 */ + return; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (SLJIT_UNLIKELY(flags & PATCH_MD)) { + SLJIT_ASSERT(!(flags & JUMP_ADDR)); + sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr); + return; + } +#endif /* SLJIT_CONFIG_X86_64 */ + + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset); + + if (flags & PATCH_MB) { + addr -= sizeof(sljit_s8); + SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80); + *(sljit_u8*)jump_addr = U8(addr); + return; + } else if (flags & PATCH_MW) { + addr -= sizeof(sljit_s32); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr); +#else /* !SLJIT_CONFIG_X86_32 */ + SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN); + sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr); +#endif /* SLJIT_CONFIG_X86_32 */ + } +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + sljit_uw next_label_size; + sljit_uw next_jump_addr; + sljit_uw next_min_addr; + sljit_uw size_reduce = 0; + sljit_sw diff; + sljit_uw type; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + sljit_uw size_reduce_max; +#endif /* SLJIT_DEBUG */ + + label = compiler->labels; + jump = compiler->jumps; + + next_label_size = SLJIT_GET_NEXT_SIZE(label); + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + + while (1) { + next_min_addr = next_label_size; + if (next_jump_addr < next_min_addr) + next_min_addr = next_jump_addr; + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr != next_jump_addr) + continue; + + if (!(jump->flags & JUMP_MOV_ADDR)) { +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE); +#endif /* SLJIT_DEBUG */ + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (jump->u.target <= 0xffffffffl) + size_reduce += sizeof(sljit_s32); +#endif /* SLJIT_CONFIG_X86_64 */ + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce); + type = jump->flags >> TYPE_SHIFT; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (type == SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += JUMP_MAX_SIZE - 2; + else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5) + size_reduce += JUMP_MAX_SIZE - 5; + } else if (type < SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += CJUMP_MAX_SIZE - 2; + else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6) + size_reduce += CJUMP_MAX_SIZE - 6; + } else { + if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5) + size_reduce += JUMP_MAX_SIZE - 5; + } +#else /* !SLJIT_CONFIG_X86_64 */ + if (type == SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += JUMP_MAX_SIZE - 2; + } else if (type < SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += CJUMP_MAX_SIZE - 2; + } +#endif /* SLJIT_CONFIG_X86_64 */ + } + } + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT; +#endif /* SLJIT_DEBUG */ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + } else { +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + size_reduce_max = size_reduce + 10; +#endif /* SLJIT_DEBUG */ + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce - 3); + + if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) + size_reduce += 3; + } else if (jump->u.target <= 0xffffffffl) + size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5; + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT; +#endif /* SLJIT_DEBUG */ +#endif /* SLJIT_CONFIG_X86_64 */ + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) { struct sljit_memory_fragment *buf; @@ -595,27 +835,30 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_u8 *buf_end; sljit_u8 len; sljit_sw executable_offset; - sljit_uw jump_addr; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + sljit_uw addr; +#endif /* SLJIT_DEBUG */ struct sljit_label *label; struct sljit_jump *jump; struct sljit_const *const_; - struct sljit_put_label *put_label; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); + + reduce_code_size(compiler); /* Second code generation pass. */ code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size, compiler->exec_allocator_data); PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); buf = compiler->buf; code_ptr = code; label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; - put_label = compiler->put_labels; executable_offset = SLJIT_EXEC_OFFSET(code); do { @@ -623,49 +866,52 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_end = buf_ptr + buf->used_size; do { len = *buf_ptr++; - if (len > 0) { + SLJIT_ASSERT(len > 0); + if (len < SLJIT_INST_CONST) { /* The code is already generated. */ SLJIT_MEMCPY(code_ptr, buf_ptr, len); code_ptr += len; buf_ptr += len; - } - else { - switch (*buf_ptr) { - case 0: - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + } else { + switch (len) { + case SLJIT_INST_LABEL: + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = (sljit_uw)(code_ptr - code); label = label->next; break; - case 1: - jump->addr = (sljit_uw)code_ptr; + case SLJIT_INST_JUMP: +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + addr = (sljit_uw)code_ptr; +#endif /* SLJIT_DEBUG */ if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) - code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset); + code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset); else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset); -#else - code_ptr = generate_far_jump_code(jump, code_ptr); -#endif + code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset); +#else /* !SLJIT_CONFIG_X86_32 */ + code_ptr = detect_far_jump_type(jump, code_ptr); +#endif /* SLJIT_CONFIG_X86_32 */ } + + SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f)); jump = jump->next; break; - case 2: + case SLJIT_INST_MOV_ADDR: +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset); +#endif /* SLJIT_CONFIG_X86_64 */ + jump->addr = (sljit_uw)code_ptr; + jump = jump->next; + break; + default: + SLJIT_ASSERT(len == SLJIT_INST_CONST); const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); const_ = const_->next; break; - default: - SLJIT_ASSERT(*buf_ptr == 3); - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size); -#endif - put_label = put_label->next; - break; } - buf_ptr++; } } while (buf_ptr < buf_end); + SLJIT_ASSERT(buf_ptr == buf_end); buf = buf->next; } while (buf); @@ -673,61 +919,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!label); SLJIT_ASSERT(!jump); SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); SLJIT_ASSERT(code_ptr <= code + compiler->size); jump = compiler->jumps; while (jump) { - if (jump->flags & (PATCH_MB | PATCH_MW)) { - if (jump->flags & JUMP_LABEL) - jump_addr = jump->u.label->addr; - else - jump_addr = jump->u.target; - - jump_addr -= jump->addr + (sljit_uw)executable_offset; - - if (jump->flags & PATCH_MB) { - jump_addr -= sizeof(sljit_s8); - SLJIT_ASSERT((sljit_sw)jump_addr >= -128 && (sljit_sw)jump_addr <= 127); - *(sljit_u8*)jump->addr = U8(jump_addr); - } else { - jump_addr -= sizeof(sljit_s32); -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump_addr); -#else - SLJIT_ASSERT((sljit_sw)jump_addr >= HALFWORD_MIN && (sljit_sw)jump_addr <= HALFWORD_MAX); - sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)jump_addr); -#endif - } - } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - else if (jump->flags & PATCH_MD) { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr); - } -#endif - + generate_jump_or_mov_addr(jump, executable_offset); jump = jump->next; } - put_label = compiler->put_labels; - while (put_label) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); -#else - if (put_label->flags & PATCH_MD) { - SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX); - sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); - } - else { - SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX); - sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr); - } -#endif - - put_label = put_label->next; - } - compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; compiler->executable_size = (sljit_uw)(code_ptr - code); @@ -743,7 +942,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) switch (feature_type) { case SLJIT_HAS_FPU: #ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; + return (SLJIT_IS_FPU_AVAILABLE) != 0; #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) if (cpu_feature_list == 0) get_cpu_features(); @@ -779,17 +978,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_PREFETCH: case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: return 1; - case SLJIT_HAS_SSE2: -#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE + case SLJIT_HAS_AVX: if (cpu_feature_list == 0) get_cpu_features(); - return (cpu_feature_list & CPU_FEATURE_SSE2) != 0; -#else /* !SLJIT_DETECT_SSE2 */ - return 1; -#endif /* SLJIT_DETECT_SSE2 */ - + return (cpu_feature_list & CPU_FEATURE_AVX) != 0; + case SLJIT_HAS_AVX2: + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_AVX2) != 0; + case SLJIT_HAS_SIMD: + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_SSE41) != 0; +#endif /* SLJIT_IS_FPU_AVAILABLE */ default: return 0; } @@ -797,16 +1002,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) { - if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL) - return 0; - switch (type) { case SLJIT_ORDERED_EQUAL: case SLJIT_UNORDERED_OR_NOT_EQUAL: - return 0; + return 2; } - return 1; + return 0; } /* --------------------------------------------------------------------- */ @@ -850,6 +1052,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) #endif /* SLJIT_CONFIG_X86_64 */ +static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte) +{ + sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = byte; + return SLJIT_SUCCESS; +} + static sljit_s32 emit_mov(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw); @@ -857,6 +1068,14 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); +static sljit_s32 emit_groupf(struct sljit_compiler *compiler, + sljit_uw op, + sljit_s32 dst, sljit_s32 src, sljit_sw srcw); + +static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler, + sljit_uw op, + sljit_s32 dst, sljit_s32 src, sljit_sw srcw); + static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src); @@ -867,6 +1086,10 @@ static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); +static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw); + static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler) { #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) @@ -911,7 +1134,11 @@ static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_ #endif inst[0] = GROUP_0F; inst[1] = 0x1e; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]); +#else + inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]); +#endif return SLJIT_SUCCESS; } @@ -1021,7 +1248,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, *inst = MOV_rm_r; return SLJIT_SUCCESS; } - if (src & SLJIT_IMM) { + + if (src == SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); @@ -1068,6 +1296,27 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } +static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_uw size; + + SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10); + + size = compiler->size; + EMIT_MOV(compiler, dst_reg, 0, src, srcw); + + inst[1] = U8(compiler->size - size); + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { sljit_u8 *inst; @@ -1080,17 +1329,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_BREAKPOINT: - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = INT3; - break; + return emit_byte(compiler, INT3); case SLJIT_NOP: - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = NOP; - break; + return emit_byte(compiler, NOP); case SLJIT_LMUL_UW: case SLJIT_LMUL_SW: case SLJIT_DIVMOD_UW: @@ -1131,23 +1372,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = CDQ; + FAIL_IF(emit_byte(compiler, CDQ)); #else - if (compiler->mode32) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = CDQ; - } else { + if (!compiler->mode32) { inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); INC_SIZE(2); inst[0] = REX_W; inst[1] = CDQ; - } + } else + FAIL_IF(emit_byte(compiler, CDQ)); #endif } @@ -1213,29 +1447,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return SLJIT_SUCCESS; } -#define ENCODE_PREFIX(prefix) \ - do { \ - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ - FAIL_IF(!inst); \ - INC_SIZE(1); \ - *inst = U8(prefix); \ - } while (0) - static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { sljit_u8* inst; sljit_s32 dst_r; -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_s32 work_r; -#endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; #endif - if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); @@ -1264,100 +1487,33 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, #else dst_r = src; #endif - } + } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - else if (FAST_IS_REG(src) && reg_map[src] >= 4) { - /* src, dst are registers. */ - SLJIT_ASSERT(FAST_IS_REG(dst)); - if (reg_map[dst] < 4) { - if (dst != src) - EMIT_MOV(compiler, dst, 0, src, 0); - inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; - } - else { - if (dst != src) - EMIT_MOV(compiler, dst, 0, src, 0); - if (sign) { - /* shl reg, 24 */ - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); - FAIL_IF(!inst); - *inst |= SHL; - /* sar reg, 24 */ - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); - FAIL_IF(!inst); - *inst |= SAR; - } - else { + if (FAST_IS_REG(src) && reg_map[src] >= 4) { + /* Both src and dst are registers. */ + SLJIT_ASSERT(FAST_IS_REG(dst)); + + if (src == dst && !sign) { inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); FAIL_IF(!inst); *(inst + 1) |= AND; + return SLJIT_SUCCESS; } + + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + src = TMP_REG1; + srcw = 0; } - return SLJIT_SUCCESS; - } -#endif - else { +#endif /* !SLJIT_CONFIG_X86_32 */ + /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; + FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw)); } if (dst & SLJIT_MEM) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst_r == TMP_REG1) { - /* Find a non-used register, whose reg_map[src] < 4. */ - if ((dst & REG_MASK) == SLJIT_R0) { - if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) - work_r = SLJIT_R2; - else - work_r = SLJIT_R1; - } - else { - if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) - work_r = SLJIT_R0; - else if ((dst & REG_MASK) == SLJIT_R1) - work_r = SLJIT_R2; - else - work_r = SLJIT_R1; - } - - if (work_r == SLJIT_R0) { - ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); - } - else { - inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); - FAIL_IF(!inst); - *inst = XCHG_r_rm; - } - - inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm8_r8; - - if (work_r == SLJIT_R0) { - ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); - } - else { - inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); - FAIL_IF(!inst); - *inst = XCHG_r_rm; - } - } - else { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm8_r8; - } -#else inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); FAIL_IF(!inst); *inst = MOV_rm8_r8; -#endif } return SLJIT_SUCCESS; @@ -1398,7 +1554,7 @@ static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, compiler->mode32 = 0; #endif - if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); @@ -1419,12 +1575,8 @@ static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) dst_r = src; - else { - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; - } + else + FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw)); if (dst & SLJIT_MEM) { inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); @@ -1481,26 +1633,19 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz, sljit_s32 dst_r; sljit_sw max; - if (cpu_feature_list == 0) - get_cpu_features(); + SLJIT_ASSERT(cpu_feature_list != 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) { - inst = emit_x86_instruction(compiler, 2 | EX86_PREF_F3, dst_r, 0, src, srcw); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = is_clz ? LZCNT_r_rm : TZCNT_r_rm; + FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw)); if (dst & SLJIT_MEM) EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); return SLJIT_SUCCESS; } - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = is_clz ? BSR_r_rm : BSF_r_rm; + FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw)); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) max = is_clz ? (32 + 31) : 32; @@ -1518,7 +1663,7 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz, inst[1] = CMOVE_r_rm; } else - FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); + FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); if (is_clz) { inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); @@ -1533,14 +1678,9 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz, if (cpu_feature_list & CPU_FEATURE_CMOV) { EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max); - - inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = CMOVE_r_rm; - } - else - FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); + FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0)); + } else + FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); if (is_clz) { inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0); @@ -1555,6 +1695,7 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz, } static sljit_s32 emit_bswap(struct sljit_compiler *compiler, + sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { @@ -1563,10 +1704,24 @@ static sljit_s32 emit_bswap(struct sljit_compiler *compiler, sljit_uw size; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_u8 rex = 0; +#else /* !SLJIT_CONFIG_X86_64 */ + sljit_s32 dst_is_ereg = op & SLJIT_32; #endif /* SLJIT_CONFIG_X86_64 */ - if (src != dst_r) - EMIT_MOV(compiler, dst_r, 0, src, srcw); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32) + compiler->mode32 = 1; +#else /* !SLJIT_CONFIG_X86_64 */ + op &= ~SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (src != dst_r) { + /* Only the lower 16 bit is read for eregs. */ + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) + FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw)); + else + EMIT_MOV(compiler, dst_r, 0, src, srcw); + } size = 2; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -1595,8 +1750,41 @@ static sljit_s32 emit_bswap(struct sljit_compiler *compiler, inst[1] = BSWAP_r | reg_map[dst_r]; #endif /* SLJIT_CONFIG_X86_64 */ - if (dst & SLJIT_MEM) - EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + size = compiler->mode32 ? 16 : 48; +#else /* !SLJIT_CONFIG_X86_64 */ + size = 16; +#endif /* SLJIT_CONFIG_X86_64 */ + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0); + FAIL_IF(!inst); + if (op == SLJIT_REV_U16) + inst[1] |= SHR; + else + inst[1] |= SAR; + } + + if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst_is_ereg) + op = SLJIT_REV; +#endif /* SLJIT_CONFIG_X86_32 */ + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) + return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0); + + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op == SLJIT_REV_S32) { + compiler->mode32 = 0; + inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst = MOVSXD_r_rm; + } +#endif /* SLJIT_CONFIG_X86_64 */ + return SLJIT_SUCCESS; } @@ -1639,14 +1827,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile if (op == SLJIT_MOV_S32) op = SLJIT_MOV_U32; } - else if (src & SLJIT_IMM) { + else if (src == SLJIT_IMM) { if (op == SLJIT_MOV_U32) op = SLJIT_MOV_S32; } } #endif /* SLJIT_CONFIG_X86_64 */ - if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { switch (op) { case SLJIT_MOV_U8: srcw = (sljit_u8)srcw; @@ -1731,7 +1919,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_CTZ: return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw); case SLJIT_REV: - return emit_bswap(compiler, dst, dstw, src, srcw); + case SLJIT_REV_U16: + case SLJIT_REV_S16: + case SLJIT_REV_U32: + case SLJIT_REV_S32: +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst_is_ereg) + op |= SLJIT_32; +#endif /* SLJIT_CONFIG_X86_32 */ + return emit_bswap(compiler, op, dst, dstw, src, srcw); } return SLJIT_SUCCESS; @@ -1750,7 +1946,7 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, sljit_u8 op_imm = U8(op_types & 0xff); if (dst == src1 && dstw == src1w) { - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else @@ -1784,7 +1980,7 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, /* Only for cumulative operations. */ if (dst == src2 && dstw == src2w) { - if (src1 & SLJIT_IMM) { + if (src1 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else @@ -1818,7 +2014,7 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, /* General version. */ if (FAST_IS_REG(dst)) { EMIT_MOV(compiler, dst, 0, src1, src1w); - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { BINARY_IMM(op_imm, op_mr, src2w, dst, 0); } else { @@ -1830,7 +2026,7 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, else { /* This version requires less memory writing. */ EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); } else { @@ -1857,7 +2053,7 @@ static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, sljit_u8 op_imm = U8(op_types & 0xff); if (dst == src1 && dstw == src1w) { - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else @@ -1891,7 +2087,7 @@ static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, /* General version. */ if (FAST_IS_REG(dst) && dst != src2) { EMIT_MOV(compiler, dst, 0, src1, src1w); - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { BINARY_IMM(op_imm, op_mr, src2w, dst, 0); } else { @@ -1903,7 +2099,7 @@ static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, else { /* This version requires less memory writing. */ EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); } else { @@ -1926,20 +2122,12 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; /* Register destination. */ - if (dst_r == src1 && !(src2 & SLJIT_IMM)) { - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = IMUL_r_rm; - } - else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = IMUL_r_rm; - } - else if (src1 & SLJIT_IMM) { - if (src2 & SLJIT_IMM) { + if (dst_r == src1 && src2 != SLJIT_IMM) { + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w)); + } else if (dst_r == src2 && src1 != SLJIT_IMM) { + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w)); + } else if (src1 == SLJIT_IMM) { + if (src2 == SLJIT_IMM) { EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); src2 = dst_r; src2w = 0; @@ -1949,10 +2137,8 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); FAIL_IF(!inst); *inst = IMUL_r_rm_i8; - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = U8(src1w); + + FAIL_IF(emit_byte(compiler, U8(src1w))); } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { @@ -1978,30 +2164,26 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, if (dst_r != src2) EMIT_MOV(compiler, dst_r, 0, src2, src2w); FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); - inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = IMUL_r_rm; + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0)); } #endif } - else if (src2 & SLJIT_IMM) { + else if (src2 == SLJIT_IMM) { /* Note: src1 is NOT immediate. */ if (src2w <= 127 && src2w >= -128) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); FAIL_IF(!inst); *inst = IMUL_r_rm_i8; - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = U8(src2w); + + FAIL_IF(emit_byte(compiler, U8(src2w))); } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); FAIL_IF(!inst); *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); @@ -2012,31 +2194,24 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); FAIL_IF(!inst); *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); sljit_unaligned_store_s32(inst, (sljit_s32)src2w); - } - else { + } else { if (dst_r != src1) EMIT_MOV(compiler, dst_r, 0, src1, src1w); FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = IMUL_r_rm; + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0)); } #endif - } - else { + } else { /* Neither argument is immediate. */ if (ADDRESSING_DEPENDS_ON(src2, dst_r)) dst_r = TMP_REG1; EMIT_MOV(compiler, dst_r, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = IMUL_r_rm; + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w)); } if (dst & SLJIT_MEM) @@ -2069,10 +2244,10 @@ static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, done = 1; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); #else - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); #endif FAIL_IF(!inst); @@ -2082,10 +2257,10 @@ static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, } else if (FAST_IS_REG(src2)) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); #else - if (src1 & SLJIT_IMM) { + if (src1 == SLJIT_IMM) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); #endif FAIL_IF(!inst); @@ -2109,16 +2284,16 @@ static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, sljit_u8* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(CMP_EAX_i32, src2w); return SLJIT_SUCCESS; } if (FAST_IS_REG(src1)) { - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); } else { @@ -2129,15 +2304,15 @@ static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } - if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { + if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) { inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); FAIL_IF(!inst); *inst = CMP_rm_r; return SLJIT_SUCCESS; } - if (src2 & SLJIT_IMM) { - if (src1 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { + if (src1 == SLJIT_IMM) { EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); src1 = TMP_REG1; src1w = 0; @@ -2160,25 +2335,25 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, sljit_u8* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(TEST_EAX_i32, src2w); return SLJIT_SUCCESS; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else - if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { + if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) { #endif BINARY_EAX_IMM(TEST_EAX_i32, src1w); return SLJIT_SUCCESS; } - if (!(src1 & SLJIT_IMM)) { - if (src2 & SLJIT_IMM) { + if (src1 != SLJIT_IMM) { + if (src2 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src2w) || compiler->mode32) { inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); @@ -2206,8 +2381,8 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, } } - if (!(src2 & SLJIT_IMM)) { - if (src1 & SLJIT_IMM) { + if (src2 != SLJIT_IMM) { + if (src1 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src1w) || compiler->mode32) { inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); @@ -2236,7 +2411,7 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, } EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src2w) || compiler->mode32) { inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); @@ -2274,18 +2449,18 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, #endif sljit_u8* inst; - if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { + if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) { if (dst == src1 && dstw == src1w) { inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); FAIL_IF(!inst); - *inst |= mode; + inst[1] |= mode; return SLJIT_SUCCESS; } if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); FAIL_IF(!inst); - *inst |= mode; + inst[1] |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); return SLJIT_SUCCESS; } @@ -2293,14 +2468,14 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, dst, 0, src1, src1w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); FAIL_IF(!inst); - *inst |= mode; + inst[1] |= mode; return SLJIT_SUCCESS; } EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); FAIL_IF(!inst); - *inst |= mode; + inst[1] |= mode; EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); return SLJIT_SUCCESS; } @@ -2310,7 +2485,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); FAIL_IF(!inst); - *inst |= mode; + inst[1] |= mode; return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); } @@ -2328,7 +2503,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); FAIL_IF(!inst); - *inst |= mode; + inst[1] |= mode; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; #endif @@ -2354,7 +2529,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); FAIL_IF(!inst); - *inst |= mode; + inst[1] |= mode; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); @@ -2377,7 +2552,7 @@ static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, sljit_s32 src2, sljit_sw src2w) { /* The CPU does not set flags if the shift count is 0. */ - if (src2 & SLJIT_IMM) { + if (src2 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) src2w &= compiler->mode32 ? 0x1f : 0x3f; #else /* !SLJIT_CONFIG_X86_64 */ @@ -2442,7 +2617,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w); if (!HAS_FLAGS(op)) { - if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) + if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) return compiler->error; if (FAST_IS_REG(dst) && src2 == dst) { FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w)); @@ -2465,9 +2640,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile dst, dstw, src1, src1w, src2, src2w); case SLJIT_XOR: if (!HAS_FLAGS(op)) { - if ((src2 & SLJIT_IMM) && src2w == -1) + if (src2 == SLJIT_IMM && src2w == -1) return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w); - if ((src1 & SLJIT_IMM) && src1w == -1) + if (src1 == SLJIT_IMM && src1w == -1) return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w); } @@ -2553,7 +2728,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * compiler->mode32 = op & SLJIT_32; #endif /* SLJIT_CONFIG_X86_64 */ - if (src3 & SLJIT_IMM) { + if (src3 == SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) src3w &= 0x1f; #else /* !SLJIT_CONFIG_X86_32 */ @@ -2580,7 +2755,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * } #endif /* SLJIT_CONFIG_X86_32 */ - if (dst_reg == SLJIT_PREF_SHIFT_REG && !(src3 & SLJIT_IMM) && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) { + if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w); src1_reg = TMP_REG1; @@ -2605,7 +2780,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * if (src3 != SLJIT_PREF_SHIFT_REG) EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w); } else { - if (src2_reg == SLJIT_PREF_SHIFT_REG && !(src3 & SLJIT_IMM) && src3 != SLJIT_PREF_SHIFT_REG) { + if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; #endif /* SLJIT_CONFIG_X86_64 */ @@ -2653,7 +2828,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * } #endif /* SLJIT_CONFIG_X86_64 */ - if (!(src3 & SLJIT_IMM) && src3 != SLJIT_PREF_SHIFT_REG) { + if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) { if (!restore_ecx) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; @@ -2684,14 +2859,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler * FAIL_IF(!inst); inst[0] = GROUP_0F; - if (src3 & SLJIT_IMM) { + if (src3 == SLJIT_IMM) { inst[1] = U8((is_left ? SHLD : SHRD) - 1); - /* Immedate argument is added separately. */ - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - *inst = U8(src3w); + /* Immediate argument is added separately. */ + FAIL_IF(emit_byte(compiler, U8(src3w))); } else inst[1] = U8(is_left ? SHLD : SHRD); @@ -2765,24 +2937,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *comp return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (reg >= SLJIT_R3 && reg <= SLJIT_R8) - return -1; -#endif - return reg_map[reg]; -} + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + if (type == SLJIT_GP_REGISTER) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return reg; -#else + if (reg >= SLJIT_R3 && reg <= SLJIT_R8) + return -1; +#endif /* SLJIT_CONFIG_X86_32 */ + return reg_map[reg]; + } + + if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512) + return -1; + return freg_map[reg]; -#endif } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -2810,6 +2980,8 @@ static sljit_u32 *sse2_buffer; static void init_compiler(void) { + get_cpu_features(); + /* Align to 16 bytes. */ sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf); @@ -2823,40 +2995,43 @@ static void init_compiler(void) sse2_buffer[13] = 0x7fffffff; } -static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, - sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) +static sljit_s32 emit_groupf(struct sljit_compiler *compiler, + sljit_uw op, + sljit_s32 dst, sljit_s32 src, sljit_sw srcw) { - sljit_u8 *inst; - - inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw); FAIL_IF(!inst); inst[0] = GROUP_0F; - inst[1] = opcode; + inst[1] = op & 0xff; return SLJIT_SUCCESS; } -static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode, - sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) +static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler, + sljit_uw op, + sljit_s32 dst, sljit_s32 src, sljit_sw srcw) { sljit_u8 *inst; - inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A))); + + inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw); FAIL_IF(!inst); inst[0] = GROUP_0F; - inst[1] = opcode; + inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A); + inst[2] = op & 0xff; return SLJIT_SUCCESS; } static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) { - return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); + return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw); } static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) { - return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); + return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, @@ -2864,7 +3039,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r; - sljit_u8 *inst; CHECK_EXTRA_REGS(dst, dstw, (void)0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; @@ -2874,10 +3048,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp compiler->mode32 = 0; #endif - inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = CVTTSD2SI_r_xm; + FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw)); if (dst & SLJIT_MEM) return emit_mov(compiler, dst, dstw, TMP_REG1, 0); @@ -2889,7 +3060,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; - sljit_u8 *inst; CHECK_EXTRA_REGS(src, srcw, (void)0); @@ -2898,7 +3068,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp compiler->mode32 = 0; #endif - if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) srcw = (sljit_s32)srcw; @@ -2908,10 +3078,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp srcw = 0; } - inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); - FAIL_IF(!inst); - inst[0] = GROUP_0F; - inst[1] = CVTSI2SD_x_rm; + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw)); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; @@ -2926,14 +3093,28 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { switch (GET_FLAG_TYPE(op)) { + case SLJIT_ORDERED_EQUAL: + /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */ + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w)); + + /* EQ */ + FAIL_IF(emit_byte(compiler, 0)); + + src1 = TMP_FREG; + src2 = TMP_FREG; + src2w = 0; + break; + case SLJIT_ORDERED_LESS: case SLJIT_UNORDERED_OR_GREATER: + /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */ if (!FAST_IS_REG(src2)) { FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w)); src2 = TMP_FREG; } - return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w); + return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w); } if (!FAST_IS_REG(src1)) { @@ -2941,7 +3122,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile src1 = TMP_FREG; } - return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w); + return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -2949,6 +3130,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r; + sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; @@ -2972,42 +3154,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil /* We overwrite the high bits of source. From SLJIT point of view, this is not an issue. Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ - FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0)); - } - else { + FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0)); + } else { FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw)); src = TMP_FREG; } - FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0)); + FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0)); if (dst_r == TMP_FREG) return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } if (FAST_IS_REG(dst)) { - dst_r = dst; - if (dst != src) - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); - } - else { - dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); + dst_r = (dst == src) ? TMP_FREG : dst; + + if (src & SLJIT_MEM) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); + + FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0)); + + inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0); + inst[0] = GROUP_0F; + /* Same as PSRLD_x / PSRLQ_x */ + inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8; + + if (GET_OPCODE(op) == SLJIT_ABS_F64) { + inst[2] |= 2 << 3; + FAIL_IF(emit_byte(compiler, 1)); + } else { + inst[2] |= 6 << 3; + FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63))); + } + + if (dst_r != TMP_FREG) + dst_r = (src & SLJIT_MEM) ? TMP_FREG : src; + return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0); } + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); + switch (GET_OPCODE(op)) { case SLJIT_NEG_F64: - FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer : sse2_buffer + 8))); + FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8))); break; case SLJIT_ABS_F64: - FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer + 4 : sse2_buffer + 12))); + FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12))); break; } - if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); - return SLJIT_SUCCESS; + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, @@ -3050,19 +3247,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); + FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); break; case SLJIT_SUB_F64: - FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); + FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); break; case SLJIT_MUL_F64: - FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); + FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); break; case SLJIT_DIV_F64: - FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); + FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); break; } @@ -3071,6 +3268,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_uw pref; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (dst_freg == src1) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w)); + pref = EX86_SELECT_66(op) | EX86_SSE2; + FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w)); + FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8))); + return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0); + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + src1 = TMP_FREG; + src1w = 0; + } + + if (dst_freg != src2) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w)); + + pref = EX86_SELECT_66(op) | EX86_SSE2; + FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w)); + FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8))); + return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w); +} + /* --------------------------------------------------------------------- */ /* Conditional instructions */ /* --------------------------------------------------------------------- */ @@ -3090,10 +3326,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi PTR_FAIL_IF(!label); set_label(label, compiler); - inst = (sljit_u8*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 1); PTR_FAIL_IF(!inst); - inst[0] = 0; - inst[1] = 0; + inst[0] = SLJIT_INST_LABEL; return label; } @@ -3111,18 +3346,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT))); type &= 0xff; + jump->addr = compiler->size; /* Worst case size. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; -#else - compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); -#endif - - inst = (sljit_u8*)ensure_buf(compiler, 2); + compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE; + inst = (sljit_u8*)ensure_buf(compiler, 1); PTR_FAIL_IF_NULL(inst); - inst[0] = 0; - inst[1] = 1; + inst[0] = SLJIT_INST_JUMP; return jump; } @@ -3143,20 +3373,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT))); jump->u.target = (sljit_uw)srcw; + jump->addr = compiler->size; /* Worst case size. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->size += 5; -#else - compiler->size += 10 + 3; -#endif - - inst = (sljit_u8*)ensure_buf(compiler, 2); + compiler->size += JUMP_MAX_SIZE; + inst = (sljit_u8*)ensure_buf(compiler, 1); FAIL_IF_NULL(inst); - inst[0] = 0; - inst[1] = 1; - } - else { + inst[0] = SLJIT_INST_JUMP; + } else { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) /* REX_W is not necessary (src is not immediate). */ compiler->mode32 = 1; @@ -3289,43 +3513,1256 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #endif /* SLJIT_CONFIG_X86_64 */ } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) { - sljit_u8* inst; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 dst = dst_reg; + sljit_sw dstw = 0; +#endif /* SLJIT_CONFIG_X86_32 */ + sljit_sw src2w = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - type &= ~SLJIT_32; + ADJUST_LOCAL_OFFSET(src1, src1w); - if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3)) - return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); -#else - if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV)) - return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); -#endif - - /* ADJUST_LOCAL_OFFSET is not needed. */ - CHECK_EXTRA_REGS(src, srcw, (void)0); + CHECK_EXTRA_REGS(dst, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2_reg, src2w, (void)0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = type & SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ type &= ~SLJIT_32; -#endif - if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst & SLJIT_MEM) { + if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else + EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w); + + dst_reg = TMP_REG1; + } else { +#endif /* SLJIT_CONFIG_X86_32 */ + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + EMIT_MOV(compiler, dst_reg, 0, src1, src1w); + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else + EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w); + } + } + + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + SLJIT_ASSERT(dst_reg != TMP_REG1); + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = TMP_REG1; + src1w = 0; + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + } +#endif /* SLJIT_CONFIG_X86_32 */ + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w)); + else + FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w)); + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst_reg == TMP_REG1) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_32 */ + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_u8* inst; + sljit_uw size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0)); + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10); + + size = compiler->size; + FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w)); + + inst[1] = U8(compiler->size - size); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_uw op; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + switch (reg_size) { + case 4: + op = EX86_SSE2; + break; + case 5: + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + op = EX86_SSE2 | VEX_256; + break; + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (!(srcdst & SLJIT_MEM)) + alignment = reg_size; + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 2 || elem_size == 3) { + op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm; + + if (elem_size == 3) + op |= EX86_PREF_66; + + if (type & SLJIT_SIMD_STORE) + op += 1; + } else + return SLJIT_ERR_UNSUPPORTED; + } else { + op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm) + | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3); + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (op & VEX_256) + return emit_vex_instruction(compiler, op, freg, 0, srcdst, srcdstw); + + return emit_groupf(compiler, op, freg, srcdst, srcdstw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_u8 *inst; + sljit_u8 opcode = 0; + sljit_uw size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (!(type & SLJIT_SIMD_FLOAT)) { + CHECK_EXTRA_REGS(src, srcw, (void)0); + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; +#else /* !SLJIT_CONFIG_X86_32 */ + compiler->mode32 = 1; + + if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_CONFIG_X86_32 */ + + if (cpu_feature_list & CPU_FEATURE_AVX2) { + if (reg_size < 4 || reg_size > 5) + return SLJIT_ERR_UNSUPPORTED; + + if (src != SLJIT_IMM && (reg_size == 5 || elem_size < 3 || !(type & SLJIT_SIMD_FLOAT))) { + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size >= 3) + compiler->mode32 = 0; +#endif /* SLJIT_CONFIG_X86_64 */ + FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, src, srcw)); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + src = freg; + srcw = 0; + } + + switch (elem_size) { + case 0: + size = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + case 1: + size = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + case 2: + size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSS_x_xm : VPBROADCASTD_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + default: +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + size = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; +#else /* !SLJIT_CONFIG_X86_32 */ + size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSD_x_xm : VPBROADCASTQ_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; +#endif /* SLJIT_CONFIG_X86_32 */ + break; + } + + if (reg_size == 5) + size |= VEX_256; + + return emit_vex_instruction(compiler, size, freg, 0, src, srcw); + } + } else if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) { + if (reg_size == 5) + return emit_vex_instruction(compiler, XORPD_x_xm | VEX_256 | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); + + return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0); + } + + if (elem_size == 2 && freg != src) { + FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw)); + src = freg; + srcw = 0; + } + + FAIL_IF(emit_groupf(compiler, (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2, freg, src, srcw)); + + if (elem_size == 2) + return emit_byte(compiler, 0); + return SLJIT_SUCCESS; + } + + if (src == SLJIT_IMM) { + if (elem_size == 0) { + srcw = (sljit_u8)srcw; + srcw |= srcw << 8; + srcw |= srcw << 16; + elem_size = 2; + } else if (elem_size == 1) { + srcw = (sljit_u16)srcw; + srcw |= srcw << 16; + elem_size = 2; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size == 2 && (sljit_s32)srcw == -1) + srcw = -1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (srcw == 0 || srcw == -1) { + if (reg_size == 5) + return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); + + return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, freg, freg, 0); + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size == 3) + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); + else +#endif /* SLJIT_CONFIG_X86_64 */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); + src = TMP_REG1; srcw = 0; } - inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); + size = 2; + opcode = MOVD_x_rm; + + switch (elem_size) { + case 0: + if (!FAST_IS_REG(src)) { + opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */; + size = 3; + } + break; + case 1: + if (!FAST_IS_REG(src)) + opcode = PINSRW_x_rm_i8; + break; + case 2: + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case 3: + /* MOVQ */ + compiler->mode32 = 0; + break; +#endif /* SLJIT_CONFIG_X86_64 */ + } + + inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw); FAIL_IF(!inst); inst[0] = GROUP_0F; - inst[1] = U8(get_jump_code((sljit_uw)type) - 0x40); + inst[1] = opcode; + + if (reg_size == 5) { + SLJIT_ASSERT(opcode == MOVD_x_rm); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + size = VPBROADCASTD_x_xm; +#else /* !SLJIT_CONFIG_X86_32 */ + size = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm; +#endif /* SLJIT_CONFIG_X86_32 */ + return emit_vex_instruction(compiler, size | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + } + + if (size == 3) { + SLJIT_ASSERT(opcode == 0x3a); + inst[2] = PINSRB_x_rm_i8; + } + + if (opcode != MOVD_x_rm) + FAIL_IF(emit_byte(compiler, 0)); + + switch (elem_size) { + case 0: + FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0)); + return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0); + case 1: + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0)); + FAIL_IF(emit_byte(compiler, 0)); + /* fallthrough */ + default: + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); + return emit_byte(compiler, 0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case 3: + compiler->mode32 = 1; + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); + return emit_byte(compiler, 0x44); +#endif /* SLJIT_CONFIG_X86_64 */ + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_u8 *inst; + sljit_u8 opcode = 0; + sljit_uw size; + sljit_s32 freg_orig = freg; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 srcdst_is_ereg = 0; + sljit_s32 srcdst_orig = 0; + sljit_sw srcdstw_orig = 0; +#endif /* SLJIT_CONFIG_X86_32 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size == 5) { + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + } else if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2) + return SLJIT_ERR_UNSUPPORTED; +#else /* SLJIT_CONFIG_X86_32 */ + if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_CONFIG_X86_32 */ + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#else /* !SLJIT_CONFIG_X86_64 */ + if (!(type & SLJIT_SIMD_FLOAT)) { + CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1); + + if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) { + srcdst_orig = srcdst; + srcdstw_orig = srcdstw; + srcdst = TMP_REG1; + srcdstw = 0; + } + } +#endif /* SLJIT_CONFIG_X86_64 */ + + if (type & SLJIT_SIMD_LANE_ZERO) { + if (lane_index == 0) { + if (!(type & SLJIT_SIMD_FLOAT)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size == 3) { + compiler->mode32 = 0; + elem_size = 2; + } +#endif /* SLJIT_CONFIG_X86_64 */ + if (srcdst == SLJIT_IMM) { + if (elem_size == 0) + srcdstw = (sljit_u8)srcdstw; + else if (elem_size == 1) + srcdstw = (sljit_u16)srcdstw; + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw); + srcdst = TMP_REG1; + srcdstw = 0; + elem_size = 2; + } + + if (elem_size == 2) { + if (reg_size == 4) + return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw); + return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); + } + } else if (srcdst & SLJIT_MEM) { + SLJIT_ASSERT(elem_size == 2 || elem_size == 3); + + if (reg_size == 4) + return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw); + return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw); + } else if (elem_size == 3) { + if (reg_size == 4) + return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0); + return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0); + } + } + + if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) { + freg = TMP_FREG; + lane_index -= (1 << (4 - elem_size)); + } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw)); + srcdst = TMP_FREG; + srcdstw = 0; + } + + size = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0) + | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2; + + if (reg_size == 5) + FAIL_IF(emit_vex_instruction(compiler, size | VEX_256 | VEX_SSE2_OPV, freg, freg, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, size, freg, freg, 0)); + } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) { + FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0)); + FAIL_IF(emit_byte(compiler, 1)); + + freg = TMP_FREG; + lane_index -= (1 << (4 - elem_size)); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3) { + if (srcdst & SLJIT_MEM) { + if (type & SLJIT_SIMD_STORE) + size = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x; + else + size = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m; + + FAIL_IF(emit_groupf(compiler, size | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw)); + + /* In case of store, freg is not TMP_FREG. */ + } else if (type & SLJIT_SIMD_STORE) { + if (lane_index == 1) + return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, freg, 0); + return emit_sse2_load(compiler, 0, srcdst, freg, 0); + } else { + if (lane_index == 1) + FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, freg, srcdst, 0)); + else + FAIL_IF(emit_sse2_store(compiler, 0, freg, 0, srcdst)); + } + } else if (type & SLJIT_SIMD_STORE) { + if (lane_index == 0) + return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg); + + if (srcdst & SLJIT_MEM) { + FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw)); + return emit_byte(compiler, U8(lane_index)); + } + + if (srcdst == freg) + size = SHUFPS_x_xm | EX86_SSE2; + else { + if (cpu_feature_list & CPU_FEATURE_AVX) { + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0)); + return emit_byte(compiler, U8(lane_index)); + } + + switch (lane_index) { + case 1: + size = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2; + break; + case 2: + size = MOVHLPS_x_x | EX86_SSE2; + break; + default: + SLJIT_ASSERT(lane_index == 3); + size = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2; + break; + } + } + + FAIL_IF(emit_groupf(compiler, size, srcdst, freg, 0)); + + size &= 0xff; + if (size == SHUFPS_x_xm || size == PSHUFD_x_xm) + return emit_byte(compiler, U8(lane_index)); + + return SLJIT_SUCCESS; + } else { + if (lane_index != 0 || (srcdst & SLJIT_MEM)) { + FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw)); + FAIL_IF(emit_byte(compiler, U8(lane_index << 4))); + } else + FAIL_IF(emit_sse2_store(compiler, 1, freg, 0, srcdst)); + } + + if (freg != TMP_FREG || (type & SLJIT_SIMD_STORE)) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(reg_size == 5); + + if (type & SLJIT_SIMD_LANE_ZERO) { + FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0)); + return emit_byte(compiler, 0x4e); + } + + FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0)); + return emit_byte(compiler, 1); + } + + if (srcdst == SLJIT_IMM) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw); + srcdst = TMP_REG1; + srcdstw = 0; + } + + size = 3; + + switch (elem_size) { + case 0: + opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8; + break; + case 1: + if (!(type & SLJIT_SIMD_STORE)) { + size = 2; + opcode = PINSRW_x_rm_i8; + } else + opcode = PEXTRW_rm_x_i8; + break; + case 2: + opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8; + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case 3: + /* PINSRQ / PEXTRQ */ + opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8; + compiler->mode32 = 0; + break; +#endif /* SLJIT_CONFIG_X86_64 */ + } + + inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); + FAIL_IF(!inst); + inst[0] = GROUP_0F; + + if (size == 3) { + inst[1] = 0x3a; + inst[2] = opcode; + } else + inst[1] = opcode; + + FAIL_IF(emit_byte(compiler, U8(lane_index))); + + if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) { + if (freg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) { + SLJIT_ASSERT(reg_size == 5); + + if (type & SLJIT_SIMD_LANE_ZERO) { + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0)); + return emit_byte(compiler, 0x4e); + } + + FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0)); + return emit_byte(compiler, 1); + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (srcdst_orig & SLJIT_MEM) + return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_32 */ + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size >= 3) + return SLJIT_SUCCESS; + + compiler->mode32 = (type & SLJIT_32); + + size = 2; + + if (elem_size == 0) + size |= EX86_REX; + + if (elem_size == 2) { + if (type & SLJIT_32) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(!(compiler->mode32)); + size = 1; + } + + inst = emit_x86_instruction(compiler, size, srcdst, 0, srcdst, 0); + FAIL_IF(!inst); + + if (size != 1) { + inst[0] = GROUP_0F; + inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16); + } else + inst[0] = MOVSXD_r_rm; +#else /* !SLJIT_CONFIG_X86_64 */ + if (elem_size >= 2) + return SLJIT_SUCCESS; + + FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16, + (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0)); + + if (srcdst_orig & SLJIT_MEM) + return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_64 */ + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_uw pref; + sljit_u8 byte; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 opcode3 = TMP_REG1; +#else /* !SLJIT_CONFIG_X86_32 */ + sljit_s32 opcode3 = SLJIT_S0; +#endif /* SLJIT_CONFIG_X86_32 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + SLJIT_ASSERT(reg_map[opcode3] == 3); + + if (reg_size == 5) { + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + } else if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_FLOAT) { + pref = 0; + byte = U8(src_lane_index); + + if (elem_size == 3) { + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 5) { + if (src_lane_index == 0) + return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); + + FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + + byte = U8(byte | (byte << 2)); + return emit_byte(compiler, U8(byte | (byte << 4))); + } + + if (src_lane_index == 0) + return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, src, 0); + + /* Changes it to SHUFPD_x_xm. */ + pref = EX86_PREF_66; + } else if (elem_size != 2) + return SLJIT_ERR_UNSUPPORTED; + else if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 5) { + SLJIT_ASSERT(elem_size == 2); + + if (src_lane_index == 0) + return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); + + FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + + byte = 0x44; + if (src_lane_index >= 4) { + byte = 0xee; + src_lane_index -= 4; + } + + FAIL_IF(emit_byte(compiler, byte)); + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0)); + byte = U8(src_lane_index); + } else if (freg != src && (cpu_feature_list & CPU_FEATURE_AVX)) { + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0)); + } else { + if (freg != src) + FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, freg, src, 0)); + + FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, freg, freg, 0)); + } + + if (elem_size == 2) { + byte = U8(byte | (byte << 2)); + byte = U8(byte | (byte << 4)); + } else + byte = U8(byte | (byte << 1)); + + return emit_byte(compiler, U8(byte)); + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (elem_size == 0) { + if (reg_size == 5 && src_lane_index >= 16) { + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa)); + src_lane_index &= 0x7; + src = freg; + } + + if ((freg != src && !(cpu_feature_list & CPU_FEATURE_AVX2)) || src_lane_index != 0) { + pref = 0; + + if ((src_lane_index & 0x3) == 0) { + pref = EX86_PREF_66; + byte = U8(src_lane_index >> 2); + } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) { + pref = EX86_PREF_F2; + byte = U8(src_lane_index >> 1); + } else { + if (freg == src || !(cpu_feature_list & CPU_FEATURE_AVX2)) { + if (freg != src) + FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0)); + + FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, freg, 0)); + } else + FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, freg, src, 0)); + + FAIL_IF(emit_byte(compiler, U8(src_lane_index))); + } + + if (pref != 0) { + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); + FAIL_IF(emit_byte(compiler, byte)); + } + + src = freg; + } + + if (cpu_feature_list & CPU_FEATURE_AVX2) + return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); + + SLJIT_ASSERT(reg_size == 4); + FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0)); + return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0); + } + + if ((cpu_feature_list & CPU_FEATURE_AVX2) && src_lane_index == 0 && elem_size <= 3) { + switch (elem_size) { + case 1: + pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + case 2: + pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + default: + pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + } + + if (reg_size == 5) + pref |= VEX_256; + + return emit_vex_instruction(compiler, pref, freg, 0, src, 0); + } + + if (reg_size == 5) { + switch (elem_size) { + case 1: + byte = U8(src_lane_index & 0x3); + src_lane_index >>= 2; + pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2; + break; + case 2: + byte = U8(src_lane_index & 0x3); + src_lane_index >>= 1; + pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2; + break; + case 3: + pref = 0; + break; + default: + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee)); + } + + if (pref != 0) { + FAIL_IF(emit_vex_instruction(compiler, pref, freg, 0, src, 0)); + byte = U8(byte | (byte << 2)); + FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4)))); + + if (src_lane_index == 0) + return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + + src = freg; + } + + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + byte = U8(src_lane_index); + byte = U8(byte | (byte << 2)); + return emit_byte(compiler, U8(byte | (byte << 4))); + } + + switch (elem_size) { + case 1: + byte = U8(src_lane_index & 0x3); + src_lane_index >>= 1; + pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3; + + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); + byte = U8(byte | (byte << 2)); + FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4)))); + + if ((cpu_feature_list & CPU_FEATURE_AVX2) && pref == EX86_PREF_F2) + return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + + src = freg; + /* fallthrough */ + case 2: + byte = U8(src_lane_index); + byte = U8(byte | (byte << 2)); + break; + default: + byte = U8(src_lane_index << 1); + byte = U8(byte | (byte << 2) | 0x4); + break; + } + + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0)); + return emit_byte(compiler, U8(byte | (byte << 4))); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_u8 opcode; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (reg_size == 5) { + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + } else if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size != 2 || elem2_size != 3) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw); + return emit_vex_instruction(compiler, CVTPS2PD_x_xm | VEX_256 | EX86_SSE2, freg, 0, src, srcw); + } + + switch (elem_size) { + case 0: + if (elem2_size == 1) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm; + else if (elem2_size == 2) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm; + else if (elem2_size == 3) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm; + else + return SLJIT_ERR_UNSUPPORTED; + break; + case 1: + if (elem2_size == 2) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm; + else if (elem2_size == 3) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm; + else + return SLJIT_ERR_UNSUPPORTED; + break; + case 2: + if (elem2_size == 3) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm; + else + return SLJIT_ERR_UNSUPPORTED; + break; + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw); + return emit_vex_instruction(compiler, opcode | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 dst_r; + sljit_uw pref; + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 4) { + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + pref = EX86_PREF_66 | EX86_SSE2_OP2; + + switch (elem_size) { + case 1: + FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0)); + freg = TMP_FREG; + break; + case 2: + pref = EX86_SSE2_OP2; + break; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + FAIL_IF(emit_groupf(compiler, (elem_size < 2 ? PMOVMSKB_r_x : MOVMSKPS_r_x) | pref, dst_r, freg, 0)); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = type & SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (elem_size == 1) { + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0); + FAIL_IF(!inst); + inst[1] |= SHR; + } + + if (dst_r == TMP_REG1) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + + return SLJIT_SUCCESS; + } + + if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (elem_size == 1) { + FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0)); + FAIL_IF(emit_byte(compiler, 1)); + FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0)); + FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0)); + } else { + pref = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2; + + if (elem_size == 0) + pref = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2; + else if (elem_size == 3) + pref |= EX86_PREF_66; + + FAIL_IF(emit_vex_instruction(compiler, pref, dst_r, 0, freg, 0)); + } + + if (dst_r == TMP_REG1) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = type & SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src_freg) +{ + sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2; + + SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4); + + if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3) + op |= EX86_PREF_66; + + return emit_groupf(compiler, op, dst_freg, src_freg, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 needs_move = 0; + sljit_uw op = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (reg_size == 5) { + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + } else if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm; + + if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) + op |= EX86_PREF_66; + break; + case SLJIT_SIMD_OP2_OR: + op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm; + + if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) + op |= EX86_PREF_66; + break; + case SLJIT_SIMD_OP2_XOR: + op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm; + + if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) + op |= EX86_PREF_66; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + needs_move = dst_freg != src1_freg && dst_freg != src2_freg; + + if (reg_size == 5 || (needs_move && (cpu_feature_list & CPU_FEATURE_AVX2))) { + if (reg_size == 5) + op |= VEX_256; + + return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_freg, src1_freg, src2_freg, 0); + } + + if (needs_move) { + FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg)); + } else if (dst_freg != src1_freg) { + SLJIT_ASSERT(dst_freg == src2_freg); + src2_freg = src1_freg; + } + + FAIL_IF(emit_groupf(compiler, op | EX86_SSE2, dst_freg, src2_freg, 0)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_uw pref; + sljit_s32 free_reg = TMP_REG1; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_sw srcw = 0; + sljit_sw tempw = 0; +#endif /* SLJIT_CONFIG_X86_32 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + CHECK_EXTRA_REGS(src_reg, srcw, (void)0); + CHECK_EXTRA_REGS(temp_reg, tempw, (void)0); + + SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP)); + SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP)); + + op = GET_OPCODE(op); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) { + /* Src is virtual register or its low byte is not accessible. */ + SLJIT_ASSERT(src_reg != SLJIT_R1); + free_reg = src_reg; + + EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw); + src_reg = TMP_REG1; + + if (mem_reg == src_reg) + mem_reg = TMP_REG1; + } +#endif /* SLJIT_CONFIG_X86_32 */ + + if (temp_reg != SLJIT_R0) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + + EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0); + EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0); + + if (src_reg == SLJIT_R0) + src_reg = free_reg; + if (mem_reg == SLJIT_R0) + mem_reg = free_reg; +#else /* !SLJIT_CONFIG_X86_64 */ + if (src_reg == TMP_REG1 && mem_reg == SLJIT_R0 && (free_reg & SLJIT_MEM)) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0); + EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0); + EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); + + mem_reg = SLJIT_R1; + free_reg = SLJIT_R1; + } else { + EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0); + EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); + + if (src_reg == SLJIT_R0) + src_reg = free_reg; + if (mem_reg == SLJIT_R0) + mem_reg = free_reg; + } +#endif /* SLJIT_CONFIG_X86_64 */ + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P; +#endif /* SLJIT_CONFIG_X86_64 */ + + /* Lock prefix. */ + FAIL_IF(emit_byte(compiler, GROUP_LOCK)); + + pref = 0; + if (op == SLJIT_MOV_U16) + pref = EX86_HALF_ARG | EX86_PREF_66; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op == SLJIT_MOV_U8) + pref = EX86_REX; +#endif /* SLJIT_CONFIG_X86_64 */ + + FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0)); + + if (temp_reg != SLJIT_R0) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + return emit_mov(compiler, SLJIT_R0, 0, TMP_REG1, 0); +#else /* !SLJIT_CONFIG_X86_64 */ + EMIT_MOV(compiler, SLJIT_R0, 0, free_reg, 0); + if (free_reg != TMP_REG1) + return emit_mov(compiler, free_reg, 0, (free_reg == SLJIT_R1) ? SLJIT_MEM1(SLJIT_SP) : TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_64 */ + } return SLJIT_SUCCESS; } @@ -3389,11 +4826,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return NULL; #endif - inst = (sljit_u8*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 1); PTR_FAIL_IF(!inst); - inst[0] = 0; - inst[1] = 2; + inst[0] = SLJIT_INST_CONST; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (dst & SLJIT_MEM) @@ -3404,52 +4840,48 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { - struct sljit_put_label *put_label; + struct sljit_jump *jump; sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_s32 reg; - sljit_uw start_size; -#endif +#endif /* SLJIT_CONFIG_X86_64 */ CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); CHECK_EXTRA_REGS(dst, dstw, (void)0); - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; reg = FAST_IS_REG(dst) ? dst : TMP_REG1; - if (emit_load_imm64(compiler, reg, 0)) - return NULL; -#else - if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0)) - return NULL; -#endif + PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0)); + jump->addr = compiler->size; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (dst & SLJIT_MEM) { - start_size = compiler->size; - if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) - return NULL; - put_label->flags = compiler->size - start_size; - } -#endif + if (reg_map[reg] >= 8) + jump->flags |= MOV_ADDR_HI; +#else /* !SLJIT_CONFIG_X86_64 */ + PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0)); +#endif /* SLJIT_CONFIG_X86_64 */ - inst = (sljit_u8*)ensure_buf(compiler, 2); + inst = (sljit_u8*)ensure_buf(compiler, 1); PTR_FAIL_IF(!inst); - inst[0] = 0; - inst[1] = 3; + inst[0] = SLJIT_INST_MOV_ADDR; - return put_label; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0)); +#endif /* SLJIT_CONFIG_X86_64 */ + + return jump; } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitSerialize.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitSerialize.c new file mode 100755 index 0000000000..4392b2da9a --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitSerialize.c @@ -0,0 +1,516 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_label(struct sljit_jump *jump) +{ + return !(jump->flags & JUMP_ADDR) && (jump->u.label != NULL); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_target(struct sljit_jump *jump) +{ + return (jump->flags & JUMP_ADDR) != 0; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_is_mov_addr(struct sljit_jump *jump) +{ + return (jump->flags & JUMP_MOV_ADDR) != 0; +} + +#define SLJIT_SERIALIZE_DEBUG ((sljit_u16)0x1) + +struct sljit_serialized_compiler { + sljit_u32 signature; + sljit_u16 version; + sljit_u16 cpu_type; + + sljit_uw buf_segment_count; + sljit_uw label_count; + sljit_uw jump_count; + sljit_uw const_count; + + sljit_s32 options; + sljit_s32 scratches; + sljit_s32 saveds; + sljit_s32 fscratches; + sljit_s32 fsaveds; + sljit_s32 local_size; + sljit_uw size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + sljit_s32 status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 args_size; +#endif /* SLJIT_CONFIG_X86_32 */ + +#if ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_uw args_size; +#endif /* (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_uw cpool_diff; + sljit_uw cpool_fill; + sljit_uw patches; +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + sljit_s32 delay_slot; +#endif /* SLJIT_CONFIG_MIPS */ + +}; + +struct sljit_serialized_debug_info { + sljit_sw last_flags; + sljit_s32 last_return; + sljit_s32 logical_local_size; +}; + +struct sljit_serialized_label { + sljit_uw size; +}; + +struct sljit_serialized_jump { + sljit_uw addr; + sljit_uw flags; + sljit_uw value; +}; + +struct sljit_serialized_const { + sljit_uw addr; +}; + +#define SLJIT_SERIALIZE_ALIGN(v) (((v) + sizeof(sljit_uw) - 1) & ~(sljit_uw)(sizeof(sljit_uw) - 1)) +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_SERIALIZE_SIGNATURE 0x534c4a54 +#else /* !SLJIT_LITTLE_ENDIAN */ +#define SLJIT_SERIALIZE_SIGNATURE 0x544a4c53 +#endif /* SLJIT_LITTLE_ENDIAN */ +#define SLJIT_SERIALIZE_VERSION 1 + +SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler, + sljit_s32 options, sljit_uw *size) +{ + sljit_uw total_size = sizeof(struct sljit_serialized_compiler); + struct sljit_memory_fragment *buf; + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_serialized_compiler *serialized_compiler; + struct sljit_serialized_label *serialized_label; + struct sljit_serialized_jump *serialized_jump; + struct sljit_serialized_const *serialized_const; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + struct sljit_serialized_debug_info *serialized_debug_info; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + sljit_uw counter, used_size; + sljit_u8 *result; + sljit_u8 *ptr; + SLJIT_UNUSED_ARG(options); + + if (size != NULL) + *size = 0; + + PTR_FAIL_IF(compiler->error); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (!(options & SLJIT_SERIALIZE_IGNORE_DEBUG)) + total_size += sizeof(struct sljit_serialized_debug_info); +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + total_size += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + + /* Compute the size of the data. */ + buf = compiler->buf; + while (buf != NULL) { + total_size += sizeof(sljit_uw) + SLJIT_SERIALIZE_ALIGN(buf->used_size); + buf = buf->next; + } + + total_size += compiler->label_count * sizeof(struct sljit_serialized_label); + + jump = compiler->jumps; + while (jump != NULL) { + total_size += sizeof(struct sljit_serialized_jump); + jump = jump->next; + } + + const_ = compiler->consts; + while (const_ != NULL) { + total_size += sizeof(struct sljit_serialized_const); + const_ = const_->next; + } + + result = (sljit_u8*)SLJIT_MALLOC(total_size, compiler->allocator_data); + PTR_FAIL_IF_NULL(result); + + if (size != NULL) + *size = total_size; + + ptr = result; + serialized_compiler = (struct sljit_serialized_compiler*)ptr; + ptr += sizeof(struct sljit_serialized_compiler); + + serialized_compiler->signature = SLJIT_SERIALIZE_SIGNATURE; + serialized_compiler->version = SLJIT_SERIALIZE_VERSION; + serialized_compiler->cpu_type = 0; + serialized_compiler->label_count = compiler->label_count; + serialized_compiler->options = compiler->options; + serialized_compiler->scratches = compiler->scratches; + serialized_compiler->saveds = compiler->saveds; + serialized_compiler->fscratches = compiler->fscratches; + serialized_compiler->fsaveds = compiler->fsaveds; + serialized_compiler->local_size = compiler->local_size; + serialized_compiler->size = compiler->size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + serialized_compiler->status_flags_state = compiler->status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + serialized_compiler->args_size = compiler->args_size; +#endif /* SLJIT_CONFIG_X86_32 || (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + serialized_compiler->cpool_diff = compiler->cpool_diff; + serialized_compiler->cpool_fill = compiler->cpool_fill; + serialized_compiler->patches = compiler->patches; + + SLJIT_MEMCPY(ptr, compiler->cpool, compiler->cpool_fill * sizeof(sljit_uw)); + SLJIT_MEMCPY(ptr + compiler->cpool_fill * sizeof(sljit_uw), compiler->cpool_unique, compiler->cpool_fill); + ptr += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + serialized_compiler->delay_slot = compiler->delay_slot; +#endif /* SLJIT_CONFIG_MIPS */ + + buf = compiler->buf; + counter = 0; + while (buf != NULL) { + used_size = buf->used_size; + *(sljit_uw*)ptr = used_size; + ptr += sizeof(sljit_uw); + SLJIT_MEMCPY(ptr, buf->memory, used_size); + ptr += SLJIT_SERIALIZE_ALIGN(used_size); + buf = buf->next; + counter++; + } + serialized_compiler->buf_segment_count = counter; + + label = compiler->labels; + while (label != NULL) { + serialized_label = (struct sljit_serialized_label*)ptr; + serialized_label->size = label->size; + ptr += sizeof(struct sljit_serialized_label); + label = label->next; + } + + jump = compiler->jumps; + counter = 0; + while (jump != NULL) { + serialized_jump = (struct sljit_serialized_jump*)ptr; + serialized_jump->addr = jump->addr; + serialized_jump->flags = jump->flags; + + if (jump->flags & JUMP_ADDR) + serialized_jump->value = jump->u.target; + else if (jump->u.label != NULL) + serialized_jump->value = jump->u.label->u.index; + else + serialized_jump->value = SLJIT_MAX_ADDRESS; + + ptr += sizeof(struct sljit_serialized_jump); + jump = jump->next; + counter++; + } + serialized_compiler->jump_count = counter; + + const_ = compiler->consts; + counter = 0; + while (const_ != NULL) { + serialized_const = (struct sljit_serialized_const*)ptr; + serialized_const->addr = const_->addr; + ptr += sizeof(struct sljit_serialized_const); + const_ = const_->next; + counter++; + } + serialized_compiler->const_count = counter; + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (!(options & SLJIT_SERIALIZE_IGNORE_DEBUG)) { + serialized_debug_info = (struct sljit_serialized_debug_info*)ptr; + serialized_debug_info->last_flags = compiler->last_flags; + serialized_debug_info->last_return = compiler->last_return; + serialized_debug_info->logical_local_size = compiler->logical_local_size; + serialized_compiler->cpu_type |= SLJIT_SERIALIZE_DEBUG; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + ptr += sizeof(struct sljit_serialized_debug_info); +#endif /* SLJIT_DEBUG */ + } +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + + SLJIT_ASSERT((sljit_uw)(ptr - result) == total_size); + return (sljit_uw*)result; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler *sljit_deserialize_compiler(sljit_uw* buffer, sljit_uw size, + sljit_s32 options, void *allocator_data, void *exec_allocator_data) +{ + struct sljit_compiler *compiler; + struct sljit_serialized_compiler *serialized_compiler; + struct sljit_serialized_label *serialized_label; + struct sljit_serialized_jump *serialized_jump; + struct sljit_serialized_const *serialized_const; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + struct sljit_serialized_debug_info *serialized_debug_info; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *last_buf; + struct sljit_label *label; + struct sljit_label *last_label; + struct sljit_label **label_list = NULL; + struct sljit_jump *jump; + struct sljit_jump *last_jump; + struct sljit_const *const_; + struct sljit_const *last_const; + sljit_u8 *ptr = (sljit_u8*)buffer; + sljit_u8 *end = ptr + size; + sljit_uw i, used_size, aligned_size, label_count; + SLJIT_UNUSED_ARG(options); + + if (size < sizeof(struct sljit_serialized_compiler) || (size & (sizeof(sljit_uw) - 1)) != 0) + return NULL; + + serialized_compiler = (struct sljit_serialized_compiler*)ptr; + + if (serialized_compiler->signature != SLJIT_SERIALIZE_SIGNATURE || serialized_compiler->version != SLJIT_SERIALIZE_VERSION) + return NULL; + + compiler = sljit_create_compiler(allocator_data, exec_allocator_data); + PTR_FAIL_IF(compiler == NULL); + + compiler->label_count = serialized_compiler->label_count; + compiler->options = serialized_compiler->options; + compiler->scratches = serialized_compiler->scratches; + compiler->saveds = serialized_compiler->saveds; + compiler->fscratches = serialized_compiler->fscratches; + compiler->fsaveds = serialized_compiler->fsaveds; + compiler->local_size = serialized_compiler->local_size; + compiler->size = serialized_compiler->size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + compiler->status_flags_state = serialized_compiler->status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->args_size = serialized_compiler->args_size; +#endif /* SLJIT_CONFIG_X86_32 || (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + used_size = serialized_compiler->cpool_fill; + aligned_size = SLJIT_SERIALIZE_ALIGN(used_size * (sizeof(sljit_uw) + 1)); + compiler->cpool_diff = serialized_compiler->cpool_diff; + compiler->cpool_fill = used_size; + compiler->patches = serialized_compiler->patches; + + if ((sljit_uw)(end - ptr) < aligned_size) + goto error; + + SLJIT_MEMCPY(compiler->cpool, ptr, used_size * sizeof(sljit_uw)); + SLJIT_MEMCPY(compiler->cpool_unique, ptr + used_size * sizeof(sljit_uw), used_size); + ptr += aligned_size; +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + compiler->delay_slot = serialized_compiler->delay_slot; +#endif /* SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (!(serialized_compiler->cpu_type & SLJIT_SERIALIZE_DEBUG)) + goto error; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + + ptr += sizeof(struct sljit_serialized_compiler); + i = serialized_compiler->buf_segment_count; + last_buf = NULL; + while (i > 0) { + if ((sljit_uw)(end - ptr) < sizeof(sljit_uw)) + goto error; + + used_size = *(sljit_uw*)ptr; + aligned_size = SLJIT_SERIALIZE_ALIGN(used_size); + ptr += sizeof(sljit_uw); + + if ((sljit_uw)(end - ptr) < aligned_size) + goto error; + + if (last_buf == NULL) { + SLJIT_ASSERT(compiler->buf != NULL && compiler->buf->next == NULL); + buf = compiler->buf; + } else { + buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data); + if (!buf) + goto error; + buf->next = NULL; + } + + buf->used_size = used_size; + SLJIT_MEMCPY(buf->memory, ptr, used_size); + + if (last_buf != NULL) + last_buf->next = buf; + last_buf = buf; + + ptr += aligned_size; + i--; + } + + last_label = NULL; + label_count = serialized_compiler->label_count; + if ((sljit_uw)(end - ptr) < label_count * sizeof(struct sljit_serialized_label)) + goto error; + + label_list = (struct sljit_label **)SLJIT_MALLOC(label_count * sizeof(struct sljit_label*), allocator_data); + if (label_list == NULL) + goto error; + + for (i = 0; i < label_count; i++) { + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + if (label == NULL) + goto error; + + serialized_label = (struct sljit_serialized_label*)ptr; + label->next = NULL; + label->u.index = i; + label->size = serialized_label->size; + + if (last_label != NULL) + last_label->next = label; + else + compiler->labels = label; + last_label = label; + + label_list[i] = label; + ptr += sizeof(struct sljit_serialized_label); + } + compiler->last_label = last_label; + + last_jump = NULL; + i = serialized_compiler->jump_count; + if ((sljit_uw)(end - ptr) < i * sizeof(struct sljit_serialized_jump)) + goto error; + + while (i > 0) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + if (jump == NULL) + goto error; + + serialized_jump = (struct sljit_serialized_jump*)ptr; + jump->next = NULL; + jump->addr = serialized_jump->addr; + jump->flags = serialized_jump->flags; + + if (!(serialized_jump->flags & JUMP_ADDR)) { + if (serialized_jump->value != SLJIT_MAX_ADDRESS) { + if (serialized_jump->value >= label_count) + goto error; + jump->u.label = label_list[serialized_jump->value]; + } else + jump->u.label = NULL; + } else + jump->u.target = serialized_jump->value; + + if (last_jump != NULL) + last_jump->next = jump; + else + compiler->jumps = jump; + last_jump = jump; + + ptr += sizeof(struct sljit_serialized_jump); + i--; + } + compiler->last_jump = last_jump; + + SLJIT_FREE(label_list, allocator_data); + label_list = NULL; + + last_const = NULL; + i = serialized_compiler->const_count; + if ((sljit_uw)(end - ptr) < i * sizeof(struct sljit_serialized_const)) + goto error; + + while (i > 0) { + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + if (const_ == NULL) + goto error; + + serialized_const = (struct sljit_serialized_const*)ptr; + const_->next = NULL; + const_->addr = serialized_const->addr; + + if (last_const != NULL) + last_const->next = const_; + else + compiler->consts = const_; + last_const = const_; + + ptr += sizeof(struct sljit_serialized_const); + i--; + } + compiler->last_const = last_const; + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if ((sljit_uw)(end - ptr) < sizeof(struct sljit_serialized_debug_info)) + goto error; + + serialized_debug_info = (struct sljit_serialized_debug_info*)ptr; + compiler->last_flags = (sljit_s32)serialized_debug_info->last_flags; + compiler->last_return = serialized_debug_info->last_return; + compiler->logical_local_size = serialized_debug_info->logical_local_size; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + + return compiler; + +error: + sljit_free_compiler(compiler); + if (label_list != NULL) + SLJIT_FREE(label_list, allocator_data); + return NULL; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitUtils.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitUtils.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPost.h b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPost.h old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPre.h b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPre.h old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitMain.c b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitMain.c old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTest.c b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTest.c old mode 100644 new mode 100755 index 1a133a7a4d..b46cbe1218 --- a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTest.c +++ b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTest.c @@ -55,28 +55,30 @@ union executable_code { sljit_sw (SLJIT_FUNC *func2)(sljit_sw a, sljit_sw b); sljit_sw (SLJIT_FUNC *func3)(sljit_sw a, sljit_sw b, sljit_sw c); - void (SLJIT_FUNC *test70_f1)(sljit_s32 a, sljit_uw b, sljit_u32 c, sljit_sw d); - void (SLJIT_FUNC *test70_f2)(sljit_s32 a, sljit_u32 b, sljit_sw c, sljit_sw d); - void (SLJIT_FUNC *test70_f3)(sljit_s32 a, sljit_f32 b, sljit_uw c, sljit_f64 d); - void (SLJIT_FUNC *test70_f4)(sljit_f32 a, sljit_f64 b, sljit_f32 c, sljit_s32 d); - void (SLJIT_FUNC *test70_f5)(sljit_f64 a, sljit_f32 b, sljit_u32 c, sljit_f32 d); - void (SLJIT_FUNC *test70_f6)(sljit_f64 a, sljit_s32 b, sljit_f32 c, sljit_f64 d); - void (SLJIT_FUNC *test70_f7)(sljit_f32 a, sljit_s32 b, sljit_uw c, sljit_u32 d); - void (SLJIT_FUNC *test70_f8)(sljit_f64 a, sljit_f64 b, sljit_uw c, sljit_sw d); - void (SLJIT_FUNC *test70_f9)(sljit_f64 a, sljit_f64 b, sljit_uw c, sljit_f64 d); - void (SLJIT_FUNC *test70_f10)(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_s32 d); + void (SLJIT_FUNC *test57_f1)(sljit_s32 a, sljit_uw b, sljit_u32 c, sljit_sw d); + void (SLJIT_FUNC *test57_f2)(sljit_s32 a, sljit_u32 b, sljit_sw c, sljit_sw d); - sljit_sw (SLJIT_FUNC *test71_f1)(sljit_f32 a, sljit_f64 b); - sljit_sw (SLJIT_FUNC *test71_f2)(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_f64 d); - sljit_sw (SLJIT_FUNC *test71_f3)(sljit_f64 a, sljit_f64 b, sljit_f64 c); + void (SLJIT_FUNC *test58_f1)(sljit_s32 a, sljit_sw b, sljit_sw c, sljit_s32 d); + void (SLJIT_FUNC *test58_f2)(sljit_sw a, sljit_sw b, sljit_s32 c, sljit_s32 d); - void (SLJIT_FUNC *test73_f1)(sljit_s32 a, sljit_sw b, sljit_sw c, sljit_s32 d); - void (SLJIT_FUNC *test73_f2)(sljit_sw a, sljit_sw b, sljit_s32 c, sljit_s32 d); - void (SLJIT_FUNC *test73_f3)(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_sw d); - void (SLJIT_FUNC *test73_f4)(sljit_f64 a, sljit_f64 b, sljit_sw c, sljit_sw d); + void (SLJIT_FUNC *test_float12_f1)(sljit_s32 a, sljit_f32 b, sljit_uw c, sljit_f64 d); + void (SLJIT_FUNC *test_float12_f2)(sljit_f32 a, sljit_f64 b, sljit_f32 c, sljit_s32 d); + void (SLJIT_FUNC *test_float12_f3)(sljit_f64 a, sljit_f32 b, sljit_u32 c, sljit_f32 d); + void (SLJIT_FUNC *test_float12_f4)(sljit_f64 a, sljit_s32 b, sljit_f32 c, sljit_f64 d); + void (SLJIT_FUNC *test_float12_f5)(sljit_f32 a, sljit_s32 b, sljit_uw c, sljit_u32 d); + void (SLJIT_FUNC *test_float12_f6)(sljit_f64 a, sljit_f64 b, sljit_uw c, sljit_sw d); + void (SLJIT_FUNC *test_float12_f7)(sljit_f64 a, sljit_f64 b, sljit_uw c, sljit_f64 d); + void (SLJIT_FUNC *test_float12_f8)(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_s32 d); - sljit_f32 (SLJIT_FUNC *test81_f1)(sljit_sw a); - sljit_f64 (SLJIT_FUNC *test81_f2)(sljit_sw a); + void (SLJIT_FUNC *test_float14_f1)(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_sw d); + void (SLJIT_FUNC *test_float14_f2)(sljit_f64 a, sljit_f64 b, sljit_sw c, sljit_sw d); + + sljit_sw (SLJIT_FUNC *test_call6_f1)(sljit_f32 a, sljit_f64 b); + sljit_sw (SLJIT_FUNC *test_call6_f2)(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_f64 d); + sljit_sw (SLJIT_FUNC *test_call6_f3)(sljit_f64 a, sljit_f64 b, sljit_f64 c); + + sljit_f32 (SLJIT_FUNC *test_call10_f1)(sljit_sw a); + sljit_f64 (SLJIT_FUNC *test_call10_f2)(sljit_sw a); }; typedef union executable_code executable_code; @@ -98,11 +100,33 @@ static sljit_s32 silent = 0; } #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define IS_32BIT 0 +#define IS_64BIT 1 #define WCONST(const64, const32) ((sljit_sw)SLJIT_W(const64)) #else /* !SLJIT_64BIT_ARCHITECTURE */ +#define IS_32BIT 1 +#define IS_64BIT 0 #define WCONST(const64, const32) ((sljit_sw)const32) #endif /* SLJIT_64BIT_ARCHITECTURE */ +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#define IS_X86 1 +#else /* !SLJIT_CONFIG_X86 */ +#define IS_X86 0 +#endif /* SLJIT_CONFIG_X86 */ + +#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#define IS_ARM 1 +#else /* !SLJIT_CONFIG_ARM */ +#define IS_ARM 0 +#endif /* SLJIT_CONFIG_ARM */ + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define LITTLE_BIG(a, b) (a) +#else /* !SLJIT_LITTLE_ENDIAN */ +#define LITTLE_BIG(a, b) (b) +#endif /* SLJIT_LITTLE_ENDIAN */ + static void cond_set(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { /* Testing both sljit_emit_op_flags and sljit_emit_jump. */ @@ -118,7 +142,7 @@ static void cond_set(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw ds #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) -/* For interface testing and for test64. */ +/* For interface testing and for test51. */ void *sljit_test_malloc_exec(sljit_uw size, void *exec_allocator_data) { if (exec_allocator_data) @@ -188,6 +212,45 @@ static void test_exec_allocator(void) #endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ +static void test_macros(void) +{ + SLJIT_ASSERT(SLJIT_IS_MEM(SLJIT_MEM0())); + SLJIT_ASSERT(SLJIT_IS_MEM(SLJIT_MEM1(SLJIT_S0))); + SLJIT_ASSERT(SLJIT_IS_MEM(SLJIT_MEM2(SLJIT_R0, SLJIT_S0))); + SLJIT_ASSERT(SLJIT_IS_MEM0(SLJIT_MEM0())); + SLJIT_ASSERT(!SLJIT_IS_MEM0(SLJIT_MEM1(SLJIT_S0))); + SLJIT_ASSERT(!SLJIT_IS_MEM0(SLJIT_MEM2(SLJIT_R0, SLJIT_S0))); + SLJIT_ASSERT(!SLJIT_IS_MEM1(SLJIT_MEM0())); + SLJIT_ASSERT(SLJIT_IS_MEM1(SLJIT_MEM1(SLJIT_S0))); + SLJIT_ASSERT(!SLJIT_IS_MEM1(SLJIT_MEM2(SLJIT_R0, SLJIT_S0))); + SLJIT_ASSERT(!SLJIT_IS_MEM2(SLJIT_MEM0())); + SLJIT_ASSERT(!SLJIT_IS_MEM2(SLJIT_MEM1(SLJIT_R0))); + SLJIT_ASSERT(SLJIT_IS_MEM2(SLJIT_MEM2(SLJIT_R0, SLJIT_R1))); + + SLJIT_ASSERT(!SLJIT_IS_REG(SLJIT_IMM)); + SLJIT_ASSERT(!SLJIT_IS_MEM(SLJIT_IMM)); + SLJIT_ASSERT(SLJIT_IS_IMM(SLJIT_IMM)); + SLJIT_ASSERT(!SLJIT_IS_REG_PAIR(SLJIT_IMM)); + SLJIT_ASSERT(SLJIT_IS_REG(SLJIT_S0)); + SLJIT_ASSERT(!SLJIT_IS_MEM(SLJIT_S0)); + SLJIT_ASSERT(!SLJIT_IS_IMM(SLJIT_S0)); + SLJIT_ASSERT(!SLJIT_IS_REG_PAIR(SLJIT_S0)); + SLJIT_ASSERT(!SLJIT_IS_REG(SLJIT_REG_PAIR(SLJIT_R0, SLJIT_S0))); + SLJIT_ASSERT(!SLJIT_IS_MEM(SLJIT_REG_PAIR(SLJIT_R0, SLJIT_S0))); + SLJIT_ASSERT(!SLJIT_IS_IMM(SLJIT_REG_PAIR(SLJIT_R0, SLJIT_S0))); + SLJIT_ASSERT(SLJIT_IS_REG_PAIR(SLJIT_REG_PAIR(SLJIT_R0, SLJIT_S0))); + + SLJIT_ASSERT(SLJIT_EXTRACT_REG(SLJIT_R2) == SLJIT_R2); + SLJIT_ASSERT(SLJIT_EXTRACT_REG(SLJIT_FR1) == SLJIT_FR1); + SLJIT_ASSERT(SLJIT_EXTRACT_REG(SLJIT_MEM1(SLJIT_S2)) == SLJIT_S2); + SLJIT_ASSERT(SLJIT_EXTRACT_REG(SLJIT_MEM2(SLJIT_S1, SLJIT_S2)) == SLJIT_S1); + SLJIT_ASSERT(SLJIT_EXTRACT_REG(SLJIT_REG_PAIR(SLJIT_R3, SLJIT_S3)) == SLJIT_R3); + SLJIT_ASSERT(SLJIT_EXTRACT_REG(SLJIT_REG_PAIR(SLJIT_FR2, SLJIT_FR4)) == SLJIT_FR2); + SLJIT_ASSERT(SLJIT_EXTRACT_SECOND_REG(SLJIT_MEM2(SLJIT_S1, SLJIT_S2)) == SLJIT_S2); + SLJIT_ASSERT(SLJIT_EXTRACT_SECOND_REG(SLJIT_REG_PAIR(SLJIT_R3, SLJIT_S3)) == SLJIT_S3); + SLJIT_ASSERT(SLJIT_EXTRACT_SECOND_REG(SLJIT_REG_PAIR(SLJIT_FR2, SLJIT_FR4)) == SLJIT_FR4); +} + static void test1(void) { /* Enter and return from an sljit function. */ @@ -222,7 +285,8 @@ static void test2(void) /* Test mov. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); - sljit_sw buf[10]; + sljit_sw buf[12]; + sljit_s32 i; static sljit_sw data[2] = { 0, -9876 }; if (verbose) @@ -230,14 +294,10 @@ static void test2(void) FAILED(!compiler, "cannot create compiler\n"); + for (i = 0; i < 12; i++) + buf[i] = 0; buf[0] = 5678; - buf[1] = 0; - buf[2] = 0; - buf[3] = 0; - buf[4] = 0; - buf[5] = 0; - buf[6] = 0; - buf[7] = 0; + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 3, 2, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 9999); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_S0, 0); @@ -273,6 +333,13 @@ static void test2(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -2450); /* buf[9] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), -0xfff100, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&buf - 0xffef01 + 10 * (sljit_sw)sizeof(sljit_sw)); + /* buf[10] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 0xffef01, SLJIT_IMM, 8796); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&buf + 0xffef01 + 11 * (sljit_sw)sizeof(sljit_sw)); + /* buf[11] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), -0xffef01, SLJIT_IMM, 5704); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R2, 0); code.code = sljit_generate_code(compiler); @@ -289,6 +356,8 @@ static void test2(void) FAILED(buf[7] != 3456, "test2 case 8 failed\n"); FAILED(buf[8] != 7896, "test2 case 9 failed\n"); FAILED(buf[9] != -2450, "test2 case 10 failed\n"); + FAILED(buf[10] != 8796, "test2 case 11 failed\n"); + FAILED(buf[11] != 5704, "test2 case 12 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; @@ -456,22 +525,16 @@ static void test6(void) /* Test addc, sub, subc. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); - sljit_sw buf[11]; + sljit_sw buf[21]; + sljit_s32 i; if (verbose) printf("Run test6\n"); FAILED(!compiler, "cannot create compiler\n"); - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; - buf[3] = 0; - buf[4] = 0; - buf[5] = 0; - buf[6] = 0; - buf[7] = 0; - buf[8] = 0; - buf[9] = 0; + + for (i = 0; i < 21; i++) + buf[i] = 0; buf[10] = 4000; sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 3, 1, 0, 0, 0); @@ -517,6 +580,47 @@ static void test6(void) /* buf[10] */ sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, SLJIT_R1, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, SLJIT_R1, 0); + /* buf[11] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -1); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_ADDC, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 11, SLJIT_IMM, 0xff00ff, SLJIT_R1, 0); + /* buf[12] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_CARRY, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_SUBC, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 12, SLJIT_R1, 0, SLJIT_IMM, 0xff00ff); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 4); + /* buf[13] */ + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 13, SLJIT_R0, 0, SLJIT_IMM, -2); + /* buf[14] */ + sljit_emit_op2(compiler, SLJIT_ADDC, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 14, SLJIT_R1, 0, SLJIT_IMM, -2); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -4); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_CARRY, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, -2); + sljit_emit_op2(compiler, SLJIT_SUBC, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, -2); + /* buf[15] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 15, SLJIT_R0, 0); + /* buf[16] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 16, SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 4); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, -4); + sljit_emit_op2(compiler, SLJIT_ADDC, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, -4); + /* buf[17] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 17, SLJIT_R1, 0); + /* buf[18] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 18, SLJIT_R2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -3); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -4); + /* buf[19] */ + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_CARRY, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 19, SLJIT_R1, 0, SLJIT_IMM, -4); + /* buf[20] */ + sljit_emit_op2(compiler, SLJIT_SUBC, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 20, SLJIT_R2, 0, SLJIT_IMM, -4); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 10); sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_CARRY, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 5); sljit_emit_op2(compiler, SLJIT_SUBC, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 2); @@ -539,6 +643,16 @@ static void test6(void) FAILED(buf[8] != 100 + 32767, "test6 case 10 failed\n"); FAILED(buf[9] != 0x152aa42e, "test6 case 11 failed\n"); FAILED(buf[10] != -2000, "test6 case 12 failed\n"); + FAILED(buf[11] != 0xff0100, "test6 case 13 failed\n"); + FAILED(buf[12] != -0xff0101, "test6 case 14 failed\n"); + FAILED(buf[13] != 3, "test6 case 15 failed\n"); + FAILED(buf[14] != 3, "test6 case 16 failed\n"); + FAILED(buf[15] != -3, "test6 case 17 failed\n"); + FAILED(buf[16] != -3, "test6 case 18 failed\n"); + FAILED(buf[17] != -1, "test6 case 19 failed\n"); + FAILED(buf[18] != 0, "test6 case 20 failed\n"); + FAILED(buf[19] != 1, "test6 case 21 failed\n"); + FAILED(buf[20] != 0, "test6 case 22 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; @@ -629,7 +743,7 @@ static void test8(void) for (i = 1; i < 21; i++) buf[i] = 3; - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 2, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 2, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 20); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 10); sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_IMM, 6, SLJIT_IMM, 5); @@ -772,7 +886,7 @@ static void test9(void) buf[4] = 1 << 10; buf[9] = 3; - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 4, 2, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 4, 2, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xf); sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 3); sljit_emit_op2(compiler, SLJIT_LSHR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 1); @@ -826,12 +940,12 @@ static void test9(void) sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 0); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0x7d00); sljit_emit_op2(compiler, SLJIT_LSHR32, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 32); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_R0, 0); #endif sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xe30000); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op2(compiler, SLJIT_ASHR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffc0); #else sljit_emit_op2(compiler, SLJIT_ASHR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffe0); @@ -839,7 +953,7 @@ static void test9(void) sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0x25000000); sljit_emit_op2(compiler, SLJIT_SHL32, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xfffe1); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_R0, 0); #endif /* buf[11] */ @@ -850,7 +964,7 @@ static void test9(void) sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R1, 0, SLJIT_R0, 0, shift_reg, 0); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0xf600); sljit_emit_op2(compiler, SLJIT_LSHR32, SLJIT_R0, 0, SLJIT_R0, 0, shift_reg, 0); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT /* Alternative form of uint32 type cast. */ sljit_emit_op2(compiler, SLJIT_AND, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffffffff); #endif @@ -942,7 +1056,7 @@ static void test10(void) sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R0, 0); /* buf[5] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5, SLJIT_R0, 0); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3); sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_R0, 0, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x123456789)); /* buf[6] */ @@ -962,7 +1076,7 @@ static void test10(void) FAILED(buf[3] != -12, "test10 case 5 failed\n"); FAILED(buf[4] != 100, "test10 case 6 failed\n"); FAILED(buf[5] != 81, "test10 case 7 failed\n"); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT FAILED(buf[6] != SLJIT_W(0x123456789) * 3, "test10 case 8 failed\n"); #endif @@ -985,13 +1099,8 @@ static void test11(void) sljit_uw const2_addr; sljit_uw const3_addr; sljit_uw const4_addr; -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_sw word_value1 = (sljit_sw)SLJIT_W(0xaaaaaaaaaaaaaaaa); - sljit_sw word_value2 = (sljit_sw)SLJIT_W(0xfee1deadfbadf00d); -#else - sljit_sw word_value1 = (sljit_sw)0xaaaaaaaal; - sljit_sw word_value2 = (sljit_sw)0xfbadf00dl; -#endif + sljit_sw word_value1 = WCONST(0xaaaaaaaaaaaaaaaa, 0xaaaaaaaa); + sljit_sw word_value2 = WCONST(0xfee1deadfbadf00d, 0xfbadf00d); sljit_sw buf[3]; if (verbose) @@ -1031,7 +1140,7 @@ static void test11(void) memset(value, 255, 16); } - /* Return vaue */ + /* Return value */ const4 = sljit_emit_const(compiler, SLJIT_RETURN_REG, 0, (sljit_sw)0xf7afcdb7); sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); @@ -1092,7 +1201,7 @@ static void test12(void) FAILED(!compiler, "cannot create compiler\n"); buf[0] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, W), 3, 2, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, W), 3, 2, 0, 0, 0); sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_S1, 0, SLJIT_IMM, 10); jump1 = sljit_emit_jump(compiler, SLJIT_REWRITABLE_JUMP | SLJIT_SIG_GREATER); /* Default handler. */ @@ -1159,405 +1268,77 @@ static void test12(void) static void test13(void) { - /* Test fpu monadic functions. */ + /* Test emit const and jumps. */ executable_code code; - struct sljit_compiler* compiler; - sljit_f64 buf[7]; - sljit_sw buf2[6]; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_label *label; + struct sljit_jump *jump1; + struct sljit_jump *jump2; + struct sljit_const* const1; + struct sljit_jump *mov_addr; + sljit_sw executable_offset; + sljit_uw const_addr; + sljit_uw jump_addr; + sljit_uw label_addr; + sljit_sw buf[4]; if (verbose) printf("Run test13\n"); - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test13 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; - buf[0] = 7.75; - buf[1] = -4.5; - buf[2] = 0.0; - buf[3] = 0.0; - buf[4] = 0.0; - buf[5] = 0.0; - buf[6] = 0.0; + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 2, 0, 0, 0); - buf2[0] = 10; - buf2[1] = 10; - buf2[2] = 10; - buf2[3] = 10; - buf2[4] = 10; - buf2[5] = 10; + jump1 = sljit_emit_jump(compiler, SLJIT_JUMP); + label = sljit_emit_label(compiler); + jump2 = sljit_emit_jump(compiler, SLJIT_JUMP); + sljit_set_label(jump2, label); + label = sljit_emit_label(compiler); + sljit_set_label(jump1, label); - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 3, 2, 6, 0, 0); - /* buf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&buf[2], SLJIT_MEM0(), (sljit_sw)&buf[1]); - /* buf[3] */ - sljit_emit_fop1(compiler, SLJIT_ABS_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); - /* buf[4] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM0(), (sljit_sw)&buf[0]); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2 * sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0); - sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_FR2, 0, SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_FR2, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&buf[4], SLJIT_FR3, 0); - /* buf[5] */ - sljit_emit_fop1(compiler, SLJIT_ABS_F64, SLJIT_FR4, 0, SLJIT_FR1, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR4, 0); - /* buf[6] */ - sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR4, 0); + mov_addr = sljit_emit_mov_addr(compiler, SLJIT_R2, 0); + /* buf[0] */ + const1 = sljit_emit_const(compiler, SLJIT_MEM1(SLJIT_S0), 0, -1234); - /* buf2[0] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_GREATER, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_F_GREATER); - /* buf2[1] */ - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_GREATER, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR5, 0); - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw), SLJIT_F_GREATER); - /* buf2[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_FR5, 0); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_FR1, 0); - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_sw), SLJIT_F_EQUAL); - /* buf2[3] */ - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_LESS, SLJIT_FR1, 0, SLJIT_FR1, 0); - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_F_LESS); - /* buf2[4] */ - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_F_EQUAL); - /* buf2[5] */ - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_NOT_EQUAL, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_F_NOT_EQUAL); + sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, -1234); + + label = sljit_emit_label(compiler); + sljit_set_label(mov_addr, label); + + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, -56789); + jump1 = sljit_emit_jump(compiler, SLJIT_JUMP | SLJIT_REWRITABLE_JUMP); + label = sljit_emit_label(compiler); + sljit_set_label(jump1, label); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, 0); + label = sljit_emit_label(compiler); sljit_emit_return_void(compiler); code.code = sljit_generate_code(compiler); CHECK(compiler); + executable_offset = sljit_get_executable_offset(compiler); + const_addr = sljit_get_const_addr(const1); + jump_addr = sljit_get_jump_addr(jump1); + label_addr = sljit_get_label_addr(label); sljit_free_compiler(compiler); - code.func2((sljit_sw)&buf, (sljit_sw)&buf2); - FAILED(buf[2] != -4.5, "test13 case 1 failed\n"); - FAILED(buf[3] != 4.5, "test13 case 2 failed\n"); - FAILED(buf[4] != -7.75, "test13 case 3 failed\n"); - FAILED(buf[5] != 4.5, "test13 case 4 failed\n"); - FAILED(buf[6] != -4.5, "test13 case 5 failed\n"); + sljit_set_const(const_addr, 87654, executable_offset); + sljit_set_jump_addr(jump_addr, label_addr, executable_offset); - FAILED(buf2[0] != 1, "test13 case 6 failed\n"); - FAILED(buf2[1] != 0, "test13 case 7 failed\n"); - FAILED(buf2[2] != 1, "test13 case 8 failed\n"); - FAILED(buf2[3] != 0, "test13 case 9 failed\n"); - FAILED(buf2[4] != 0, "test13 case 10 failed\n"); - FAILED(buf2[5] != 1, "test13 case 11 failed\n"); + code.func1((sljit_sw)&buf); + FAILED(buf[0] != 87654, "test13 case 1 failed\n"); + FAILED(buf[1] != -56789, "test13 case 2 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } static void test14(void) -{ - /* Test fpu diadic functions. */ - executable_code code; - struct sljit_compiler* compiler; - sljit_f64 buf[15]; - - if (verbose) - printf("Run test14\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test14 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - buf[0] = 7.25; - buf[1] = 3.5; - buf[2] = 1.75; - buf[3] = 0.0; - buf[4] = 0.0; - buf[5] = 0.0; - buf[6] = 0.0; - buf[7] = 0.0; - buf[8] = 0.0; - buf[9] = 0.0; - buf[10] = 0.0; - buf[11] = 0.0; - buf[12] = 8.0; - buf[13] = 4.0; - buf[14] = 0.0; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 1, 6, 0, 0); - - /* ADD */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2); - /* buf[3] */ - sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 3, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_FR1, 0); - sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR1, 0, SLJIT_FR0, 0, SLJIT_FR1, 0); - /* buf[4] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 4, SLJIT_FR0, 0); - /* buf[5] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 5, SLJIT_FR1, 0); - - /* SUB */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); - /* buf[6] */ - sljit_emit_fop2(compiler, SLJIT_SUB_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 6, SLJIT_FR3, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), SLJIT_F64_SHIFT); - sljit_emit_fop2(compiler, SLJIT_SUB_F64, SLJIT_FR2, 0, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2); - sljit_emit_fop2(compiler, SLJIT_SUB_F64, SLJIT_FR3, 0, SLJIT_FR2, 0, SLJIT_FR3, 0); - /* buf[7] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 7, SLJIT_FR2, 0); - /* buf[8] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 8, SLJIT_FR3, 0); - - /* MUL */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); - /* buf[9] */ - sljit_emit_fop2(compiler, SLJIT_MUL_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 9, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), SLJIT_F64_SHIFT, SLJIT_FR1, 0); - sljit_emit_fop2(compiler, SLJIT_MUL_F64, SLJIT_FR1, 0, SLJIT_FR1, 0, SLJIT_FR2, 0); - sljit_emit_fop2(compiler, SLJIT_MUL_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2, SLJIT_FR2, 0); - /* buf[10] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 10, SLJIT_FR1, 0); - /* buf[11] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 11, SLJIT_FR5, 0); - - /* DIV */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 12); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 13); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR4, 0, SLJIT_FR5, 0); - /* buf[12] */ - sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 12, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 12, SLJIT_FR1, 0); - sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_FR5, 0, SLJIT_FR5, 0, SLJIT_FR1, 0); - sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_FR4, 0, SLJIT_FR1, 0, SLJIT_FR4, 0); - /* buf[13] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 13, SLJIT_FR5, 0); - /* buf[14] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 14, SLJIT_FR4, 0); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - FAILED(buf[3] != 10.75, "test14 case 1 failed\n"); - FAILED(buf[4] != 5.25, "test14 case 2 failed\n"); - FAILED(buf[5] != 7.0, "test14 case 3 failed\n"); - FAILED(buf[6] != 0.0, "test14 case 4 failed\n"); - FAILED(buf[7] != 5.5, "test14 case 5 failed\n"); - FAILED(buf[8] != 3.75, "test14 case 6 failed\n"); - FAILED(buf[9] != 24.5, "test14 case 7 failed\n"); - FAILED(buf[10] != 38.5, "test14 case 8 failed\n"); - FAILED(buf[11] != 9.625, "test14 case 9 failed\n"); - FAILED(buf[12] != 2.0, "test14 case 10 failed\n"); - FAILED(buf[13] != 2.0, "test14 case 11 failed\n"); - FAILED(buf[14] != 0.5, "test14 case 12 failed\n"); - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static sljit_sw func(sljit_sw a, sljit_sw b, sljit_sw c) -{ - return a + b + c + 5; -} - -static sljit_sw func4(sljit_sw a, sljit_sw b, sljit_sw c, sljit_sw d) -{ - return func(a, b, c) + d; -} - -static void test15(void) -{ - /* Test function call. */ - executable_code code; - struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); - struct sljit_jump* jump = NULL; - sljit_sw buf[9]; - - if (verbose) - printf("Run test15\n"); - - FAILED(!compiler, "cannot create compiler\n"); - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; - buf[3] = 0; - buf[4] = 0; - buf[5] = 0; - buf[6] = 0; - buf[7] = 0; - buf[8] = SLJIT_FUNC_ADDR(func); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 4, 2, 0, 0, 0); - - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 5); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 7); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -3); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(func)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); - - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -5); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -10); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2); - jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_REWRITABLE_JUMP, SLJIT_ARGS3(W, W, W, W)); - sljit_set_target(jump, (sljit_uw)-1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_RETURN_REG, 0); - - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 40); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -3); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); - - /* buf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -60); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -30); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_R1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); - - /* buf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 10); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 16); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_R2, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); - - /* buf[5] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 100); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 110); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 120); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_R3, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); - - /* buf[6] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 3); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_S1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); - - /* buf[7] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 3); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -6); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func4)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), SLJIT_S1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); - - /* buf[8] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -10); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -16); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 6); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); - - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_set_jump_addr(sljit_get_jump_addr(jump), SLJIT_FUNC_UADDR(func), sljit_get_executable_offset(compiler)); - sljit_free_compiler(compiler); - - FAILED(code.func1((sljit_sw)&buf) != -15, "test15 case 1 failed\n"); - FAILED(buf[0] != 14, "test15 case 2 failed\n"); - FAILED(buf[1] != -8, "test15 case 3 failed\n"); - FAILED(buf[2] != SLJIT_FUNC_ADDR(func) + 42, "test15 case 4 failed\n"); - FAILED(buf[3] != SLJIT_FUNC_ADDR(func) - 85, "test15 case 5 failed\n"); - FAILED(buf[4] != SLJIT_FUNC_ADDR(func) + 31, "test15 case 6 failed\n"); - FAILED(buf[5] != 335, "test15 case 7 failed\n"); - FAILED(buf[6] != 11, "test15 case 8 failed\n"); - FAILED(buf[7] != 5, "test15 case 9 failed\n"); - FAILED(buf[8] != -15, "test15 case 10 failed\n"); - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test16(void) -{ - /* Ackermann benchmark. */ - executable_code code; - struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); - struct sljit_label *entry; - struct sljit_label *label; - struct sljit_jump *jump; - struct sljit_jump *jump1; - struct sljit_jump *jump2; - - if (verbose) - printf("Run test16\n"); - - FAILED(!compiler, "cannot create compiler\n"); - - entry = sljit_emit_label(compiler); - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(W, W, W), 3, 2, 0, 0, 0); - /* If x == 0. */ - sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_S0, 0, SLJIT_IMM, 0); - jump1 = sljit_emit_jump(compiler, SLJIT_EQUAL); - /* If y == 0. */ - sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_S1, 0, SLJIT_IMM, 0); - jump2 = sljit_emit_jump(compiler, SLJIT_EQUAL); - - /* Ack(x,y-1). */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); - sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, 1); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W)); - sljit_set_label(jump, entry); - - /* Returns with Ack(x-1, Ack(x,y-1)). */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_RETURN_REG, 0); - sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W)); - sljit_set_label(jump, entry); - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); - - /* Returns with y+1. */ - label = sljit_emit_label(compiler); - sljit_set_label(jump1, label); - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1, SLJIT_S1, 0); - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); - - /* Returns with Ack(x-1,1) */ - label = sljit_emit_label(compiler); - sljit_set_label(jump2, label); - sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W)); - sljit_set_label(jump, entry); - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.func2(3, 3) != 61, "test16 case 1 failed\n"); - /* For benchmarking. */ - /* FAILED(code.func2(3, 11) != 16381, "test16 case 1 failed\n"); */ - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test17(void) { /* Test arm constant pool. */ executable_code code; @@ -1566,14 +1347,14 @@ static void test17(void) sljit_sw buf[5]; if (verbose) - printf("Run test17\n"); + printf("Run test14\n"); FAILED(!compiler, "cannot create compiler\n"); for (i = 0; i < 5; i++) buf[i] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 1, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 1, 0, 0, 0); for (i = 0; i <= 0xfff; i++) { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)0x81818000 | i); if ((i & 0x3ff) == 0) @@ -1589,17 +1370,17 @@ static void test17(void) sljit_free_compiler(compiler); code.func1((sljit_sw)&buf); - FAILED((sljit_uw)buf[0] != 0x81818000, "test17 case 1 failed\n"); - FAILED((sljit_uw)buf[1] != 0x81818400, "test17 case 2 failed\n"); - FAILED((sljit_uw)buf[2] != 0x81818800, "test17 case 3 failed\n"); - FAILED((sljit_uw)buf[3] != 0x81818c00, "test17 case 4 failed\n"); - FAILED((sljit_uw)buf[4] != 0x81818fff, "test17 case 5 failed\n"); + FAILED((sljit_uw)buf[0] != 0x81818000, "test14 case 1 failed\n"); + FAILED((sljit_uw)buf[1] != 0x81818400, "test14 case 2 failed\n"); + FAILED((sljit_uw)buf[2] != 0x81818800, "test14 case 3 failed\n"); + FAILED((sljit_uw)buf[3] != 0x81818c00, "test14 case 4 failed\n"); + FAILED((sljit_uw)buf[4] != 0x81818fff, "test14 case 5 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test18(void) +static void test15(void) { /* Test 64 bit. */ executable_code code; @@ -1607,7 +1388,7 @@ static void test18(void) sljit_sw buf[11]; if (verbose) - printf("Run test18\n"); + printf("Run test15\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 0; @@ -1620,15 +1401,15 @@ static void test18(void) buf[7] = 100; buf[8] = 100; buf[9] = 0; -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) +#if IS_64BIT && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) buf[10] = SLJIT_W(1) << 32; #else buf[10] = 1; #endif - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 2, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 2, 0, 0, 0); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT /* buf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, SLJIT_W(0x1122334455667788)); /* buf[1] */ @@ -1668,12 +1449,12 @@ static void test18(void) sljit_emit_op2(compiler, SLJIT_SHL32, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 9, SLJIT_IMM, SLJIT_W(0xffff0000), SLJIT_R0, 0); /* buf[10] */ sljit_emit_op2(compiler, SLJIT_MUL32, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, SLJIT_IMM, -1); -#else /* !SLJIT_64BIT_ARCHITECTURE */ +#else /* !IS_64BIT */ /* buf[0] */ sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 0x11223344); /* buf[1] */ sljit_emit_op2(compiler, SLJIT_ADD32, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, 0x44332211); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#endif /* IS_64BIT */ sljit_emit_return_void(compiler); @@ -1682,41 +1463,41 @@ static void test18(void) sljit_free_compiler(compiler); code.func1((sljit_sw)&buf); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(buf[0] != SLJIT_W(0x1122334455667788), "test18 case 1 failed\n"); +#if IS_64BIT + FAILED(buf[0] != SLJIT_W(0x1122334455667788), "test15 case 1 failed\n"); #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) - FAILED(buf[1] != 0x55667788, "test18 case 2 failed\n"); + FAILED(buf[1] != 0x55667788, "test15 case 2 failed\n"); #else /* !SLJIT_LITTLE_ENDIAN */ - FAILED(buf[1] != SLJIT_W(0x5566778800000000), "test18 case 2 failed\n"); + FAILED(buf[1] != SLJIT_W(0x5566778800000000), "test15 case 2 failed\n"); #endif /* SLJIT_LITTLE_ENDIAN */ - FAILED(buf[2] != SLJIT_W(2000000000000), "test18 case 3 failed\n"); - FAILED(buf[3] != SLJIT_W(4000000000000), "test18 case 4 failed\n"); + FAILED(buf[2] != SLJIT_W(2000000000000), "test15 case 3 failed\n"); + FAILED(buf[3] != SLJIT_W(4000000000000), "test15 case 4 failed\n"); #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) - FAILED(buf[4] != 0x28282828, "test18 case 5 failed\n"); + FAILED(buf[4] != 0x28282828, "test15 case 5 failed\n"); #else /* !SLJIT_LITTLE_ENDIAN */ - FAILED(buf[4] != SLJIT_W(0x2828282800000000), "test18 case 5 failed\n"); + FAILED(buf[4] != SLJIT_W(0x2828282800000000), "test15 case 5 failed\n"); #endif /* SLJIT_LITTLE_ENDIAN */ - FAILED(buf[5] != 0, "test18 case 6 failed\n"); - FAILED(buf[6] != 1, "test18 case 7 failed\n"); - FAILED(buf[7] != 1, "test18 case 8 failed\n"); - FAILED(buf[8] != 0, "test18 case 9 failed\n"); + FAILED(buf[5] != 0, "test15 case 6 failed\n"); + FAILED(buf[6] != 1, "test15 case 7 failed\n"); + FAILED(buf[7] != 1, "test15 case 8 failed\n"); + FAILED(buf[8] != 0, "test15 case 9 failed\n"); #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) - FAILED(buf[9] != (sljit_sw)0xfff00000, "test18 case 10 failed\n"); - FAILED(buf[10] != (sljit_sw)0xffffffff, "test18 case 11 failed\n"); + FAILED(buf[9] != (sljit_sw)0xfff00000, "test15 case 10 failed\n"); + FAILED(buf[10] != (sljit_sw)0xffffffff, "test15 case 11 failed\n"); #else /* !SLJIT_LITTLE_ENDIAN */ - FAILED(buf[9] != (sljit_sw)SLJIT_W(0xfff0000000000000), "test18 case 10 failed\n"); - FAILED(buf[10] != (sljit_sw)SLJIT_W(0xffffffff00000000), "test18 case 11 failed\n"); + FAILED(buf[9] != (sljit_sw)SLJIT_W(0xfff0000000000000), "test15 case 10 failed\n"); + FAILED(buf[10] != (sljit_sw)SLJIT_W(0xffffffff00000000), "test15 case 11 failed\n"); #endif /* SLJIT_LITTLE_ENDIAN */ -#else /* !SLJIT_64BIT_ARCHITECTURE */ - FAILED(buf[0] != 0x11223344, "test18 case 1 failed\n"); - FAILED(buf[1] != 0x44332211, "test18 case 2 failed\n"); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#else /* !IS_64BIT */ + FAILED(buf[0] != 0x11223344, "test15 case 1 failed\n"); + FAILED(buf[1] != 0x44332211, "test15 case 2 failed\n"); +#endif /* IS_64BIT */ sljit_free_code(code.code, NULL); successful_tests++; } -static void test19(void) +static void test16(void) { /* Test arm partial instruction caching. */ executable_code code; @@ -1724,7 +1505,7 @@ static void test19(void) sljit_sw buf[10]; if (verbose) - printf("Run test19\n"); + printf("Run test16\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 6; @@ -1736,7 +1517,7 @@ static void test19(void) buf[6] = 2; buf[7] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 1, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 1, 0, 0, 0); /* buf[0] */ sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); /* buf[2] */ @@ -1762,20 +1543,20 @@ static void test19(void) sljit_free_compiler(compiler); code.func1((sljit_sw)&buf); - FAILED(buf[0] != 10, "test19 case 1 failed\n"); - FAILED(buf[1] != 4, "test19 case 2 failed\n"); - FAILED(buf[2] != 14, "test19 case 3 failed\n"); - FAILED(buf[3] != 14, "test19 case 4 failed\n"); - FAILED(buf[4] != 8, "test19 case 5 failed\n"); - FAILED(buf[5] != 6, "test19 case 6 failed\n"); - FAILED(buf[6] != 12, "test19 case 7 failed\n"); - FAILED(buf[7] != 10, "test19 case 8 failed\n"); + FAILED(buf[0] != 10, "test16 case 1 failed\n"); + FAILED(buf[1] != 4, "test16 case 2 failed\n"); + FAILED(buf[2] != 14, "test16 case 3 failed\n"); + FAILED(buf[3] != 14, "test16 case 4 failed\n"); + FAILED(buf[4] != 8, "test16 case 5 failed\n"); + FAILED(buf[5] != 6, "test16 case 6 failed\n"); + FAILED(buf[6] != 12, "test16 case 7 failed\n"); + FAILED(buf[7] != 10, "test16 case 8 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test20(void) +static void test17(void) { /* Test stack. */ executable_code code; @@ -1783,14 +1564,10 @@ static void test20(void) struct sljit_jump* jump; struct sljit_label* label; sljit_sw buf[6]; -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_sw offset_value = SLJIT_W(0x1234567812345678); -#else - sljit_sw offset_value = SLJIT_W(0x12345678); -#endif + sljit_sw offset_value = WCONST(0x1234567812345678, 0x12345678); if (verbose) - printf("Run test20\n"); + printf("Run test17\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 5; @@ -1828,11 +1605,11 @@ static void test20(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code.func1((sljit_sw)&buf) != -12345, "test20 case 1 failed\n") + FAILED(code.func1((sljit_sw)&buf) != -12345, "test17 case 1 failed\n"); - FAILED(buf[2] != 60, "test20 case 2 failed\n"); - FAILED(buf[3] != 17, "test20 case 3 failed\n"); - FAILED(buf[4] != 7, "test20 case 4 failed\n"); + FAILED(buf[2] != 60, "test17 case 2 failed\n"); + FAILED(buf[3] != 17, "test17 case 3 failed\n"); + FAILED(buf[4] != 7, "test17 case 4 failed\n"); sljit_free_code(code.code, NULL); @@ -1858,13 +1635,13 @@ static void test20(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code.func3(1234, 5678, 9012) != 15924, "test20 case 5 failed\n"); + FAILED(code.func3(1234, 5678, 9012) != 15924, "test17 case 5 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test21(void) +static void test18(void) { /* Test set context. The parts of the jit code can be separated in the memory. */ executable_code code1; @@ -1876,7 +1653,7 @@ static void test21(void) sljit_sw buf[4]; if (verbose) - printf("Run test21\n"); + printf("Run test18\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 9; @@ -1922,16 +1699,16 @@ static void test21(void) sljit_set_jump_addr(addr, SLJIT_FUNC_UADDR(code2.code), executable_offset); - FAILED(code1.func1((sljit_sw)&buf) != 19, "test21 case 1 failed\n"); - FAILED(buf[2] != -16, "test21 case 2 failed\n"); - FAILED(buf[3] != 100, "test21 case 3 failed\n"); + FAILED(code1.func1((sljit_sw)&buf) != 19, "test18 case 1 failed\n"); + FAILED(buf[2] != -16, "test18 case 2 failed\n"); + FAILED(buf[3] != 100, "test18 case 3 failed\n"); sljit_free_code(code1.code, NULL); sljit_free_code(code2.code, NULL); successful_tests++; } -static void test22(void) +static void test19(void) { /* Test simple byte and half-int data transfers. */ executable_code code; @@ -1941,7 +1718,7 @@ static void test22(void) sljit_s8 bbuf[5]; if (verbose) - printf("Run test22\n"); + printf("Run test19\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 0; @@ -1963,7 +1740,7 @@ static void test22(void) bbuf[3] = 0; bbuf[4] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 3, 3, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 3, 3, 0, 0, 0); /* sbuf[0] */ sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_IMM, -13); @@ -2023,30 +1800,30 @@ static void test22(void) sljit_free_compiler(compiler); code.func3((sljit_sw)&buf, (sljit_sw)&sbuf, (sljit_sw)&bbuf); - FAILED(buf[0] != -9, "test22 case 1 failed\n"); - FAILED(buf[1] != -56, "test22 case 2 failed\n"); - FAILED(buf[2] != 0, "test22 case 3 failed\n"); - FAILED(buf[3] != 0, "test22 case 4 failed\n"); + FAILED(buf[0] != -9, "test19 case 1 failed\n"); + FAILED(buf[1] != -56, "test19 case 2 failed\n"); + FAILED(buf[2] != 0, "test19 case 3 failed\n"); + FAILED(buf[3] != 0, "test19 case 4 failed\n"); - FAILED(sbuf[0] != -13, "test22 case 5 failed\n"); - FAILED(sbuf[1] != 0x1234, "test22 case 6 failed\n"); - FAILED(sbuf[3] != 0x1234, "test22 case 7 failed\n"); - FAILED(sbuf[4] != 8000, "test22 case 8 failed\n"); - FAILED(sbuf[5] != -9317, "test22 case 9 failed\n"); - FAILED(sbuf[6] != -9317, "test22 case 10 failed\n"); - FAILED(sbuf[7] != -8888, "test22 case 11 failed\n"); - FAILED(sbuf[8] != -8888, "test22 case 12 failed\n"); + FAILED(sbuf[0] != -13, "test19 case 5 failed\n"); + FAILED(sbuf[1] != 0x1234, "test19 case 6 failed\n"); + FAILED(sbuf[3] != 0x1234, "test19 case 7 failed\n"); + FAILED(sbuf[4] != 8000, "test19 case 8 failed\n"); + FAILED(sbuf[5] != -9317, "test19 case 9 failed\n"); + FAILED(sbuf[6] != -9317, "test19 case 10 failed\n"); + FAILED(sbuf[7] != -8888, "test19 case 11 failed\n"); + FAILED(sbuf[8] != -8888, "test19 case 12 failed\n"); - FAILED(bbuf[0] != -45, "test22 case 13 failed\n"); - FAILED(bbuf[1] != 0x12, "test22 case 14 failed\n"); - FAILED(bbuf[3] != -56, "test22 case 15 failed\n"); - FAILED(bbuf[4] != 4, "test22 case 16 failed\n"); + FAILED(bbuf[0] != -45, "test19 case 13 failed\n"); + FAILED(bbuf[1] != 0x12, "test19 case 14 failed\n"); + FAILED(bbuf[3] != -56, "test19 case 15 failed\n"); + FAILED(bbuf[4] != 4, "test19 case 16 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test23(void) +static void test20(void) { /* Test 32 bit / 64 bit signed / unsigned int transfer and conversion. This test has do real things on 64 bit systems, but works on 32 bit systems as well. */ @@ -2058,14 +1835,10 @@ static void test23(void) sljit_s32 asint; sljit_u8 asbytes[4]; } u; -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_sw garbage = SLJIT_W(0x1234567812345678); -#else - sljit_sw garbage = 0x12345678; -#endif + sljit_sw garbage = WCONST(0x1234567812345678, 0x12345678); if (verbose) - printf("Run test23\n"); + printf("Run test20\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 0; @@ -2140,37 +1913,37 @@ static void test23(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code.func2((sljit_sw)&buf, (sljit_sw)&ibuf) != -13, "test23 case 1 failed\n"); - FAILED(buf[0] != -5791, "test23 case 2 failed\n"); - FAILED(buf[1] != 43579, "test23 case 3 failed\n"); - FAILED(buf[2] != 658923, "test23 case 4 failed\n"); - FAILED(buf[3] != 0x0f00f00, "test23 case 5 failed\n"); - FAILED(buf[4] != 0x0f00f00, "test23 case 6 failed\n"); - FAILED(buf[5] != 80, "test23 case 7 failed\n"); - FAILED(buf[6] != 0x123456, "test23 case 8 failed\n"); - FAILED(buf[7] != (sljit_sw)&buf[5], "test23 case 9 failed\n"); - FAILED(buf[8] != (sljit_sw)&buf[5] + 6, "test23 case 10 failed\n"); + FAILED(code.func2((sljit_sw)&buf, (sljit_sw)&ibuf) != -13, "test20 case 1 failed\n"); + FAILED(buf[0] != -5791, "test20 case 2 failed\n"); + FAILED(buf[1] != 43579, "test20 case 3 failed\n"); + FAILED(buf[2] != 658923, "test20 case 4 failed\n"); + FAILED(buf[3] != 0x0f00f00, "test20 case 5 failed\n"); + FAILED(buf[4] != 0x0f00f00, "test20 case 6 failed\n"); + FAILED(buf[5] != 80, "test20 case 7 failed\n"); + FAILED(buf[6] != 0x123456, "test20 case 8 failed\n"); + FAILED(buf[7] != (sljit_sw)&buf[5], "test20 case 9 failed\n"); + FAILED(buf[8] != (sljit_sw)&buf[5] + 6, "test20 case 10 failed\n"); - FAILED(ibuf[0] != 34567, "test23 case 11 failed\n"); - FAILED(ibuf[1] != -7654, "test23 case 12 failed\n"); + FAILED(ibuf[0] != 34567, "test20 case 11 failed\n"); + FAILED(ibuf[1] != -7654, "test20 case 12 failed\n"); u.asint = ibuf[4]; #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) - FAILED(u.asbytes[0] != 0x78, "test23 case 13 failed\n"); - FAILED(u.asbytes[1] != 0x56, "test23 case 14 failed\n"); - FAILED(u.asbytes[2] != 0x34, "test23 case 15 failed\n"); - FAILED(u.asbytes[3] != 0x12, "test23 case 16 failed\n"); + FAILED(u.asbytes[0] != 0x78, "test20 case 13 failed\n"); + FAILED(u.asbytes[1] != 0x56, "test20 case 14 failed\n"); + FAILED(u.asbytes[2] != 0x34, "test20 case 15 failed\n"); + FAILED(u.asbytes[3] != 0x12, "test20 case 16 failed\n"); #else - FAILED(u.asbytes[0] != 0x12, "test23 case 13 failed\n"); - FAILED(u.asbytes[1] != 0x34, "test23 case 14 failed\n"); - FAILED(u.asbytes[2] != 0x56, "test23 case 15 failed\n"); - FAILED(u.asbytes[3] != 0x78, "test23 case 16 failed\n"); + FAILED(u.asbytes[0] != 0x12, "test20 case 13 failed\n"); + FAILED(u.asbytes[1] != 0x34, "test20 case 14 failed\n"); + FAILED(u.asbytes[2] != 0x56, "test20 case 15 failed\n"); + FAILED(u.asbytes[3] != 0x78, "test20 case 16 failed\n"); #endif sljit_free_code(code.code, NULL); successful_tests++; } -static void test24(void) +static void test21(void) { /* Some complicated addressing modes. */ executable_code code; @@ -2180,7 +1953,7 @@ static void test24(void) sljit_s8 bbuf[7]; if (verbose) - printf("Run test24\n"); + printf("Run test21\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -2208,7 +1981,7 @@ static void test24(void) bbuf[5] = 0; bbuf[6] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 3, 3, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 3, 3, 0, 0, 0); /* Nothing should be updated. */ /* sbuf[1] */ @@ -2261,12 +2034,12 @@ static void test24(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&bbuf + 0x7fff7fff + 5 * (sljit_sw)sizeof(sljit_s8)); /* bbuf[5] */ sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_MEM1(SLJIT_R0), -0x7fff7fff, SLJIT_MEM1(SLJIT_R0), -0x7fff8000); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&bbuf - SLJIT_W(0x123456123456)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&bbuf - SLJIT_W(0x123456123456)); /* bbuf[6] */ sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_MEM1(SLJIT_R0), SLJIT_W(0x123456123456) + 6 * sizeof(sljit_s8), SLJIT_MEM1(SLJIT_R1), SLJIT_W(0x123456123456)); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#endif /* IS_64BIT */ sljit_emit_return_void(compiler); @@ -2275,40 +2048,40 @@ static void test24(void) sljit_free_compiler(compiler); code.func3((sljit_sw)&buf, (sljit_sw)&sbuf, (sljit_sw)&bbuf); - FAILED(buf[2] != 176366, "test24 case 1 failed\n"); - FAILED(buf[3] != 64, "test24 case 2 failed\n"); - FAILED(buf[4] != -100, "test24 case 3 failed\n"); - FAILED(buf[5] != 100567, "test24 case 4 failed\n"); - FAILED(buf[6] != 952467, "test24 case 5 failed\n"); - FAILED(buf[7] != 952467, "test24 case 6 failed\n"); - FAILED(buf[8] != 952467 * 2, "test24 case 7 failed\n"); + FAILED(buf[2] != 176366, "test21 case 1 failed\n"); + FAILED(buf[3] != 64, "test21 case 2 failed\n"); + FAILED(buf[4] != -100, "test21 case 3 failed\n"); + FAILED(buf[5] != 100567, "test21 case 4 failed\n"); + FAILED(buf[6] != 952467, "test21 case 5 failed\n"); + FAILED(buf[7] != 952467, "test21 case 6 failed\n"); + FAILED(buf[8] != 952467 * 2, "test21 case 7 failed\n"); - FAILED(sbuf[1] != 30000, "test24 case 8 failed\n"); - FAILED(sbuf[2] != -12345, "test24 case 9 failed\n"); - FAILED(sbuf[4] != sizeof(sljit_s16), "test24 case 10 failed\n"); + FAILED(sbuf[1] != 30000, "test21 case 8 failed\n"); + FAILED(sbuf[2] != -12345, "test21 case 9 failed\n"); + FAILED(sbuf[4] != sizeof(sljit_s16), "test21 case 10 failed\n"); - FAILED(bbuf[1] != -128, "test24 case 11 failed\n"); - FAILED(bbuf[2] != 99, "test24 case 12 failed\n"); - FAILED(bbuf[4] != 99, "test24 case 13 failed\n"); - FAILED(bbuf[5] != 99, "test24 case 14 failed\n"); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(bbuf[6] != -128, "test24 case 15 failed\n"); + FAILED(bbuf[1] != -128, "test21 case 11 failed\n"); + FAILED(bbuf[2] != 99, "test21 case 12 failed\n"); + FAILED(bbuf[4] != 99, "test21 case 13 failed\n"); + FAILED(bbuf[5] != 99, "test21 case 14 failed\n"); +#if IS_64BIT + FAILED(bbuf[6] != -128, "test21 case 15 failed\n"); #endif sljit_free_code(code.code, NULL); successful_tests++; } -static void test25(void) +static void test22(void) { -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT /* 64 bit loads. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); sljit_sw buf[14]; if (verbose) - printf("Run test25\n"); + printf("Run test22\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 7; @@ -2326,7 +2099,7 @@ static void test25(void) buf[12] = 0; buf[13] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 1, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 1, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 1 * sizeof(sljit_sw), SLJIT_IMM, 0x7fff); @@ -2350,27 +2123,27 @@ static void test25(void) sljit_free_compiler(compiler); code.func1((sljit_sw)&buf); - FAILED(buf[0] != 0, "test25 case 1 failed\n"); - FAILED(buf[1] != 0x7fff, "test25 case 2 failed\n"); - FAILED(buf[2] != -0x8000, "test25 case 3 failed\n"); - FAILED(buf[3] != 0x7fffffff, "test25 case 4 failed\n"); - FAILED(buf[4] != SLJIT_W(-0x80000000), "test25 case 5 failed\n"); - FAILED(buf[5] != SLJIT_W(0x1234567887654321), "test25 case 6 failed\n"); - FAILED(buf[6] != SLJIT_W(0xff80000000), "test25 case 7 failed\n"); - FAILED(buf[7] != SLJIT_W(0x3ff0000000), "test25 case 8 failed\n"); - FAILED((sljit_uw)buf[8] != SLJIT_W(0xfffffff800100000), "test25 case 9 failed\n"); - FAILED((sljit_uw)buf[9] != SLJIT_W(0xfffffff80010f000), "test25 case 10 failed\n"); - FAILED(buf[10] != SLJIT_W(0x07fff00000008001), "test25 case 11 failed\n"); - FAILED(buf[11] != SLJIT_W(0x07fff00080010000), "test25 case 12 failed\n"); - FAILED(buf[12] != SLJIT_W(0x07fff00080018001), "test25 case 13 failed\n"); - FAILED(buf[13] != SLJIT_W(0x07fff00ffff00000), "test25 case 14 failed\n"); + FAILED(buf[0] != 0, "test22 case 1 failed\n"); + FAILED(buf[1] != 0x7fff, "test22 case 2 failed\n"); + FAILED(buf[2] != -0x8000, "test22 case 3 failed\n"); + FAILED(buf[3] != 0x7fffffff, "test22 case 4 failed\n"); + FAILED(buf[4] != SLJIT_W(-0x80000000), "test22 case 5 failed\n"); + FAILED(buf[5] != SLJIT_W(0x1234567887654321), "test22 case 6 failed\n"); + FAILED(buf[6] != SLJIT_W(0xff80000000), "test22 case 7 failed\n"); + FAILED(buf[7] != SLJIT_W(0x3ff0000000), "test22 case 8 failed\n"); + FAILED((sljit_uw)buf[8] != SLJIT_W(0xfffffff800100000), "test22 case 9 failed\n"); + FAILED((sljit_uw)buf[9] != SLJIT_W(0xfffffff80010f000), "test22 case 10 failed\n"); + FAILED(buf[10] != SLJIT_W(0x07fff00000008001), "test22 case 11 failed\n"); + FAILED(buf[11] != SLJIT_W(0x07fff00080010000), "test22 case 12 failed\n"); + FAILED(buf[12] != SLJIT_W(0x07fff00080018001), "test22 case 13 failed\n"); + FAILED(buf[13] != SLJIT_W(0x07fff00ffff00000), "test22 case 14 failed\n"); sljit_free_code(code.code, NULL); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#endif /* IS_64BIT */ successful_tests++; } -static void test26(void) +static void test23(void) { /* Aligned access without aligned offsets. */ executable_code code; @@ -2380,7 +2153,7 @@ static void test26(void) sljit_f64 dbuf[4]; if (verbose) - printf("Run test26\n"); + printf("Run test23\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -2399,7 +2172,7 @@ static void test26(void) dbuf[2] = 0.0; dbuf[3] = -4.0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 3, 3, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 3, 3, 0, 0, 0); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 3); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, 1); @@ -2438,28 +2211,28 @@ static void test26(void) code.func3((sljit_sw)&buf, (sljit_sw)&ibuf, (sljit_sw)&dbuf); - FAILED(buf[1] != -689, "test26 case 1 failed\n"); - FAILED(buf[2] != -16, "test26 case 2 failed\n"); - FAILED(ibuf[1] != -2789, "test26 case 3 failed\n"); - FAILED(ibuf[2] != -18, "test26 case 4 failed\n"); + FAILED(buf[1] != -689, "test23 case 1 failed\n"); + FAILED(buf[2] != -16, "test23 case 2 failed\n"); + FAILED(ibuf[1] != -2789, "test23 case 3 failed\n"); + FAILED(ibuf[2] != -18, "test23 case 4 failed\n"); if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - FAILED(dbuf[1] != 5.75, "test26 case 5 failed\n"); - FAILED(dbuf[2] != 11.5, "test26 case 6 failed\n"); - FAILED(dbuf[3] != -2.875, "test26 case 7 failed\n"); + FAILED(dbuf[1] != 5.75, "test23 case 5 failed\n"); + FAILED(dbuf[2] != 11.5, "test23 case 6 failed\n"); + FAILED(dbuf[3] != -2.875, "test23 case 7 failed\n"); } sljit_free_code(code.code, NULL); successful_tests++; } -static void test27(void) +static void test24(void) { #define SET_NEXT_BYTE(type) \ cond_set(compiler, SLJIT_R2, 0, type); \ sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 1, SLJIT_R2, 0); \ sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT #define RESULT(i) i #else #define RESULT(i) (3 - i) @@ -2479,7 +2252,7 @@ static void test27(void) SLJIT_ASSERT(shift_reg >= SLJIT_R2 && shift_reg <= SLJIT_R3); if (verbose) - printf("Run test27\n"); + printf("Run test24\n"); for (i = 0; i < sizeof(buf); ++i) buf[i] = 10; @@ -2487,7 +2260,7 @@ static void test27(void) FAILED(!compiler, "cannot create compiler\n"); /* 3 arguments passed, 3 arguments used. */ - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 4, 3, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 4, 3, 0, 0, 0); /* buf[0] */ sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); @@ -2650,7 +2423,7 @@ static void test27(void) sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_R0, 0, SLJIT_IMM, 0x1234); SET_NEXT_BYTE(SLJIT_SIG_LESS); /* buf[38] */ -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x12300000000) - 43); #else sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, -43); @@ -2670,54 +2443,54 @@ static void test27(void) code.func1((sljit_sw)&buf); - FAILED(buf[0] != RESULT(1), "test27 case 1 failed\n"); - FAILED(buf[1] != RESULT(2), "test27 case 2 failed\n"); - FAILED(buf[2] != 2, "test27 case 3 failed\n"); - FAILED(buf[3] != 1, "test27 case 4 failed\n"); - FAILED(buf[4] != RESULT(1), "test27 case 5 failed\n"); - FAILED(buf[5] != RESULT(2), "test27 case 6 failed\n"); - FAILED(buf[6] != 2, "test27 case 7 failed\n"); - FAILED(buf[7] != 1, "test27 case 8 failed\n"); + FAILED(buf[0] != RESULT(1), "test24 case 1 failed\n"); + FAILED(buf[1] != RESULT(2), "test24 case 2 failed\n"); + FAILED(buf[2] != 2, "test24 case 3 failed\n"); + FAILED(buf[3] != 1, "test24 case 4 failed\n"); + FAILED(buf[4] != RESULT(1), "test24 case 5 failed\n"); + FAILED(buf[5] != RESULT(2), "test24 case 6 failed\n"); + FAILED(buf[6] != 2, "test24 case 7 failed\n"); + FAILED(buf[7] != 1, "test24 case 8 failed\n"); - FAILED(buf[8] != 2, "test27 case 9 failed\n"); - FAILED(buf[9] != 1, "test27 case 10 failed\n"); - FAILED(buf[10] != 2, "test27 case 11 failed\n"); - FAILED(buf[11] != 1, "test27 case 12 failed\n"); - FAILED(buf[12] != 1, "test27 case 13 failed\n"); - FAILED(buf[13] != 2, "test27 case 14 failed\n"); - FAILED(buf[14] != 2, "test27 case 15 failed\n"); - FAILED(buf[15] != 1, "test27 case 16 failed\n"); - FAILED(buf[16] != 1, "test27 case 17 failed\n"); - FAILED(buf[17] != 2, "test27 case 18 failed\n"); - FAILED(buf[18] != 1, "test27 case 19 failed\n"); - FAILED(buf[19] != 1, "test27 case 20 failed\n"); - FAILED(buf[20] != 1, "test27 case 21 failed\n"); - FAILED(buf[21] != 2, "test27 case 22 failed\n"); + FAILED(buf[8] != 2, "test24 case 9 failed\n"); + FAILED(buf[9] != 1, "test24 case 10 failed\n"); + FAILED(buf[10] != 2, "test24 case 11 failed\n"); + FAILED(buf[11] != 1, "test24 case 12 failed\n"); + FAILED(buf[12] != 1, "test24 case 13 failed\n"); + FAILED(buf[13] != 2, "test24 case 14 failed\n"); + FAILED(buf[14] != 2, "test24 case 15 failed\n"); + FAILED(buf[15] != 1, "test24 case 16 failed\n"); + FAILED(buf[16] != 1, "test24 case 17 failed\n"); + FAILED(buf[17] != 2, "test24 case 18 failed\n"); + FAILED(buf[18] != 1, "test24 case 19 failed\n"); + FAILED(buf[19] != 1, "test24 case 20 failed\n"); + FAILED(buf[20] != 1, "test24 case 21 failed\n"); + FAILED(buf[21] != 2, "test24 case 22 failed\n"); - FAILED(buf[22] != RESULT(1), "test27 case 23 failed\n"); - FAILED(buf[23] != RESULT(2), "test27 case 24 failed\n"); - FAILED(buf[24] != 2, "test27 case 25 failed\n"); - FAILED(buf[25] != 1, "test27 case 26 failed\n"); + FAILED(buf[22] != RESULT(1), "test24 case 23 failed\n"); + FAILED(buf[23] != RESULT(2), "test24 case 24 failed\n"); + FAILED(buf[24] != 2, "test24 case 25 failed\n"); + FAILED(buf[25] != 1, "test24 case 26 failed\n"); - FAILED(buf[26] != 5, "test27 case 27 failed\n"); - FAILED(buf[27] != 9, "test27 case 28 failed\n"); + FAILED(buf[26] != 5, "test24 case 27 failed\n"); + FAILED(buf[27] != 9, "test24 case 28 failed\n"); - FAILED(buf[28] != 2, "test27 case 29 failed\n"); - FAILED(buf[29] != 1, "test27 case 30 failed\n"); + FAILED(buf[28] != 2, "test24 case 29 failed\n"); + FAILED(buf[29] != 1, "test24 case 30 failed\n"); - FAILED(buf[30] != 1, "test27 case 31 failed\n"); - FAILED(buf[31] != 1, "test27 case 32 failed\n"); - FAILED(buf[32] != 1, "test27 case 33 failed\n"); - FAILED(buf[33] != 1, "test27 case 34 failed\n"); + FAILED(buf[30] != 1, "test24 case 31 failed\n"); + FAILED(buf[31] != 1, "test24 case 32 failed\n"); + FAILED(buf[32] != 1, "test24 case 33 failed\n"); + FAILED(buf[33] != 1, "test24 case 34 failed\n"); - FAILED(buf[34] != 1, "test27 case 35 failed\n"); - FAILED(buf[35] != 0, "test27 case 36 failed\n"); + FAILED(buf[34] != 1, "test24 case 35 failed\n"); + FAILED(buf[35] != 0, "test24 case 36 failed\n"); - FAILED(buf[36] != 2, "test27 case 37 failed\n"); - FAILED(buf[37] != 1, "test27 case 38 failed\n"); - FAILED(buf[38] != 2, "test27 case 39 failed\n"); - FAILED(buf[39] != 1, "test27 case 40 failed\n"); - FAILED(buf[40] != 10, "test27 case 41 failed\n"); + FAILED(buf[36] != 2, "test24 case 37 failed\n"); + FAILED(buf[37] != 1, "test24 case 38 failed\n"); + FAILED(buf[38] != 2, "test24 case 39 failed\n"); + FAILED(buf[39] != 1, "test24 case 40 failed\n"); + FAILED(buf[40] != 10, "test24 case 41 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; @@ -2725,7 +2498,7 @@ static void test27(void) #undef RESULT } -static void test28(void) +static void test25(void) { /* Test mov. */ executable_code code; @@ -2736,7 +2509,7 @@ static void test28(void) sljit_sw buf[5]; if (verbose) - printf("Run test28\n"); + printf("Run test25\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -2780,17 +2553,17 @@ static void test28(void) sljit_free_compiler(compiler); - FAILED(code.func1((sljit_sw)&buf) != 8, "test28 case 1 failed\n"); - FAILED(buf[1] != -1872, "test28 case 2 failed\n"); - FAILED(buf[2] != 1, "test28 case 3 failed\n"); - FAILED(buf[3] != 2, "test28 case 4 failed\n"); - FAILED(buf[4] != (sljit_sw)label_addr, "test28 case 5 failed\n"); + FAILED(code.func1((sljit_sw)&buf) != 8, "test25 case 1 failed\n"); + FAILED(buf[1] != -1872, "test25 case 2 failed\n"); + FAILED(buf[2] != 1, "test25 case 3 failed\n"); + FAILED(buf[3] != 2, "test25 case 4 failed\n"); + FAILED(buf[4] != (sljit_sw)label_addr, "test25 case 5 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test29(void) +static void test26(void) { /* Test signed/unsigned bytes and halfs. */ executable_code code; @@ -2799,13 +2572,13 @@ static void test29(void) sljit_s32 i; if (verbose) - printf("Run test29\n"); + printf("Run test26\n"); for (i = 0; i < 25; i++) buf[i] = 0; FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 5, 5, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 5, 5, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_IMM, -187); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); @@ -2825,7 +2598,7 @@ static void test29(void) sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R4, 0, SLJIT_IMM, 0x9cb0a6); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_uw), SLJIT_R4, 0); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-3580429715)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_uw), SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-100722768662)); @@ -2834,7 +2607,7 @@ static void test29(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_uw), SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, SLJIT_W(0xcef97a70b5)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_uw), SLJIT_R4, 0); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#endif /* IS_64BIT */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -187); sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_R1, 0); @@ -2862,7 +2635,7 @@ static void test29(void) sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R4, 0, SLJIT_R3, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 19 * sizeof(sljit_uw), SLJIT_R4, 0); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(-3580429715)); sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_R1, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 20 * sizeof(sljit_uw), SLJIT_R0, 0); @@ -2875,7 +2648,7 @@ static void test29(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, SLJIT_W(0xcef97a70b5)); sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_R3, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 23 * sizeof(sljit_uw), SLJIT_R4, 0); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#endif /* IS_64BIT */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 0x9faa5); sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_S2, 0, SLJIT_S2, 0); @@ -2888,45 +2661,45 @@ static void test29(void) sljit_free_compiler(compiler); code.func1((sljit_sw)&buf); - FAILED(buf[0] != 69, "test29 case 1 failed\n"); - FAILED(buf[1] != -93, "test29 case 2 failed\n"); - FAILED(buf[2] != 200, "test29 case 3 failed\n"); - FAILED(buf[3] != 0xe5, "test29 case 4 failed\n"); - FAILED(buf[4] != 19640, "test29 case 5 failed\n"); - FAILED(buf[5] != -31005, "test29 case 6 failed\n"); - FAILED(buf[6] != 52646, "test29 case 7 failed\n"); - FAILED(buf[7] != 0xb0a6, "test29 case 8 failed\n"); + FAILED(buf[0] != 69, "test26 case 1 failed\n"); + FAILED(buf[1] != -93, "test26 case 2 failed\n"); + FAILED(buf[2] != 200, "test26 case 3 failed\n"); + FAILED(buf[3] != 0xe5, "test26 case 4 failed\n"); + FAILED(buf[4] != 19640, "test26 case 5 failed\n"); + FAILED(buf[5] != -31005, "test26 case 6 failed\n"); + FAILED(buf[6] != 52646, "test26 case 7 failed\n"); + FAILED(buf[7] != 0xb0a6, "test26 case 8 failed\n"); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(buf[8] != SLJIT_W(714537581), "test29 case 9 failed\n"); - FAILED(buf[9] != SLJIT_W(-1938520854), "test29 case 10 failed\n"); - FAILED(buf[10] != SLJIT_W(3236202668), "test29 case 11 failed\n"); - FAILED(buf[11] != SLJIT_W(0xf97a70b5), "test29 case 12 failed\n"); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#if IS_64BIT + FAILED(buf[8] != SLJIT_W(714537581), "test26 case 9 failed\n"); + FAILED(buf[9] != SLJIT_W(-1938520854), "test26 case 10 failed\n"); + FAILED(buf[10] != SLJIT_W(3236202668), "test26 case 11 failed\n"); + FAILED(buf[11] != SLJIT_W(0xf97a70b5), "test26 case 12 failed\n"); +#endif /* IS_64BIT */ - FAILED(buf[12] != 69, "test29 case 13 failed\n"); - FAILED(buf[13] != -93, "test29 case 14 failed\n"); - FAILED(buf[14] != 200, "test29 case 15 failed\n"); - FAILED(buf[15] != 0xe5, "test29 case 16 failed\n"); - FAILED(buf[16] != 19640, "test29 case 17 failed\n"); - FAILED(buf[17] != -31005, "test29 case 18 failed\n"); - FAILED(buf[18] != 52646, "test29 case 19 failed\n"); - FAILED(buf[19] != 0xb0a6, "test29 case 20 failed\n"); + FAILED(buf[12] != 69, "test26 case 13 failed\n"); + FAILED(buf[13] != -93, "test26 case 14 failed\n"); + FAILED(buf[14] != 200, "test26 case 15 failed\n"); + FAILED(buf[15] != 0xe5, "test26 case 16 failed\n"); + FAILED(buf[16] != 19640, "test26 case 17 failed\n"); + FAILED(buf[17] != -31005, "test26 case 18 failed\n"); + FAILED(buf[18] != 52646, "test26 case 19 failed\n"); + FAILED(buf[19] != 0xb0a6, "test26 case 20 failed\n"); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(buf[20] != SLJIT_W(714537581), "test29 case 21 failed\n"); - FAILED(buf[21] != SLJIT_W(-1938520854), "test29 case 22 failed\n"); - FAILED(buf[22] != SLJIT_W(3236202668), "test29 case 23 failed\n"); - FAILED(buf[23] != SLJIT_W(0xf97a70b5), "test29 case 24 failed\n"); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#if IS_64BIT + FAILED(buf[20] != SLJIT_W(714537581), "test26 case 21 failed\n"); + FAILED(buf[21] != SLJIT_W(-1938520854), "test26 case 22 failed\n"); + FAILED(buf[22] != SLJIT_W(3236202668), "test26 case 23 failed\n"); + FAILED(buf[23] != SLJIT_W(0xf97a70b5), "test26 case 24 failed\n"); +#endif /* IS_64BIT */ - FAILED(buf[24] != -91, "test29 case 25 failed\n"); + FAILED(buf[24] != -91, "test26 case 25 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test30(void) +static void test27(void) { /* Test unused results. */ executable_code code; @@ -2934,18 +2707,18 @@ static void test30(void) sljit_sw buf[1]; if (verbose) - printf("Run test30\n"); + printf("Run test27\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 5, 5, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 5, 5, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 1); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 1); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, 1); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S1, 0, SLJIT_IMM, SLJIT_W(-0x123ffffffff)); #else sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S1, 0, SLJIT_IMM, 1); @@ -2981,29 +2754,24 @@ static void test30(void) sljit_free_compiler(compiler); code.func1((sljit_sw)&buf); - FAILED(buf[0] != 9, "test30 case 1 failed\n"); + FAILED(buf[0] != 9, "test27 case 1 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test31(void) +static void test28(void) { /* Integer mul and set flags. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); sljit_sw buf[12]; sljit_s32 i; -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_sw big_word = SLJIT_W(0x7fffffff00000000); - sljit_sw big_word2 = SLJIT_W(0x7fffffff00000012); -#else - sljit_sw big_word = 0x7fffffff; - sljit_sw big_word2 = 0x00000012; -#endif + sljit_sw big_word = WCONST(0x7fffffff00000000, 0x7fffffff); + sljit_sw big_word2 = WCONST(0x7fffffff00000012, 0x00000012); if (verbose) - printf("Run test31\n"); + printf("Run test28\n"); for (i = 0; i < 12; i++) buf[i] = 3; @@ -3011,7 +2779,7 @@ static void test31(void) FAILED(!compiler, "cannot create compiler\n"); /* buf[0] */ - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 5, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 5, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); sljit_emit_op2u(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_IMM, -45); cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_NOT_OVERFLOW); @@ -3063,150 +2831,22 @@ static void test31(void) code.func1((sljit_sw)&buf); - FAILED(buf[0] != 1, "test31 case 1 failed\n"); - FAILED(buf[1] != 2, "test31 case 2 failed\n"); -/* Qemu issues for 64 bit muls. */ -#if !(defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(buf[2] != 1, "test31 case 3 failed\n"); - FAILED(buf[3] != 2, "test31 case 4 failed\n"); -#endif - FAILED(buf[4] != 1, "test31 case 5 failed\n"); - FAILED((buf[5] & (sljit_sw)0xffffffff) != (sljit_sw)0x85540c10, "test31 case 6 failed\n"); - FAILED(buf[6] != 2, "test31 case 7 failed\n"); - FAILED(buf[7] != 1, "test31 case 8 failed\n"); -#if !(defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(buf[8] != 1, "test31 case 9 failed\n"); -#endif - FAILED(buf[9] != -1541, "test31 case 10 failed\n"); + FAILED(buf[0] != 1, "test28 case 1 failed\n"); + FAILED(buf[1] != 2, "test28 case 2 failed\n"); + FAILED(buf[2] != 1, "test28 case 3 failed\n"); + FAILED(buf[3] != 2, "test28 case 4 failed\n"); + FAILED(buf[4] != 1, "test28 case 5 failed\n"); + FAILED((buf[5] & (sljit_sw)0xffffffff) != (sljit_sw)0x85540c10, "test28 case 6 failed\n"); + FAILED(buf[6] != 2, "test28 case 7 failed\n"); + FAILED(buf[7] != 1, "test28 case 8 failed\n"); + FAILED(buf[8] != 1, "test28 case 9 failed\n"); + FAILED(buf[9] != -1541, "test28 case 10 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test32(void) -{ - /* Floating point set flags. */ - executable_code code; - struct sljit_compiler* compiler; - sljit_s32 i; - - sljit_sw buf[16]; - union { - sljit_f64 value; - struct { - sljit_s32 value1; - sljit_s32 value2; - } u; - } dbuf[4]; - - if (verbose) - printf("Run test32\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test32 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - for (i = 0; i < 16; i++) - buf[i] = 5; - - /* Two NaNs */ - dbuf[0].u.value1 = 0x7fffffff; - dbuf[0].u.value2 = 0x7fffffff; - dbuf[1].u.value1 = 0x7fffffff; - dbuf[1].u.value2 = 0x7fffffff; - dbuf[2].value = -13.0; - dbuf[3].value = 27.0; - - SLJIT_ASSERT(sizeof(sljit_f64) == 8 && sizeof(sljit_s32) == 4 && sizeof(dbuf[0]) == 8); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 1, 2, 4, 0, 0); - - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); - /* buf[0] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_UNORDERED); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_ORDERED, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); - /* buf[1] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_ORDERED); - - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED, SLJIT_FR1, 0, SLJIT_FR2, 0); - /* buf[2] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_UNORDERED); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_ORDERED, SLJIT_FR1, 0, SLJIT_FR2, 0); - /* buf[3] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_ORDERED); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_LESS, SLJIT_FR1, 0, SLJIT_FR2, 0); - /* buf[4] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_F_LESS); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_GREATER_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); - /* buf[5] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_F_GREATER_EQUAL); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_GREATER, SLJIT_FR1, 0, SLJIT_FR2, 0); - /* buf[6] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_F_GREATER); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_LESS_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); - /* buf[7] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_F_LESS_EQUAL); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); - /* buf[8] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_F_EQUAL); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_NOT_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); - /* buf[9] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_F_NOT_EQUAL); - - sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR3, 0, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64)); - /* buf[10] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_UNORDERED); - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_EQUAL, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64)); - /* buf[11] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_F_EQUAL); - - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_ORDERED, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64), SLJIT_FR0, 0); - /* buf[12] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_ORDERED); - - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED, SLJIT_FR3, 0, SLJIT_FR2, 0); - sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S1), 0); - /* buf[13] */ - cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_UNORDERED); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func2((sljit_sw)&buf, (sljit_sw)&dbuf); - - FAILED(buf[0] != 1, "test32 case 1 failed\n"); - FAILED(buf[1] != 2, "test32 case 2 failed\n"); - FAILED(buf[2] != 2, "test32 case 3 failed\n"); - FAILED(buf[3] != 1, "test32 case 4 failed\n"); - FAILED(buf[4] != 1, "test32 case 5 failed\n"); - FAILED(buf[5] != 2, "test32 case 6 failed\n"); - FAILED(buf[6] != 2, "test32 case 7 failed\n"); - FAILED(buf[7] != 1, "test32 case 8 failed\n"); - FAILED(buf[8] != 2, "test32 case 9 failed\n"); - FAILED(buf[9] != 1, "test32 case 10 failed\n"); - FAILED(buf[10] != 2, "test32 case 11 failed\n"); - FAILED(buf[11] != 1, "test32 case 12 failed\n"); - FAILED(buf[12] != 2, "test32 case 13 failed\n"); - FAILED(buf[13] != 1, "test32 case 14 failed\n"); - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test33(void) +static void test29(void) { /* Test setting multiple flags. */ executable_code code; @@ -3215,7 +2855,7 @@ static void test33(void) sljit_sw buf[10]; if (verbose) - printf("Run test33\n"); + printf("Run test29\n"); buf[0] = 3; buf[1] = 3; @@ -3230,7 +2870,7 @@ static void test33(void) FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 3, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 3, 0, 0, 0); /* buf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 20); @@ -3253,7 +2893,7 @@ static void test33(void) sljit_set_label(jump, sljit_emit_label(compiler)); /* buf[4-5] */ -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)SLJIT_W(0x8000000000000000)); #else sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)SLJIT_W(0x80000000)); @@ -3281,7 +2921,7 @@ static void test33(void) sljit_set_label(jump, sljit_emit_label(compiler)); /* buf[8] */ -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)SLJIT_W(0x8000000000000000)); #else sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)SLJIT_W(0x80000000)); @@ -3303,22 +2943,22 @@ static void test33(void) code.func1((sljit_sw)&buf); - FAILED(buf[0] != 0, "test33 case 1 failed\n"); - FAILED(buf[1] != 11, "test33 case 2 failed\n"); - FAILED(buf[2] != 1, "test33 case 3 failed\n"); - FAILED(buf[3] != 45, "test33 case 4 failed\n"); - FAILED(buf[4] != 13, "test33 case 5 failed\n"); - FAILED(buf[5] != 0, "test33 case 6 failed\n"); - FAILED(buf[6] != 0, "test33 case 7 failed\n"); - FAILED(buf[7] != 48, "test33 case 8 failed\n"); - FAILED(buf[8] != 50, "test33 case 9 failed\n"); - FAILED(buf[9] != 1, "test33 case 10 failed\n"); + FAILED(buf[0] != 0, "test29 case 1 failed\n"); + FAILED(buf[1] != 11, "test29 case 2 failed\n"); + FAILED(buf[2] != 1, "test29 case 3 failed\n"); + FAILED(buf[3] != 45, "test29 case 4 failed\n"); + FAILED(buf[4] != 13, "test29 case 5 failed\n"); + FAILED(buf[5] != 0, "test29 case 6 failed\n"); + FAILED(buf[6] != 0, "test29 case 7 failed\n"); + FAILED(buf[7] != 48, "test29 case 8 failed\n"); + FAILED(buf[8] != 50, "test29 case 9 failed\n"); + FAILED(buf[9] != 1, "test29 case 10 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test34(void) +static void test30(void) { /* Test fast calls. */ executable_code codeA; @@ -3334,7 +2974,7 @@ static void test34(void) sljit_p buf[2]; if (verbose) - printf("Run test34\n"); + printf("Run test30\n"); buf[0] = 0; buf[1] = 0; @@ -3390,6 +3030,10 @@ static void test34(void) FAILED(!compiler, "cannot create compiler\n"); sljit_set_context(compiler, 0, 1, 5, 5, 0, 0, 2 * sizeof(sljit_p)); + label = sljit_emit_label(compiler); + jump = sljit_emit_jump(compiler, SLJIT_JUMP | SLJIT_REWRITABLE_JUMP); + sljit_set_label(jump, label); + label = sljit_emit_label(compiler); sljit_emit_op0(compiler, SLJIT_ENDBR); sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), 0); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 10); @@ -3398,6 +3042,7 @@ static void test34(void) codeD.code = sljit_generate_code(compiler); CHECK(compiler); + addr = sljit_get_label_addr(label); sljit_free_compiler(compiler); /* E */ @@ -3407,7 +3052,7 @@ static void test34(void) sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_S0), 0); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 12); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_p), SLJIT_IMM, SLJIT_FUNC_ADDR(codeD.code)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_p), SLJIT_IMM, (sljit_sw)addr); sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_p)); sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_MEM1(SLJIT_S0), 0); @@ -3430,8 +3075,8 @@ static void test34(void) addr = sljit_get_label_addr(label); sljit_free_compiler(compiler); - FAILED(codeF.func1((sljit_sw)&buf) != 40, "test34 case 1 failed\n"); - FAILED(buf[0] != addr - SLJIT_RETURN_ADDRESS_OFFSET, "test34 case 2 failed\n"); + FAILED(codeF.func1((sljit_sw)&buf) != 40, "test30 case 1 failed\n"); + FAILED(buf[0] != addr - SLJIT_RETURN_ADDRESS_OFFSET, "test30 case 2 failed\n"); sljit_free_code(codeA.code, NULL); sljit_free_code(codeB.code, NULL); @@ -3442,7 +3087,7 @@ static void test34(void) successful_tests++; } -static void test35(void) +static void test31(void) { /* More complicated tests for fast calls. */ executable_code codeA; @@ -3457,7 +3102,7 @@ static void test35(void) sljit_p buf[1]; if (verbose) - printf("Run test35\n"); + printf("Run test31\n"); buf[0] = 0; @@ -3512,8 +3157,8 @@ static void test35(void) return_addr = sljit_get_label_addr(label); sljit_free_compiler(compiler); - FAILED(codeC.func0() != 12, "test35 case 1 failed\n"); - FAILED(buf[0] != return_addr - SLJIT_RETURN_ADDRESS_OFFSET, "test35 case 2 failed\n"); + FAILED(codeC.func0() != 12, "test31 case 1 failed\n"); + FAILED(buf[0] != return_addr - SLJIT_RETURN_ADDRESS_OFFSET, "test31 case 2 failed\n"); sljit_free_code(codeA.code, NULL); sljit_free_code(codeB.code, NULL); @@ -3537,7 +3182,7 @@ static void cmp_test(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 } #define TEST_CASES (7 + 10 + 12 + 11 + 4) -static void test36(void) +static void test32(void) { /* Compare instruction. */ executable_code code; @@ -3555,7 +3200,7 @@ static void test36(void) sljit_s32 i; if (verbose) - printf("Run test36\n"); + printf("Run test32\n"); FAILED(!compiler, "cannot create compiler\n"); for (i = 0; i < TEST_CASES; ++i) @@ -3565,7 +3210,7 @@ static void test36(void) data[2] = 43; data[3] = -13; - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 3, 2, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 3, 2, 0, 0, 0); sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 13); @@ -3631,7 +3276,7 @@ static void test36(void) cmp_test(compiler, SLJIT_SIG_GREATER, SLJIT_IMM, -1, SLJIT_R1, 0); cmp_test(compiler, SLJIT_SIG_GREATER | SLJIT_REWRITABLE_JUMP, SLJIT_R1, 0, SLJIT_IMM, -1); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0xf00000004)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_R1, 0); @@ -3642,7 +3287,7 @@ static void test36(void) sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_R0, 0); cmp_test(compiler, SLJIT_SIG_GREATER | SLJIT_32, SLJIT_R1, 0, SLJIT_IMM, 5); cmp_test(compiler, SLJIT_SIG_GREATER, SLJIT_R0, 0, SLJIT_IMM, 5); -#else /* !SLJIT_64BIT_ARCHITECTURE */ +#else /* !IS_64BIT */ sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 4); /* buf[40-43] */ cmp_test(compiler, SLJIT_LESS | SLJIT_32, SLJIT_R0, 0, SLJIT_IMM, 5); @@ -3650,7 +3295,7 @@ static void test36(void) sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)0xf0000004); cmp_test(compiler, SLJIT_SIG_GREATER | SLJIT_32, SLJIT_R0, 0, SLJIT_IMM, 5); cmp_test(compiler, SLJIT_SIG_LESS | SLJIT_32, SLJIT_R0, 0, SLJIT_IMM, 5); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#endif /* IS_64BIT */ sljit_emit_return_void(compiler); @@ -3662,7 +3307,7 @@ static void test36(void) for (i = 0; i < TEST_CASES; ++i) if (SLJIT_UNLIKELY(buf[i] != compare_buf[i])) { - printf("test36 case %d failed\n", i + 1); + printf("test32 case %d failed\n", i + 1); return; } @@ -3671,13 +3316,13 @@ static void test36(void) } #undef TEST_CASES -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT #define BITN(n) (SLJIT_W(1) << (63 - (n))) -#else /* !SLJIT_64BIT_ARCHITECTURE */ +#else /* !IS_64BIT */ #define BITN(n) (1 << (31 - ((n) & 0x1f))) -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#endif /* IS_64BIT */ -static void test37(void) +static void test33(void) { /* Test count leading zeroes. */ executable_code code; @@ -3687,7 +3332,7 @@ static void test37(void) sljit_s32 i; if (verbose) - printf("Run test37\n"); + printf("Run test33\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -3698,7 +3343,7 @@ static void test37(void) buf[2] = 0; buf[4] = BITN(13); - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 2, 3, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 2, 3, 0, 0, 0); /* buf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, BITN(27)); sljit_emit_op1(compiler, SLJIT_CLZ, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); @@ -3749,18 +3394,18 @@ static void test37(void) sljit_free_compiler(compiler); code.func2((sljit_sw)&buf, (sljit_sw)&ibuf); - FAILED(buf[0] != 27, "test37 case 1 failed\n"); - FAILED(buf[1] != WCONST(47, 15), "test37 case 2 failed\n"); - FAILED(buf[2] != WCONST(64, 32), "test37 case 3 failed\n"); - FAILED(buf[3] != 0, "test37 case 4 failed\n"); - FAILED(ibuf[0] != 32, "test37 case 5 failed\n"); - FAILED(buf[4] != 13, "test37 case 6 failed\n"); - FAILED(buf[5] != WCONST(58, 26), "test37 case 7 failed\n"); - FAILED(buf[6] != WCONST(64, 32), "test37 case 8 failed\n"); - FAILED(ibuf[1] != 4, "test37 case 9 failed\n"); - FAILED((buf[7] & (sljit_sw)0xffffffff) != 4, "test37 case 10 failed\n"); - FAILED((buf[8] & (sljit_sw)0xffffffff) != 0, "test37 case 11 failed\n"); - FAILED(ibuf[2] != 8, "test37 case 12 failed\n"); + FAILED(buf[0] != 27, "test33 case 1 failed\n"); + FAILED(buf[1] != WCONST(47, 15), "test33 case 2 failed\n"); + FAILED(buf[2] != WCONST(64, 32), "test33 case 3 failed\n"); + FAILED(buf[3] != 0, "test33 case 4 failed\n"); + FAILED(ibuf[0] != 32, "test33 case 5 failed\n"); + FAILED(buf[4] != 13, "test33 case 6 failed\n"); + FAILED(buf[5] != WCONST(58, 26), "test33 case 7 failed\n"); + FAILED(buf[6] != WCONST(64, 32), "test33 case 8 failed\n"); + FAILED(ibuf[1] != 4, "test33 case 9 failed\n"); + FAILED((buf[7] & (sljit_sw)0xffffffff) != 4, "test33 case 10 failed\n"); + FAILED((buf[8] & (sljit_sw)0xffffffff) != 0, "test33 case 11 failed\n"); + FAILED(ibuf[2] != 8, "test33 case 12 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; @@ -3768,7 +3413,7 @@ static void test37(void) #undef BITN -static void test38(void) +static void test34(void) { #if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) /* Test stack utility. */ @@ -3785,7 +3430,7 @@ static void test38(void) struct sljit_label* label; if (verbose) - printf("Run test38\n"); + printf("Run test34\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -3845,7 +3490,7 @@ static void test38(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(VOID, P, P), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_free_stack)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2V(P, P), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_free_stack)); sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 4567); @@ -3864,14 +3509,14 @@ static void test38(void) sljit_free_compiler(compiler); /* Just survive this. */ - FAILED(code.func0() != 4567, "test38 case 1 failed\n"); + FAILED(code.func0() != 4567, "test34 case 1 failed\n"); sljit_free_code(code.code, NULL); #endif successful_tests++; } -static void test39(void) +static void test35(void) { /* Test error handling. */ executable_code code; @@ -3879,14 +3524,14 @@ static void test39(void) struct sljit_jump* jump; if (verbose) - printf("Run test39\n"); + printf("Run test35\n"); FAILED(!compiler, "cannot create compiler\n"); /* Such assignment should never happen in a regular program. */ compiler->error = -3967; - SLJIT_ASSERT(sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W, W), 5, 5, 6, 0, 32) == -3967); + SLJIT_ASSERT(sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 5, 5, 6, 0, 32) == -3967); SLJIT_ASSERT(sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R1, 0) == -3967); SLJIT_ASSERT(sljit_emit_op0(compiler, SLJIT_NOP) == -3967); SLJIT_ASSERT(sljit_emit_op0(compiler, SLJIT_ENDBR) == -3967); @@ -3911,25 +3556,25 @@ static void test39(void) SLJIT_ASSERT(!compiler->abuf->next && !compiler->abuf->used_size); sljit_set_compiler_memory_error(compiler); - FAILED(sljit_get_compiler_error(compiler) != -3967, "test39 case 1 failed\n"); + FAILED(sljit_get_compiler_error(compiler) != -3967, "test35 case 1 failed\n"); code.code = sljit_generate_code(compiler); - FAILED(sljit_get_compiler_error(compiler) != -3967, "test39 case 2 failed\n"); - FAILED(!!code.code, "test39 case 3 failed\n"); + FAILED(sljit_get_compiler_error(compiler) != -3967, "test35 case 2 failed\n"); + FAILED(!!code.code, "test35 case 3 failed\n"); sljit_free_compiler(compiler); compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - FAILED(sljit_get_compiler_error(compiler) != SLJIT_SUCCESS, "test39 case 4 failed\n"); + FAILED(sljit_get_compiler_error(compiler) != SLJIT_SUCCESS, "test35 case 4 failed\n"); sljit_set_compiler_memory_error(compiler); - FAILED(sljit_get_compiler_error(compiler) != SLJIT_ERR_ALLOC_FAILED, "test39 case 5 failed\n"); + FAILED(sljit_get_compiler_error(compiler) != SLJIT_ERR_ALLOC_FAILED, "test35 case 5 failed\n"); sljit_free_compiler(compiler); successful_tests++; } -static void test40(void) +static void test36(void) { /* Test emit_op_flags. */ executable_code code; @@ -3937,7 +3582,7 @@ static void test40(void) sljit_sw buf[10]; if (verbose) - printf("Run test40\n"); + printf("Run test36\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = -100; @@ -4019,29 +3664,28 @@ static void test40(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code.func1((sljit_sw)&buf) != 0xbaddead, "test40 case 1 failed\n"); - FAILED(buf[0] != 0x123457, "test40 case 2 failed\n"); - FAILED(buf[1] != 1, "test40 case 3 failed\n"); - FAILED(buf[2] != 0, "test40 case 4 failed\n"); - FAILED(buf[3] != -7, "test40 case 5 failed\n"); - FAILED(buf[4] != 0, "test40 case 6 failed\n"); - FAILED(buf[5] != 0x89, "test40 case 7 failed\n"); - FAILED(buf[6] != 0, "test40 case 8 failed\n"); - FAILED(buf[7] != 1, "test40 case 9 failed\n"); - FAILED(buf[8] != 1, "test40 case 10 failed\n"); - FAILED(buf[9] != 0x123457, "test40 case 11 failed\n"); + FAILED(code.func1((sljit_sw)&buf) != 0xbaddead, "test36 case 1 failed\n"); + FAILED(buf[0] != 0x123457, "test36 case 2 failed\n"); + FAILED(buf[1] != 1, "test36 case 3 failed\n"); + FAILED(buf[2] != 0, "test36 case 4 failed\n"); + FAILED(buf[3] != -7, "test36 case 5 failed\n"); + FAILED(buf[4] != 0, "test36 case 6 failed\n"); + FAILED(buf[5] != 0x89, "test36 case 7 failed\n"); + FAILED(buf[6] != 0, "test36 case 8 failed\n"); + FAILED(buf[7] != 1, "test36 case 9 failed\n"); + FAILED(buf[8] != 1, "test36 case 10 failed\n"); + FAILED(buf[9] != 0x123457, "test36 case 11 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test41(void) +static void test37(void) { /* Test inline assembly. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); sljit_s32 i; - sljit_f64 buf[3]; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_u8 inst[16]; #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -4052,7 +3696,7 @@ static void test41(void) #endif if (verbose) - printf("Run test41\n"); + printf("Run test37\n"); #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) SLJIT_ASSERT(sljit_has_cpu_feature(SLJIT_HAS_VIRTUAL_REGISTERS) == 0); @@ -4061,11 +3705,12 @@ static void test41(void) for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (SLJIT_R(i) >= SLJIT_R3 && SLJIT_R(i) <= SLJIT_R8) { - SLJIT_ASSERT(sljit_get_register_index(SLJIT_R(i)) == -1); + SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R(i)) == -1); continue; } #endif - SLJIT_ASSERT(sljit_get_register_index(SLJIT_R(i)) >= 0 && sljit_get_register_index(SLJIT_R(i)) < 64); + SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R(i)) >= 0 + && sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R(i)) < 64); } FAILED(!compiler, "cannot create compiler\n"); @@ -4076,82 +3721,82 @@ static void test41(void) /* lea SLJIT_RETURN_REG, [SLJIT_S0, SLJIT_S1] */ inst[0] = 0x48; inst[1] = 0x8d; - inst[2] = (sljit_u8)(0x04 | ((sljit_get_register_index(SLJIT_RETURN_REG) & 0x7) << 3)); - inst[3] = (sljit_u8)((sljit_get_register_index(SLJIT_S0) & 0x7) - | ((sljit_get_register_index(SLJIT_S1) & 0x7) << 3)); + inst[2] = (sljit_u8)(0x04 | ((sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) & 0x7) << 3)); + inst[3] = (sljit_u8)((sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) & 0x7) + | ((sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1) & 0x7) << 3)); sljit_emit_op_custom(compiler, inst, 4); #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) /* lea SLJIT_RETURN_REG, [SLJIT_S0, SLJIT_S1] */ inst[0] = 0x48; /* REX_W */ inst[1] = 0x8d; - reg = sljit_get_register_index(SLJIT_RETURN_REG); + reg = sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG); inst[2] = (sljit_u8)(0x04 | ((reg & 0x7) << 3)); if (reg > 7) inst[0] |= 0x04; /* REX_R */ - reg = sljit_get_register_index(SLJIT_S0); + reg = sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0); inst[3] = (sljit_u8)(reg & 0x7); if (reg > 7) inst[0] |= 0x01; /* REX_B */ - reg = sljit_get_register_index(SLJIT_S1); + reg = sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1); inst[3] = (sljit_u8)(inst[3] | ((reg & 0x7) << 3)); if (reg > 7) inst[0] |= 0x02; /* REX_X */ sljit_emit_op_custom(compiler, inst, 4); -#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +#elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) /* add rd, rn, rm */ - inst = 0xe0800000 | ((sljit_u32)sljit_get_register_index(SLJIT_RETURN_REG) << 12) - | ((sljit_u32)sljit_get_register_index(SLJIT_S0) << 16) - | (sljit_u32)sljit_get_register_index(SLJIT_S1); + inst = 0xe0800000 | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) << 12) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) << 16) + | (sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1); sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); #elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) /* add rd, rn, rm */ - inst = 0xeb000000 | ((sljit_u32)sljit_get_register_index(SLJIT_RETURN_REG) << 8) - | ((sljit_u32)sljit_get_register_index(SLJIT_S0) << 16) - | (sljit_u32)sljit_get_register_index(SLJIT_S1); + inst = 0xeb000000 | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) << 8) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) << 16) + | (sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1); sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) /* add rd, rn, rm */ - inst = 0x8b000000 | (sljit_u32)sljit_get_register_index(SLJIT_RETURN_REG) - | ((sljit_u32)sljit_get_register_index(SLJIT_S0) << 5) - | ((sljit_u32)sljit_get_register_index(SLJIT_S1) << 16); + inst = 0x8b000000 | (sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) << 5) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1) << 16); sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) /* add rD, rA, rB */ - inst = (31 << 26) | (266 << 1) | ((sljit_u32)sljit_get_register_index(SLJIT_RETURN_REG) << 21) - | ((sljit_u32)sljit_get_register_index(SLJIT_S0) << 16) - | ((sljit_u32)sljit_get_register_index(SLJIT_S1) << 11); + inst = (31 << 26) | (266 << 1) | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) << 21) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) << 16) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1) << 11); sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); #elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) /* addu rd, rs, rt */ - inst = 33 | ((sljit_u32)sljit_get_register_index(SLJIT_RETURN_REG) << 11) - | ((sljit_u32)sljit_get_register_index(SLJIT_S0) << 21) - | ((sljit_u32)sljit_get_register_index(SLJIT_S1) << 16); + inst = 33 | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) << 11) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) << 21) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1) << 16); sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); #elif (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) /* daddu rd, rs, rt */ - inst = 45 | ((sljit_u32)sljit_get_register_index(SLJIT_RETURN_REG) << 11) - | ((sljit_u32)sljit_get_register_index(SLJIT_S0) << 21) - | ((sljit_u32)sljit_get_register_index(SLJIT_S1) << 16); + inst = 45 | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) << 11) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) << 21) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1) << 16); sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); #elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) /* add rd, rs1, rs2 */ - inst = 0x33 | (0 << 12) | (0 << 25) | ((sljit_u32)sljit_get_register_index(SLJIT_RETURN_REG) << 7) - | ((sljit_u32)sljit_get_register_index(SLJIT_S0) << 15) - | ((sljit_u32)sljit_get_register_index(SLJIT_S1) << 20); - sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - /* add rd, rs1, rs2 */ - inst = (0x2u << 30) | ((sljit_u32)sljit_get_register_index(SLJIT_RETURN_REG) << 25) - | ((sljit_u32)sljit_get_register_index(SLJIT_S0) << 14) - | (sljit_u32)sljit_get_register_index(SLJIT_S1); + inst = 0x33 | (0 << 12) | (0 << 25) | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) << 7) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) << 15) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1) << 20); sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) /* agrk rd, rs1, rs2 */ inst = (0xb9e8u << 16) - | ((sljit_u32)sljit_get_register_index(SLJIT_RETURN_REG) << 4) - | ((sljit_u32)sljit_get_register_index(SLJIT_S0) << 12) - | (sljit_u32)sljit_get_register_index(SLJIT_S1); + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) << 4) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) << 12) + | (sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1); sljit_emit_op_custom(compiler, &inst, sizeof(inst)); +#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) + /* add.d rd, rs1, rs2 */ + inst = (0x21u << 15) | (sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_RETURN_REG) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S0) << 5) + | ((sljit_u32)sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_S1) << 10); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); #else inst = 0; sljit_emit_op_custom(compiler, &inst, 0); @@ -4163,116 +3808,17 @@ static void test41(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code.func2(32, -11) != 21, "test41 case 1 failed\n"); - FAILED(code.func2(1000, 234) != 1234, "test41 case 2 failed\n"); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(code.func2(SLJIT_W(0x20f0a04090c06070), SLJIT_W(0x020f0a04090c0607)) != SLJIT_W(0x22ffaa4499cc6677), "test41 case 3 failed\n"); + FAILED(code.func2(32, -11) != 21, "test37 case 1 failed\n"); + FAILED(code.func2(1000, 234) != 1234, "test37 case 2 failed\n"); +#if IS_64BIT + FAILED(code.func2(SLJIT_W(0x20f0a04090c06070), SLJIT_W(0x020f0a04090c0607)) != SLJIT_W(0x22ffaa4499cc6677), "test37 case 3 failed\n"); #endif sljit_free_code(code.code, NULL); - - if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - buf[0] = 13.5; - buf[1] = -2.25; - buf[2] = 0.0; - - compiler = sljit_create_compiler(NULL, NULL); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 0, 1, 2, 0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - /* addsd x, xm */ - inst[0] = 0xf2; - inst[1] = 0x0f; - inst[2] = 0x58; - inst[3] = (sljit_u8)(0xc0 | (sljit_get_float_register_index(SLJIT_FR0) << 3) - | sljit_get_float_register_index(SLJIT_FR1)); - sljit_emit_op_custom(compiler, inst, 4); -#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - /* addsd x, xm */ - if (sljit_get_float_register_index(SLJIT_FR0) > 7 || sljit_get_float_register_index(SLJIT_FR1) > 7) { - inst[0] = 0; - if (sljit_get_float_register_index(SLJIT_FR0) > 7) - inst[0] |= 0x04; /* REX_R */ - if (sljit_get_float_register_index(SLJIT_FR1) > 7) - inst[0] |= 0x01; /* REX_B */ - inst[1] = 0xf2; - inst[2] = 0x0f; - inst[3] = 0x58; - inst[4] = (sljit_u8)(0xc0 | ((sljit_get_float_register_index(SLJIT_FR0) & 0x7) << 3) - | (sljit_get_float_register_index(SLJIT_FR1) & 0x7)); - sljit_emit_op_custom(compiler, inst, 5); - } - else { - inst[0] = 0xf2; - inst[1] = 0x0f; - inst[2] = 0x58; - inst[3] = (sljit_u8)(0xc0 | (sljit_get_float_register_index(SLJIT_FR0) << 3) - | sljit_get_float_register_index(SLJIT_FR1)); - sljit_emit_op_custom(compiler, inst, 4); - } -#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) - /* vadd.f64 dd, dn, dm */ - inst = 0xee300b00 | (((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) >> 1) << 12) - | (((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) >> 1) << 16) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR1) >> 1); - sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); -#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) - /* fadd rd, rn, rm */ - inst = 0x1e602800 | (sljit_u32)sljit_get_float_register_index(SLJIT_FR0) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 5) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR1) << 16); - sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); -#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) - /* fadd frD, frA, frB */ - inst = (63u << 26) | (21u << 1) | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 21) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 16) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR1) << 11); - sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); -#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) - /* add.d fd, fs, ft */ - inst = (17u << 26) | (17u << 21) | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 6) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 11) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR1) << 16); - sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); -#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) - /* fadd.d rd, rs1, rs2 */ - inst = (0x1u << 25) | (0x7u << 12) | (0x53u) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 7) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 15) - | (sljit_u32)sljit_get_float_register_index(SLJIT_FR1) << 20; - sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - /* faddd rd, rs1, rs2 */ - inst = (0x2u << 30) | (0x34u << 19) | (0x42u << 5) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 25) - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 14) - | (sljit_u32)sljit_get_float_register_index(SLJIT_FR1); - sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); -#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) - /* adbr r1, r2 */ - inst = 0xb31a0000 - | ((sljit_u32)sljit_get_float_register_index(SLJIT_FR0) << 4) - | (sljit_u32)sljit_get_float_register_index(SLJIT_FR1); - sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); -#endif - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - FAILED(buf[2] != 11.25, "test41 case 4 failed\n"); - - sljit_free_code(code.code, NULL); - } - successful_tests++; } -static void test42(void) +static void test38(void) { /* Test long multiply and division. */ executable_code code; @@ -4281,13 +3827,13 @@ static void test42(void) sljit_sw buf[7 + 4 + 8 + 8]; if (verbose) - printf("Run test42\n"); + printf("Run test38\n"); FAILED(!compiler, "cannot create compiler\n"); for (i = 0; i < 7 + 4 + 8 + 8; i++) buf[i] = -1; - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 5, 5, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 5, 5, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -0x1fb308a); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0xf50c873); @@ -4297,7 +3843,7 @@ static void test42(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, 0x5a4d0c4); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S4, 0, SLJIT_IMM, 0x9a3b06d); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT /* buf[7-26] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-0x5dc4f897b8cd67f5)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x3f8b5c026cb088df)); @@ -4367,7 +3913,7 @@ static void test42(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 25 * sizeof(sljit_sw), SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 26 * sizeof(sljit_sw), SLJIT_R1, 0); -#else /* !SLJIT_64BIT_ARCHITECTURE */ +#else /* !IS_64BIT */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -0x58cd67f5); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x3cb088df); sljit_emit_op0(compiler, SLJIT_LMUL_UW); @@ -4431,7 +3977,7 @@ static void test42(void) sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_R1, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 25 * sizeof(sljit_sw), SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 26 * sizeof(sljit_sw), SLJIT_R1, 0); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#endif /* IS_64BIT */ /* buf[0-6] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R2, 0); @@ -4450,135 +3996,43 @@ static void test42(void) code.func1((sljit_sw)&buf); - FAILED(buf[0] != -0x1fb308a, "test42 case 1 failed\n"); - FAILED(buf[1] != 0xf50c873, "test42 case 2 failed\n"); - FAILED(buf[2] != 0x8a0475b, "test42 case 3 failed\n"); - FAILED(buf[3] != 0x9dc849b, "test42 case 4 failed\n"); - FAILED(buf[4] != -0x7c69a35, "test42 case 5 failed\n"); - FAILED(buf[5] != 0x5a4d0c4, "test42 case 6 failed\n"); - FAILED(buf[6] != 0x9a3b06d, "test42 case 7 failed\n"); + FAILED(buf[0] != -0x1fb308a, "test38 case 1 failed\n"); + FAILED(buf[1] != 0xf50c873, "test38 case 2 failed\n"); + FAILED(buf[2] != 0x8a0475b, "test38 case 3 failed\n"); + FAILED(buf[3] != 0x9dc849b, "test38 case 4 failed\n"); + FAILED(buf[4] != -0x7c69a35, "test38 case 5 failed\n"); + FAILED(buf[5] != 0x5a4d0c4, "test38 case 6 failed\n"); + FAILED(buf[6] != 0x9a3b06d, "test38 case 7 failed\n"); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(buf[7] != SLJIT_W(-4388959407985636971), "test42 case 8 failed\n"); - FAILED(buf[8] != SLJIT_W(2901680654366567099), "test42 case 9 failed\n"); - FAILED(buf[9] != SLJIT_W(-4388959407985636971), "test42 case 10 failed\n"); - FAILED(buf[10] != SLJIT_W(-1677173957268872740), "test42 case 11 failed\n"); - FAILED(buf[11] != SLJIT_W(2), "test42 case 12 failed\n"); - FAILED(buf[12] != SLJIT_W(2532236178951865933), "test42 case 13 failed\n"); - FAILED(buf[13] != SLJIT_W(-1), "test42 case 14 failed\n"); - FAILED(buf[14] != SLJIT_W(-2177944059851366166), "test42 case 15 failed\n"); -#else /* !SLJIT_64BIT_ARCHITECTURE */ - FAILED(buf[7] != -1587000939, "test42 case 8 failed\n"); - FAILED(buf[8] != 665003983, "test42 case 9 failed\n"); - FAILED(buf[9] != -1587000939, "test42 case 10 failed\n"); - FAILED(buf[10] != -353198352, "test42 case 11 failed\n"); - FAILED(buf[11] != 2, "test42 case 12 failed\n"); - FAILED(buf[12] != 768706125, "test42 case 13 failed\n"); - FAILED(buf[13] != -1, "test42 case 14 failed\n"); - FAILED(buf[14] != -471654166, "test42 case 15 failed\n"); -#endif /* SLJIT_64BIT_ARCHITECTURE */ + FAILED(buf[7] != WCONST(-4388959407985636971, -1587000939), "test38 case 8 failed\n"); + FAILED(buf[8] != WCONST(2901680654366567099, 665003983), "test38 case 9 failed\n"); + FAILED(buf[9] != WCONST(-4388959407985636971, -1587000939), "test38 case 10 failed\n"); + FAILED(buf[10] != WCONST(-1677173957268872740, -353198352), "test38 case 11 failed\n"); + FAILED(buf[11] != 2, "test38 case 12 failed\n"); + FAILED(buf[12] != WCONST(2532236178951865933, 768706125), "test38 case 13 failed\n"); + FAILED(buf[13] != -1, "test38 case 14 failed\n"); + FAILED(buf[14] != WCONST(-2177944059851366166, -471654166), "test38 case 15 failed\n"); - FAILED(buf[15] != 56, "test42 case 16 failed\n"); - FAILED(buf[16] != 58392872, "test42 case 17 failed\n"); - FAILED(buf[17] != -47, "test42 case 18 failed\n"); - FAILED(buf[18] != 35949148, "test42 case 19 failed\n"); + FAILED(buf[15] != 56, "test38 case 16 failed\n"); + FAILED(buf[16] != 58392872, "test38 case 17 failed\n"); + FAILED(buf[17] != -47, "test38 case 18 failed\n"); + FAILED(buf[18] != 35949148, "test38 case 19 failed\n"); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(buf[19] != SLJIT_W(0x3340bfc), "test42 case 20 failed\n"); - FAILED(buf[20] != SLJIT_W(0x3d4af2c543), "test42 case 21 failed\n"); - FAILED(buf[21] != SLJIT_W(-0xaf978), "test42 case 22 failed\n"); - FAILED(buf[22] != SLJIT_W(0xa64ae42b7d6), "test42 case 23 failed\n"); -#else /* !SLJIT_64BIT_ARCHITECTURE */ - FAILED(buf[19] != SLJIT_W(0xda5), "test42 case 20 failed\n"); - FAILED(buf[20] != SLJIT_W(0xb86d0), "test42 case 21 failed\n"); - FAILED(buf[21] != SLJIT_W(-0x6b6e), "test42 case 22 failed\n"); - FAILED(buf[22] != SLJIT_W(0xd357), "test42 case 23 failed\n"); -#endif /* SLJIT_64BIT_ARCHITECTURE */ + FAILED(buf[19] != WCONST(0x3340bfc, 0xda5), "test38 case 20 failed\n"); + FAILED(buf[20] != WCONST(0x3d4af2c543, 0xb86d0), "test38 case 21 failed\n"); + FAILED(buf[21] != WCONST(-0xaf978, -0x6b6e), "test38 case 22 failed\n"); + FAILED(buf[22] != WCONST(0xa64ae42b7d6, 0xd357), "test38 case 23 failed\n"); - FAILED(buf[23] != 0x0, "test42 case 24 failed\n"); - FAILED(buf[24] != (sljit_sw)0xf2906b14, "test42 case 25 failed\n"); - FAILED(buf[25] != -0x8, "test42 case 26 failed\n"); - FAILED(buf[26] != -0xa63c923, "test42 case 27 failed\n"); + FAILED(buf[23] != 0x0, "test38 case 24 failed\n"); + FAILED(buf[24] != (sljit_sw)0xf2906b14, "test38 case 25 failed\n"); + FAILED(buf[25] != -0x8, "test38 case 26 failed\n"); + FAILED(buf[26] != -0xa63c923, "test38 case 27 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test43(void) -{ - /* Test floating point compare. */ - executable_code code; - struct sljit_compiler* compiler; - struct sljit_jump* jump; - - union { - sljit_f64 value; - struct { - sljit_u32 value1; - sljit_u32 value2; - } u; - } dbuf[4]; - - if (verbose) - printf("Run test43\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test43 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - dbuf[0].value = 12.125; - /* a NaN */ - dbuf[1].u.value1 = 0x7fffffff; - dbuf[1].u.value2 = 0x7fffffff; - dbuf[2].value = -13.5; - dbuf[3].value = 12.125; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 1, 1, 3, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); - /* dbuf[0] < dbuf[2] -> -2 */ - jump = sljit_emit_fcmp(compiler, SLJIT_F_GREATER_EQUAL, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_F64_SHIFT); - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, -2); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); - /* dbuf[0] and dbuf[1] is not NaN -> 5 */ - jump = sljit_emit_fcmp(compiler, SLJIT_UNORDERED, SLJIT_MEM0(), (sljit_sw)&dbuf[1], SLJIT_FR1, 0); - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 5); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 11); - /* dbuf[0] == dbuf[3] -> 11 */ - jump = sljit_emit_fcmp(compiler, SLJIT_F_EQUAL, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_FR2, 0); - - /* else -> -17 */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, -17); - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.func1((sljit_sw)&dbuf) != 11, "test43 case 1 failed\n"); - dbuf[3].value = 12; - FAILED(code.func1((sljit_sw)&dbuf) != -17, "test43 case 2 failed\n"); - dbuf[1].value = 0; - FAILED(code.func1((sljit_sw)&dbuf) != 5, "test43 case 3 failed\n"); - dbuf[2].value = 20; - FAILED(code.func1((sljit_sw)&dbuf) != -2, "test43 case 4 failed\n"); - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test44(void) +static void test39(void) { /* Test mov. */ executable_code code; @@ -4586,7 +4040,7 @@ static void test44(void) void *buf[5]; if (verbose) - printf("Run test44\n"); + printf("Run test39\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -4620,146 +4074,17 @@ static void test44(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code.func1((sljit_sw)&buf) != (sljit_sw)(buf + 2), "test44 case 1 failed\n"); - FAILED(buf[1] != buf + 2, "test44 case 2 failed\n"); - FAILED(buf[2] != buf + 3, "test44 case 3 failed\n"); - FAILED(buf[3] != buf + 4, "test44 case 4 failed\n"); - FAILED(buf[4] != buf + 2, "test44 case 5 failed\n"); + FAILED(code.func1((sljit_sw)&buf) != (sljit_sw)(buf + 2), "test39 case 1 failed\n"); + FAILED(buf[1] != buf + 2, "test39 case 2 failed\n"); + FAILED(buf[2] != buf + 3, "test39 case 3 failed\n"); + FAILED(buf[3] != buf + 4, "test39 case 4 failed\n"); + FAILED(buf[4] != buf + 2, "test39 case 5 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test45(void) -{ - /* Test single precision floating point. */ - - executable_code code; - struct sljit_compiler* compiler; - sljit_f32 buf[12]; - sljit_sw buf2[6]; - struct sljit_jump* jump; - - if (verbose) - printf("Run test45\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test45 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - buf[0] = 5.5; - buf[1] = -7.25; - buf[2] = 0; - buf[3] = 0; - buf[4] = 0; - buf[5] = 0; - buf[6] = 0; - buf[7] = 8.75; - buf[8] = 0; - buf[9] = 16.5; - buf[10] = 0; - buf[11] = 0; - - buf2[0] = -1; - buf2[1] = -1; - buf2[2] = -1; - buf2[3] = -1; - buf2[4] = -1; - buf2[5] = -1; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 3, 2, 6, 0, 0); - - /* buf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f32), SLJIT_FR0, 0); - /* buf[3] */ - sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_FR1, 0, SLJIT_FR5, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f32), SLJIT_FR1, 0); - /* buf[4] */ - sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f32), SLJIT_FR5, 0); - /* buf[5] */ - sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f32), SLJIT_FR4, 0); - - /* buf[6] */ - sljit_emit_fop2(compiler, SLJIT_ADD_F32, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f32), SLJIT_FR0, 0); - /* buf[7] */ - sljit_emit_fop2(compiler, SLJIT_SUB_F32, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f32), SLJIT_FR5, 0); - /* buf[8] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop2(compiler, SLJIT_MUL_F32, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f32), SLJIT_FR0, 0, SLJIT_FR0, 0); - /* buf[9] */ - sljit_emit_fop2(compiler, SLJIT_DIV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f32), SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f32), SLJIT_FR2, 0); - /* buf[10] */ - sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 0x3d0ac); - sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R0), 0x3d0ac); - /* buf[11] */ - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 0x3d0ac + sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R0), -0x3d0ac); - - /* buf2[0] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); - cond_set(compiler, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_F_EQUAL); - /* buf2[1] */ - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_LESS, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); - cond_set(compiler, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw), SLJIT_F_LESS); - /* buf2[2] */ - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); - cond_set(compiler, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_sw), SLJIT_F_EQUAL); - /* buf2[3] */ - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_GREATER_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); - cond_set(compiler, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_F_GREATER_EQUAL); - - /* buf2[4] */ - jump = sljit_emit_fcmp(compiler, SLJIT_F_LESS_EQUAL | SLJIT_32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_IMM, 7); - sljit_set_label(jump, sljit_emit_label(compiler)); - - /* buf2[5] */ - jump = sljit_emit_fcmp(compiler, SLJIT_F_GREATER | SLJIT_32, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_FR2, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_IMM, 6); - sljit_set_label(jump, sljit_emit_label(compiler)); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func2((sljit_sw)&buf, (sljit_sw)&buf2); - FAILED(buf[2] != -5.5, "test45 case 1 failed\n"); - FAILED(buf[3] != 7.25, "test45 case 2 failed\n"); - FAILED(buf[4] != 7.25, "test45 case 3 failed\n"); - FAILED(buf[5] != -5.5, "test45 case 4 failed\n"); - FAILED(buf[6] != -1.75, "test45 case 5 failed\n"); - FAILED(buf[7] != 16.0, "test45 case 6 failed\n"); - FAILED(buf[8] != 30.25, "test45 case 7 failed\n"); - FAILED(buf[9] != 3, "test45 case 8 failed\n"); - FAILED(buf[10] != -5.5, "test45 case 9 failed\n"); - FAILED(buf[11] != 7.25, "test45 case 10 failed\n"); - FAILED(buf2[0] != 1, "test45 case 11 failed\n"); - FAILED(buf2[1] != 2, "test45 case 12 failed\n"); - FAILED(buf2[2] != 2, "test45 case 13 failed\n"); - FAILED(buf2[3] != 1, "test45 case 14 failed\n"); - FAILED(buf2[4] != 7, "test45 case 15 failed\n"); - FAILED(buf2[5] != -1, "test45 case 16 failed\n"); - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test46(void) +static void test40(void) { /* Test sljit_emit_op_flags with 32 bit operations. */ @@ -4770,7 +4095,7 @@ static void test46(void) sljit_s32 i; if (verbose) - printf("Run test46\n"); + printf("Run test40\n"); for (i = 0; i < 24; ++i) buf[i] = -17; @@ -4779,7 +4104,7 @@ static void test46(void) buf2[i] = -13; buf2[4] = -124; - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 3, 3, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 3, 3, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -7); sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_R2, 0, SLJIT_IMM, 13); @@ -4851,43 +4176,43 @@ static void test46(void) sljit_free_compiler(compiler); code.func2((sljit_sw)&buf, (sljit_sw)&buf2); - FAILED(buf[0] != 0, "test46 case 1 failed\n"); - FAILED(buf[1] != -17, "test46 case 2 failed\n"); - FAILED(buf[2] != 1, "test46 case 3 failed\n"); - FAILED(buf[3] != -17, "test46 case 4 failed\n"); - FAILED(buf[4] != 1, "test46 case 5 failed\n"); - FAILED(buf[5] != -17, "test46 case 6 failed\n"); - FAILED(buf[6] != 1, "test46 case 7 failed\n"); - FAILED(buf[7] != -17, "test46 case 8 failed\n"); - FAILED(buf[8] != 0, "test46 case 9 failed\n"); - FAILED(buf[9] != -17, "test46 case 10 failed\n"); - FAILED(buf[10] != 1, "test46 case 11 failed\n"); - FAILED(buf[11] != -17, "test46 case 12 failed\n"); - FAILED(buf[12] != 1, "test46 case 13 failed\n"); - FAILED(buf[13] != -17, "test46 case 14 failed\n"); - FAILED(buf[14] != 1, "test46 case 15 failed\n"); - FAILED(buf[15] != -17, "test46 case 16 failed\n"); - FAILED(buf[16] != 0, "test46 case 17 failed\n"); - FAILED(buf[17] != -17, "test46 case 18 failed\n"); - FAILED(buf[18] != 0, "test46 case 19 failed\n"); - FAILED(buf[19] != -17, "test46 case 20 failed\n"); - FAILED(buf[20] != -18, "test46 case 21 failed\n"); - FAILED(buf[21] != -17, "test46 case 22 failed\n"); - FAILED(buf[22] != 38, "test46 case 23 failed\n"); - FAILED(buf[23] != -17, "test46 case 24 failed\n"); + FAILED(buf[0] != 0, "test40 case 1 failed\n"); + FAILED(buf[1] != -17, "test40 case 2 failed\n"); + FAILED(buf[2] != 1, "test40 case 3 failed\n"); + FAILED(buf[3] != -17, "test40 case 4 failed\n"); + FAILED(buf[4] != 1, "test40 case 5 failed\n"); + FAILED(buf[5] != -17, "test40 case 6 failed\n"); + FAILED(buf[6] != 1, "test40 case 7 failed\n"); + FAILED(buf[7] != -17, "test40 case 8 failed\n"); + FAILED(buf[8] != 0, "test40 case 9 failed\n"); + FAILED(buf[9] != -17, "test40 case 10 failed\n"); + FAILED(buf[10] != 1, "test40 case 11 failed\n"); + FAILED(buf[11] != -17, "test40 case 12 failed\n"); + FAILED(buf[12] != 1, "test40 case 13 failed\n"); + FAILED(buf[13] != -17, "test40 case 14 failed\n"); + FAILED(buf[14] != 1, "test40 case 15 failed\n"); + FAILED(buf[15] != -17, "test40 case 16 failed\n"); + FAILED(buf[16] != 0, "test40 case 17 failed\n"); + FAILED(buf[17] != -17, "test40 case 18 failed\n"); + FAILED(buf[18] != 0, "test40 case 19 failed\n"); + FAILED(buf[19] != -17, "test40 case 20 failed\n"); + FAILED(buf[20] != -18, "test40 case 21 failed\n"); + FAILED(buf[21] != -17, "test40 case 22 failed\n"); + FAILED(buf[22] != 38, "test40 case 23 failed\n"); + FAILED(buf[23] != -17, "test40 case 24 failed\n"); - FAILED(buf2[0] != 0, "test46 case 25 failed\n"); - FAILED(buf2[1] != 1, "test46 case 26 failed\n"); - FAILED(buf2[2] != 0, "test46 case 27 failed\n"); - FAILED(buf2[3] != 1, "test46 case 28 failed\n"); - FAILED(buf2[4] != -123, "test46 case 29 failed\n"); - FAILED(buf2[5] != -14, "test46 case 30 failed\n"); + FAILED(buf2[0] != 0, "test40 case 25 failed\n"); + FAILED(buf2[1] != 1, "test40 case 26 failed\n"); + FAILED(buf2[2] != 0, "test40 case 27 failed\n"); + FAILED(buf2[3] != 1, "test40 case 28 failed\n"); + FAILED(buf2[4] != -123, "test40 case 29 failed\n"); + FAILED(buf2[5] != -14, "test40 case 30 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test47(void) +static void test41(void) { /* Test jump optimizations. */ executable_code code; @@ -4895,7 +4220,7 @@ static void test47(void) sljit_sw buf[3]; if (verbose) - printf("Run test47\n"); + printf("Run test41\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 0; @@ -4910,13 +4235,13 @@ static void test47(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); /* buf[1] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xd37c10); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_set_target(sljit_emit_jump(compiler, SLJIT_LESS), SLJIT_W(0x112233445566)); #endif sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); /* buf[2] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x59b48e); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_set_target(sljit_emit_jump(compiler, SLJIT_LESS), SLJIT_W(0x1122334455667788)); #endif sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R0, 0); @@ -4926,386 +4251,16 @@ static void test47(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code.func1((sljit_sw)&buf) != 0x59b48e, "test47 case 1 failed\n"); - FAILED(buf[0] != 0x3a5c6f, "test47 case 2 failed\n"); - FAILED(buf[1] != 0xd37c10, "test47 case 3 failed\n"); - FAILED(buf[2] != 0x59b48e, "test47 case 4 failed\n"); + FAILED(code.func1((sljit_sw)&buf) != 0x59b48e, "test41 case 1 failed\n"); + FAILED(buf[0] != 0x3a5c6f, "test41 case 2 failed\n"); + FAILED(buf[1] != 0xd37c10, "test41 case 3 failed\n"); + FAILED(buf[2] != 0x59b48e, "test41 case 4 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test48(void) -{ - /* Test floating point conversions. */ - executable_code code; - struct sljit_compiler* compiler; - int i; - sljit_f64 dbuf[10]; - sljit_f32 sbuf[10]; - sljit_sw wbuf[10]; - sljit_s32 ibuf[10]; - - if (verbose) - printf("Run test48\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test48 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - for (i = 0; i < 10; i++) { - dbuf[i] = 0.0; - sbuf[i] = 0.0; - wbuf[i] = 0; - ibuf[i] = 0; - } - - dbuf[0] = 123.5; - dbuf[1] = -367; - dbuf[2] = 917.75; - - sbuf[0] = 476.25; - sbuf[1] = -1689.75; - - wbuf[0] = 2345; - - ibuf[0] = 312; - ibuf[1] = -9324; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 3, 3, 6, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&sbuf); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&wbuf); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)&ibuf); - - /* sbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_F64, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 3); - /* sbuf[3] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_F64, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_F32_SHIFT, SLJIT_FR5, 0); - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S1), 0); - /* dbuf[3] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR4, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), 0); - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_FR2, 0, SLJIT_FR3, 0); - /* dbuf[4] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_FR2, 0); - /* sbuf[4] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR3, 0); - - /* wbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); - sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_R0, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_F64_SHIFT); - /* wbuf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S1), 0); - /* wbuf[3] */ - sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_sw), SLJIT_FR5, 0); - sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_FR0, 0, SLJIT_FR5, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 4); - /* wbuf[4] */ - sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM2(SLJIT_S2, SLJIT_R1), SLJIT_WORD_SHIFT, SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); - /* ibuf[2] */ - sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_MEM1(SLJIT_R2), 2 * sizeof(sljit_s32), SLJIT_FR4, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F32, SLJIT_R0, 0, SLJIT_FR1, 0); - /* ibuf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 3 * sizeof(sljit_s32), SLJIT_R0, 0); - - /* dbuf[5] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S2), 0); - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_FR2, 0, SLJIT_IMM, -6213); - /* dbuf[6] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR2, 0); - /* dbuf[7] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64), SLJIT_MEM0(), (sljit_sw)&ibuf[0]); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32)); - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_FR1, 0, SLJIT_R0, 0); - /* dbuf[8] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f64), SLJIT_FR1, 0); - /* dbuf[9] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM0(), (sljit_sw)(dbuf + 9), SLJIT_IMM, -77); - /* sbuf[5] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_f32), SLJIT_IMM, -123); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 7190); - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_FR3, 0, SLJIT_R0, 0); - /* sbuf[6] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_f32), SLJIT_FR3, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 123); - sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_R2, 0, SLJIT_IMM, 123 * sizeof(sljit_s32)); - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_FR1, 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 2); - /* sbuf[7] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_f32), SLJIT_FR1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 3812); - /* sbuf[8] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_F32_SHIFT, SLJIT_R1, 0); - /* sbuf[9] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM0(), (sljit_sw)(sbuf + 9), SLJIT_IMM, -79); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func0(); - FAILED(dbuf[3] != 476.25, "test48 case 1 failed\n"); - FAILED(dbuf[4] != 476.25, "test48 case 2 failed\n"); - FAILED(dbuf[5] != 2345.0, "test48 case 3 failed\n"); - FAILED(dbuf[6] != -6213.0, "test48 case 4 failed\n"); - FAILED(dbuf[7] != 312.0, "test48 case 5 failed\n"); - FAILED(dbuf[8] != -9324.0, "test48 case 6 failed\n"); - FAILED(dbuf[9] != -77.0, "test48 case 7 failed\n"); - - FAILED(sbuf[2] != 123.5, "test48 case 8 failed\n"); - FAILED(sbuf[3] != 123.5, "test48 case 9 failed\n"); - FAILED(sbuf[4] != 476.25, "test48 case 10 failed\n"); - FAILED(sbuf[5] != -123, "test48 case 11 failed\n"); - FAILED(sbuf[6] != 7190, "test48 case 12 failed\n"); - FAILED(sbuf[7] != 312, "test48 case 13 failed\n"); - FAILED(sbuf[8] != 3812, "test48 case 14 failed\n"); - FAILED(sbuf[9] != -79.0, "test48 case 15 failed\n"); - - FAILED(wbuf[1] != -367, "test48 case 16 failed\n"); - FAILED(wbuf[2] != 917, "test48 case 17 failed\n"); - FAILED(wbuf[3] != 476, "test48 case 18 failed\n"); - FAILED(wbuf[4] != -476, "test48 case 19 failed\n"); - - FAILED(ibuf[2] != -917, "test48 case 20 failed\n"); - FAILED(ibuf[3] != -1689, "test48 case 21 failed\n"); - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test49(void) -{ - /* Test floating point conversions. */ - executable_code code; - struct sljit_compiler* compiler; - int i; - sljit_f64 dbuf[10]; - sljit_f32 sbuf[9]; - sljit_sw wbuf[9]; - sljit_s32 ibuf[9]; - sljit_s32* dbuf_ptr = (sljit_s32*)dbuf; - sljit_s32* sbuf_ptr = (sljit_s32*)sbuf; - - if (verbose) - printf("Run test49\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test49 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - for (i = 0; i < 9; i++) { - dbuf_ptr[i << 1] = -1; - dbuf_ptr[(i << 1) + 1] = -1; - sbuf_ptr[i] = -1; - wbuf[i] = -1; - ibuf[i] = -1; - } - -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - dbuf[9] = (sljit_f64)SLJIT_W(0x1122334455); -#endif - dbuf[0] = 673.75; - sbuf[0] = -879.75; - wbuf[0] = 345; - ibuf[0] = -249; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 3, 3, 3, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&sbuf); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&wbuf); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)&ibuf); - - /* dbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S1), 0); - /* sbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_F64, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), 0); - /* wbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 0); - /* wbuf[4] */ - sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S1), 0); - /* ibuf[2] */ - sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_MEM1(SLJIT_R2), 2 * sizeof(sljit_s32), SLJIT_MEM1(SLJIT_S0), 0); - /* ibuf[4] */ - sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F32, SLJIT_MEM1(SLJIT_R2), 4 * sizeof(sljit_s32), SLJIT_MEM1(SLJIT_S1), 0); - /* dbuf[4] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S2), 0); - /* sbuf[4] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S2), 0); - /* dbuf[6] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_R2), 0); - /* sbuf[6] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R2), 0); - -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64)); - /* wbuf[8] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 8 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_R0, 0, SLJIT_FR2, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_R0, 0); - sljit_emit_op2(compiler, SLJIT_AND32, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffff); - /* ibuf[8] */ - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), 8 * sizeof(sljit_s32), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x4455667788)); - /* dbuf[8] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f64), SLJIT_R0, 0); - /* dbuf[9] */ - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64), SLJIT_IMM, SLJIT_W(0x7766554433)); -#endif /* SLJIT_64BIT_ARCHITECTURE */ - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func0(); - - FAILED(dbuf_ptr[(1 * 2) + 0] != -1, "test49 case 1 failed\n"); - FAILED(dbuf_ptr[(1 * 2) + 1] != -1, "test49 case 2 failed\n"); - FAILED(dbuf[2] != -879.75, "test49 case 3 failed\n"); - FAILED(dbuf_ptr[(3 * 2) + 0] != -1, "test49 case 4 failed\n"); - FAILED(dbuf_ptr[(3 * 2) + 1] != -1, "test49 case 5 failed\n"); - FAILED(dbuf[4] != 345, "test49 case 6 failed\n"); - FAILED(dbuf_ptr[(5 * 2) + 0] != -1, "test49 case 7 failed\n"); - FAILED(dbuf_ptr[(5 * 2) + 1] != -1, "test49 case 8 failed\n"); - FAILED(dbuf[6] != -249, "test49 case 9 failed\n"); - FAILED(dbuf_ptr[(7 * 2) + 0] != -1, "test49 case 10 failed\n"); - FAILED(dbuf_ptr[(7 * 2) + 1] != -1, "test49 case 11 failed\n"); - - FAILED(sbuf_ptr[1] != -1, "test49 case 12 failed\n"); - FAILED(sbuf[2] != 673.75, "test49 case 13 failed\n"); - FAILED(sbuf_ptr[3] != -1, "test49 case 14 failed\n"); - FAILED(sbuf[4] != 345, "test49 case 15 failed\n"); - FAILED(sbuf_ptr[5] != -1, "test49 case 16 failed\n"); - FAILED(sbuf[6] != -249, "test49 case 17 failed\n"); - FAILED(sbuf_ptr[7] != -1, "test49 case 18 failed\n"); - - FAILED(wbuf[1] != -1, "test49 case 19 failed\n"); - FAILED(wbuf[2] != 673, "test49 case 20 failed\n"); - FAILED(wbuf[3] != -1, "test49 case 21 failed\n"); - FAILED(wbuf[4] != -879, "test49 case 22 failed\n"); - FAILED(wbuf[5] != -1, "test49 case 23 failed\n"); - - FAILED(ibuf[1] != -1, "test49 case 24 failed\n"); - FAILED(ibuf[2] != 673, "test49 case 25 failed\n"); - FAILED(ibuf[3] != -1, "test49 case 26 failed\n"); - FAILED(ibuf[4] != -879, "test49 case 27 failed\n"); - FAILED(ibuf[5] != -1, "test49 case 28 failed\n"); - -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(dbuf[8] != (sljit_f64)SLJIT_W(0x4455667788), "test49 case 29 failed\n"); - FAILED(dbuf[9] != (sljit_f64)SLJIT_W(0x66554433), "test49 case 30 failed\n"); - FAILED(wbuf[8] != SLJIT_W(0x1122334455), "test48 case 31 failed\n"); - FAILED(ibuf[8] == 0x4455, "test48 case 32 failed\n"); -#endif /* SLJIT_64BIT_ARCHITECTURE */ - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test50(void) -{ - /* Test stack and floating point operations. */ - executable_code code; - struct sljit_compiler* compiler; -#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) - sljit_uw size1, size2, size3; - int result; -#endif - sljit_f32 sbuf[7]; - - if (verbose) - printf("Run test50\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test50 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sbuf[0] = 245.5; - sbuf[1] = -100.25; - sbuf[2] = 713.75; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 3, 6, 0, 8 * sizeof(sljit_f32)); - - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 0); - /* sbuf[3] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32)); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); - sljit_emit_fop2(compiler, SLJIT_ADD_F32, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32)); - /* sbuf[4] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32)); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32), SLJIT_IMM, 5934); - sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_MEM1(SLJIT_SP), 3 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32)); - /* sbuf[5] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 3 * sizeof(sljit_f32)); - -#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) - size1 = compiler->size; -#endif - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f32)); -#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) - size2 = compiler->size; -#endif - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_FR2, 0); -#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) - size3 = compiler->size; -#endif - /* sbuf[6] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f32), SLJIT_FR5, 0); -#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) - result = (compiler->size - size3) == (size3 - size2) && (size3 - size2) == (size2 - size1); -#endif - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&sbuf); - - FAILED(sbuf[3] != 245.5, "test50 case 1 failed\n"); - FAILED(sbuf[4] != 145.25, "test50 case 2 failed\n"); - FAILED(sbuf[5] != 5934, "test50 case 3 failed\n"); - FAILED(sbuf[6] != 713.75, "test50 case 4 failed\n"); -#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) - FAILED(!result, "test50 case 5 failed\n"); -#endif - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test51(void) +static void test42(void) { /* Test all registers provided by the CPU. */ executable_code code; @@ -5315,13 +4270,13 @@ static void test51(void) sljit_s32 i; if (verbose) - printf("Run test51\n"); + printf("Run test42\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 39; - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, 0); for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 32); @@ -5330,7 +4285,7 @@ static void test51(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); for (i = 2; i < SLJIT_NUMBER_OF_REGISTERS; i++) { - if (sljit_get_register_index(SLJIT_R(i)) >= 0) { + if (sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R(i)) >= 0) { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_R0, 0); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R(i)), 0); } else @@ -5339,7 +4294,7 @@ static void test51(void) sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 32); for (i = 2; i < SLJIT_NUMBER_OF_REGISTERS; i++) { - if (sljit_get_register_index(SLJIT_R(i)) >= 0) { + if (sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R(i)) >= 0) { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_R0, 0); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R(i)), 32); } else @@ -5347,7 +4302,7 @@ static void test51(void) } for (i = 2; i < SLJIT_NUMBER_OF_REGISTERS; i++) { - if (sljit_get_register_index(SLJIT_R(i)) >= 0) { + if (sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R(i)) >= 0) { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 32); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_MEM2(SLJIT_R(i), SLJIT_R0), 0); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_MEM2(SLJIT_R0, SLJIT_R(i)), 0); @@ -5367,7 +4322,7 @@ static void test51(void) code.func0(); - FAILED(buf[1] != (39 * 5 * (SLJIT_NUMBER_OF_REGISTERS - 2)), "test51 case 1 failed\n"); + FAILED(buf[1] != (39 * 5 * (SLJIT_NUMBER_OF_REGISTERS - 2)), "test42 case 1 failed\n"); sljit_free_code(code.code, NULL); @@ -5391,7 +4346,7 @@ static void test51(void) sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, 0); for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 35); sljit_emit_return_void(compiler); @@ -5400,7 +4355,7 @@ static void test51(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code.func0() != (SLJIT_NUMBER_OF_SAVED_REGISTERS * 17), "test51 case 2 failed\n"); + FAILED(code.func0() != (SLJIT_NUMBER_OF_SAVED_REGISTERS * 17), "test42 case 2 failed\n"); sljit_free_code(code.code, NULL); @@ -5426,7 +4381,7 @@ static void test51(void) sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, 0); for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 43); sljit_emit_return_void(compiler); @@ -5435,106 +4390,13 @@ static void test51(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code.func0() != (SLJIT_NUMBER_OF_SAVED_REGISTERS * 68), "test51 case 3 failed\n"); + FAILED(code.func0() != (SLJIT_NUMBER_OF_SAVED_REGISTERS * 68), "test42 case 3 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test52(void) -{ - /* Test all registers provided by the CPU. */ - executable_code code; - struct sljit_compiler* compiler; - struct sljit_jump* jump; - sljit_f64 buf[3]; - sljit_s32 i; - - if (verbose) - printf("Run test52\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test52 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - buf[0] = 6.25; - buf[1] = 17.75; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 0, 1, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS, 0); - - for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_S0), 0); - - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0(VOID)); - /* SLJIT_FR0 contains the first value. */ - for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_FR(i), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); - - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 1, 0, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf[1]); - for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_R0), 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - FAILED(buf[2] != (SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS * 17.75 + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS * 6.25), "test52 case 1 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - buf[0] = -32.5; - buf[1] = -11.25; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 0, 1, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS, 0); - - for (i = 0; i < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_S0), 0); - for (i = 0; i < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FS(i), 0, SLJIT_MEM1(SLJIT_S0), 0); - - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0(VOID)); - /* SLJIT_FR0 contains the first value. */ - for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_FR(i), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); - - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 1, 0, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf[1]); - for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_R0), 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - FAILED(buf[2] != (SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS * -11.25 + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS * -32.5), "test52 case 2 failed\n"); - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test53(void) +static void test43(void) { /* Test addressing modes. */ executable_code code; @@ -5553,7 +4415,7 @@ static void test53(void) buf[i] = 0; if (verbose) - printf("Run test53\n"); + printf("Run test43\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -5561,7 +4423,7 @@ static void test53(void) addr /= 3; for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++, addr++) { - if (sljit_get_register_index(SLJIT_R(i)) == -1) + if (sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R(i)) == -1) continue; /* buf_start[i * 3] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, (sljit_sw)addr); @@ -5585,14 +4447,14 @@ static void test53(void) code.func0(); for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) { - if (sljit_get_register_index(SLJIT_R(i)) == -1) + if (sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R(i)) == -1) continue; - FAILED(buf_start[i * 3] != 88 + i, "test78 case 1 failed\n"); + FAILED(buf_start[i * 3] != 88 + i, "test43 case 1 failed\n"); if (i != 0) { - FAILED(buf_start[i * 3 + 1] != 147 + i, "test78 case 2 failed\n"); + FAILED(buf_start[i * 3 + 1] != 147 + i, "test43 case 2 failed\n"); } - FAILED(buf_start[i * 3 + 2] != 191 + i, "test78 case 3 failed\n"); + FAILED(buf_start[i * 3 + 2] != 191 + i, "test43 case 3 failed\n"); } sljit_free_code(code.code, NULL); @@ -5600,19 +4462,14 @@ static void test53(void) successful_tests++; } -static void test54(void) +static void test44(void) { - /* Check cmov. */ + /* Test select operation. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_sw large_num = SLJIT_W(0x1234567812345678); -#else - sljit_sw large_num = SLJIT_W(0x12345678); -#endif int i; - sljit_sw buf[19]; - sljit_s32 ibuf[4]; + sljit_sw buf[25]; + sljit_s32 ibuf[6]; union { sljit_f32 value; @@ -5624,80 +4481,123 @@ static void test54(void) sbuf[2].value = -14.75; if (verbose) - printf("Run test54\n"); + printf("Run test44\n"); FAILED(!compiler, "cannot create compiler\n"); - for (i = 0; i < 19; i++) + for (i = 0; i < 25; i++) buf[i] = 0; - for (i = 0; i < 4; i++) + for (i = 0; i < 6; i++) ibuf[i] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 5, 3, 3, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 5, 3, 3, 0, 2 * sizeof(sljit_sw)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 17); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 34); sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_R0, 0, SLJIT_IMM, -10); - sljit_emit_cmov(compiler, SLJIT_SIG_LESS, SLJIT_R0, SLJIT_R1, 0); + sljit_emit_select(compiler, SLJIT_SIG_LESS, SLJIT_R0, SLJIT_R1, 0, SLJIT_R0); /* buf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_R0, 0, SLJIT_IMM, -10); - sljit_emit_cmov(compiler, SLJIT_SIG_GREATER, SLJIT_R0, SLJIT_R1, 0); + sljit_emit_select(compiler, SLJIT_SIG_GREATER, SLJIT_R0, SLJIT_R1, 0, SLJIT_R0); /* buf[1] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 24); - sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_IMM, 24); - sljit_emit_cmov(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, SLJIT_IMM, 66); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -67); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 81); + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_R0, 0, SLJIT_IMM, -10); + sljit_emit_select(compiler, SLJIT_SIG_LESS_EQUAL, SLJIT_R0, SLJIT_R0, 0, SLJIT_R1); /* buf[2] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_cmov(compiler, SLJIT_EQUAL, SLJIT_R0, SLJIT_IMM, 78); + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_R0, 0, SLJIT_IMM, -66); + sljit_emit_select(compiler, SLJIT_SIG_GREATER, SLJIT_R0, SLJIT_R0, 0, SLJIT_R1); /* buf[3] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_cmov(compiler, SLJIT_EQUAL, SLJIT_R0, SLJIT_IMM, large_num); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 24); + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_IMM, 23); + sljit_emit_select(compiler, SLJIT_EQUAL, SLJIT_R0, SLJIT_IMM, 66, SLJIT_R0); /* buf[4] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_IMM, 78); + sljit_emit_select(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R0); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_select(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, SLJIT_IMM, WCONST(0x1234567812345678, 0x12345678), SLJIT_R0); + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R0, 0); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - SLJIT_ASSERT(sljit_get_register_index(SLJIT_R3) == -1 && sljit_get_register_index(SLJIT_R4) == -1); + SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R3) == -1 && sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_R4) == -1); #endif sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 7); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -45); sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 8); - sljit_emit_cmov(compiler, SLJIT_OVERFLOW, SLJIT_R3, SLJIT_IMM, 35); - /* buf[5] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R3, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, large_num); - sljit_emit_op2u(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_IMM, large_num); - sljit_emit_cmov(compiler, SLJIT_OVERFLOW, SLJIT_R3, SLJIT_IMM, 35); - /* buf[6] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R3, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 71); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 13); - sljit_emit_op2(compiler, SLJIT_LSHR | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 8); - sljit_emit_cmov(compiler, SLJIT_EQUAL, SLJIT_R3, SLJIT_R0, 0); + sljit_emit_select(compiler, SLJIT_OVERFLOW, SLJIT_R3, SLJIT_IMM, 35, SLJIT_R3); /* buf[7] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R3, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 12); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -29); - sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 8); - sljit_emit_cmov(compiler, SLJIT_NOT_OVERFLOW, SLJIT_R0, SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, WCONST(0x1010000000, 0x100000)); + sljit_emit_op2u(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_IMM, WCONST(0x1010000000, 0x100000)); + sljit_emit_select(compiler, SLJIT_OVERFLOW, SLJIT_R3, SLJIT_IMM, 35, SLJIT_R3); /* buf[8] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R3, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 16); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -12); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, 21); - sljit_emit_op2u(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_IMM, 8); - sljit_emit_cmov(compiler, SLJIT_NOT_EQUAL, SLJIT_R3, SLJIT_R4, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 71); + sljit_emit_op2(compiler, SLJIT_LSHR | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_select(compiler, SLJIT_NOT_ZERO, SLJIT_R3, SLJIT_IMM, 13, SLJIT_R0); /* buf[9] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_R3, 0); - sljit_emit_cmov(compiler, SLJIT_EQUAL, SLJIT_R3, SLJIT_R4, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 12); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -29); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_select(compiler, SLJIT_OVERFLOW, SLJIT_R0, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_R3); /* buf[10] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_IMM, -12); + sljit_emit_op2u(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_IMM, 8); + sljit_emit_select(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, SLJIT_MEM1(SLJIT_R0), 11 * sizeof(sljit_sw), SLJIT_R1); + /* buf[11] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 99); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_IMM, -21); + sljit_emit_select(compiler, SLJIT_EQUAL, SLJIT_R0, SLJIT_MEM1(SLJIT_R0), 12 * sizeof(sljit_sw), SLJIT_R1); + /* buf[12] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, 43); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 90); + sljit_emit_op2u(compiler, SLJIT_XOR | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_IMM, 2); + sljit_emit_select(compiler, SLJIT_ZERO, SLJIT_R1, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1); + /* buf[13] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, -62); + sljit_emit_select(compiler, SLJIT_NOT_ZERO, SLJIT_R2, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_R1); + /* buf[14] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_sw), SLJIT_R2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 15); + sljit_emit_op2u(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 15 * sizeof(sljit_sw), SLJIT_IMM, 38); + sljit_emit_select(compiler, SLJIT_CARRY, SLJIT_R0, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), SLJIT_WORD_SHIFT, SLJIT_R0); + /* buf[15] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 15 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, WCONST(0x8800000000, 0x8800000) + 16 * sizeof(sljit_sw)); + sljit_emit_op2u(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_sw), SLJIT_IMM, 77); + sljit_emit_select(compiler, SLJIT_CARRY, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), WCONST(-0x8800000000, -0x8800000), SLJIT_R0); + /* buf[16] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_sw), SLJIT_R0, 0); + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), 0); sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); @@ -5705,68 +4605,88 @@ static void test54(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 16); sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); - sljit_emit_cmov(compiler, SLJIT_F_EQUAL, SLJIT_R0, SLJIT_IMM, -45); - /* buf[11] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_GREATER, SLJIT_FR1, 0, SLJIT_FR2, 0); - sljit_emit_cmov(compiler, SLJIT_F_GREATER, SLJIT_R0, SLJIT_IMM, -45); - /* buf[12] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_GREATER_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); - sljit_emit_cmov(compiler, SLJIT_F_GREATER_EQUAL, SLJIT_R0, SLJIT_IMM, 33); - /* buf[13] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_R0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_LESS, SLJIT_FR1, 0, SLJIT_FR2, 0); - sljit_emit_cmov(compiler, SLJIT_F_LESS, SLJIT_R0, SLJIT_IMM, -70); - /* buf[14] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_LESS_EQUAL, SLJIT_FR2, 0, SLJIT_FR1, 0); - sljit_emit_cmov(compiler, SLJIT_F_LESS_EQUAL, SLJIT_R0, SLJIT_IMM, -60); - /* buf[15] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 15 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_NOT_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); - sljit_emit_cmov(compiler, SLJIT_F_NOT_EQUAL, SLJIT_R0, SLJIT_IMM, 31); - /* buf[16] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_sw), SLJIT_R0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 53); - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_ORDERED, SLJIT_FR1, 0, SLJIT_FR0, 0); - sljit_emit_cmov(compiler, SLJIT_ORDERED, SLJIT_R0, SLJIT_IMM, 17); + sljit_emit_select(compiler, SLJIT_F_EQUAL, SLJIT_R0, SLJIT_IMM, -45, SLJIT_R0); /* buf[17] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 17 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_UNORDERED, SLJIT_FR1, 0, SLJIT_FR0, 0); - sljit_emit_cmov(compiler, SLJIT_UNORDERED, SLJIT_R0, SLJIT_IMM, 59); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_GREATER, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_select(compiler, SLJIT_F_GREATER, SLJIT_R0, SLJIT_IMM, -45, SLJIT_R0); /* buf[18] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 18 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_GREATER_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_select(compiler, SLJIT_F_GREATER_EQUAL, SLJIT_R0, SLJIT_IMM, 33, SLJIT_R0); + /* buf[19] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 19 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -70); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 8); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_LESS, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_select(compiler, SLJIT_F_LESS, SLJIT_R0, SLJIT_R0, 0, SLJIT_R1); + /* buf[20] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 20 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_LESS_EQUAL, SLJIT_FR2, 0, SLJIT_FR1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -60); + sljit_emit_select(compiler, SLJIT_F_GREATER, SLJIT_R0, SLJIT_IMM, 8, SLJIT_R0); + /* buf[21] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 21 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_NOT_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_select(compiler, SLJIT_F_NOT_EQUAL, SLJIT_R0, SLJIT_IMM, 31, SLJIT_R0); + /* buf[22] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 22 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 23 * sizeof(sljit_sw), SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 53); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_ORDERED, SLJIT_FR1, 0, SLJIT_FR0, 0); + sljit_emit_select(compiler, SLJIT_ORDERED, SLJIT_R0, SLJIT_MEM1(SLJIT_S0), 23 * sizeof(sljit_sw), SLJIT_R1); + /* buf[23] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 23 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 24 * sizeof(sljit_sw), SLJIT_IMM, 59); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_UNORDERED, SLJIT_FR0, 0, SLJIT_FR1, 0); + sljit_emit_select(compiler, SLJIT_UNORDERED, SLJIT_R0, SLJIT_MEM1(SLJIT_R0), 24 * sizeof(sljit_sw), SLJIT_R1); + /* buf[24] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 24 * sizeof(sljit_sw), SLJIT_R0, 0); } sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 177); sljit_emit_op2u(compiler, SLJIT_SUB32 | SLJIT_SET_LESS, SLJIT_R0, 0, SLJIT_IMM, 178); - sljit_emit_cmov(compiler, SLJIT_LESS | SLJIT_32, SLJIT_R0, SLJIT_IMM, 200); + sljit_emit_select(compiler, SLJIT_LESS | SLJIT_32, SLJIT_R0, SLJIT_IMM, 200, SLJIT_R0); /* ibuf[0] */ sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 95); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R3, 0, SLJIT_IMM, 177); - sljit_emit_op2u(compiler, SLJIT_SUB32 | SLJIT_SET_LESS_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 95); - sljit_emit_cmov(compiler, SLJIT_LESS_EQUAL | SLJIT_32, SLJIT_R3, SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_s32) >> 1); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32), SLJIT_IMM, 177); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 95); + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_s32)); + sljit_emit_select(compiler, SLJIT_LESS_EQUAL | SLJIT_32, SLJIT_R0, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), 1, SLJIT_R1); /* ibuf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32), SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32), SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R3, 0, SLJIT_IMM, 56); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R4, 0, SLJIT_IMM, -63); sljit_emit_op2u(compiler, SLJIT_SUB32 | SLJIT_SET_SIG_LESS, SLJIT_R3, 0, SLJIT_R4, 0); - sljit_emit_cmov(compiler, SLJIT_SIG_LESS | SLJIT_32, SLJIT_R3, SLJIT_R4, 0); + sljit_emit_select(compiler, SLJIT_SIG_LESS | SLJIT_32, SLJIT_R3, SLJIT_R4, 0, SLJIT_R3); /* ibuf[2] */ sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_s32), SLJIT_R3, 0); - sljit_emit_op2u(compiler, SLJIT_SUB32 | SLJIT_SET_SIG_GREATER, SLJIT_R3, 0, SLJIT_R4, 0); - sljit_emit_cmov(compiler, SLJIT_SIG_GREATER | SLJIT_32, SLJIT_R3, SLJIT_R4, 0); + sljit_emit_op2u(compiler, SLJIT_SUB32 | SLJIT_SET_SIG_LESS, SLJIT_R3, 0, SLJIT_R4, 0); + sljit_emit_select(compiler, SLJIT_SIG_GREATER_EQUAL | SLJIT_32, SLJIT_R3, SLJIT_R4, 0, SLJIT_R3); /* ibuf[3] */ sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_s32), SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_s32), SLJIT_IMM, 467); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 10); + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 20); + sljit_emit_select(compiler, SLJIT_SIG_LESS | SLJIT_32, SLJIT_R2, SLJIT_MEM0(), (sljit_sw)(ibuf + 4), SLJIT_R2); + /* ibuf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_s32), SLJIT_R2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 5 * sizeof(sljit_s32)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_s32), SLJIT_IMM, -29); + sljit_emit_op2u(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_select(compiler, SLJIT_CARRY | SLJIT_32, SLJIT_R2, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), 0, SLJIT_R2); + /* ibuf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_s32), SLJIT_R2, 0); + sljit_emit_return_void(compiler); code.code = sljit_generate_code(compiler); @@ -5775,39 +4695,47 @@ static void test54(void) code.func3((sljit_sw)&buf, (sljit_sw)&ibuf, (sljit_sw)&sbuf); - FAILED(buf[0] != 17, "test54 case 1 failed\n"); - FAILED(buf[1] != 34, "test54 case 2 failed\n"); - FAILED(buf[2] != 24, "test54 case 3 failed\n"); - FAILED(buf[3] != 78, "test54 case 4 failed\n"); - FAILED(buf[4] != large_num, "test54 case 5 failed\n"); - FAILED(buf[5] != -45, "test54 case 6 failed\n"); - FAILED(buf[6] != 35, "test54 case 7 failed\n"); - FAILED(buf[7] != 71, "test54 case 8 failed\n"); - FAILED(buf[8] != -29, "test54 case 9 failed\n"); - FAILED(buf[9] != -12, "test54 case 10 failed\n"); - FAILED(buf[10] != 21, "test54 case 11 failed\n"); + FAILED(buf[0] != 17, "test44 case 1 failed\n"); + FAILED(buf[1] != 34, "test44 case 2 failed\n"); + FAILED(buf[2] != -67, "test44 case 3 failed\n"); + FAILED(buf[3] != 81, "test44 case 4 failed\n"); + FAILED(buf[4] != 24, "test44 case 5 failed\n"); + FAILED(buf[5] != 78, "test44 case 6 failed\n"); + FAILED(buf[6] != WCONST(0x1234567812345678, 0x12345678), "test44 case 7 failed\n"); + FAILED(buf[7] != -45, "test44 case 8 failed\n"); + FAILED(buf[8] != 35, "test44 case 9 failed\n"); + FAILED(buf[9] != 71, "test44 case 10 failed\n"); + FAILED(buf[10] != -29, "test44 case 11 failed\n"); + FAILED(buf[11] != 16, "test44 case 12 failed\n"); + FAILED(buf[12] != -21, "test44 case 13 failed\n"); + FAILED(buf[13] != 90, "test44 case 14 failed\n"); + FAILED(buf[14] != -62, "test44 case 15 failed\n"); + FAILED(buf[15] != 38, "test44 case 16 failed\n"); + FAILED(buf[16] != 77, "test44 case 17 failed\n"); if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - FAILED(buf[11] != 16, "test54 case 12 failed\n"); - FAILED(buf[12] != -45, "test54 case 13 failed\n"); - FAILED(buf[13] != 33, "test54 case 14 failed\n"); - FAILED(buf[14] != 8, "test54 case 15 failed\n"); - FAILED(buf[15] != -60, "test54 case 16 failed\n"); - FAILED(buf[16] != 31, "test54 case 17 failed\n"); - FAILED(buf[17] != 53, "test54 case 18 failed\n"); - FAILED(buf[18] != 59, "test54 case 19 failed\n"); + FAILED(buf[17] != 16, "test44 case 18 failed\n"); + FAILED(buf[18] != -45, "test44 case 19 failed\n"); + FAILED(buf[19] != 33, "test44 case 20 failed\n"); + FAILED(buf[20] != 8, "test44 case 21 failed\n"); + FAILED(buf[21] != -60, "test44 case 22 failed\n"); + FAILED(buf[22] != 31, "test44 case 23 failed\n"); + FAILED(buf[23] != 53, "test44 case 24 failed\n"); + FAILED(buf[24] != 59, "test44 case 25 failed\n"); } - FAILED(ibuf[0] != 200, "test54 case 12 failed\n"); - FAILED(ibuf[1] != 95, "test54 case 13 failed\n"); - FAILED(ibuf[2] != 56, "test54 case 14 failed\n"); - FAILED(ibuf[3] != -63, "test54 case 15 failed\n"); + FAILED(ibuf[0] != 200, "test44 case 26 failed\n"); + FAILED(ibuf[1] != 177, "test44 case 27 failed\n"); + FAILED(ibuf[2] != 56, "test44 case 28 failed\n"); + FAILED(ibuf[3] != -63, "test44 case 29 failed\n"); + FAILED(ibuf[4] != 467, "test44 case 30 failed\n"); + FAILED(ibuf[5] != -29, "test44 case 31 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test55(void) +static void test45(void) { /* Check value preservation. */ executable_code code; @@ -5816,13 +4744,13 @@ static void test55(void) sljit_s32 i; if (verbose) - printf("Run test55\n"); + printf("Run test45\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 0; buf[1] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, sizeof (sljit_sw)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, sizeof (sljit_sw)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, 217); @@ -5862,14 +4790,14 @@ static void test55(void) code.func0(); - FAILED(buf[0] != (SLJIT_NUMBER_OF_REGISTERS - 2) * 118 + 217, "test55 case 1 failed\n"); - FAILED(buf[1] != (SLJIT_NUMBER_OF_REGISTERS - 1) * 146 + 217, "test55 case 2 failed\n"); + FAILED(buf[0] != (SLJIT_NUMBER_OF_REGISTERS - 2) * 118 + 217, "test45 case 1 failed\n"); + FAILED(buf[1] != (SLJIT_NUMBER_OF_REGISTERS - 1) * 146 + 217, "test45 case 2 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test56(void) +static void test46(void) { /* Check integer subtraction with negative immediate. */ executable_code code; @@ -5878,14 +4806,14 @@ static void test56(void) sljit_s32 i; if (verbose) - printf("Run test56\n"); + printf("Run test46\n"); for (i = 0; i < 13; i++) buf[i] = 77; FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 1, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 1, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 90 << 12); sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, -(91 << 12)); @@ -5940,25 +4868,25 @@ static void test56(void) code.func1((sljit_sw)&buf); - FAILED(buf[0] != (181 << 12), "test56 case 1 failed\n"); - FAILED(buf[1] != 1, "test56 case 2 failed\n"); - FAILED(buf[2] != (181 << 12), "test56 case 3 failed\n"); - FAILED(buf[3] != 1, "test56 case 4 failed\n"); - FAILED(buf[4] != 1, "test56 case 5 failed\n"); - FAILED(buf[5] != 1, "test56 case 6 failed\n"); - FAILED(buf[6] != 0, "test56 case 7 failed\n"); - FAILED(buf[7] != 0, "test56 case 8 failed\n"); - FAILED(buf[8] != 181, "test56 case 9 failed\n"); - FAILED(buf[9] != 1, "test56 case 10 failed\n"); - FAILED(buf[10] != 1, "test56 case 11 failed\n"); - FAILED(buf[11] != 1, "test56 case 12 failed\n"); - FAILED(buf[12] != 1, "test56 case 13 failed\n"); + FAILED(buf[0] != (181 << 12), "test46 case 1 failed\n"); + FAILED(buf[1] != 1, "test46 case 2 failed\n"); + FAILED(buf[2] != (181 << 12), "test46 case 3 failed\n"); + FAILED(buf[3] != 1, "test46 case 4 failed\n"); + FAILED(buf[4] != 1, "test46 case 5 failed\n"); + FAILED(buf[5] != 1, "test46 case 6 failed\n"); + FAILED(buf[6] != 0, "test46 case 7 failed\n"); + FAILED(buf[7] != 0, "test46 case 8 failed\n"); + FAILED(buf[8] != 181, "test46 case 9 failed\n"); + FAILED(buf[9] != 1, "test46 case 10 failed\n"); + FAILED(buf[10] != 1, "test46 case 11 failed\n"); + FAILED(buf[11] != 1, "test46 case 12 failed\n"); + FAILED(buf[12] != 1, "test46 case 13 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test57(void) +static void test47(void) { /* Check prefetch instructions. */ executable_code code; @@ -5968,11 +4896,11 @@ static void test57(void) int i; if (verbose) - printf("Run test57\n"); + printf("Run test47\n"); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 3, 1, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 3, 1, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); labels[0] = sljit_emit_label(compiler); @@ -5981,7 +4909,7 @@ static void test57(void) labels[1] = sljit_emit_label(compiler); sljit_emit_op_src(compiler, SLJIT_PREFETCH_L2, SLJIT_MEM0(), 0); labels[2] = sljit_emit_label(compiler); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if IS_64BIT sljit_emit_op_src(compiler, SLJIT_PREFETCH_L3, SLJIT_MEM1(SLJIT_R0), SLJIT_W(0x1122334455667788)); #else sljit_emit_op_src(compiler, SLJIT_PREFETCH_L3, SLJIT_MEM1(SLJIT_R0), 0x11223344); @@ -6003,306 +4931,22 @@ static void test57(void) code.func0(); if (sljit_has_cpu_feature(SLJIT_HAS_PREFETCH)) { - FAILED(addr[0] == addr[1], "test57 case 1 failed\n"); - FAILED(addr[1] == addr[2], "test57 case 2 failed\n"); - FAILED(addr[2] == addr[3], "test57 case 3 failed\n"); - FAILED(addr[3] == addr[4], "test57 case 4 failed\n"); - } - else { - FAILED(addr[0] != addr[1], "test57 case 1 failed\n"); - FAILED(addr[1] != addr[2], "test57 case 2 failed\n"); - FAILED(addr[2] != addr[3], "test57 case 3 failed\n"); - FAILED(addr[3] != addr[4], "test57 case 4 failed\n"); + FAILED(addr[0] == addr[1], "test47 case 1 failed\n"); + FAILED(addr[1] == addr[2], "test47 case 2 failed\n"); + FAILED(addr[2] == addr[3], "test47 case 3 failed\n"); + FAILED(addr[3] == addr[4], "test47 case 4 failed\n"); + } else { + FAILED(addr[0] != addr[1], "test47 case 1 failed\n"); + FAILED(addr[1] != addr[2], "test47 case 2 failed\n"); + FAILED(addr[2] != addr[3], "test47 case 3 failed\n"); + FAILED(addr[3] != addr[4], "test47 case 4 failed\n"); } sljit_free_code(code.code, NULL); successful_tests++; } -static sljit_f64 test58_f1(sljit_f32 a, sljit_f32 b, sljit_f64 c) -{ - return (sljit_f64)a + (sljit_f64)b + c; -} - -static sljit_f32 test58_f2(sljit_sw a, sljit_f64 b, sljit_f32 c) -{ - return (sljit_f32)((sljit_f64)a + b + (sljit_f64)c); -} - -static sljit_f64 test58_f3(sljit_sw a, sljit_f32 b, sljit_sw c) -{ - return (sljit_f64)a + (sljit_f64)b + (sljit_f64)c; -} - -static sljit_f64 test58_f4(sljit_f32 a, sljit_sw b) -{ - return (sljit_f64)a + (sljit_f64)b; -} - -static sljit_f32 test58_f5(sljit_f32 a, sljit_f64 b, sljit_s32 c) -{ - return (sljit_f32)((sljit_f64)a + b + (sljit_f64)c); -} - -static sljit_sw test58_f6(sljit_f64 a, sljit_sw b) -{ - return (sljit_sw)(a + (sljit_f64)b); -} - -static void test58(void) -{ - /* Check function calls with floating point arguments. */ - executable_code code; - struct sljit_compiler* compiler; - struct sljit_jump* jump = NULL; - sljit_f64 dbuf[7]; - sljit_f32 sbuf[7]; - sljit_sw wbuf[2]; - - if (verbose) - printf("Run test58\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test58 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - dbuf[0] = 5.25; - dbuf[1] = 0.0; - dbuf[2] = 2.5; - dbuf[3] = 0.0; - dbuf[4] = 0.0; - dbuf[5] = 0.0; - dbuf[6] = -18.0; - - sbuf[0] = 6.75; - sbuf[1] = -3.5; - sbuf[2] = 1.5; - sbuf[3] = 0.0; - sbuf[4] = 0.0; - - wbuf[0] = 0; - wbuf[1] = 0; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 3, 3, 4, 0, sizeof(sljit_sw)); - - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(F64, F32, F32, F64), SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f1)); - /* dbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR0, 0); - - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS3(F64, F32, F32, F64)); - sljit_set_target(jump, SLJIT_FUNC_UADDR(test58_f1)); - /* dbuf[3] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f2)); - sljit_get_local_base(compiler, SLJIT_R1, 0, -16); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 16); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(F32, W, F64, F32), SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); - /* sbuf[3] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f32), SLJIT_FR0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -4); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 9); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS3(F64, W, F32, W)); - sljit_set_target(jump, SLJIT_FUNC_UADDR(test58_f3)); - /* dbuf[4] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_FR0, 0); - - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -6); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(F64, F32, W)); - sljit_set_target(jump, SLJIT_FUNC_UADDR(test58_f4)); - /* dbuf[5] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f5)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(F32, F32, F64, 32), SLJIT_MEM1(SLJIT_SP), 0); - /* sbuf[4] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR0, 0); - - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f6)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, F64, W), SLJIT_R0, 0); - /* wbuf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_R0, 0); - - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 319); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test58_f6)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, F64, W), SLJIT_R1, 0); - /* wbuf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_R0, 0); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func3((sljit_sw)&dbuf, (sljit_sw)&sbuf, (sljit_sw)&wbuf); - - FAILED(dbuf[1] != 8.5, "test58 case 1 failed\n"); - FAILED(dbuf[3] != 0.5, "test58 case 2 failed\n"); - FAILED(sbuf[3] != 17.75, "test58 case 3 failed\n"); - FAILED(dbuf[4] != 11.75, "test58 case 4 failed\n"); - FAILED(dbuf[5] != -9.5, "test58 case 5 failed\n"); - FAILED(sbuf[4] != 12, "test58 case 6 failed\n"); - FAILED(wbuf[0] != SLJIT_FUNC_ADDR(test58_f6) - 18, "test58 case 7 failed\n"); - FAILED(wbuf[1] != 301, "test58 case 8 failed\n"); - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static sljit_sw test59_f1(sljit_sw a, sljit_s32 b, sljit_sw c, sljit_sw d) -{ - return (sljit_sw)(a + b + c + d - SLJIT_FUNC_ADDR(test59_f1)); -} - -static sljit_s32 test59_f2(sljit_f64 a, sljit_f32 b, sljit_f64 c, sljit_sw d) -{ - return (sljit_s32)(a + b + c + (sljit_f64)d); -} - -static sljit_f32 test59_f3(sljit_f32 a, sljit_s32 b, sljit_f64 c, sljit_sw d) -{ - return (sljit_f32)(a + (sljit_f64)b + c + (sljit_f64)d); -} - -static sljit_f32 test59_f4(sljit_f32 a, sljit_f64 b, sljit_f32 c, sljit_f64 d) -{ - return (sljit_f32)(a + b + c + (sljit_f64)d); -} - -static void test59(void) -{ - /* Check function calls with four arguments. */ - executable_code code; - struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); - struct sljit_jump* jump = NULL; - sljit_sw wbuf[5]; - sljit_f64 dbuf[3]; - sljit_f32 sbuf[4]; - - if (verbose) - printf("Run test59\n"); - - wbuf[0] = 0; - wbuf[1] = 0; - wbuf[2] = SLJIT_FUNC_ADDR(test59_f1); - wbuf[3] = 0; - wbuf[4] = 0; - - if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - dbuf[0] = 5.125; - dbuf[1] = 6.125; - dbuf[2] = 4.25; - - sbuf[0] = 0.75; - sbuf[1] = -1.5; - sbuf[2] = 0.0; - sbuf[3] = 0.0; - } - - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 4, 3, 4, 0, sizeof(sljit_sw)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 33); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -20); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test59_f1)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -40); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, 32, W, W), SLJIT_R2, 0); - /* wbuf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test59_f1)); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -25); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 100); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -10); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, 32, W, W), SLJIT_R0, 0); - /* wbuf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 231); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test59_f1) - 100); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, 32, W, W), SLJIT_MEM2(SLJIT_R0, SLJIT_R2), SLJIT_WORD_SHIFT); - /* wbuf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); - - if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -100); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(32, F64, F32, F64, W), SLJIT_IMM, SLJIT_FUNC_ADDR(test59_f2)); - sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_R0, 0); - /* wbuf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R0, 0); - - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 36); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 41); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS4(F32, F32, 32, F64, W)); - sljit_set_target(jump, SLJIT_FUNC_UADDR(test59_f3)); - /* sbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_f32), SLJIT_FR0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test59_f4)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(F32, F32, F64, F32, F64), SLJIT_R0, 0); - /* sbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_f32), SLJIT_FR0, 0); - } - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func3((sljit_sw)&wbuf, (sljit_sw)&dbuf, (sljit_sw)&sbuf); - - FAILED(wbuf[0] != -27, "test59 case 1 failed\n"); - FAILED(wbuf[1] != 65, "test59 case 2 failed\n"); - FAILED(wbuf[3] != (sljit_sw)wbuf + 133, "test59 case 3 failed\n"); - - if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - FAILED(wbuf[4] != -88, "test59 case 4 failed\n"); - FAILED(sbuf[2] != 79.75, "test59 case 5 failed\n"); - FAILED(sbuf[3] != 8.625, "test59 case 6 failed\n"); - } - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test60(void) +static void test48(void) { /* Test memory accesses with pre/post updates. */ executable_code code; @@ -6326,7 +4970,7 @@ static void test60(void) #endif if (verbose) - printf("Run test60\n"); + printf("Run test48\n"); for (i = 0; i < 18; i++) wbuf[i] = 0; @@ -6342,7 +4986,7 @@ static void test60(void) FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 4, 3, 4, 0, sizeof(sljit_sw)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 4, 3, 4, 0, sizeof(sljit_sw)); supported[0] = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP, SLJIT_R1, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_sw)); if (supported[0] == SLJIT_SUCCESS) { @@ -6461,7 +5105,7 @@ static void test60(void) code.func3((sljit_sw)&wbuf, (sljit_sw)&bbuf, (sljit_sw)&ibuf); - FAILED(sizeof(expected) != sizeof(supported) / sizeof(sljit_s32), "test60 case 1 failed\n"); + FAILED(sizeof(expected) != sizeof(supported) / sizeof(sljit_s32), "test48 case 1 failed\n"); for (i = 0; i < sizeof(expected); i++) { if (expected[i]) { @@ -6471,196 +5115,38 @@ static void test60(void) } } else { if (supported[i] == SLJIT_SUCCESS) { - printf("test60 case %d should not be supported\n", i + 1); + printf("test48 case %d should not be supported\n", i + 1); return; } } } - FAILED(supported[0] == SLJIT_SUCCESS && wbuf[0] != -887766, "test60 case 2 failed\n"); - FAILED(supported[0] == SLJIT_SUCCESS && wbuf[1] != (sljit_sw)(wbuf + 2), "test60 case 3 failed\n"); - FAILED(supported[1] == SLJIT_SUCCESS && wbuf[3] != -13, "test60 case 4 failed\n"); - FAILED(supported[1] == SLJIT_SUCCESS && wbuf[4] != (sljit_sw)(bbuf), "test60 case 5 failed\n"); - FAILED(supported[2] == SLJIT_SUCCESS && wbuf[5] != -5678, "test60 case 6 failed\n"); - FAILED(supported[2] == SLJIT_SUCCESS && wbuf[6] != (sljit_sw)(ibuf), "test60 case 7 failed\n"); - FAILED(supported[3] == SLJIT_SUCCESS && ibuf[1] != -8765, "test60 case 8 failed\n"); - FAILED(supported[3] == SLJIT_SUCCESS && wbuf[7] != (sljit_sw)(ibuf + 1), "test60 case 9 failed\n"); - FAILED(supported[4] == SLJIT_SUCCESS && bbuf[0] != -121, "test60 case 10 failed\n"); - FAILED(supported[4] == SLJIT_SUCCESS && wbuf[8] != (sljit_sw)(bbuf) - 128 * (sljit_sw)sizeof(sljit_s8), "test60 case 11 failed\n"); - FAILED(supported[5] == SLJIT_SUCCESS && wbuf[9] != -881199, "test60 case 12 failed\n"); - FAILED(supported[5] == SLJIT_SUCCESS && wbuf[10] != (sljit_sw)(wbuf + 9), "test60 case 13 failed\n"); - FAILED(supported[6] == SLJIT_SUCCESS && wbuf[11] != -5678, "test60 case 14 failed\n"); - FAILED(supported[6] == SLJIT_SUCCESS && wbuf[12] != (sljit_sw)(ibuf), "test60 case 15 failed\n"); - FAILED(supported[7] == SLJIT_SUCCESS && ibuf[2] != -7890, "test60 case 16 failed\n"); - FAILED(supported[7] == SLJIT_SUCCESS && wbuf[13] != (sljit_sw)(ibuf + 2), "test60 case 17 failed\n"); - FAILED(supported[8] == SLJIT_SUCCESS && wbuf[14] != -887766, "test60 case 18 failed\n"); - FAILED(supported[8] == SLJIT_SUCCESS && wbuf[15] != (sljit_sw)(wbuf + 10), "test60 case 19 failed\n"); - FAILED(supported[9] == SLJIT_SUCCESS && wbuf[16] != -13, "test60 case 20 failed\n"); - FAILED(supported[9] == SLJIT_SUCCESS && wbuf[17] != (sljit_sw)(bbuf), "test60 case 21 failed\n"); + FAILED(supported[0] == SLJIT_SUCCESS && wbuf[0] != -887766, "test48 case 2 failed\n"); + FAILED(supported[0] == SLJIT_SUCCESS && wbuf[1] != (sljit_sw)(wbuf + 2), "test48 case 3 failed\n"); + FAILED(supported[1] == SLJIT_SUCCESS && wbuf[3] != -13, "test48 case 4 failed\n"); + FAILED(supported[1] == SLJIT_SUCCESS && wbuf[4] != (sljit_sw)(bbuf), "test48 case 5 failed\n"); + FAILED(supported[2] == SLJIT_SUCCESS && wbuf[5] != -5678, "test48 case 6 failed\n"); + FAILED(supported[2] == SLJIT_SUCCESS && wbuf[6] != (sljit_sw)(ibuf), "test48 case 7 failed\n"); + FAILED(supported[3] == SLJIT_SUCCESS && ibuf[1] != -8765, "test48 case 8 failed\n"); + FAILED(supported[3] == SLJIT_SUCCESS && wbuf[7] != (sljit_sw)(ibuf + 1), "test48 case 9 failed\n"); + FAILED(supported[4] == SLJIT_SUCCESS && bbuf[0] != -121, "test48 case 10 failed\n"); + FAILED(supported[4] == SLJIT_SUCCESS && wbuf[8] != (sljit_sw)(bbuf) - 128 * (sljit_sw)sizeof(sljit_s8), "test48 case 11 failed\n"); + FAILED(supported[5] == SLJIT_SUCCESS && wbuf[9] != -881199, "test48 case 12 failed\n"); + FAILED(supported[5] == SLJIT_SUCCESS && wbuf[10] != (sljit_sw)(wbuf + 9), "test48 case 13 failed\n"); + FAILED(supported[6] == SLJIT_SUCCESS && wbuf[11] != -5678, "test48 case 14 failed\n"); + FAILED(supported[6] == SLJIT_SUCCESS && wbuf[12] != (sljit_sw)(ibuf), "test48 case 15 failed\n"); + FAILED(supported[7] == SLJIT_SUCCESS && ibuf[2] != -7890, "test48 case 16 failed\n"); + FAILED(supported[7] == SLJIT_SUCCESS && wbuf[13] != (sljit_sw)(ibuf + 2), "test48 case 17 failed\n"); + FAILED(supported[8] == SLJIT_SUCCESS && wbuf[14] != -887766, "test48 case 18 failed\n"); + FAILED(supported[8] == SLJIT_SUCCESS && wbuf[15] != (sljit_sw)(wbuf + 10), "test48 case 19 failed\n"); + FAILED(supported[9] == SLJIT_SUCCESS && wbuf[16] != -13, "test48 case 20 failed\n"); + FAILED(supported[9] == SLJIT_SUCCESS && wbuf[17] != (sljit_sw)(bbuf), "test48 case 21 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test61(void) -{ - /* Test float memory accesses with pre/post updates. */ - executable_code code; - struct sljit_compiler* compiler; - sljit_u32 i; - sljit_s32 supported[6]; - sljit_sw wbuf[6]; - sljit_f64 dbuf[4]; - sljit_f32 sbuf[4]; -#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) - static sljit_u8 expected[6] = { 1, 1, 1, 1, 0, 0 }; -#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) - static sljit_u8 expected[6] = { 1, 0, 1, 0, 1, 1 }; -#else - static sljit_u8 expected[6] = { 0, 0, 0, 0, 0, 0 }; -#endif - - if (verbose) - printf("Run test61\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test61 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - for (i = 0; i < 6; i++) - wbuf[i] = 0; - - dbuf[0] = 66.725; - dbuf[1] = 0.0; - dbuf[2] = 0.0; - dbuf[3] = 0.0; - - sbuf[0] = 0.0; - sbuf[1] = -22.125; - sbuf[2] = 0.0; - sbuf[3] = 0.0; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 4, 3, 4, 0, sizeof(sljit_sw)); - - supported[0] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_f64)); - if (supported[0] == SLJIT_SUCCESS) { - /* dbuf[1] */ - sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, 4 * sizeof(sljit_f64)); - sljit_emit_fmem_update(compiler, SLJIT_MOV_F64, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64), SLJIT_FR0, 0); - /* wbuf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); - } - - supported[1] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_FR2, SLJIT_MEM1(SLJIT_R0), -(sljit_sw)sizeof(sljit_f64)); - if (supported[1] == SLJIT_SUCCESS) { - /* dbuf[2] */ - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, 2 * sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 0); - sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_FR2, SLJIT_MEM1(SLJIT_R0), -(sljit_sw)sizeof(sljit_f64)); - /* wbuf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); - } - - supported[2] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE, SLJIT_FR1, SLJIT_MEM1(SLJIT_R2), -4 * (sljit_sw)sizeof(sljit_f32)); - if (supported[2] == SLJIT_SUCCESS) { - /* sbuf[0] */ - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S2, 0, SLJIT_IMM, 4 * sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); - sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_STORE, SLJIT_FR1, SLJIT_MEM1(SLJIT_R2), -4 * (sljit_sw)sizeof(sljit_f32)); - /* wbuf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R2, 0); - } - - supported[3] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_FR1, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_f32)); - if (supported[3] == SLJIT_SUCCESS) { - /* sbuf[2] */ - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S2, 0, SLJIT_IMM, sizeof(sljit_f32)); - sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_POST, SLJIT_FR1, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_f32), SLJIT_FR1, 0); - /* wbuf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R1, 0); - } - - supported[4] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); - if (supported[4] == SLJIT_SUCCESS) { - /* dbuf[3] */ - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, 8 * sizeof(sljit_f64)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -8 * (sljit_sw)sizeof(sljit_f64)); - sljit_emit_fmem_update(compiler, SLJIT_MOV_F64, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); - /* wbuf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R1, 0); - } - - supported[5] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE, SLJIT_FR2, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 0); - if (supported[5] == SLJIT_SUCCESS) { - /* sbuf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_S2, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3 * sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); - sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_STORE, SLJIT_FR2, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 0); - /* wbuf[5] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R2, 0); - } - - SLJIT_ASSERT(sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0) == SLJIT_ERR_UNSUPPORTED); - SLJIT_ASSERT(sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0) == SLJIT_ERR_UNSUPPORTED); - -#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) - /* TODO: at least for ARM (both V5 and V7) the range below needs further fixing */ - SLJIT_ASSERT(sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 256) == SLJIT_ERR_UNSUPPORTED); - SLJIT_ASSERT(sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), -257) == SLJIT_ERR_UNSUPPORTED); -#endif - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func3((sljit_sw)&wbuf, (sljit_sw)&dbuf, (sljit_sw)&sbuf); - - FAILED(sizeof(expected) != sizeof(supported) / sizeof(sljit_s32), "test61 case 1 failed\n"); - - for (i = 0; i < sizeof(expected); i++) { - if (expected[i]) { - if (supported[i] != SLJIT_SUCCESS) { - printf("tast61 case %d should be supported\n", i + 1); - return; - } - } else { - if (supported[i] == SLJIT_SUCCESS) { - printf("test61 case %d should not be supported\n", i + 1); - return; - } - } - } - - FAILED(supported[0] == SLJIT_SUCCESS && dbuf[1] != 66.725, "test61 case 2 failed\n"); - FAILED(supported[0] == SLJIT_SUCCESS && wbuf[0] != (sljit_sw)(dbuf), "test61 case 3 failed\n"); - FAILED(supported[1] == SLJIT_SUCCESS && dbuf[2] != 66.725, "test61 case 4 failed\n"); - FAILED(supported[1] == SLJIT_SUCCESS && wbuf[1] != (sljit_sw)(dbuf + 1), "test61 case 5 failed\n"); - FAILED(supported[2] == SLJIT_SUCCESS && sbuf[0] != -22.125, "test61 case 6 failed\n"); - FAILED(supported[2] == SLJIT_SUCCESS && wbuf[2] != (sljit_sw)(sbuf), "test61 case 7 failed\n"); - FAILED(supported[3] == SLJIT_SUCCESS && sbuf[2] != -22.125, "test61 case 8 failed\n"); - FAILED(supported[3] == SLJIT_SUCCESS && wbuf[3] != (sljit_sw)(sbuf + 2), "test61 case 9 failed\n"); - FAILED(supported[4] == SLJIT_SUCCESS && dbuf[3] != 66.725, "test61 case 10 failed\n"); - FAILED(supported[4] == SLJIT_SUCCESS && wbuf[4] != (sljit_sw)(dbuf), "test61 case 11 failed\n"); - FAILED(supported[5] == SLJIT_SUCCESS && sbuf[3] != -22.125, "test61 case 12 failed\n"); - FAILED(supported[5] == SLJIT_SUCCESS && wbuf[5] != (sljit_sw)(sbuf + 3), "test61 case 13 failed\n"); - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test62(void) +static void test49(void) { /* Test fast calls flag preservation. */ executable_code code1; @@ -6668,7 +5154,7 @@ static void test62(void) struct sljit_compiler* compiler; if (verbose) - printf("Run test62\n"); + printf("Run test49\n"); /* A */ compiler = sljit_create_compiler(NULL, NULL); @@ -6700,32 +5186,28 @@ static void test62(void) CHECK(compiler); sljit_free_compiler(compiler); - FAILED(code2.func1(88) != 0, "test62 case 1 failed\n"); - FAILED(code2.func1(42) != 1, "test62 case 2 failed\n"); - FAILED(code2.func1(0) != 2, "test62 case 3 failed\n"); + FAILED(code2.func1(88) != 0, "test49 case 1 failed\n"); + FAILED(code2.func1(42) != 1, "test49 case 2 failed\n"); + FAILED(code2.func1(0) != 2, "test49 case 3 failed\n"); sljit_free_code(code1.code, NULL); sljit_free_code(code2.code, NULL); successful_tests++; } -static void test63(void) +static void test50(void) { /* Test put label. */ executable_code code; struct sljit_label *label[2]; - struct sljit_put_label *put_label[5]; + struct sljit_jump *mov_addr[5]; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); sljit_uw addr[2]; sljit_uw buf[4]; -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_sw offs = SLJIT_W(0x123456789012); -#else - sljit_sw offs = 0x12345678; -#endif + sljit_sw offs = WCONST(0x123456789012, 0x12345678); if (verbose) - printf("Run test63\n"); + printf("Run test50\n"); FAILED(!compiler, "cannot create compiler\n"); buf[0] = 0; @@ -6736,30 +5218,30 @@ static void test63(void) sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 3, 1, 0, 0, 2 * sizeof(sljit_sw)); /* buf[0-1] */ - put_label[0] = sljit_emit_put_label(compiler, SLJIT_R0, 0); + mov_addr[0] = sljit_emit_mov_addr(compiler, SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); - put_label[1] = sljit_emit_put_label(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); + mov_addr[1] = sljit_emit_mov_addr(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_uw), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); label[0] = sljit_emit_label(compiler); - sljit_set_put_label(put_label[0], label[0]); - sljit_set_put_label(put_label[1], label[0]); + sljit_set_label(mov_addr[0], label[0]); + sljit_set_label(mov_addr[1], label[0]); /* buf[2-3] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)(buf + 2) - offs); - put_label[2] = sljit_emit_put_label(compiler, SLJIT_MEM1(SLJIT_R0), offs); + mov_addr[2] = sljit_emit_mov_addr(compiler, SLJIT_MEM1(SLJIT_R0), offs); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (offs + (sljit_sw)sizeof(sljit_uw)) >> 1); - put_label[3] = sljit_emit_put_label(compiler, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1); + mov_addr[3] = sljit_emit_mov_addr(compiler, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1); label[1] = sljit_emit_label(compiler); - sljit_set_put_label(put_label[2], label[1]); - sljit_set_put_label(put_label[3], label[1]); + sljit_set_label(mov_addr[2], label[1]); + sljit_set_label(mov_addr[3], label[1]); /* Return value */ - put_label[4] = sljit_emit_put_label(compiler, SLJIT_RETURN_REG, 0); - sljit_set_put_label(put_label[4], label[0]); + mov_addr[4] = sljit_emit_mov_addr(compiler, SLJIT_RETURN_REG, 0); + sljit_set_label(mov_addr[4], label[0]); sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); code.code = sljit_generate_code(compiler); @@ -6770,45 +5252,38 @@ static void test63(void) sljit_free_compiler(compiler); - FAILED(code.func1((sljit_sw)&buf) != (sljit_sw)addr[0], "test63 case 1 failed\n"); - FAILED(buf[0] != addr[0], "test63 case 2 failed\n"); - FAILED(buf[1] != addr[0], "test63 case 3 failed\n"); - FAILED(buf[2] != addr[1], "test63 case 4 failed\n"); - FAILED(buf[3] != addr[1], "test63 case 5 failed\n"); + FAILED(code.func1((sljit_sw)&buf) != (sljit_sw)addr[0], "test50 case 1 failed\n"); + FAILED(buf[0] != addr[0], "test50 case 2 failed\n"); + FAILED(buf[1] != addr[0], "test50 case 3 failed\n"); + FAILED(buf[2] != addr[1], "test50 case 4 failed\n"); + FAILED(buf[3] != addr[1], "test50 case 5 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test64(void) +static void test51(void) { /* Test put label with absolute label addresses */ executable_code code; sljit_uw malloc_addr; - struct sljit_label label[6]; - struct sljit_put_label *put_label[2]; + struct sljit_jump *mov_addr[2]; struct sljit_compiler* compiler; - sljit_uw buf[7]; + sljit_uw buf[9]; sljit_s32 i; -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) /* Must be even because it is also used for addressing. */ - sljit_sw offs1 = SLJIT_W(0x1234567812); - sljit_sw offs2 = SLJIT_W(0x123456781122); - sljit_sw offs3 = SLJIT_W(0x7fffffff7ff); - sljit_sw offs4 = SLJIT_W(0x1234567811223344); -#else /* !SLJIT_64BIT_ARCHITECTURE */ - sljit_sw offs1 = (sljit_sw)0x80000000; - sljit_sw offs2 = (sljit_sw)0xe0000000; - sljit_sw offs3 = (sljit_sw)0x87654321; - sljit_sw offs4 = (sljit_sw)0xffffffff; -#endif /* SLJIT_64BIT_ARCHITECTURE */ + sljit_uw offs1 = 0x7f1f; + sljit_uw offs2 = 0x7f1f2f3f; + sljit_uw offs3 = (sljit_uw)WCONST(0xfedcba9876, 0x80000000); + sljit_uw offs4 = (sljit_uw)WCONST(0x789abcdeff12, 0xefdfcfbf); + sljit_uw offs5 = (sljit_uw)WCONST(0x7fffffff7ff, 0x87654321); + sljit_uw offs6 = (sljit_uw)WCONST(0xfedcba9811223344, 0xffffffff); if (verbose) - printf("Run test64\n"); + printf("Run test51\n"); /* lock next allocation; see sljit_test_malloc_exec() */ -#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) - malloc_addr = (sljit_uw)SLJIT_MALLOC_EXEC(1024, NULL); + malloc_addr = (sljit_uw)SLJIT_MALLOC_EXEC(2048, NULL); if (!malloc_addr) { printf("Cannot allocate executable memory\n"); @@ -6817,98 +5292,87 @@ static void test64(void) compiler = sljit_create_compiler(NULL, (void*)malloc_addr); malloc_addr += (sljit_uw)SLJIT_EXEC_OFFSET((void*)malloc_addr); -#else /* SLJIT_CONFIG_UNSUPPORTED */ - malloc_addr = 0; - compiler = sljit_create_compiler(NULL, (void*)malloc_addr); -#endif /* !SLJIT_CONFIG_UNSUPPORTED */ - - label[0].addr = 0x1234; - label[0].size = (sljit_uw)0x1234 - malloc_addr; - - label[1].addr = 0x12345678; - label[1].size = (sljit_uw)0x12345678 - malloc_addr; - - label[2].addr = (sljit_uw)offs1; - label[2].size = (sljit_uw)offs1 - malloc_addr; - - label[3].addr = (sljit_uw)offs2; - label[3].size = (sljit_uw)offs2 - malloc_addr; - - label[4].addr = (sljit_uw)offs3; - label[4].size = (sljit_uw)offs3 - malloc_addr; - - label[5].addr = (sljit_uw)offs4; - label[5].size = (sljit_uw)offs4 - malloc_addr; - FAILED(!compiler, "cannot create compiler\n"); - for (i = 0; i < 6; i++) + for (i = 0; i < 9; i++) buf[i] = 0; - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 3, 1, 0, 0, 2 * sizeof(sljit_sw)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 3, 3, 0, 0, 2 * sizeof(sljit_sw)); /* buf[0] */ - put_label[0] = sljit_emit_put_label(compiler, SLJIT_R0, 0); + mov_addr[0] = sljit_emit_mov_addr(compiler, SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); /* buf[1] */ - put_label[1] = sljit_emit_put_label(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); + mov_addr[1] = sljit_emit_mov_addr(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_uw), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); - sljit_set_put_label(put_label[0], &label[0]); - sljit_set_put_label(put_label[1], &label[0]); + sljit_set_target(mov_addr[0], malloc_addr); + sljit_set_target(mov_addr[1], malloc_addr + 1); /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)(buf + 2) - offs1); - put_label[0] = sljit_emit_put_label(compiler, SLJIT_MEM1(SLJIT_R0), offs1); + mov_addr[0] = sljit_emit_mov_addr(compiler, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_uw), SLJIT_S2, 0); /* buf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (offs1 + (sljit_sw)sizeof(sljit_uw)) >> 1); - put_label[1] = sljit_emit_put_label(compiler, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1); + mov_addr[1] = sljit_emit_mov_addr(compiler, SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_uw), SLJIT_MEM1(SLJIT_SP), 0); - sljit_set_put_label(put_label[0], &label[1]); - sljit_set_put_label(put_label[1], &label[1]); + sljit_set_target(mov_addr[0], offs1); + sljit_set_target(mov_addr[1], offs1); /* buf[4] */ - put_label[0] = sljit_emit_put_label(compiler, SLJIT_R1, 0); - sljit_set_put_label(put_label[0], &label[2]); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_uw), SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)(buf + 4) - (sljit_sw)offs1); + mov_addr[0] = sljit_emit_mov_addr(compiler, SLJIT_MEM1(SLJIT_R0), (sljit_sw)offs1); /* buf[5] */ - put_label[0] = sljit_emit_put_label(compiler, SLJIT_R2, 0); - sljit_set_put_label(put_label[0], &label[3]); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_uw), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)(buf + 5) - 0x1234); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)0x1234 >> 1); + mov_addr[1] = sljit_emit_mov_addr(compiler, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1); + + sljit_set_target(mov_addr[0], offs2); + sljit_set_target(mov_addr[1], offs2); /* buf[6] */ - put_label[0] = sljit_emit_put_label(compiler, SLJIT_R1, 0); - sljit_set_put_label(put_label[0], &label[4]); + mov_addr[0] = sljit_emit_mov_addr(compiler, SLJIT_R1, 0); + sljit_set_target(mov_addr[0], offs3); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_uw), SLJIT_R1, 0); /* buf[7] */ - put_label[0] = sljit_emit_put_label(compiler, SLJIT_RETURN_REG, 0); - sljit_set_put_label(put_label[0], &label[5]); + mov_addr[0] = sljit_emit_mov_addr(compiler, SLJIT_R2, 0); + sljit_set_target(mov_addr[0], offs4); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_uw), SLJIT_R2, 0); + + /* buf[8] */ + mov_addr[0] = sljit_emit_mov_addr(compiler, SLJIT_S1, 0); + sljit_set_target(mov_addr[0], offs5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_uw), SLJIT_S1, 0); + + mov_addr[0] = sljit_emit_mov_addr(compiler, SLJIT_RETURN_REG, 0); + sljit_set_target(mov_addr[0], offs6); sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); code.code = sljit_generate_code(compiler); CHECK(compiler); sljit_free_compiler(compiler); - SLJIT_ASSERT(SLJIT_FUNC_UADDR(code.code) >= malloc_addr && SLJIT_FUNC_UADDR(code.code) <= malloc_addr + 8); - FAILED(code.func1((sljit_sw)&buf) != (sljit_sw)label[5].addr, "test64 case 1 failed\n"); - FAILED(buf[0] != label[0].addr, "test64 case 2 failed\n"); - FAILED(buf[1] != label[0].addr, "test64 case 3 failed\n"); - FAILED(buf[2] != label[1].addr, "test64 case 4 failed\n"); - FAILED(buf[3] != label[1].addr, "test64 case 5 failed\n"); - FAILED(buf[4] != label[2].addr, "test64 case 6 failed\n"); - FAILED(buf[5] != label[3].addr, "test64 case 7 failed\n"); - FAILED(buf[6] != label[4].addr, "test64 case 8 failed\n"); + FAILED(code.func1((sljit_sw)&buf) != (sljit_sw)offs6, "test51 case 1 failed\n"); + FAILED(buf[0] != malloc_addr, "test51 case 2 failed\n"); + FAILED(buf[1] != malloc_addr + 1, "test51 case 3 failed\n"); + FAILED(buf[2] != offs1, "test51 case 4 failed\n"); + FAILED(buf[3] != offs1, "test51 case 5 failed\n"); + FAILED(buf[4] != offs2, "test51 case 6 failed\n"); + FAILED(buf[5] != offs2, "test51 case 7 failed\n"); + FAILED(buf[6] != offs3, "test51 case 8 failed\n"); + FAILED(buf[7] != offs4, "test51 case 9 failed\n"); + FAILED(buf[8] != offs5, "test51 case 10 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test65(void) +static void test52(void) { /* Test jump tables. */ executable_code code; @@ -6920,7 +5384,7 @@ static void test65(void) struct sljit_jump *jump; if (verbose) - printf("Run test65\n"); + printf("Run test52\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -6949,17 +5413,17 @@ static void test65(void) sljit_free_compiler(compiler); - FAILED(code.func2(64, 0) != -1, "test65 case 1 failed\n"); + FAILED(code.func2(64, 0) != -1, "test52 case 1 failed\n"); for (i = 0; i < 64; i++) { - FAILED(code.func2(i, i * 2) != i * 4, "test65 case 2 failed\n"); + FAILED(code.func2(i, i * 2) != i * 4, "test52 case 2 failed\n"); } sljit_free_code(code.code, NULL); successful_tests++; } -static void test66(void) +static void test53(void) { /* Test direct jumps (computed goto). */ executable_code code; @@ -6969,7 +5433,7 @@ static void test66(void) struct sljit_label *labels[64]; if (verbose) - printf("Run test66\n"); + printf("Run test53\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -6993,14 +5457,14 @@ static void test66(void) sljit_free_compiler(compiler); for (i = 0; i < 64; i++) { - FAILED(code.func2((sljit_sw)addr[i], i) != i * 3, "test66 case 1 failed\n"); + FAILED(code.func2((sljit_sw)addr[i], i) != i * 3, "test53 case 1 failed\n"); } sljit_free_code(code.code, NULL); successful_tests++; } -static void test67(void) +static void test54(void) { /* Test skipping returns from fast calls (return type is fast). */ executable_code code; @@ -7009,7 +5473,7 @@ static void test67(void) struct sljit_label *label; if (verbose) - printf("Run test67\n"); + printf("Run test54\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -7053,13 +5517,13 @@ static void test67(void) sljit_free_compiler(compiler); - FAILED(code.func0() != 3, "test67 case 1 failed\n"); + FAILED(code.func0() != 3, "test54 case 1 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test68(void) +static void test55(void) { /* Test skipping returns from fast calls (return type is normal). */ executable_code code; @@ -7069,7 +5533,7 @@ static void test68(void) int i; if (verbose) - printf("Run test68\n"); + printf("Run test55\n"); for (i = 0; i < 6; i++) { compiler = sljit_create_compiler(NULL, NULL); @@ -7104,7 +5568,7 @@ static void test68(void) sljit_free_compiler(compiler); if (SLJIT_UNLIKELY(code.func0() != 4)) { - printf("test68 case %d failed\n", i + 1); + printf("test55 case %d failed\n", i + 1); return; } sljit_free_code(code.code, NULL); @@ -7113,7 +5577,7 @@ static void test68(void) successful_tests++; } -static void test69(void) +static void test56(void) { /* Test sljit_set_current_flags. */ executable_code code; @@ -7122,14 +5586,14 @@ static void test69(void) sljit_s32 i; if (verbose) - printf("Run test69\n"); + printf("Run test56\n"); for (i = 0; i < 8; i++) buf[i] = 4; FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 3, 1, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 1, 0, 0, 0); /* buf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)1 << ((sizeof (sljit_sw) * 8) - 2)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_R0, 0); @@ -7187,31 +5651,29 @@ static void test69(void) code.func1((sljit_sw)&buf); - FAILED(buf[0] != 1, "test69 case 1 failed\n"); - FAILED(buf[1] != 2, "test69 case 2 failed\n"); - FAILED(buf[2] != 1, "test69 case 3 failed\n"); - FAILED(buf[3] != 2, "test69 case 4 failed\n"); - FAILED(buf[4] != 1, "test69 case 5 failed\n"); - FAILED(buf[5] != 2, "test69 case 6 failed\n"); - FAILED(buf[6] != 1, "test69 case 7 failed\n"); - FAILED(buf[7] != 2, "test69 case 8 failed\n"); + FAILED(buf[0] != 1, "test56 case 1 failed\n"); + FAILED(buf[1] != 2, "test56 case 2 failed\n"); + FAILED(buf[2] != 1, "test56 case 3 failed\n"); + FAILED(buf[3] != 2, "test56 case 4 failed\n"); + FAILED(buf[4] != 1, "test56 case 5 failed\n"); + FAILED(buf[5] != 2, "test56 case 6 failed\n"); + FAILED(buf[6] != 1, "test56 case 7 failed\n"); + FAILED(buf[7] != 2, "test56 case 8 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test70(void) +static void test57(void) { /* Test argument passing to sljit_emit_enter. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); sljit_sw wbuf[2]; sljit_s32 ibuf[2]; - sljit_f64 dbuf[3]; - sljit_f32 fbuf[2]; if (verbose) - printf("Run test70\n"); + printf("Run test57\n"); wbuf[0] = 0; wbuf[1] = 0; @@ -7220,7 +5682,7 @@ static void test70(void) FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, 32, W, 32, W), 1, 4, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(32, W, 32, W), 1, 4, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&wbuf); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S1, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_S3, 0); @@ -7233,19 +5695,19 @@ static void test70(void) CHECK(compiler); sljit_free_compiler(compiler); - code.test70_f1(-1478, 9476, 4928, -6832); + code.test57_f1(-1478, 9476, 4928, -6832); - FAILED(wbuf[0] != 9476, "test70 case 1 failed\n"); - FAILED(wbuf[1] != -6832, "test70 case 2 failed\n"); - FAILED(ibuf[0] != -1478, "test70 case 3 failed\n"); - FAILED(ibuf[1] != 4928, "test70 case 4 failed\n"); + FAILED(wbuf[0] != 9476, "test57 case 1 failed\n"); + FAILED(wbuf[1] != -6832, "test57 case 2 failed\n"); + FAILED(ibuf[0] != -1478, "test57 case 3 failed\n"); + FAILED(ibuf[1] != 4928, "test57 case 4 failed\n"); sljit_free_code(code.code, NULL); compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, 32, 32, W, W), 1, 4, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(32, 32, W, W), 1, 4, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&wbuf); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_S1, 0); @@ -7258,677 +5720,35 @@ static void test70(void) CHECK(compiler); sljit_free_compiler(compiler); - code.test70_f2(4721, 7892, -3579, -4830); + code.test57_f2(4721, 7892, -3579, -4830); - FAILED(wbuf[0] != 4721, "test70 case 5 failed\n"); - FAILED(wbuf[1] != 7892, "test70 case 6 failed\n"); - FAILED(ibuf[0] != -3579, "test70 case 7 failed\n"); - FAILED(ibuf[1] != -4830, "test70 case 8 failed\n"); + FAILED(wbuf[0] != 4721, "test57 case 5 failed\n"); + FAILED(wbuf[1] != 7892, "test57 case 6 failed\n"); + FAILED(ibuf[0] != -3579, "test57 case 7 failed\n"); + FAILED(ibuf[1] != -4830, "test57 case 8 failed\n"); sljit_free_code(code.code, NULL); - if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - wbuf[0] = 0; - ibuf[0] = 0; - dbuf[0] = 0; - fbuf[0] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, 32, F32, W, F64), 2, 2, 2, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&wbuf, SLJIT_S1, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&dbuf, SLJIT_FR1, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM0(), (sljit_sw)&fbuf, SLJIT_FR0, 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test70_f3(-6834, 674.5, 2789, -895.25); - - FAILED(wbuf[0] != 2789, "test70 case 9 failed\n"); - FAILED(ibuf[0] != -6834, "test70 case 10 failed\n"); - FAILED(dbuf[0] != -895.25, "test70 case 11 failed\n"); - FAILED(fbuf[0] != 674.5, "test70 case 12 failed\n"); - - ibuf[0] = 0; - dbuf[0] = 0; - fbuf[0] = 0; - fbuf[1] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, F32, F64, F32, 32), 1, 1, 3, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&dbuf, SLJIT_FR1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&fbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f32), SLJIT_FR2, 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test70_f4(-4712.5, 5342.25, 2904.25, -4607); - - FAILED(ibuf[0] != -4607, "test70 case 13 failed\n"); - FAILED(dbuf[0] != 5342.25, "test70 case 14 failed\n"); - FAILED(fbuf[0] != -4712.5, "test70 case 15 failed\n"); - FAILED(fbuf[1] != 2904.25, "test70 case 16 failed\n"); - - ibuf[0] = 0; - dbuf[0] = 0; - fbuf[0] = 0; - fbuf[1] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, F64, F32, 32, F32), 1, 1, 3, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&dbuf, SLJIT_FR0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&fbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR1, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f32), SLJIT_FR2, 0); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test70_f5(3578.5, 4619.25, 6859, -1807.75); - - FAILED(ibuf[0] != 6859, "test70 case 17 failed\n"); - FAILED(dbuf[0] != 3578.5, "test70 case 18 failed\n"); - FAILED(fbuf[0] != 4619.25, "test70 case 19 failed\n"); - FAILED(fbuf[1] != -1807.75, "test70 case 20 failed\n"); - - ibuf[0] = 0; - dbuf[0] = 0; - dbuf[1] = 0; - fbuf[0] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, F64, 32, F32, F64), SLJIT_NUMBER_OF_SCRATCH_REGISTERS + 2, 1, 3, 0, 33); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR2, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM0(), (sljit_sw)&fbuf, SLJIT_FR1, 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test70_f6(2740.75, -2651, -7909.25, 3671.5); - - FAILED(ibuf[0] != -2651, "test70 case 21 failed\n"); - FAILED(dbuf[0] != 2740.75, "test70 case 22 failed\n"); - FAILED(dbuf[1] != 3671.5, "test70 case 23 failed\n"); - FAILED(fbuf[0] != -7909.25, "test70 case 24 failed\n"); - - wbuf[0] = 0; - ibuf[0] = 0; - ibuf[1] = 0; - fbuf[0] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, F32, 32, W, 32), 1, 3, 1, 0, 1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&wbuf, SLJIT_S1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&ibuf); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_s32), SLJIT_S2, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM0(), (sljit_sw)&fbuf, SLJIT_FR0, 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test70_f7(-5219.25, -4530, 7214, 6741); - - FAILED(wbuf[0] != 7214, "test70 case 25 failed\n"); - FAILED(ibuf[0] != -4530, "test70 case 26 failed\n"); - FAILED(ibuf[1] != 6741, "test70 case 27 failed\n"); - FAILED(fbuf[0] != -5219.25, "test70 case 28 failed\n"); - - wbuf[0] = 0; - wbuf[1] = 0; - dbuf[0] = 0; - dbuf[1] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, F64, F64, W, W), 1, 5, 2, 0, SLJIT_MAX_LOCAL_SIZE - 1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_S0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_SP), SLJIT_MAX_LOCAL_SIZE - 2 * sizeof(sljit_f64), SLJIT_FR0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&wbuf); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_S1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test70_f8(-3749.75, 5280.5, 9134, -6506); - - FAILED(wbuf[0] != 9134, "test70 case 29 failed\n"); - FAILED(wbuf[1] != -6506, "test70 case 30 failed\n"); - FAILED(dbuf[0] != -3749.75, "test70 case 31 failed\n"); - FAILED(dbuf[1] != 5280.5, "test70 case 32 failed\n"); - - wbuf[0] = 0; - dbuf[0] = 0; - dbuf[1] = 0; - dbuf[2] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, F64, F64, W, F64), 1, 1, 3, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&wbuf, SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64), SLJIT_FR2, 0); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test70_f9(-6049.25, 7301.5, 4610, -4312.75); - - FAILED(wbuf[0] != 4610, "test70 case 33 failed\n"); - FAILED(dbuf[0] != -6049.25, "test70 case 34 failed\n"); - FAILED(dbuf[1] != 7301.5, "test70 case 35 failed\n"); - FAILED(dbuf[2] != -4312.75, "test70 case 36 failed\n"); - - ibuf[0] = 0; - dbuf[0] = 0; - dbuf[1] = 0; - dbuf[2] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, F64, F64, F64, 32), 1, 1, 3, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64), SLJIT_FR2, 0); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test70_f10(4810.5, -9148.75, 8601.25, 6703); - - FAILED(ibuf[0] != 6703, "test70 case 37 failed\n"); - FAILED(dbuf[0] != 4810.5, "test70 case 38 failed\n"); - FAILED(dbuf[1] != -9148.75, "test70 case 39 failed\n"); - FAILED(dbuf[2] != 8601.25, "test70 case 40 failed\n"); - } - successful_tests++; } -static sljit_sw test71_f1(sljit_sw a) +static void test58(void) { - return a + 10000; -} - -static sljit_sw test71_f2(sljit_sw a, sljit_s32 b, sljit_s32 c, sljit_sw d) -{ - return a | b | c | d; -} - -static sljit_sw test71_f3(sljit_sw a, sljit_s32 b, sljit_s32 c, sljit_sw d) -{ - SLJIT_UNUSED_ARG(a); - return b | c | d; -} - -static sljit_sw test71_f4(void) -{ - return 7461932; -} - -static sljit_sw test71_f5(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_f64 d) -{ - if (a == 1345.5 && b == -8724.25 && c == 9034.75 && d == 6307.5) - return 8920567; - return 0; -} - -static sljit_sw test71_f6(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_sw d) -{ - if (a == 4061.25 && b == -3291.75 && c == 8703.5 && d == 1706) - return 5074526; - return 0; -} - -static void test71(void) -{ - /* Test tail calls. */ - executable_code code; - struct sljit_compiler* compiler; - struct sljit_jump *jump; - sljit_uw jump_addr; - sljit_sw executable_offset; - sljit_sw wbuf[1]; - sljit_f64 dbuf[4]; - - if (verbose) - printf("Run test71\n"); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 4, 4, 0, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, -1); - sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS1(W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(test71_f1)); - /* Should crash. */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.func1(7987) != 17987, "test71 case 1 failed\n"); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 1, 4, 0, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, -1); - jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN, SLJIT_ARGS1(W, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - sljit_set_target(jump, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - - executable_offset = sljit_get_executable_offset(compiler); - jump_addr = sljit_get_jump_addr(jump); - sljit_free_compiler(compiler); - - sljit_set_jump_addr(jump_addr, SLJIT_FUNC_UADDR(test71_f1), executable_offset); - - FAILED(code.func1(3903) != 13903, "test71 case 2 failed\n"); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 4, 2, 0, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test71_f2)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x28000000); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 0x00140000); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, 0x00002800); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0x00000041); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); - sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, W, 32, 32, W), SLJIT_MEM1(SLJIT_SP), 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.func0() != 0x28142841, "test71 case 3 failed\n"); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 4, 4, 0, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test71_f2)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)0x81000000); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 0x00480000); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, 0x00002100); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0x00000014); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); - sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, W, 32, 32, W), SLJIT_S3, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.func0() != (sljit_sw)0x81482114, "test71 case 4 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 4, 0, 0, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test71_f3)); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 0x342); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, 0x451000); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0x21000000); - sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, W, 32, 32, W), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.func0() != 0x21451342, "test71 case 5 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 1, 0, 0, 0, 9); - sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS0(W), SLJIT_IMM, SLJIT_FUNC_ADDR(test71_f4)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.func0() != 7461932, "test71 case 6 failed\n"); - - sljit_free_code(code.code, NULL); - - if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - /* Next test. */ - - dbuf[0] = 9034.75; - dbuf[1] = 6307.5; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(W, F32, F64), 1, 1, 4, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_FR0, 0, SLJIT_FR0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_R0), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64)); - sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, F64), SLJIT_IMM, SLJIT_FUNC_ADDR(test71_f5)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.test71_f1(1345.5, -8724.25) != 8920567, "test71 case 7 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - wbuf[0] = SLJIT_FUNC_ADDR(test71_f5); - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(W, F64, F64, F64, F64), 1, 0, 4, 0, 0); - sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, F64), SLJIT_MEM0(), (sljit_sw)wbuf); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.test71_f2(1345.5, -8724.25, 9034.75, 6307.5) != 8920567, "test71 case 8 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(W, F64, F64, F64), 1, 0, 4, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); - jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - sljit_set_target(jump, SLJIT_FUNC_UADDR(test71_f6)); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.test71_f3(4061.25, -3291.75, 8703.5) != 5074526, "test71 case 9 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(W, F64, F64, F64), SLJIT_NUMBER_OF_SCRATCH_REGISTERS + 1, 0, 4, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); - jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - sljit_set_target(jump, SLJIT_FUNC_UADDR(test71_f6)); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.test71_f3(4061.25, -3291.75, 8703.5) != 5074526, "test71 case 10 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(W, F64, F64, F64), SLJIT_NUMBER_OF_SCRATCH_REGISTERS + 1, 1, 3, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); - jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - sljit_set_target(jump, SLJIT_FUNC_UADDR(test71_f6)); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - FAILED(code.test71_f3(4061.25, -3291.75, 8703.5) != 5074526, "test71 case 11 failed\n"); - - sljit_free_code(code.code, NULL); - } - - successful_tests++; -} - -static void test72(void) -{ - /* Test using all fpu registers. */ - executable_code code; - struct sljit_compiler* compiler; - sljit_f64 buf[SLJIT_NUMBER_OF_FLOAT_REGISTERS]; - sljit_f64 buf2[2]; - struct sljit_jump *jump; - sljit_s32 i; - - if (verbose) - printf("Run test72\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test72 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - buf2[0] = 7.75; - buf2[1] = -8.25; - - for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - buf[i] = 0.0; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 1, 2, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); - for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_FR0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S1, 0); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS1(VOID, W)); - - for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), i * (sljit_sw)sizeof(sljit_f64), SLJIT_FR(i), 0); - sljit_emit_return_void(compiler); - - /* Called function. */ - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); - - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); - for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_FR0, 0); - - sljit_set_context(compiler, 0, SLJIT_ARGS1(VOID, P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func2((sljit_sw)buf, (sljit_sw)buf2); - - for (i = 0; i < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i++) { - FAILED(buf[i] != -8.25, "test72 case 1 failed\n"); - } - - for (i = SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) { - FAILED(buf[i] != 7.75, "test72 case 2 failed\n"); - } - - sljit_free_code(code.code, NULL); - - /* Next test. */ - if (SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS >= 3) { - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - buf2[0] = -6.25; - buf2[1] = 3.75; - - for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - buf[i] = 0.0; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 1, 2, SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2, 1, SLJIT_MAX_LOCAL_SIZE); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FS0, 0, SLJIT_MEM1(SLJIT_S1), 0); - for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_FS0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S1, 0); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS1(VOID, W)); - - for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), i * (sljit_sw)sizeof(sljit_f64), SLJIT_FR(i), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) * (sljit_sw)sizeof(sljit_f64), SLJIT_FS0, 0); - sljit_emit_return_void(compiler); - - /* Called function. */ - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, SLJIT_MAX_LOCAL_SIZE); - - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); - for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_FR0, 0); - - sljit_set_context(compiler, 0, SLJIT_ARGS1(VOID, P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func2((sljit_sw)buf, (sljit_sw)buf2); - - for (i = 0; i < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i++) { - FAILED(buf[i] != 3.75, "test72 case 3 failed\n"); - } - - for (i = SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2; i++) { - FAILED(buf[i] != -6.25, "test72 case 4 failed\n"); - } - - FAILED(buf[SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2] != 0, "test72 case 5 failed\n"); - FAILED(buf[SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1] != -6.25, "test72 case 6 failed\n"); - - sljit_free_code(code.code, NULL); - } - successful_tests++; -} - -static void test73(void) -{ - /* Test pasing arguments in registers. */ + /* Test passing arguments in registers. */ executable_code code; struct sljit_compiler* compiler; sljit_sw wbuf[2]; sljit_s32 ibuf[2]; - sljit_f64 dbuf[3]; if (verbose) - printf("Run test73\n"); + printf("Run test58\n"); /* Next test. */ compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, 32_R, W, W_R, 32), 3, 2, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(32_R, W, W_R, 32), 3, 2, 0, 0, 0); /* wbuf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&wbuf); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_S0, 0); @@ -7945,12 +5765,12 @@ static void test73(void) CHECK(compiler); sljit_free_compiler(compiler); - code.test73_f1(3467, -6781, 5038, 6310); + code.test58_f1(3467, -6781, 5038, 6310); - FAILED(wbuf[0] != -6781, "test73 case 1 failed\n"); - FAILED(wbuf[1] != 5038, "test73 case 2 failed\n"); - FAILED(ibuf[0] != 3467, "test73 case 3 failed\n"); - FAILED(ibuf[1] != 6310, "test73 case 4 failed\n"); + FAILED(wbuf[0] != -6781, "test58 case 1 failed\n"); + FAILED(wbuf[1] != 5038, "test58 case 2 failed\n"); + FAILED(ibuf[0] != 3467, "test58 case 3 failed\n"); + FAILED(ibuf[1] != 6310, "test58 case 4 failed\n"); sljit_free_code(code.code, NULL); /* Next test. */ @@ -7958,7 +5778,7 @@ static void test73(void) compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, 32, W_R, W, 32_R), 4, 2, 0, 0, 8192); + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(32, W_R, W, 32_R), 4, 2, 0, 0, 8192); /* wbuf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&wbuf); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_R1, 0); @@ -7975,12 +5795,12 @@ static void test73(void) CHECK(compiler); sljit_free_compiler(compiler); - code.test73_f1(-9723, 5208, 4761, 5084); + code.test58_f1(-9723, 5208, 4761, 5084); - FAILED(wbuf[0] != 5208, "test73 case 5 failed\n"); - FAILED(wbuf[1] != 4761, "test73 case 6 failed\n"); - FAILED(ibuf[0] != -9723, "test73 case 7 failed\n"); - FAILED(ibuf[1] != 5084, "test73 case 8 failed\n"); + FAILED(wbuf[0] != 5208, "test58 case 5 failed\n"); + FAILED(wbuf[1] != 4761, "test58 case 6 failed\n"); + FAILED(ibuf[0] != -9723, "test58 case 7 failed\n"); + FAILED(ibuf[1] != 5084, "test58 case 8 failed\n"); sljit_free_code(code.code, NULL); /* Next test. */ @@ -7988,7 +5808,7 @@ static void test73(void) compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, 32_R, W_R, W_R, 32_R), 4, 1, 0, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(32_R, W_R, W_R, 32_R), 4, 1, 0, 0, SLJIT_MAX_LOCAL_SIZE); /* wbuf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&wbuf); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R1, 0); @@ -8005,12 +5825,12 @@ static void test73(void) CHECK(compiler); sljit_free_compiler(compiler); - code.test73_f1(5934, 6043, -8572, -3861); + code.test58_f1(5934, 6043, -8572, -3861); - FAILED(wbuf[0] != 6043, "test73 case 9 failed\n"); - FAILED(wbuf[1] != -8572, "test73 case 10 failed\n"); - FAILED(ibuf[0] != 5934, "test73 case 11 failed\n"); - FAILED(ibuf[1] != -3861, "test73 case 12 failed\n"); + FAILED(wbuf[0] != 6043, "test58 case 9 failed\n"); + FAILED(wbuf[1] != -8572, "test58 case 10 failed\n"); + FAILED(ibuf[0] != 5934, "test58 case 11 failed\n"); + FAILED(ibuf[1] != -3861, "test58 case 12 failed\n"); sljit_free_code(code.code, NULL); /* Next test. */ @@ -8018,8 +5838,8 @@ static void test73(void) compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, W_R, W_R, 32_R, 32_R), 4, 1, 0, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_set_context(compiler, 0, SLJIT_ARGS4(VOID, W_R, W_R, 32_R, 32_R), 4, 1, 0, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(W_R, W_R, 32_R, 32_R), 4, 1, 0, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_set_context(compiler, 0, SLJIT_ARGS4V(W_R, W_R, 32_R, 32_R), 4, 1, 0, 0, SLJIT_MAX_LOCAL_SIZE); /* wbuf[0] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&wbuf); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); @@ -8036,79 +5856,18 @@ static void test73(void) CHECK(compiler); sljit_free_compiler(compiler); - code.test73_f2(6732, -5916, 2740, -3621); + code.test58_f2(6732, -5916, 2740, -3621); - FAILED(wbuf[0] != 6732, "test73 case 13 failed\n"); - FAILED(wbuf[1] != -5916, "test73 case 14 failed\n"); - FAILED(ibuf[0] != 2740, "test73 case 15 failed\n"); - FAILED(ibuf[1] != -3621, "test73 case 16 failed\n"); + FAILED(wbuf[0] != 6732, "test58 case 13 failed\n"); + FAILED(wbuf[1] != -5916, "test58 case 14 failed\n"); + FAILED(ibuf[0] != 2740, "test58 case 15 failed\n"); + FAILED(ibuf[1] != -3621, "test58 case 16 failed\n"); sljit_free_code(code.code, NULL); - if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, F64, F64, F64, W_R), 1, 0, 3, 0, SLJIT_MAX_LOCAL_SIZE); - /* wbuf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&wbuf, SLJIT_R0, 0); - /* dbuf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); - /* dbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); - /* dbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64), SLJIT_FR2, 0); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test73_f3(7390.25, -8045.5, 1390.75, 8201); - - FAILED(wbuf[0] != 8201, "test73 case 17 failed\n"); - FAILED(dbuf[0] != 7390.25, "test73 case 18 failed\n"); - FAILED(dbuf[1] != -8045.5, "test73 case 19 failed\n"); - FAILED(dbuf[2] != 1390.75, "test73 case 20 failed\n"); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS4(VOID, F64, F64, W, W_R), 2, 1, 2, 0, SLJIT_MAX_LOCAL_SIZE); - /* wbuf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&wbuf); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); - /* wbuf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_R1, 0); - /* dbuf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); - /* dbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.test73_f4(4892.75, -3702.5, 4731, 8530); - - FAILED(wbuf[0] != 4731, "test73 case 21 failed\n"); - FAILED(wbuf[1] != 8530, "test73 case 22 failed\n"); - FAILED(dbuf[0] != 4892.75, "test73 case 23 failed\n"); - FAILED(dbuf[1] != -3702.5, "test73 case 24 failed\n"); - } - successful_tests++; } -static void test74(void) +static void test59(void) { /* Test carry flag. */ executable_code code; @@ -8117,7 +5876,7 @@ static void test74(void) sljit_s32 i; if (verbose) - printf("Run test74\n"); + printf("Run test59\n"); for (i = 0; i < 15; i++) wbuf[i] = -1; @@ -8125,7 +5884,7 @@ static void test74(void) compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, W), 3, 2, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(W), 3, 2, 0, 0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -1); sljit_emit_op2u(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_R0, 0, SLJIT_IMM, 1); @@ -8230,907 +5989,21 @@ static void test74(void) code.func1((sljit_sw)&wbuf); - FAILED(wbuf[0] != 1, "test74 case 1 failed\n"); - FAILED(wbuf[1] != 1, "test74 case 2 failed\n"); - FAILED(wbuf[2] != 2, "test74 case 3 failed\n"); - FAILED(wbuf[3] != 1, "test74 case 4 failed\n"); - FAILED(wbuf[4] != 2, "test74 case 5 failed\n"); - FAILED(wbuf[5] != 1, "test74 case 6 failed\n"); - FAILED(wbuf[6] != 1, "test74 case 7 failed\n"); - FAILED(wbuf[7] != 1, "test74 case 8 failed\n"); - FAILED(wbuf[8] != 2, "test74 case 9 failed\n"); - FAILED(wbuf[9] != 2, "test74 case 10 failed\n"); - FAILED(wbuf[10] != 1, "test74 case 11 failed\n"); - FAILED(wbuf[11] != 2, "test74 case 12 failed\n"); - FAILED(wbuf[12] != 1, "test74 case 13 failed\n"); - FAILED(wbuf[13] != 1, "test74 case 14 failed\n"); - FAILED(wbuf[14] != 1, "test74 case 15 failed\n"); - - successful_tests++; -} - -static void test75_set(struct sljit_compiler *compiler, sljit_s32 compare, sljit_s32 type, sljit_s32 left_fr, sljit_s32 right_fr) -{ - /* Testing both sljit_emit_op_flags and sljit_emit_jump. */ - struct sljit_jump* jump1; - struct sljit_jump* jump2; - sljit_s32 is_ordered; - - if (sljit_cmp_info(type)) { - sljit_emit_fop1(compiler, compare | SLJIT_SET(type & 0xfe), left_fr, 0, right_fr, 0); - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R0, 0, type); - jump1 = sljit_emit_jump(compiler, type); - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2); - sljit_set_label(jump1, sljit_emit_label(compiler)); - } else { - if (type < SLJIT_UNORDERED_OR_EQUAL) { - is_ordered = (type & 0x1) ^ 0x1; - type += SLJIT_UNORDERED_OR_EQUAL - SLJIT_ORDERED_EQUAL; - } else { - is_ordered = type & 0x1; - type -= SLJIT_UNORDERED_OR_EQUAL - SLJIT_ORDERED_EQUAL; - } - SLJIT_ASSERT(sljit_cmp_info(type) && sljit_cmp_info(SLJIT_UNORDERED) && sljit_cmp_info(SLJIT_ORDERED)); - - sljit_emit_fop1(compiler, compare | SLJIT_SET(type & 0xfe), left_fr, 0, right_fr, 0); - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R0, 0, type); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); - - if (is_ordered) { - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_ORDERED); - - jump1 = sljit_emit_jump(compiler, SLJIT_UNORDERED); - jump2 = sljit_emit_jump(compiler, type); - sljit_set_label(jump1, sljit_emit_label(compiler)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2); - sljit_set_label(jump2, sljit_emit_label(compiler)); - - sljit_emit_op2(compiler, SLJIT_AND, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R1, 0); - } else { - sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_UNORDERED); - - jump1 = sljit_emit_jump(compiler, SLJIT_UNORDERED); - jump2 = sljit_emit_jump(compiler, type); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2); - sljit_set_label(jump1, sljit_emit_label(compiler)); - sljit_set_label(jump2, sljit_emit_label(compiler)); - - sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R1, 0); - } - - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R2, 0); - } - - sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_s8)); -} - -static void test75(void) -{ - /* Test floating point comparison. */ - executable_code code; - struct sljit_compiler* compiler; - sljit_s8 bbuf[96]; - sljit_s32 i; - - union { - sljit_f64 value; - struct { - sljit_s32 value1; - sljit_s32 value2; - } u; - } dbuf[3]; - - union { - sljit_f32 value; - sljit_s32 value1; - } sbuf[3]; - - if (verbose) - printf("Run test75\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test75 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - dbuf[0].u.value1 = 0x7fffffff; - dbuf[0].u.value2 = 0x7fffffff; - dbuf[1].value = -13.0; - dbuf[2].value = 27.0; - - sbuf[0].value1 = 0x7fffffff; - sbuf[1].value = -13.0; - sbuf[2].value = 27.0; - - for (i = 0; i < 96; i++) - bbuf[i] = -3; - - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 3, 3, 6, 0, 0); - - i = SLJIT_CMP_F64; - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); - - while (1) { - /* bbuf[0] and bbuf[48] */ - test75_set(compiler, i, SLJIT_ORDERED_EQUAL, SLJIT_FR2, SLJIT_FR3); - /* bbuf[1] and bbuf[49] */ - test75_set(compiler, i, SLJIT_ORDERED_EQUAL, SLJIT_FR2, SLJIT_FR4); - /* bbuf[2] and bbuf[50] */ - test75_set(compiler, i, SLJIT_ORDERED_EQUAL, SLJIT_FR0, SLJIT_FR1); - /* bbuf[3] and bbuf[51] */ - test75_set(compiler, i, SLJIT_ORDERED_EQUAL, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[4] and bbuf[52] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_NOT_EQUAL, SLJIT_FR2, SLJIT_FR3); - /* bbuf[5] and bbuf[53] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_NOT_EQUAL, SLJIT_FR2, SLJIT_FR4); - /* bbuf[6] and bbuf[54] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_NOT_EQUAL, SLJIT_FR0, SLJIT_FR1); - /* bbuf[7] and bbuf[55] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_NOT_EQUAL, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[8] and bbuf[56] */ - test75_set(compiler, i, SLJIT_ORDERED_LESS, SLJIT_FR2, SLJIT_FR3); - /* bbuf[9] and bbuf[57] */ - test75_set(compiler, i, SLJIT_ORDERED_LESS, SLJIT_FR2, SLJIT_FR4); - /* bbuf[10] and bbuf[58] */ - test75_set(compiler, i, SLJIT_ORDERED_LESS, SLJIT_FR0, SLJIT_FR1); - /* bbuf[11] and bbuf[59] */ - test75_set(compiler, i, SLJIT_ORDERED_LESS, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[12] and bbuf[60] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_FR2, SLJIT_FR4); - /* bbuf[13] and bbuf[61] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_FR4, SLJIT_FR2); - /* bbuf[14] and bbuf[62] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_FR0, SLJIT_FR1); - /* bbuf[15] and bbuf[63] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[16] and bbuf[64] */ - test75_set(compiler, i, SLJIT_ORDERED_GREATER, SLJIT_FR2, SLJIT_FR4); - /* bbuf[17] and bbuf[65] */ - test75_set(compiler, i, SLJIT_ORDERED_GREATER, SLJIT_FR4, SLJIT_FR2); - /* bbuf[18] and bbuf[66] */ - test75_set(compiler, i, SLJIT_ORDERED_GREATER, SLJIT_FR0, SLJIT_FR1); - /* bbuf[19] and bbuf[67] */ - test75_set(compiler, i, SLJIT_ORDERED_GREATER, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[20] and bbuf[68] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_LESS_EQUAL, SLJIT_FR2, SLJIT_FR4); - /* bbuf[21] and bbuf[69] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_LESS_EQUAL, SLJIT_FR4, SLJIT_FR2); - /* bbuf[22] and bbuf[70] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_LESS_EQUAL, SLJIT_FR0, SLJIT_FR1); - /* bbuf[23] and bbuf[71] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_LESS_EQUAL, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[24] and bbuf[72] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_EQUAL, SLJIT_FR2, SLJIT_FR4); - /* bbuf[25] and bbuf[73] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_EQUAL, SLJIT_FR2, SLJIT_FR3); - /* bbuf[26] and bbuf[74] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_EQUAL, SLJIT_FR0, SLJIT_FR1); - /* bbuf[27] and bbuf[75] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_EQUAL, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[28] and bbuf[76] */ - test75_set(compiler, i, SLJIT_ORDERED_NOT_EQUAL, SLJIT_FR2, SLJIT_FR3); - /* bbuf[29] and bbuf[77] */ - test75_set(compiler, i, SLJIT_ORDERED_NOT_EQUAL, SLJIT_FR2, SLJIT_FR4); - /* bbuf[30] and bbuf[78] */ - test75_set(compiler, i, SLJIT_ORDERED_NOT_EQUAL, SLJIT_FR0, SLJIT_FR1); - /* bbuf[31] and bbuf[79] */ - test75_set(compiler, i, SLJIT_ORDERED_NOT_EQUAL, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[32] and bbuf[80] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_LESS, SLJIT_FR2, SLJIT_FR4); - /* bbuf[33] and bbuf[81] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_LESS, SLJIT_FR2, SLJIT_FR3); - /* bbuf[34] and bbuf[82] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_LESS, SLJIT_FR0, SLJIT_FR1); - /* bbuf[35] and bbuf[83] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_LESS, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[36] and bbuf[84] */ - test75_set(compiler, i, SLJIT_ORDERED_GREATER_EQUAL, SLJIT_FR2, SLJIT_FR4); - /* bbuf[37] and bbuf[85] */ - test75_set(compiler, i, SLJIT_ORDERED_GREATER_EQUAL, SLJIT_FR4, SLJIT_FR2); - /* bbuf[38] and bbuf[86] */ - test75_set(compiler, i, SLJIT_ORDERED_GREATER_EQUAL, SLJIT_FR0, SLJIT_FR1); - /* bbuf[39] and bbuf[87] */ - test75_set(compiler, i, SLJIT_ORDERED_GREATER_EQUAL, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[40] and bbuf[88] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_GREATER, SLJIT_FR2, SLJIT_FR4); - /* bbuf[41] and bbuf[89] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_GREATER, SLJIT_FR4, SLJIT_FR2); - /* bbuf[42] and bbuf[90] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_GREATER, SLJIT_FR0, SLJIT_FR1); - /* bbuf[43] and bbuf[91] */ - test75_set(compiler, i, SLJIT_UNORDERED_OR_GREATER, SLJIT_FR0, SLJIT_FR2); - - /* bbuf[44] and bbuf[92] */ - test75_set(compiler, i, SLJIT_ORDERED_LESS_EQUAL, SLJIT_FR2, SLJIT_FR3); - /* bbuf[45] and bbuf[93] */ - test75_set(compiler, i, SLJIT_ORDERED_LESS_EQUAL, SLJIT_FR4, SLJIT_FR2); - /* bbuf[46] and bbuf[94] */ - test75_set(compiler, i, SLJIT_ORDERED_LESS_EQUAL, SLJIT_FR0, SLJIT_FR1); - /* bbuf[47] and bbuf[95] */ - test75_set(compiler, i, SLJIT_ORDERED_LESS_EQUAL, SLJIT_FR0, SLJIT_FR2); - - if (i == SLJIT_CMP_F32) - break; - - i = SLJIT_CMP_F32; - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_f32)); - } - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func3((sljit_sw)&bbuf, (sljit_sw)&dbuf, (sljit_sw)&sbuf); - - /* SLJIT_CMP_F64 */ - FAILED(bbuf[0] != 1, "test75 case 1 failed\n"); - FAILED(bbuf[1] != 2, "test75 case 2 failed\n"); - FAILED(bbuf[2] != 2, "test75 case 3 failed\n"); - FAILED(bbuf[3] != 2, "test75 case 4 failed\n"); - - FAILED(bbuf[4] != 2, "test75 case 5 failed\n"); - FAILED(bbuf[5] != 1, "test75 case 6 failed\n"); - FAILED(bbuf[6] != 1, "test75 case 7 failed\n"); - FAILED(bbuf[7] != 1, "test75 case 8 failed\n"); - - FAILED(bbuf[8] != 2, "test75 case 9 failed\n"); - FAILED(bbuf[9] != 1, "test75 case 10 failed\n"); - FAILED(bbuf[10] != 2, "test75 case 11 failed\n"); - FAILED(bbuf[11] != 2, "test75 case 12 failed\n"); - - FAILED(bbuf[12] != 2, "test75 case 13 failed\n"); - FAILED(bbuf[13] != 1, "test75 case 14 failed\n"); - FAILED(bbuf[14] != 1, "test75 case 15 failed\n"); - FAILED(bbuf[15] != 1, "test75 case 16 failed\n"); - - FAILED(bbuf[16] != 2, "test75 case 17 failed\n"); - FAILED(bbuf[17] != 1, "test75 case 18 failed\n"); - FAILED(bbuf[18] != 2, "test75 case 19 failed\n"); - FAILED(bbuf[19] != 2, "test75 case 20 failed\n"); - - FAILED(bbuf[20] != 1, "test75 case 21 failed\n"); - FAILED(bbuf[21] != 2, "test75 case 22 failed\n"); - FAILED(bbuf[22] != 1, "test75 case 23 failed\n"); - FAILED(bbuf[23] != 1, "test75 case 24 failed\n"); - - FAILED(bbuf[24] != 2, "test75 case 25 failed\n"); - FAILED(bbuf[25] != 1, "test75 case 26 failed\n"); - FAILED(bbuf[26] != 1, "test75 case 27 failed\n"); - FAILED(bbuf[27] != 1, "test75 case 28 failed\n"); - - FAILED(bbuf[28] != 2, "test75 case 29 failed\n"); - FAILED(bbuf[29] != 1, "test75 case 30 failed\n"); - FAILED(bbuf[30] != 2, "test75 case 31 failed\n"); - FAILED(bbuf[31] != 2, "test75 case 32 failed\n"); - - FAILED(bbuf[32] != 1, "test75 case 33 failed\n"); - FAILED(bbuf[33] != 2, "test75 case 34 failed\n"); - FAILED(bbuf[34] != 1, "test75 case 35 failed\n"); - FAILED(bbuf[35] != 1, "test75 case 36 failed\n"); - - FAILED(bbuf[36] != 2, "test75 case 37 failed\n"); - FAILED(bbuf[37] != 1, "test75 case 38 failed\n"); - FAILED(bbuf[38] != 2, "test75 case 39 failed\n"); - FAILED(bbuf[39] != 2, "test75 case 40 failed\n"); - - FAILED(bbuf[40] != 2, "test75 case 41 failed\n"); - FAILED(bbuf[41] != 1, "test75 case 42 failed\n"); - FAILED(bbuf[42] != 1, "test75 case 43 failed\n"); - FAILED(bbuf[43] != 1, "test75 case 44 failed\n"); - - FAILED(bbuf[44] != 1, "test75 case 45 failed\n"); - FAILED(bbuf[45] != 2, "test75 case 46 failed\n"); - FAILED(bbuf[46] != 2, "test75 case 47 failed\n"); - FAILED(bbuf[47] != 2, "test75 case 48 failed\n"); - - /* SLJIT_CMP_F32 */ - FAILED(bbuf[48] != 1, "test75 case 49 failed\n"); - FAILED(bbuf[49] != 2, "test75 case 50 failed\n"); - FAILED(bbuf[50] != 2, "test75 case 51 failed\n"); - FAILED(bbuf[51] != 2, "test75 case 52 failed\n"); - - FAILED(bbuf[52] != 2, "test75 case 53 failed\n"); - FAILED(bbuf[53] != 1, "test75 case 54 failed\n"); - FAILED(bbuf[54] != 1, "test75 case 55 failed\n"); - FAILED(bbuf[55] != 1, "test75 case 56 failed\n"); - - FAILED(bbuf[56] != 2, "test75 case 57 failed\n"); - FAILED(bbuf[57] != 1, "test75 case 58 failed\n"); - FAILED(bbuf[58] != 2, "test75 case 59 failed\n"); - FAILED(bbuf[59] != 2, "test75 case 60 failed\n"); - - FAILED(bbuf[60] != 2, "test75 case 61 failed\n"); - FAILED(bbuf[61] != 1, "test75 case 62 failed\n"); - FAILED(bbuf[62] != 1, "test75 case 63 failed\n"); - FAILED(bbuf[63] != 1, "test75 case 64 failed\n"); - - FAILED(bbuf[64] != 2, "test75 case 65 failed\n"); - FAILED(bbuf[65] != 1, "test75 case 66 failed\n"); - FAILED(bbuf[66] != 2, "test75 case 67 failed\n"); - FAILED(bbuf[67] != 2, "test75 case 68 failed\n"); - - FAILED(bbuf[68] != 1, "test75 case 69 failed\n"); - FAILED(bbuf[69] != 2, "test75 case 70 failed\n"); - FAILED(bbuf[70] != 1, "test75 case 71 failed\n"); - FAILED(bbuf[71] != 1, "test75 case 72 failed\n"); - - FAILED(bbuf[72] != 2, "test75 case 73 failed\n"); - FAILED(bbuf[73] != 1, "test75 case 74 failed\n"); - FAILED(bbuf[74] != 1, "test75 case 75 failed\n"); - FAILED(bbuf[75] != 1, "test75 case 76 failed\n"); - - FAILED(bbuf[76] != 2, "test75 case 77 failed\n"); - FAILED(bbuf[77] != 1, "test75 case 78 failed\n"); - FAILED(bbuf[78] != 2, "test75 case 79 failed\n"); - FAILED(bbuf[79] != 2, "test75 case 80 failed\n"); - - FAILED(bbuf[80] != 1, "test75 case 81 failed\n"); - FAILED(bbuf[81] != 2, "test75 case 82 failed\n"); - FAILED(bbuf[82] != 1, "test75 case 83 failed\n"); - FAILED(bbuf[83] != 1, "test75 case 84 failed\n"); - - FAILED(bbuf[84] != 2, "test75 case 85 failed\n"); - FAILED(bbuf[85] != 1, "test75 case 86 failed\n"); - FAILED(bbuf[86] != 2, "test75 case 87 failed\n"); - FAILED(bbuf[87] != 2, "test75 case 88 failed\n"); - - FAILED(bbuf[88] != 2, "test75 case 89 failed\n"); - FAILED(bbuf[89] != 1, "test75 case 90 failed\n"); - FAILED(bbuf[90] != 1, "test75 case 91 failed\n"); - FAILED(bbuf[91] != 1, "test75 case 92 failed\n"); - - FAILED(bbuf[92] != 1, "test75 case 93 failed\n"); - FAILED(bbuf[93] != 2, "test75 case 94 failed\n"); - FAILED(bbuf[94] != 2, "test75 case 95 failed\n"); - FAILED(bbuf[95] != 2, "test75 case 96 failed\n"); - - successful_tests++; -} - -static void test76(void) -{ - /* Test register argument and keep saved registers. */ - executable_code code; - struct sljit_compiler* compiler; - struct sljit_jump* jump; - sljit_sw buf[9]; - sljit_f64 dbuf[3]; - sljit_s32 i; - - if (verbose) - printf("Run test76\n"); - - for (i = 0; i < 9; i++) - buf[i] = -1; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 4, 2, 0, 0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 7945); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -9267); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 4309); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -8321); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 6803); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -5497); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4(W, W, W, W, W)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 4, 2, 0, 0, 32); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[3-6] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R2, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R3, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 6028); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 4982); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1289); - - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - - FAILED(buf[0] != 6028, "test76 case 1 failed\n"); - FAILED(buf[1] != 6803, "test76 case 2 failed\n"); - FAILED(buf[2] != -5497, "test76 case 3 failed\n"); - FAILED(buf[3] != 7945, "test76 case 4 failed\n"); - FAILED(buf[4] != -9267, "test76 case 5 failed\n"); - FAILED(buf[5] != 4309, "test76 case 6 failed\n"); - FAILED(buf[6] != -8321, "test76 case 7 failed\n"); - - /* Next test. */ - - for (i = 0; i < 9; i++) - buf[i] = -1; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 4, 2, 0, 0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -2608); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 4751); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 5740); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -9704); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -8749); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 9213); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4(W, W, W, W, W)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(1), SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 6, 2, 0, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_set_context(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(1), SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 6, 2, 0, 0, SLJIT_MAX_LOCAL_SIZE); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[3-7] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_R1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_sw), SLJIT_R2, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_sw), SLJIT_R3, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -7351); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 3628); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 0); - - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - - FAILED(buf[0] != -7351, "test76 case 8 failed\n"); - FAILED(buf[1] != 3628, "test76 case 9 failed\n"); - FAILED(buf[2] != 9213, "test76 case 10 failed\n"); - FAILED(buf[3] != -8749, "test76 case 11 failed\n"); - FAILED(buf[4] != -2608, "test76 case 12 failed\n"); - FAILED(buf[5] != 4751, "test76 case 13 failed\n"); - FAILED(buf[6] != 5740, "test76 case 14 failed\n"); - FAILED(buf[7] != -9704, "test76 case 15 failed\n"); - FAILED(buf[8] != -1, "test76 case 16 failed\n"); - - /* Next test. */ - - for (i = 0; i < 9; i++) - buf[i] = -1; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 4, 2, 0, 0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8653); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 7245); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -3610); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 4591); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -2865); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 2510); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4(VOID, W, W, W, W)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_S1, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[2-7] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_sw), SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_sw), SLJIT_S1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 5 * sizeof(sljit_sw), SLJIT_R1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 6 * sizeof(sljit_sw), SLJIT_R2, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 7 * sizeof(sljit_sw), SLJIT_R3, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 5789); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -9214); - - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - - FAILED(buf[0] != 5789, "test76 case 17 failed\n"); - FAILED(buf[1] != -9214, "test76 case 18 failed\n"); - FAILED(buf[2] != -2865, "test76 case 19 failed\n"); - FAILED(buf[3] != 2510, "test76 case 20 failed\n"); - FAILED(buf[4] != 8653, "test76 case 21 failed\n"); - FAILED(buf[5] != 7245, "test76 case 22 failed\n"); - FAILED(buf[6] != -3610, "test76 case 23 failed\n"); - FAILED(buf[7] != 4591, "test76 case 24 failed\n"); - FAILED(buf[8] != -1, "test76 case 25 failed\n"); - - /* Next test. */ - - for (i = 0; i < 9; i++) - buf[i] = -1; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 2, 3, 0, 0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 6071); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -3817); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 9250); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS0(W)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); - /* buf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 3 * sizeof(sljit_sw), SLJIT_S2, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS0(W), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[5] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 5 * sizeof(sljit_sw), SLJIT_S1, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -6278); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1467); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 7150 - 1467); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 8413); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 4892); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -7513); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, W, W, W, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 4, 2, 0, 0, 256); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[6] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_R1, 0); - /* buf[7] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R2, 0); - /* buf[8] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R3, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 6923); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - - FAILED(buf[0] != 6923, "test76 case 26 failed\n"); - FAILED(buf[1] != 4892, "test76 case 27 failed\n"); - FAILED(buf[2] != -7513, "test76 case 28 failed\n"); - FAILED(buf[3] != 9250, "test76 case 29 failed\n"); - FAILED(buf[4] != 6071, "test76 case 30 failed\n"); - FAILED(buf[5] != -3817, "test76 case 31 failed\n"); - FAILED(buf[6] != -6278, "test76 case 32 failed\n"); - FAILED(buf[7] != 7150, "test76 case 33 failed\n"); - FAILED(buf[8] != 8413, "test76 case 34 failed\n"); - - if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - /* Next test. */ - - for (i = 0; i < 9; i++) - buf[i] = -1; - - dbuf[0] = 4061.25; - dbuf[1] = -3291.75; - dbuf[2] = 8703.5; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 2, 3, 3, 0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&dbuf); - /* dbuf[0] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_R1), 0); - /* dbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_f64)); - /* dbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_f64)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -8956); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 4381); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -5314); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4(W, F64, F64, F64, W)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); - /* buf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 3 * sizeof(sljit_sw), SLJIT_S2, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(1), SLJIT_ARGS4(W, F64, F64, F64, W_R), 1, 3, 3, 0, SLJIT_MAX_LOCAL_SIZE); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[5] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_R0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&dbuf); - /* dbuf[0] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_FR2, 0); - /* dbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64), SLJIT_FR0, 0); - /* dbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64), SLJIT_FR1, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2784); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 1503); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); - - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - - FAILED(buf[0] != 2784, "test76 case 35 failed\n"); - FAILED(buf[1] != 1503, "test76 case 36 failed\n"); - FAILED(buf[2] != 4381, "test76 case 37 failed\n"); - FAILED(buf[3] != -5314, "test76 case 38 failed\n"); - FAILED(buf[4] != -8956, "test76 case 39 failed\n"); - FAILED(buf[5] != 1706, "test76 case 40 failed\n"); - FAILED(buf[6] != -1, "test76 case 41 failed\n"); - FAILED(dbuf[0] != 8703.5, "test76 case 42 failed\n"); - FAILED(dbuf[1] != 4061.25, "test76 case 43 failed\n"); - FAILED(dbuf[2] != -3291.75, "test76 case 44 failed\n"); - - /* Next test. */ - - for (i = 0; i < 9; i++) - buf[i] = -1; - - dbuf[0] = 4061.25; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 3, 3, 1, 0, 0); - - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM0(), (sljit_sw)&dbuf); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8793); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -4027); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2910); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 4619); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1502); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 5316); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4(VOID, F64, W, W, W)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_S1, 0); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_sw), SLJIT_S2, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS4(VOID, F64, W_R, W_R, W_R), 3, 3, 3, 0, SLJIT_MAX_LOCAL_SIZE); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_sw), SLJIT_S1, 0); - /* buf[5] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 5 * sizeof(sljit_sw), SLJIT_R0, 0); - /* buf[6] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 6 * sizeof(sljit_sw), SLJIT_R1, 0); - /* buf[7] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 7 * sizeof(sljit_sw), SLJIT_R2, 0); - - sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_MEM0(), (sljit_sw)&dbuf, SLJIT_FR0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 7839); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -9215); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); - - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - - FAILED(buf[0] != 7839, "test76 case 45 failed\n"); - FAILED(buf[1] != -9215, "test76 case 46 failed\n"); - FAILED(buf[2] != 5316, "test76 case 47 failed\n"); - FAILED(buf[3] != 4619, "test76 case 48 failed\n"); - FAILED(buf[4] != -1502, "test76 case 49 failed\n"); - FAILED(buf[5] != 8793, "test76 case 50 failed\n"); - FAILED(buf[6] != -4027, "test76 case 51 failed\n"); - FAILED(buf[7] != 2910, "test76 case 52 failed\n"); - FAILED(buf[8] != -1, "test76 case 53 failed\n"); - FAILED(dbuf[0] != -4061.25, "test76 case 54 failed\n"); - - /* Next test. */ - - for (i = 0; i < 9; i++) - buf[i] = -1; - - dbuf[0] = 4061.25; - dbuf[1] = -3291.75; - dbuf[2] = 8703.5; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 2, 3, 0, 0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 7869); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -5406); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 4951); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS0(W)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); - /* buf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 3 * sizeof(sljit_sw), SLJIT_S2, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS0(W), 1, 3, 3, 0, SLJIT_MAX_LOCAL_SIZE); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[5] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 5 * sizeof(sljit_sw), SLJIT_S1, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - /* dbuf[0] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_R0), 0); - /* dbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64)); - /* dbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 4713); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -2078); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS4(W, F64, F64, F64, W_R), 1, 0, 3, 0, 256); - - /* buf[6] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&buf[6], SLJIT_R0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); - /* dbuf[0] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR2, 0); - /* dbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR0, 0); - /* dbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64), SLJIT_FR1, 0); - - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 5074); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)&buf); - - FAILED(buf[0] != 5074, "test76 case 55 failed\n"); - FAILED(buf[1] != 4713, "test76 case 56 failed\n"); - FAILED(buf[2] != -2078, "test76 case 57 failed\n"); - FAILED(buf[3] != 4951, "test76 case 58 failed\n"); - FAILED(buf[4] != 7869, "test76 case 59 failed\n"); - FAILED(buf[5] != -5406, "test76 case 60 failed\n"); - FAILED(buf[6] != 1706, "test76 case 61 failed\n"); - FAILED(buf[7] != -1, "test76 case 62 failed\n"); - FAILED(dbuf[0] != 8703.5, "test76 case 63 failed\n"); - FAILED(dbuf[1] != 4061.25, "test76 case 64 failed\n"); - FAILED(dbuf[2] != -3291.75, "test76 case 65 failed\n"); - } + FAILED(wbuf[0] != 1, "test59 case 1 failed\n"); + FAILED(wbuf[1] != 1, "test59 case 2 failed\n"); + FAILED(wbuf[2] != 2, "test59 case 3 failed\n"); + FAILED(wbuf[3] != 1, "test59 case 4 failed\n"); + FAILED(wbuf[4] != 2, "test59 case 5 failed\n"); + FAILED(wbuf[5] != 1, "test59 case 6 failed\n"); + FAILED(wbuf[6] != 1, "test59 case 7 failed\n"); + FAILED(wbuf[7] != 1, "test59 case 8 failed\n"); + FAILED(wbuf[8] != 2, "test59 case 9 failed\n"); + FAILED(wbuf[9] != 2, "test59 case 10 failed\n"); + FAILED(wbuf[10] != 1, "test59 case 11 failed\n"); + FAILED(wbuf[11] != 2, "test59 case 12 failed\n"); + FAILED(wbuf[12] != 1, "test59 case 13 failed\n"); + FAILED(wbuf[13] != 1, "test59 case 14 failed\n"); + FAILED(wbuf[14] != 1, "test59 case 15 failed\n"); successful_tests++; } @@ -9158,7 +6031,7 @@ static int cmp_u8(const void *src1, sljit_sw offset, const void *src2, sljit_uw return 1; } -static void test77(void) +static void test60(void) { /* Test unaligned accesses. */ executable_code code; @@ -9175,7 +6048,7 @@ static void test77(void) SLJIT_ASSERT(((sljit_uw)bbuf & 0x7) == 0); if (verbose) - printf("Run test77\n"); + printf("Run test60\n"); for (i = 0; i < 13; i++) wbuf[i] = -3; @@ -9201,7 +6074,7 @@ static void test77(void) compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P, P), 2, 2, 0, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 2, 2, 0, 0, 0); sljit_emit_mem(compiler, SLJIT_MOV_S8 | SLJIT_MEM_UNALIGNED, SLJIT_R0, SLJIT_MEM0(), (sljit_sw)bbuf); /* wbuf[1] */ @@ -9262,31 +6135,31 @@ static void test77(void) #endif /* SLJIT_UNALIGNED */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 18 >> 1); - sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_UNALIGNED_16, SLJIT_R0, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 1); + sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_ALIGNED_16, SLJIT_R0, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 1); /* wbuf[8] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_MEM0(), (sljit_sw)&ibuf); /* bbuf[18] */ - sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED_16, SLJIT_R0, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 1); + sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_STORE | SLJIT_MEM_ALIGNED_16, SLJIT_R0, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 1); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_UNALIGNED_16, SLJIT_R0, SLJIT_MEM0(), (sljit_sw)bbuf + 22); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_ALIGNED_16, SLJIT_R0, SLJIT_MEM0(), (sljit_sw)bbuf + 22); /* wbuf[9] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_R0, 0); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 0); /* bbuf[22] */ - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED_16, SLJIT_R0, SLJIT_MEM0(), (sljit_sw)bbuf + 22); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_ALIGNED_16, SLJIT_R0, SLJIT_MEM0(), (sljit_sw)bbuf + 22); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, 128); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_UNALIGNED_32, SLJIT_R0, SLJIT_MEM1(SLJIT_R0), -128 + 32); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_ALIGNED_32, SLJIT_R0, SLJIT_MEM1(SLJIT_R0), -128 + 32); /* wbuf[10] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_R0, 0); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, 128); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 0); /* bbuf[32] */ - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED_32, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), -128 + 32); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_ALIGNED_32, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), -128 + 32); sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_UNALIGNED, SLJIT_R0, SLJIT_MEM1(SLJIT_S0), 0); /* wbuf[11] */ @@ -9302,29 +6175,25 @@ static void test77(void) code.func2((sljit_sw)&wbuf, (sljit_sw)bbuf); - FAILED(wbuf[1] != -73, "test77 case 1 failed\n"); - FAILED(wbuf[2] != (sljit_u8)-73, "test77 case 2 failed\n"); - FAILED(bbuf[1] != -73, "test77 case 3 failed\n"); - FAILED(wbuf[3] != -28531, "test77 case 4 failed\n"); - FAILED(wbuf[4] != (sljit_u16)-28531, "test77 case 5 failed\n"); - FAILED(cmp_u8(bbuf, 3, hbuf, sizeof(sljit_s16)) != 1, "test77 case 6 failed\n"); - FAILED(wbuf[5] != -38512, "test77 case 7 failed\n"); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(wbuf[6] != (sljit_u32)-38512, "test77 case 8 failed\n"); -#else - FAILED(wbuf[6] != -38512, "test77 case 8 failed\n"); -#endif - FAILED(cmp_u8(bbuf, 5, ibuf, sizeof(sljit_s32)) != 1, "test77 case 9 failed\n"); - FAILED(wbuf[7] != -46870, "test77 case 10 failed\n"); - FAILED(cmp_u8(bbuf, 9, wbuf, sizeof(sljit_sw)) != 1, "test77 case 11 failed\n"); - FAILED(wbuf[8] != -38512, "test77 case 12 failed\n"); - FAILED(cmp_u8(bbuf, 18, ibuf, sizeof(sljit_s32)) != 1, "test77 case 13 failed\n"); - FAILED(wbuf[9] != -46870, "test77 case 14 failed\n"); - FAILED(cmp_u8(bbuf, 22, wbuf, sizeof(sljit_sw)) != 1, "test77 case 15 failed\n"); - FAILED(wbuf[10] != -46870, "test77 case 16 failed\n"); - FAILED(cmp_u8(bbuf, 32, wbuf, sizeof(sljit_sw)) != 1, "test77 case 17 failed\n"); - FAILED(wbuf[11] != -62945, "test77 case 18 failed\n"); - FAILED(wbuf[12] != (sljit_sw)&wbuf, "test77 case 19 failed\n"); + FAILED(wbuf[1] != -73, "test60 case 1 failed\n"); + FAILED(wbuf[2] != (sljit_u8)-73, "test60 case 2 failed\n"); + FAILED(bbuf[1] != -73, "test60 case 3 failed\n"); + FAILED(wbuf[3] != -28531, "test60 case 4 failed\n"); + FAILED(wbuf[4] != (sljit_u16)-28531, "test60 case 5 failed\n"); + FAILED(cmp_u8(bbuf, 3, hbuf, sizeof(sljit_s16)) != 1, "test60 case 6 failed\n"); + FAILED(wbuf[5] != -38512, "test60 case 7 failed\n"); + FAILED(wbuf[6] != WCONST((sljit_u32)-38512, -38512), "test60 case 8 failed\n"); + FAILED(cmp_u8(bbuf, 5, ibuf, sizeof(sljit_s32)) != 1, "test60 case 9 failed\n"); + FAILED(wbuf[7] != -46870, "test60 case 10 failed\n"); + FAILED(cmp_u8(bbuf, 9, wbuf, sizeof(sljit_sw)) != 1, "test60 case 11 failed\n"); + FAILED(wbuf[8] != -38512, "test60 case 12 failed\n"); + FAILED(cmp_u8(bbuf, 18, ibuf, sizeof(sljit_s32)) != 1, "test60 case 13 failed\n"); + FAILED(wbuf[9] != -46870, "test60 case 14 failed\n"); + FAILED(cmp_u8(bbuf, 22, wbuf, sizeof(sljit_sw)) != 1, "test60 case 15 failed\n"); + FAILED(wbuf[10] != -46870, "test60 case 16 failed\n"); + FAILED(cmp_u8(bbuf, 32, wbuf, sizeof(sljit_sw)) != 1, "test60 case 17 failed\n"); + FAILED(wbuf[11] != -62945, "test60 case 18 failed\n"); + FAILED(wbuf[12] != (sljit_sw)&wbuf, "test60 case 19 failed\n"); sljit_free_code(code.code, NULL); @@ -9349,7 +6218,7 @@ static void test77(void) compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, 0, SLJIT_ARGS3(VOID, P, P, P), 1, 3, 1, 0, 0); + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 1, 3, 1, 0, 0); sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_UNALIGNED, SLJIT_FR0, SLJIT_MEM0(), (sljit_sw)bbuf + 1); /* sbuf[1] */ @@ -9376,35 +6245,35 @@ static void test77(void) sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 100000 + 5); #endif /* SLJIT_FPU_UNALIGNED */ - sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_UNALIGNED_16, SLJIT_FR0, SLJIT_MEM1(SLJIT_S2), 14); + sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_ALIGNED_16, SLJIT_FR0, SLJIT_MEM1(SLJIT_S2), 14); /* sbuf[2] */ sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_FR0, 0); sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); /* bbuf[14] */ - sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED_16, SLJIT_FR0, SLJIT_MEM1(SLJIT_S2), 14); + sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_STORE | SLJIT_MEM_ALIGNED_16, SLJIT_FR0, SLJIT_MEM1(SLJIT_S2), 14); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 18 >> 1); - sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_UNALIGNED_16, SLJIT_FR0, SLJIT_MEM2(SLJIT_S2, SLJIT_R0), 1); + sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_ALIGNED_16, SLJIT_FR0, SLJIT_MEM2(SLJIT_S2, SLJIT_R0), 1); /* dbuf[2] */ sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); /* bbuf[18] */ - sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED_16, SLJIT_FR0, SLJIT_MEM2(SLJIT_S2, SLJIT_R0), 1); + sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_ALIGNED_16, SLJIT_FR0, SLJIT_MEM2(SLJIT_S2, SLJIT_R0), 1); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S2, 0, SLJIT_IMM, 128); - sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_UNALIGNED_32, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), -128 + 28); + sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_ALIGNED_32, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), -128 + 28); /* dbuf[3] */ sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); /* bbuf[28] */ - sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED_32, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), -128 + 28); + sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_ALIGNED_32, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), -128 + 28); - sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_UNALIGNED_32, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_ALIGNED_32, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); /* dbuf[4] */ - sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED_32, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64)); + sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_ALIGNED_32, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64)); sljit_emit_return_void(compiler); @@ -9414,17 +6283,17 @@ static void test77(void) code.func3((sljit_sw)&dbuf, (sljit_sw)&sbuf, (sljit_sw)bbuf); - FAILED(sbuf[1] != -8812.25, "test77 case 20 failed\n"); - FAILED(cmp_u8(bbuf, 1, sbuf, sizeof(sljit_f32)) != 1, "test77 case 21 failed\n"); - FAILED(dbuf[1] != 6897.75, "test77 case 22 failed\n"); - FAILED(cmp_u8(bbuf, 5, dbuf, sizeof(sljit_f64)) != 1, "test77 case 23 failed\n"); - FAILED(sbuf[2] != -8812.25, "test77 case 24 failed\n"); - FAILED(cmp_u8(bbuf, 14, sbuf, sizeof(sljit_f32)) != 1, "test77 case 25 failed\n"); - FAILED(dbuf[2] != 6897.75, "test77 case 26 failed\n"); - FAILED(cmp_u8(bbuf, 18, dbuf, sizeof(sljit_f64)) != 1, "test77 case 27 failed\n"); - FAILED(dbuf[3] != 6897.75, "test77 case 28 failed\n"); - FAILED(cmp_u8(bbuf, 28, dbuf, sizeof(sljit_f64)) != 1, "test77 case 29 failed\n"); - FAILED(dbuf[4] != -18046.5, "test77 case 30 failed\n"); + FAILED(sbuf[1] != -8812.25, "test60 case 20 failed\n"); + FAILED(cmp_u8(bbuf, 1, sbuf, sizeof(sljit_f32)) != 1, "test60 case 21 failed\n"); + FAILED(dbuf[1] != 6897.75, "test60 case 22 failed\n"); + FAILED(cmp_u8(bbuf, 5, dbuf, sizeof(sljit_f64)) != 1, "test60 case 23 failed\n"); + FAILED(sbuf[2] != -8812.25, "test60 case 24 failed\n"); + FAILED(cmp_u8(bbuf, 14, sbuf, sizeof(sljit_f32)) != 1, "test60 case 25 failed\n"); + FAILED(dbuf[2] != 6897.75, "test60 case 26 failed\n"); + FAILED(cmp_u8(bbuf, 18, dbuf, sizeof(sljit_f64)) != 1, "test60 case 27 failed\n"); + FAILED(dbuf[3] != 6897.75, "test60 case 28 failed\n"); + FAILED(cmp_u8(bbuf, 28, dbuf, sizeof(sljit_f64)) != 1, "test60 case 29 failed\n"); + FAILED(dbuf[4] != -18046.5, "test60 case 30 failed\n"); sljit_free_code(code.code, NULL); } @@ -9432,88 +6301,7 @@ static void test77(void) successful_tests++; } -static void test78(void) -{ - /* Test register register preservation in keep saveds mode. */ - executable_code code; - struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); - sljit_sw buf[6 + SLJIT_NUMBER_OF_REGISTERS]; - struct sljit_jump* jump; - sljit_s32 i; - - if (verbose) - printf("Run test78\n"); - - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), SLJIT_NUMBER_OF_REGISTERS - 3, 3, 0, 0, 0); - - for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS - 3; i++) - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 8469 + 1805 * i); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 3671); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 2418); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 1597); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4(VOID, W, W, W, W)); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 6), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); - /* buf[3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 3 * sizeof(sljit_sw), SLJIT_S0, 0); - /* buf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_sw), SLJIT_S1, 0); - /* buf[5] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 5 * sizeof(sljit_sw), SLJIT_S2, 0); - - for (i = 1; i < SLJIT_NUMBER_OF_REGISTERS - 3; i++) - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), (6 + i) * (sljit_sw)sizeof(sljit_sw), SLJIT_R(i), 0); - - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(3), SLJIT_ARGS4(VOID, W_R, W_R, W_R, W_R), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_set_context(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(3), SLJIT_ARGS4(VOID, W_R, W_R, W_R, W_R), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE); - - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 0), SLJIT_S0, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 1), SLJIT_S1, 0); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 2), SLJIT_S2, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 6501); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 7149); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 5732); - - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG | SLJIT_CALL_RETURN, SLJIT_ARGS0(VOID)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM0(), 0); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(3), SLJIT_ARGS0(VOID), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE / 2); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - code.func0(); - - FAILED(buf[0] != 3671, "test78 case 1 failed\n"); - FAILED(buf[1] != 2418, "test78 case 2 failed\n"); - FAILED(buf[2] != 1597, "test78 case 3 failed\n"); - FAILED(buf[3] != 6501, "test78 case 4 failed\n"); - FAILED(buf[4] != 7149, "test78 case 5 failed\n"); - FAILED(buf[5] != 5732, "test78 case 6 failed\n"); - - for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS - 3; i++) { - FAILED(buf[6 + i] != 8469 + 1805 * i, "test78 case 7 failed\n"); - } - - sljit_free_code(code.code, NULL); - successful_tests++; -} - -static void test79(void) +static void test61(void) { /* Test register pair movement. */ executable_code code; @@ -9524,7 +6312,7 @@ static void test79(void) sljit_s32 i; if (verbose) - printf("Run test79\n"); + printf("Run test61\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -9543,7 +6331,7 @@ static void test79(void) copy_u8(bbuf, 34, buf + 2, sizeof(sljit_sw)); copy_u8(bbuf, 34 + sizeof(sljit_sw), buf + 3, sizeof(sljit_sw)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, W), 5, 5, 0, 0, 3 * sizeof(sljit_sw)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(W), 5, 5, 0, 0, 3 * sizeof(sljit_sw)); sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_LOAD, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); /* buf[4] */ @@ -9624,10 +6412,10 @@ static void test79(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S4, 0, SLJIT_IMM, -9035); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)bbuf + 18 - 0x7f0f); /* bbuf[18], buf[18] + sizeof(sljit_sw) */ - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED_16, SLJIT_REG_PAIR(SLJIT_R4, SLJIT_S4), SLJIT_MEM1(SLJIT_R0), 0x7f0f); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_ALIGNED_16, SLJIT_REG_PAIR(SLJIT_R4, SLJIT_S4), SLJIT_MEM1(SLJIT_R0), 0x7f0f); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)bbuf + 34 - 0xfff); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_LOAD | SLJIT_MEM_UNALIGNED_16, SLJIT_REG_PAIR(SLJIT_S1, SLJIT_R0), SLJIT_MEM1(SLJIT_S1), 0xfff); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_LOAD | SLJIT_MEM_ALIGNED_16, SLJIT_REG_PAIR(SLJIT_S1, SLJIT_R0), SLJIT_MEM1(SLJIT_S1), 0xfff); /* buf[26] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 26 * sizeof(sljit_sw), SLJIT_S1, 0); /* buf[27] */ @@ -9650,7 +6438,7 @@ static void test79(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)bbuf + 50 + 0xfff); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -9035); /* bbuf[50], buf[50] + sizeof(sljit_sw) */ - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED_16, SLJIT_REG_PAIR(SLJIT_R2, SLJIT_R3), SLJIT_MEM1(SLJIT_R2), -0xfff); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_ALIGNED_16, SLJIT_REG_PAIR(SLJIT_R2, SLJIT_R3), SLJIT_MEM1(SLJIT_R2), -0xfff); sljit_emit_return_void(compiler); @@ -9660,65 +6448,68 @@ static void test79(void) code.func1((sljit_sw)buf); - FAILED(buf[3] != (sljit_sw)bbuf + 50 + 0xfff, "test79 case 1 failed\n"); - FAILED(buf[4] != 3724, "test79 case 1 failed\n"); - FAILED(buf[5] != -9035, "test79 case 2 failed\n"); - FAILED(buf[6] != 5814, "test79 case 3 failed\n"); - FAILED(buf[7] != 7201, "test79 case 4 failed\n"); - FAILED(buf[8] != -5836, "test79 case 5 failed\n"); - FAILED(buf[9] != 3724, "test79 case 6 failed\n"); - FAILED(buf[10] != -9035, "test79 case 7 failed\n"); - FAILED(buf[11] != buf[3], "test79 case 8 failed\n"); - FAILED(buf[12] != -8402, "test79 case 9 failed\n"); - FAILED(buf[13] != 6257, "test79 case 10 failed\n"); - FAILED(buf[14] != 6139, "test79 case 11 failed\n"); - FAILED(buf[15] != -7049, "test79 case 12 failed\n"); - FAILED(buf[16] != -5836, "test79 case 13 failed\n"); - FAILED(buf[17] != 3724, "test79 case 14 failed\n"); - FAILED(buf[18] != 3724, "test79 case 15 failed\n"); - FAILED(buf[19] != -9035, "test79 case 16 failed\n"); - FAILED(buf[20] != 3065, "test79 case 17 failed\n"); - FAILED(buf[21] != 7481, "test79 case 18 failed\n"); - FAILED(buf[22] != 3275, "test79 case 19 failed\n"); - FAILED(buf[23] != -8714, "test79 case 20 failed\n"); - FAILED(buf[24] != -5836, "test79 case 21 failed\n"); - FAILED(buf[25] != 3724, "test79 case 22 failed\n"); - FAILED(cmp_u8(bbuf, 18, buf + 1, sizeof(sljit_sw)) != 1, "test79 case 23 failed\n"); - FAILED(cmp_u8(bbuf, 18 + sizeof(sljit_sw), buf + 2, sizeof(sljit_sw)) != 1, "test79 case 24 failed\n"); - FAILED(buf[26] != -9035, "test79 case 25 failed\n"); - FAILED(buf[27] != buf[3], "test79 case 26 failed\n"); - FAILED(buf[28] != -9035, "test79 case 27 failed\n"); - FAILED(buf[29] != buf[3], "test79 case 28 failed\n"); - FAILED(buf[30] != -5836, "test79 case 29 failed\n"); - FAILED(buf[31] != 3724, "test79 case 30 failed\n"); - FAILED(cmp_u8(bbuf, 50, buf + 3, sizeof(sljit_sw)) != 1, "test79 case 31 failed\n"); - FAILED(cmp_u8(bbuf, 50 + sizeof(sljit_sw), buf + 2, sizeof(sljit_sw)) != 1, "test79 case 32 failed\n"); - FAILED(buf[32] != -1, "test79 case 33 failed\n"); + FAILED(buf[3] != (sljit_sw)bbuf + 50 + 0xfff, "test61 case 1 failed\n"); + FAILED(buf[4] != 3724, "test61 case 2 failed\n"); + FAILED(buf[5] != -9035, "test61 case 3 failed\n"); + FAILED(buf[6] != 5814, "test61 case 4 failed\n"); + FAILED(buf[7] != 7201, "test61 case 5 failed\n"); + FAILED(buf[8] != -5836, "test61 case 6 failed\n"); + FAILED(buf[9] != 3724, "test61 case 7 failed\n"); + FAILED(buf[10] != -9035, "test61 case 8 failed\n"); + FAILED(buf[11] != buf[3], "test61 case 9 failed\n"); + FAILED(buf[12] != -8402, "test61 case 10 failed\n"); + FAILED(buf[13] != 6257, "test61 case 11 failed\n"); + FAILED(buf[14] != 6139, "test61 case 12 failed\n"); + FAILED(buf[15] != -7049, "test61 case 13 failed\n"); + FAILED(buf[16] != -5836, "test61 case 14 failed\n"); + FAILED(buf[17] != 3724, "test61 case 15 failed\n"); + FAILED(buf[18] != 3724, "test61 case 16 failed\n"); + FAILED(buf[19] != -9035, "test61 case 17 failed\n"); + FAILED(buf[20] != 3065, "test61 case 18 failed\n"); + FAILED(buf[21] != 7481, "test61 case 19 failed\n"); + FAILED(buf[22] != 3275, "test61 case 20 failed\n"); + FAILED(buf[23] != -8714, "test61 case 21 failed\n"); + FAILED(buf[24] != -5836, "test61 case 22 failed\n"); + FAILED(buf[25] != 3724, "test61 case 23 failed\n"); + FAILED(cmp_u8(bbuf, 18, buf + 1, sizeof(sljit_sw)) != 1, "test61 case 24 failed\n"); + FAILED(cmp_u8(bbuf, 18 + sizeof(sljit_sw), buf + 2, sizeof(sljit_sw)) != 1, "test61 case 25 failed\n"); + FAILED(buf[26] != -9035, "test61 case 26 failed\n"); + FAILED(buf[27] != buf[3], "test61 case 27 failed\n"); + FAILED(buf[28] != -9035, "test61 case 28 failed\n"); + FAILED(buf[29] != buf[3], "test61 case 29 failed\n"); + FAILED(buf[30] != -5836, "test61 case 30 failed\n"); + FAILED(buf[31] != 3724, "test61 case 31 failed\n"); + FAILED(cmp_u8(bbuf, 50, buf + 3, sizeof(sljit_sw)) != 1, "test61 case 32 failed\n"); + FAILED(cmp_u8(bbuf, 50 + sizeof(sljit_sw), buf + 2, sizeof(sljit_sw)) != 1, "test61 case 33 failed\n"); + FAILED(buf[32] != -1, "test61 case 34 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test80(void) +static void test62(void) { /* Test masked shift. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); - sljit_sw buf[8]; + sljit_sw buf[9]; sljit_s32 ibuf[8]; sljit_s32 i; if (verbose) - printf("Run test80\n"); + printf("Run test62\n"); FAILED(!compiler, "cannot create compiler\n"); - for (i = 0; i < 8; i++) + for (i = 0; i < 9; i++) buf[i] = -1; for (i = 0; i < 8; i++) ibuf[i] = -1; - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); + buf[7] = 3; + ibuf[6] = 4; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x1234); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 8 * sizeof(sljit_sw) + 4); @@ -9781,17 +6572,22 @@ static void test80(void) /* buf[6] */ sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_NOT_ZERO); + /* buf[7] */ + sljit_emit_op2(compiler, SLJIT_MSHL, SLJIT_MEM0(), (sljit_sw)&buf[7], SLJIT_IMM, 0xa, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw)); + /* ibuf[6] */ + sljit_emit_op2(compiler, SLJIT_MLSHR32, SLJIT_MEM0(), (sljit_sw)&ibuf[6], SLJIT_IMM, 0xa5f, SLJIT_MEM0(), (sljit_sw)&ibuf[6]); + #if (defined SLJIT_MASKED_SHIFT && SLJIT_MASKED_SHIFT) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 12344321); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (8 * sizeof(sljit_sw)) + 1); - /* buf[7] */ - sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R1, 0, SLJIT_R2, 0); + /* buf[8] */ + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R1, 0, SLJIT_R2, 0); #endif /* SLJIT_MASKED_SHIFT */ #if (defined SLJIT_MASKED_SHIFT32 && SLJIT_MASKED_SHIFT32) sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 24688643); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, (8 * sizeof(sljit_s32)) + 1); - /* ibuf[6] */ - sljit_emit_op2(compiler, SLJIT_LSHR32, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_s32), SLJIT_R1, 0, SLJIT_R2, 0); + /* ibuf[7] */ + sljit_emit_op2(compiler, SLJIT_LSHR32, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_s32), SLJIT_R1, 0, SLJIT_R2, 0); #endif /* SLJIT_MASKED_SHIFT32 */ sljit_emit_return_void(compiler); @@ -9802,432 +6598,33 @@ static void test80(void) code.func2((sljit_sw)buf, (sljit_sw)ibuf); - FAILED(buf[0] != 0x12340, "test80 case 1 failed\n"); - FAILED(buf[1] != 0x1234, "test80 case 2 failed\n"); - FAILED(ibuf[0] != 0x567800, "test80 case 3 failed\n"); - FAILED(ibuf[1] != (sljit_sw)1 << 30, "test80 case 4 failed\n"); - FAILED(buf[2] != 1, "test80 case 5 failed\n"); - FAILED(buf[3] != ((sljit_uw)-1 >> 4), "test80 case 6 failed\n"); - FAILED(buf[4] != 1, "test80 case 7 failed\n"); - FAILED(ibuf[2] != 0x5678, "test80 case 8 failed\n"); - FAILED(ibuf[3] != 0x34567, "test80 case 9 failed\n"); - FAILED(buf[5] != -0x10, "test80 case 10 failed\n"); - FAILED(ibuf[4] != -0x80, "test80 case 11 failed\n"); - FAILED(ibuf[5] != 0, "test80 case 12 failed\n"); - FAILED(buf[6] != 0, "test80 case 13 failed\n"); + FAILED(buf[0] != 0x12340, "test62 case 1 failed\n"); + FAILED(buf[1] != 0x1234, "test62 case 2 failed\n"); + FAILED(ibuf[0] != 0x567800, "test62 case 3 failed\n"); + FAILED(ibuf[1] != (sljit_sw)1 << 30, "test62 case 4 failed\n"); + FAILED(buf[2] != 1, "test62 case 5 failed\n"); + FAILED(buf[3] != ((sljit_uw)-1 >> 4), "test62 case 6 failed\n"); + FAILED(buf[4] != 1, "test62 case 7 failed\n"); + FAILED(ibuf[2] != 0x5678, "test62 case 8 failed\n"); + FAILED(ibuf[3] != 0x34567, "test62 case 9 failed\n"); + FAILED(buf[5] != -0x10, "test62 case 10 failed\n"); + FAILED(ibuf[4] != -0x80, "test62 case 11 failed\n"); + FAILED(ibuf[5] != 0, "test62 case 12 failed\n"); + FAILED(buf[6] != 0, "test62 case 13 failed\n"); + FAILED(buf[7] != 0x50, "test62 case 14 failed\n"); + FAILED(ibuf[6] != 0xa5, "test62 case 15 failed\n"); #if (defined SLJIT_MASKED_SHIFT && SLJIT_MASKED_SHIFT) - FAILED(buf[7] != 24688642, "test80 case 14 failed\n"); + FAILED(buf[8] != 24688642, "test62 case 16 failed\n"); #endif /* SLJIT_MASKED_SHIFT */ #if (defined SLJIT_MASKED_SHIFT32 && SLJIT_MASKED_SHIFT32) - FAILED(ibuf[6] != 12344321, "test80 case 15 failed\n"); + FAILED(ibuf[7] != 12344321, "test62 case 17 failed\n"); #endif /* SLJIT_MASKED_SHIFT32 */ sljit_free_code(code.code, NULL); successful_tests++; } -static void test81(void) -{ - /* Test return with floating point value. */ - executable_code code; - struct sljit_compiler* compiler; - struct sljit_jump* jump; - sljit_f64 dbuf[2]; - sljit_f32 sbuf[2]; - - if (verbose) - printf("Run test81\n"); - - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test81 skipped\n"); - successful_tests++; - return; - } - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(F64, W), 0, 1, 3, 0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_return(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - dbuf[0] = 35.125; - FAILED(code.test81_f2((sljit_sw)dbuf) != 35.125, "test81 case 1 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(F32, W), 0, 1, 1, 0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_RETURN_FREG, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_return(compiler, SLJIT_MOV_F32, SLJIT_RETURN_FREG, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - sbuf[0] = -9027.5; - FAILED(code.test81_f1((sljit_sw)sbuf) != -9027.5, "test81 case 2 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(F32, W), 0, 1, 1, 0, sizeof(sljit_f32)); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_return(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - sbuf[0] = -6.75; - FAILED(code.test81_f1((sljit_sw)sbuf) != -6.75, "test81 case 3 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(F64, W), 0, 1, 1, 0, 2 * sizeof(sljit_f64)); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_return(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f64)); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - dbuf[0] = 45.125; - FAILED(code.test81_f2((sljit_sw)dbuf) != 45.125, "test81 case 4 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 1, 0, 1, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)dbuf - 33); - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS1(F64, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)dbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_RETURN_FREG, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS1(F64, W_R), 1, 0, 1, 0, 0); - sljit_emit_return(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 33); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - dbuf[0] = 2571.75; - dbuf[1] = 0; - code.func0(); - FAILED(dbuf[1] != 2571.75, "test81 case 5 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 1, 0, 1, 0, 0); - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS0(F32)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)sbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f32), SLJIT_RETURN_FREG, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS0(F32), 0, 0, 1, 0, 0); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_RETURN_FREG, 0, SLJIT_MEM0(), (sljit_sw)sbuf); - sljit_emit_return(compiler, SLJIT_MOV_F32, SLJIT_RETURN_FREG, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - sbuf[0] = 6310.25; - sbuf[1] = 0; - code.func0(); - FAILED(sbuf[1] != 6310.25, "test81 case 6 failed\n"); - - sljit_free_code(code.code, NULL); - - successful_tests++; -} - -static void test82(void) -{ - /* Test return_to operation. */ - executable_code code, code2; - struct sljit_compiler* compiler; - struct sljit_jump* jump; - struct sljit_label* label; - sljit_s32 i; - sljit_sw buf[3]; - - if (verbose) - printf("Run test82\n"); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 2, 1, 0, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -7602); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S0, 0); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - label = sljit_emit_label(compiler); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); - sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W_R, W_R), 2, 0, 0, 0, 256); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 256 - sizeof(sljit_sw), SLJIT_IMM, -1); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 8945); - sljit_emit_return_to(compiler, SLJIT_MEM1(SLJIT_R1), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - - buf[0] = (sljit_sw)sljit_get_label_addr(label); - buf[1] = 0; - - sljit_free_compiler(compiler); - - code.func1((sljit_sw)buf); - FAILED(buf[0] != 8945, "test82 case 1 failed\n"); - FAILED(buf[1] != -7602, "test82 case 2 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - for (i = 0; i < 3; i++) { - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 2, 1, 0, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 6032); - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS1(W, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - label = sljit_emit_label(compiler); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)buf); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_RETURN_REG, 0); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S0, 0); - sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(1), SLJIT_ARGS1(VOID, W_R), 2, i == 1 ? 2 : 1, 0, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_R0, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); - if (i == 2) - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 0); - else - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S(i), 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), SLJIT_MAX_LOCAL_SIZE - sizeof(sljit_sw), SLJIT_IMM, -1); - if (i != 0) - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -3890); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 7145); - if (i == 2) - sljit_emit_return_to(compiler, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_sw)); - else - sljit_emit_return_to(compiler, SLJIT_S(i), 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - - buf[0] = (sljit_sw)sljit_get_label_addr(label); - buf[1] = 0; - buf[2] = 0; - - sljit_free_compiler(compiler); - - code.func1((sljit_sw)buf); - FAILED(buf[0] != 7145, "test82 case 3 failed\n"); - FAILED(buf[1] != 6032, "test82 case 4 failed\n"); - if (i != 0) - FAILED(buf[2] != -3890, "test82 case 5 failed\n"); - - sljit_free_code(code.code, NULL); - } - - /* Next test. */ - - for (i = 0; i < 3; i++) { - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P_R), 2, 1, 0, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_R0, 0); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS1(W, W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - label = sljit_emit_label(compiler); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)buf); - /* buf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); - sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, W_R), 2, 1, 0, 0, (i == 0) ? 0 : (i == 1) ? 512 : 32768); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 0x1000); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, -4502); - sljit_emit_return_to(compiler, SLJIT_MEM1(SLJIT_R1), -0x1000); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - - buf[0] = (sljit_sw)sljit_get_label_addr(label); - buf[1] = 0; - - sljit_free_compiler(compiler); - - code.func1((sljit_sw)buf); - FAILED(buf[0] != -4502, "test82 case 6 failed\n"); - FAILED(buf[1] != (sljit_sw)buf, "test82 case 7 failed\n"); - - sljit_free_code(code.code, NULL); - } - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - i = SLJIT_S2; -#else - i = SLJIT_S(SLJIT_NUMBER_OF_SAVED_REGISTERS - 1); -#endif - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 2, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, i, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0(W)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - label = sljit_emit_label(compiler); - /* buf[2] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_S0, i), 0, SLJIT_RETURN_REG, 0); - sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 2, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 16); - for (i = 0; i < SLJIT_NUMBER_OF_SAVED_REGISTERS; i++) - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S(i), 0, SLJIT_IMM, -1); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, (sljit_sw)(buf + 3)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -3); - sljit_emit_return_to(compiler, SLJIT_MEM2(SLJIT_RETURN_REG, SLJIT_R1), SLJIT_WORD_SHIFT); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - - buf[0] = (sljit_sw)sljit_get_label_addr(label); - buf[1] = 0; - buf[2] = 0; - - sljit_free_compiler(compiler); - - code.func1((sljit_sw)buf); - FAILED(buf[2] != (sljit_sw)(buf + 3), "test82 case 8 failed\n"); - - sljit_free_code(code.code, NULL); - - /* Next test. */ - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, P_R, P), 2, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_S0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 586000); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 392); - sljit_emit_icall(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS0(W), SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); - label = sljit_emit_label(compiler); - /* buf[0] */ - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_S0, 0, SLJIT_S1, 0); - /* buf[1] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_RETURN_REG, 0); - sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - - buf[0] = (sljit_sw)sljit_get_label_addr(label); - - sljit_free_compiler(compiler); - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS0(VOID), 2, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 16); - for (i = 2; i < SLJIT_NUMBER_OF_SAVED_REGISTERS; i++) - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S(i), 0, SLJIT_IMM, -1); - /* buf[2] */ - sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM0(), (sljit_sw)(buf + 2), SLJIT_S0, 0, SLJIT_S1, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 416000); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 931); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 2906); - sljit_emit_return_to(compiler, SLJIT_IMM, buf[0]); - - code2.code = sljit_generate_code(compiler); - CHECK(compiler); - sljit_free_compiler(compiler); - - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; - - code.func2(SLJIT_FUNC_ADDR(code2.func0), (sljit_sw)buf); - FAILED(buf[0] != 416931, "test82 case 9 failed\n"); - FAILED(buf[1] != 2906, "test82 case 10 failed\n"); - FAILED(buf[2] != 586392, "test82 case 11 failed\n"); - - sljit_free_code(code.code, NULL); - sljit_free_code(code2.code, NULL); - - successful_tests++; -} - -static void test83(void) +static void test63(void) { /* Test rotate. */ executable_code code; @@ -10242,7 +6639,7 @@ static void test83(void) #endif if (verbose) - printf("Run test83\n"); + printf("Run test63\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -10253,7 +6650,7 @@ static void test83(void) ibuf[0] = 8; - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, WCONST(0x1234567812345678, 0x12345678)); sljit_emit_op1(compiler, SLJIT_MOV, shift_reg, 0, SLJIT_IMM, 12); @@ -10363,33 +6760,33 @@ static void test83(void) code.func2((sljit_sw)buf, (sljit_sw)ibuf); - FAILED(buf[0] != WCONST(0x4567812345678123, 0x45678123), "test83 case 1 failed\n"); - FAILED(buf[1] != WCONST(0xfdb974130eca8643, 0xeca8643), "test83 case 2 failed\n"); - FAILED(buf[2] != WCONST(0xfedcba0987654321, 0x87654321), "test83 case 3 failed\n"); - FAILED(buf[3] != WCONST(0xcc3b2a190855e6f7, 0xc3b2a190), "test83 case 4 failed\n"); - FAILED(buf[4] != WCONST(0x9876543210abcdc0, 0x876543e0), "test83 case 5 failed\n"); - FAILED(buf[5] != WCONST(0x8123456781234567, 0x81234567), "test83 case 6 failed\n"); - FAILED(buf[6] != WCONST(0x4567812345678123, 0x45678123), "test83 case 7 failed\n"); - FAILED(buf[7] != WCONST(0x891a2b3c43b2a1a0, 0x8bb2a190), "test83 case 8 failed\n"); - FAILED(buf[8] != WCONST(0xfedcba0987654321, 0x87654321), "test83 case 9 failed\n"); - FAILED(buf[9] != WCONST(0xfedcba0987654321, 0x87654321), "test83 case 10 failed\n"); - FAILED(ibuf[0] != (sljit_s32)0x65432187, "test83 case 11 failed\n"); - FAILED(buf[10] != -7834, "test83 case 12 failed\n"); - FAILED(ibuf[1] != (sljit_s32)0x57913bdf, "test83 case 13 failed\n"); - FAILED(ibuf[2] != (sljit_s32)0xabc89def, "test83 case 14 failed\n"); - FAILED(ibuf[3] != (sljit_s32)0xabc89def, "test83 case 15 failed\n"); - FAILED(buf[11] != -6512, "test83 case 16 failed\n"); - FAILED(ibuf[4] != (sljit_s32)0x1d950c86, "test83 case 17 failed\n"); - FAILED(ibuf[5] != (sljit_s32)0xffedccde, "test83 case 18 failed\n"); - FAILED(ibuf[6] != (sljit_s32)0x89abcdef, "test83 case 19 failed\n"); - FAILED(ibuf[7] != (sljit_s32)0x89abcde0, "test83 case 20 failed\n"); - FAILED(buf[12] != -2647, "test83 case 21 failed\n"); + FAILED(buf[0] != WCONST(0x4567812345678123, 0x45678123), "test63 case 1 failed\n"); + FAILED(buf[1] != WCONST(0xfdb974130eca8643, 0xeca8643), "test63 case 2 failed\n"); + FAILED(buf[2] != WCONST(0xfedcba0987654321, 0x87654321), "test63 case 3 failed\n"); + FAILED(buf[3] != WCONST(0xcc3b2a190855e6f7, 0xc3b2a190), "test63 case 4 failed\n"); + FAILED(buf[4] != WCONST(0x9876543210abcdc0, 0x876543e0), "test63 case 5 failed\n"); + FAILED(buf[5] != WCONST(0x8123456781234567, 0x81234567), "test63 case 6 failed\n"); + FAILED(buf[6] != WCONST(0x4567812345678123, 0x45678123), "test63 case 7 failed\n"); + FAILED(buf[7] != WCONST(0x891a2b3c43b2a1a0, 0x8bb2a190), "test63 case 8 failed\n"); + FAILED(buf[8] != WCONST(0xfedcba0987654321, 0x87654321), "test63 case 9 failed\n"); + FAILED(buf[9] != WCONST(0xfedcba0987654321, 0x87654321), "test63 case 10 failed\n"); + FAILED(ibuf[0] != (sljit_s32)0x65432187, "test63 case 11 failed\n"); + FAILED(buf[10] != -7834, "test63 case 12 failed\n"); + FAILED(ibuf[1] != (sljit_s32)0x57913bdf, "test63 case 13 failed\n"); + FAILED(ibuf[2] != (sljit_s32)0xabc89def, "test63 case 14 failed\n"); + FAILED(ibuf[3] != (sljit_s32)0xabc89def, "test63 case 15 failed\n"); + FAILED(buf[11] != -6512, "test63 case 16 failed\n"); + FAILED(ibuf[4] != (sljit_s32)0x1d950c86, "test63 case 17 failed\n"); + FAILED(ibuf[5] != (sljit_s32)0xffedccde, "test63 case 18 failed\n"); + FAILED(ibuf[6] != (sljit_s32)0x89abcdef, "test63 case 19 failed\n"); + FAILED(ibuf[7] != (sljit_s32)0x89abcde0, "test63 case 20 failed\n"); + FAILED(buf[12] != -2647, "test63 case 21 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test84(void) +static void test64(void) { /* Test "shift into". */ executable_code code; @@ -10404,7 +6801,7 @@ static void test84(void) #endif if (verbose) - printf("Run test84\n"); + printf("Run test64\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -10413,7 +6810,7 @@ static void test84(void) for (i = 0; i < 10; i++) ibuf[i] = -1; - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, WCONST(0x1234567812345678, 0x12345678)); sljit_emit_op1(compiler, SLJIT_MOV, shift_reg, 0, SLJIT_IMM, 12); @@ -10589,45 +6986,45 @@ static void test84(void) code.func2((sljit_sw)buf, (sljit_sw)ibuf); - FAILED(buf[0] != WCONST(0x4567812345678123, 0x45678123), "test84 case 1 failed\n"); - FAILED(buf[1] != WCONST(0x4567812345678123, 0x45678123), "test84 case 2 failed\n"); - FAILED(ibuf[0] != 0x109abc32, "test84 case 3 failed\n"); - FAILED(ibuf[1] != 0x13579bdf, "test84 case 4 failed\n"); - FAILED(buf[2] != -8762, "test84 case 5 failed\n"); - FAILED(buf[3] != WCONST(0x4567812345678abc, 0x45678abc), "test84 case 6 failed\n"); - FAILED(buf[4] != WCONST(0xdb975557799bbddf, 0xdb975579), "test84 case 7 failed\n"); - FAILED(buf[5] != WCONST(0xfdb974130eca8642, 0xfdb97412), "test84 case 8 failed\n"); - FAILED(ibuf[2] != (sljit_s32)0xdb97413b, "test84 case 9 failed\n"); - FAILED(ibuf[3] != (sljit_s32)0xcd6543c9, "test84 case 10 failed\n"); - FAILED(ibuf[4] != (sljit_s32)0xaf95b95a, "test84 case 11 failed\n"); - FAILED(buf[6] != -6032, "test84 case 12 failed\n"); - FAILED(buf[7] != -9740, "test84 case 13 failed\n"); - FAILED(buf[8] != -5182, "test84 case 14 failed\n"); - FAILED(ibuf[5] != -4072, "test84 case 15 failed\n"); - FAILED(ibuf[6] != -2813, "test84 case 16 failed\n"); - FAILED(buf[9] != -3278, "test84 case 17 failed\n"); - FAILED(buf[10] != WCONST(0x34567890abcdef12, 0x34567812), "test84 case 18 failed\n"); - FAILED(buf[11] != WCONST(0xefba9876fedcba98, 0x78fedcba), "test84 case 19 failed\n"); - FAILED(buf[12] != WCONST(0x1234567890abcdef, 0x12345678), "test84 case 20 failed\n"); - FAILED(buf[13] != -4986, "test84 case 21 failed\n"); - FAILED(buf[14] != WCONST(0x2468acf1fdb97413, 0x24690ecb), "test84 case 22 failed\n"); - FAILED(buf[15] != WCONST(0x12345678fedcba09, 0x12348765), "test84 case 23 failed\n"); - FAILED(buf[16] != 0x30, "test84 case 24 failed\n"); - FAILED(buf[17] != WCONST(0x8d159e248d159e27, 0x8d159e27), "test84 case 25 failed\n"); - FAILED(ibuf[7] != (sljit_s32)0xbc23456e, "test84 case 26 failed\n"); - FAILED(ibuf[8] != (sljit_s32)0xeabc2345, "test84 case 27 failed\n"); + FAILED(buf[0] != WCONST(0x4567812345678123, 0x45678123), "test64 case 1 failed\n"); + FAILED(buf[1] != WCONST(0x4567812345678123, 0x45678123), "test64 case 2 failed\n"); + FAILED(ibuf[0] != 0x109abc32, "test64 case 3 failed\n"); + FAILED(ibuf[1] != 0x13579bdf, "test64 case 4 failed\n"); + FAILED(buf[2] != -8762, "test64 case 5 failed\n"); + FAILED(buf[3] != WCONST(0x4567812345678abc, 0x45678abc), "test64 case 6 failed\n"); + FAILED(buf[4] != WCONST(0xdb975557799bbddf, 0xdb975579), "test64 case 7 failed\n"); + FAILED(buf[5] != WCONST(0xfdb974130eca8642, 0xfdb97412), "test64 case 8 failed\n"); + FAILED(ibuf[2] != (sljit_s32)0xdb97413b, "test64 case 9 failed\n"); + FAILED(ibuf[3] != (sljit_s32)0xcd6543c9, "test64 case 10 failed\n"); + FAILED(ibuf[4] != (sljit_s32)0xaf95b95a, "test64 case 11 failed\n"); + FAILED(buf[6] != -6032, "test64 case 12 failed\n"); + FAILED(buf[7] != -9740, "test64 case 13 failed\n"); + FAILED(buf[8] != -5182, "test64 case 14 failed\n"); + FAILED(ibuf[5] != -4072, "test64 case 15 failed\n"); + FAILED(ibuf[6] != -2813, "test64 case 16 failed\n"); + FAILED(buf[9] != -3278, "test64 case 17 failed\n"); + FAILED(buf[10] != WCONST(0x34567890abcdef12, 0x34567812), "test64 case 18 failed\n"); + FAILED(buf[11] != WCONST(0xefba9876fedcba98, 0x78fedcba), "test64 case 19 failed\n"); + FAILED(buf[12] != WCONST(0x1234567890abcdef, 0x12345678), "test64 case 20 failed\n"); + FAILED(buf[13] != -4986, "test64 case 21 failed\n"); + FAILED(buf[14] != WCONST(0x2468acf1fdb97413, 0x24690ecb), "test64 case 22 failed\n"); + FAILED(buf[15] != WCONST(0x12345678fedcba09, 0x12348765), "test64 case 23 failed\n"); + FAILED(buf[16] != 0x30, "test64 case 24 failed\n"); + FAILED(buf[17] != WCONST(0x8d159e248d159e27, 0x8d159e27), "test64 case 25 failed\n"); + FAILED(ibuf[7] != (sljit_s32)0xbc23456e, "test64 case 26 failed\n"); + FAILED(ibuf[8] != (sljit_s32)0xeabc2345, "test64 case 27 failed\n"); #if (defined SLJIT_MASKED_SHIFT && SLJIT_MASKED_SHIFT) - FAILED(buf[18] != 24688643, "test84 case 28 failed\n"); + FAILED(buf[18] != 24688643, "test64 case 28 failed\n"); #endif /* SLJIT_MASKED_SHIFT */ #if (defined SLJIT_MASKED_SHIFT32 && SLJIT_MASKED_SHIFT32) - FAILED(ibuf[9] != (sljit_s32)-2135139327, "test84 case 29 failed\n"); + FAILED(ibuf[9] != (sljit_s32)-2135139327, "test64 case 29 failed\n"); #endif /* SLJIT_MASKED_SHIFT32 */ sljit_free_code(code.code, NULL); successful_tests++; } -static void test85(void) +static void test65(void) { /* Test count trailing zeroes. */ executable_code code; @@ -10637,7 +7034,7 @@ static void test85(void) sljit_s32 i; if (verbose) - printf("Run test85\n"); + printf("Run test65\n"); FAILED(!compiler, "cannot create compiler\n"); @@ -10649,7 +7046,7 @@ static void test85(void) buf[2] = 0; ibuf[3] = 1; - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x80); /* buf[0] */ @@ -10706,195 +7103,35 @@ static void test85(void) code.func2((sljit_sw)buf, (sljit_sw)ibuf); - FAILED(buf[0] != 7, "test85 case 1 failed\n"); - FAILED(buf[1] != 0, "test85 case 2 failed\n"); - FAILED(buf[2] != WCONST(64, 32), "test85 case 3 failed\n"); - FAILED(buf[3] != WCONST(61, 29), "test85 case 4 failed\n"); - FAILED(buf[4] != 11, "test85 case 5 failed\n"); - FAILED(ibuf[0] != 6, "test85 case 6 failed\n"); - FAILED(ibuf[1] != 32, "test85 case 7 failed\n"); - FAILED(ibuf[2] != 5, "test85 case 8 failed\n"); - FAILED(ibuf[3] != 0, "test85 case 9 failed\n"); + FAILED(buf[0] != 7, "test65 case 1 failed\n"); + FAILED(buf[1] != 0, "test65 case 2 failed\n"); + FAILED(buf[2] != WCONST(64, 32), "test65 case 3 failed\n"); + FAILED(buf[3] != WCONST(61, 29), "test65 case 4 failed\n"); + FAILED(buf[4] != 11, "test65 case 5 failed\n"); + FAILED(ibuf[0] != 6, "test65 case 6 failed\n"); + FAILED(ibuf[1] != 32, "test65 case 7 failed\n"); + FAILED(ibuf[2] != 5, "test65 case 8 failed\n"); + FAILED(ibuf[3] != 0, "test65 case 9 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test86(void) -{ - /* Test get return address. */ - executable_code code; - struct sljit_compiler* compiler; - struct sljit_jump *jump; - struct sljit_label *label; - sljit_uw return_addr = 0; - sljit_uw buf[1]; - - if (verbose) - printf("Run test86\n"); - - /* Next test. */ - - buf[0] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, W), 1, 1, 0, 0, 0); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0(W)); - label = sljit_emit_label(compiler); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 1, 0, 0, 0, 0); - sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_RETURN_REG, 0); - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - return_addr = sljit_get_label_addr(label); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)buf); - - FAILED(buf[0] != return_addr, "test86 case 1 failed\n"); - sljit_free_code(code.code, NULL); - - /* Next test. */ - - buf[0] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(VOID), 2, 0, 0, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -1); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -1); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(VOID, W, W)); - label = sljit_emit_label(compiler); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W, W), 1, SLJIT_NUMBER_OF_SAVED_REGISTERS - 2, 0, 0, SLJIT_MAX_LOCAL_SIZE); - sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_MEM0(), (sljit_sw)buf); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - return_addr = sljit_get_label_addr(label); - sljit_free_compiler(compiler); - - code.func0(); - - FAILED(buf[0] != return_addr, "test86 case 2 failed\n"); - sljit_free_code(code.code, NULL); - - /* Next test. */ - - buf[0] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, W), 1, 3, 0, 0, 0); - sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S2, 0, SLJIT_S0, 0, SLJIT_IMM, 16); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS1(VOID, W)); - label = sljit_emit_label(compiler); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(3), SLJIT_ARGS1(VOID, W_R), 1, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, SLJIT_MAX_LOCAL_SIZE >> 1); - sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_MEM2(SLJIT_S2, SLJIT_R0), 1); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - return_addr = sljit_get_label_addr(label); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)buf); - - FAILED(buf[0] != return_addr, "test86 case 3 failed\n"); - sljit_free_code(code.code, NULL); - - /* Next test. */ - - buf[0] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, W_R), 1, 0, 0, 0, 0); - jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS1(VOID, W)); - label = sljit_emit_label(compiler); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS1(VOID, W_R), 1, SLJIT_NUMBER_OF_SAVED_REGISTERS >> 1, 0, 0, 64); - sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_MEM1(SLJIT_SP), 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_MEM1(SLJIT_SP), 0); - sljit_emit_return_void(compiler); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - return_addr = sljit_get_label_addr(label); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)buf); - - FAILED(buf[0] != return_addr, "test86 case 4 failed\n"); - sljit_free_code(code.code, NULL); - - if (sljit_has_cpu_feature(SLJIT_HAS_FPU) && SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0) { - /* Next test. */ - - buf[0] = 0; - - compiler = sljit_create_compiler(NULL, NULL); - FAILED(!compiler, "cannot create compiler\n"); - - sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, W), 1, 1, 0, 0, 0); - jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0(W)); - label = sljit_emit_label(compiler); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); - sljit_emit_return_void(compiler); - - sljit_set_label(jump, sljit_emit_label(compiler)); - sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 1, 3, 0, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS, 64); - sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_RETURN_REG, 0); - sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); - - code.code = sljit_generate_code(compiler); - CHECK(compiler); - return_addr = sljit_get_label_addr(label); - sljit_free_compiler(compiler); - - code.func1((sljit_sw)buf); - - FAILED(buf[0] != return_addr, "test86 case 5 failed\n"); - sljit_free_code(code.code, NULL); - } - - successful_tests++; -} - -static void test87(void) +static void test66(void) { /* Test reverse bytes. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); - sljit_sw buf[5]; + sljit_sw buf[6]; sljit_s32 ibuf[5]; sljit_s32 i; if (verbose) - printf("Run test87\n"); + printf("Run test66\n"); FAILED(!compiler, "cannot create compiler\n"); - for (i = 0; i < 5; i++) + for (i = 0; i < 6; i++) buf[i] = -1; for (i = 0; i < 5; i++) ibuf[i] = -1; @@ -10902,7 +7139,7 @@ static void test87(void) buf[3] = WCONST(0x8070605040302010, 0x40302010); ibuf[1] = (sljit_s32)0xffeeddcc; - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, WCONST(0xf1e2d3c4b5a69788, 0xf1e2d3c4)); sljit_emit_op1(compiler, SLJIT_REV, SLJIT_R0, 0, SLJIT_R0, 0); /* buf[0] */ @@ -10926,6 +7163,11 @@ static void test87(void) /* buf[4] */ sljit_emit_op1(compiler, SLJIT_REV, SLJIT_MEM0(), (sljit_sw)&buf[4], SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 5 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, WCONST(0xaabbccddeeff0011, 0xaabbccdd)); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_REV, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, (sljit_s32)0xf1e2d3c4); sljit_emit_op1(compiler, SLJIT_REV32, SLJIT_R1, 0, SLJIT_R0, 0); /* ibuf[0] */ @@ -10958,119 +7200,134 @@ static void test87(void) code.func2((sljit_sw)buf, (sljit_sw)ibuf); - FAILED(buf[0] != WCONST(0x8897a6b5c4d3e2f1, 0xc4d3e2f1), "test87 case 1 failed\n"); - FAILED(buf[1] != WCONST(0x8899aabbccddeeff, 0xccddeeff), "test87 case 2 failed\n"); - FAILED(buf[2] != WCONST(0x0807060504030201, 0x04030201), "test87 case 3 failed\n"); - FAILED(buf[3] != WCONST(0x8070605040302010, 0x40302010), "test87 case 4 failed\n"); - FAILED(buf[4] != WCONST(0x8877665544332211, 0x44332211), "test87 case 5 failed\n"); - FAILED(ibuf[0] != (sljit_s32)0xc4d3e2f1, "test87 case 6 failed\n"); - FAILED(ibuf[1] != (sljit_s32)0xccddeeff, "test87 case 7 failed\n"); - FAILED(ibuf[2] != (sljit_s32)0x04030201, "test87 case 8 failed\n"); - FAILED(ibuf[3] != (sljit_s32)0x44332211, "test87 case 9 failed\n"); - FAILED(ibuf[4] != (sljit_s32)0xcbdcedfe, "test87 case 10 failed\n"); + FAILED(buf[0] != WCONST(0x8897a6b5c4d3e2f1, 0xc4d3e2f1), "test66 case 1 failed\n"); + FAILED(buf[1] != WCONST(0x8899aabbccddeeff, 0xccddeeff), "test66 case 2 failed\n"); + FAILED(buf[2] != WCONST(0x0807060504030201, 0x04030201), "test66 case 3 failed\n"); + FAILED(buf[3] != WCONST(0x8070605040302010, 0x40302010), "test66 case 4 failed\n"); + FAILED(buf[4] != WCONST(0x8877665544332211, 0x44332211), "test66 case 5 failed\n"); + FAILED(buf[5] != WCONST(0x1100ffeeddccbbaa, 0xddccbbaa), "test66 case 6 failed\n"); + FAILED(ibuf[0] != (sljit_s32)0xc4d3e2f1, "test66 case 7 failed\n"); + FAILED(ibuf[1] != (sljit_s32)0xccddeeff, "test66 case 8 failed\n"); + FAILED(ibuf[2] != (sljit_s32)0x04030201, "test66 case 9 failed\n"); + FAILED(ibuf[3] != (sljit_s32)0x44332211, "test66 case 10 failed\n"); + FAILED(ibuf[4] != (sljit_s32)0xcbdcedfe, "test66 case 11 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; } -static void test88(void) +static void test67(void) { - /* Test sljit_emit_fcopy. */ + /* Test reverse two bytes. */ executable_code code; - struct sljit_compiler* compiler; - sljit_f64 dbuf[4]; - sljit_f32 sbuf[2]; -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_sw wbuf[2]; - sljit_s32 ibuf[2]; -#else /* !SLJIT_64BIT_ARCHITECTURE */ - sljit_s32 ibuf[7]; -#endif /* SLJIT_64BIT_ARCHITECTURE */ + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[7]; + sljit_s32 ibuf[4]; + sljit_s16 hbuf[11]; + sljit_s32 i; if (verbose) - printf("Run test88\n"); + printf("Run test67\n"); - if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { - if (verbose) - printf("no fpu available, test88 skipped\n"); - successful_tests++; - return; - } - - compiler = sljit_create_compiler(NULL, NULL); FAILED(!compiler, "cannot create compiler\n"); - sbuf[0] = 12345.0; - sbuf[1] = -1.0; - ibuf[0] = -1; - ibuf[1] = (sljit_s32)0xc7543100; - dbuf[0] = 123456789012345.0; - dbuf[1] = -1.0; -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - wbuf[0] = -1; - wbuf[1] = (sljit_sw)0xc2fee0c29f50cb10; -#else /* !SLJIT_64BIT_ARCHITECTURE */ - ibuf[2] = -1; - ibuf[3] = -1; - ibuf[4] = -1; - ibuf[5] = (sljit_sw)0x9f50cb10; - ibuf[6] = (sljit_sw)0xc2fee0c2; -#endif /* SLJIT_64BIT_ARCHITECTURE */ + for (i = 0; i < 7; i++) + buf[i] = -1; + for (i = 0; i < 4; i++) + ibuf[i] = -1; + for (i = 0; i < 11; i++) + hbuf[i] = -1; - sljit_emit_enter(compiler, 0, SLJIT_ARGS2(VOID, W, W), 5, 5, 5, 0, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)ibuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 0); - sljit_emit_fcopy(compiler, SLJIT_COPY32_FROM_F32, SLJIT_FR2, SLJIT_R0); + hbuf[0] = (sljit_s16)0x8c9d; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(W, W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xeeabcd); + sljit_emit_op1(compiler, SLJIT_REV_U16, SLJIT_R0, 0, SLJIT_R0, 0); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xeeabcd); + sljit_emit_op1(compiler, SLJIT_REV_S16, SLJIT_R1, 0, SLJIT_R2, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0xeeedcb); + sljit_emit_op1(compiler, SLJIT_REV_U16, SLJIT_R4, 0, SLJIT_R3, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R4, 0); + + /* hbuf[1] */ + sljit_emit_op1(compiler, SLJIT_REV_U16, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_s16), SLJIT_MEM1(SLJIT_S2), 0); + /* hbuf[2] */ + sljit_emit_op1(compiler, SLJIT_REV_S16, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_s16), SLJIT_MEM1(SLJIT_S2), 0); + + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, 0xedcb); + sljit_emit_op1(compiler, SLJIT_REV_U16, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_s16), SLJIT_IMM, 0xedcb); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3); + sljit_emit_op1(compiler, SLJIT_REV_S16, SLJIT_R1, 0, SLJIT_MEM2(SLJIT_S2, SLJIT_R1), 1); + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R2, 0, SLJIT_IMM, 0xd9c8); + /* hbuf[3] */ + sljit_emit_op1(compiler, SLJIT_REV_S16, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_s16), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R4, 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_REV_S16, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s16), SLJIT_R4, 0); + /* hbuf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_s16), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s16)); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0xeeb1c2); + sljit_emit_op1(compiler, SLJIT_REV32_U16, SLJIT_R0, 0, SLJIT_R0, 0); /* ibuf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_R0, 0); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R3, 0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_s32)); - sljit_emit_fcopy(compiler, SLJIT_COPY32_TO_F32, SLJIT_FR4, SLJIT_R3); - /* sbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32), SLJIT_FR4, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, 0xeeb1c2); + sljit_emit_op1(compiler, SLJIT_REV32_S16, SLJIT_R1, 0, SLJIT_R2, 0); + /* ibuf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32), SLJIT_R1, 0); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)wbuf); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fcopy(compiler, SLJIT_COPY_FROM_F64, SLJIT_FR1, SLJIT_S2); - /* wbuf[0] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R3, 0, SLJIT_IMM, 0xddbfae); + sljit_emit_op1(compiler, SLJIT_REV32_S16, SLJIT_R4, 0, SLJIT_R3, 0); + /* ibuf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_s32), SLJIT_R4, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); - sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR0, SLJIT_R3); - /* dbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32_U16, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_s32), SLJIT_IMM, 0xbfae); + sljit_emit_op1(compiler, SLJIT_REV32_U16, SLJIT_R1, 0, SLJIT_MEM0(), (sljit_sw)(ibuf + 3)); + /* ibuf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_s32), SLJIT_R1, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); - sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR3, SLJIT_R2); - /* dbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR3, 0); -#else /* !SLJIT_64BIT_ARCHITECTURE */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); - sljit_emit_fcopy(compiler, SLJIT_COPY_FROM_F64, SLJIT_FR1, SLJIT_REG_PAIR(SLJIT_S3, SLJIT_S2)); - /* ibuf[2-3] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S2, 0); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 3 * sizeof(sljit_sw), SLJIT_S3, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 0x880102); + sljit_emit_op1(compiler, SLJIT_REV32_U16, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), SLJIT_R1, 0); + /* hbuf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_MEM1(SLJIT_S2), 5 * sizeof(sljit_s16), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32)); - sljit_emit_fcopy(compiler, SLJIT_COPY_FROM_F64, SLJIT_FR1, SLJIT_R2); - /* ibuf[4] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 4 * sizeof(sljit_sw), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R3, 0, SLJIT_IMM, 0x880102); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 6 * sizeof(sljit_s16)); + /* hbuf[6] */ + sljit_emit_op1(compiler, SLJIT_REV32_S16, SLJIT_MEM2(SLJIT_S2, SLJIT_R1), 0, SLJIT_R3, 0); + /* hbuf[7] */ + hbuf[7] = -367; - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_MEM1(SLJIT_R1), 5 * sizeof(sljit_sw)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 6 * sizeof(sljit_sw)); - sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR0, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R3)); - /* dbuf[1] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR0, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); - sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR3, SLJIT_REG_PAIR(SLJIT_R2, SLJIT_R2)); - /* dbuf[2] */ - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR3, 0); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)0xc00c0000); - sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR3, SLJIT_R2); - sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR3, 0); -#endif /* SLJIT_64BIT_ARCHITECTURE */ +#if IS_64BIT + /* SLJIT_REV truncates memory store, source not sign extended 64bit */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)0xdeadbeef4444aa55); + sljit_emit_op1(compiler, SLJIT_REV_U16, SLJIT_R1, 0, SLJIT_R0, 0); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R1, 0); + /* hbuf[8] */ + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_MEM1(SLJIT_S2), 8 * sizeof(sljit_u16), SLJIT_R1, 0); + /* hbuf[9] */ + sljit_emit_op1(compiler, SLJIT_REV_S16, SLJIT_MEM1(SLJIT_S2), 9 * sizeof(sljit_u16), SLJIT_R0, 0); + /* hbuf[10] */ + hbuf[10] = -42; + sljit_emit_op1(compiler, SLJIT_REV_S16, SLJIT_R0, 0, SLJIT_R0, 0); + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R0, 0); +#endif /* IS_64BIT */ sljit_emit_return_void(compiler); @@ -11078,27 +7335,677 @@ static void test88(void) CHECK(compiler); sljit_free_compiler(compiler); - code.func2((sljit_sw)dbuf, (sljit_sw)sbuf); + code.func3((sljit_sw)buf, (sljit_sw)ibuf, (sljit_sw)hbuf); - FAILED(ibuf[0] != (sljit_s32)0x4640e400, "test88 case 1 failed\n"); - FAILED(sbuf[1] != -54321.0, "test88 case 2 failed\n"); -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - FAILED(wbuf[0] != (sljit_sw)0x42dc12218377de40, "test88 case 3 failed\n"); - FAILED(dbuf[1] != -543210987654321.0, "test88 case 4 failed\n"); - FAILED(dbuf[2] != 0.0, "test88 case 5 failed\n"); -#else /* !SLJIT_64BIT_ARCHITECTURE */ - FAILED(ibuf[2] != (sljit_sw)0x8377de40, "test88 case 3 failed\n"); - FAILED(ibuf[3] != (sljit_sw)0x42dc1221, "test88 case 4 failed\n"); - FAILED(ibuf[4] != (sljit_sw)0x42dc1221, "test88 case 5 failed\n"); - FAILED(dbuf[1] != -543210987654321.0, "test88 case 6 failed\n"); - FAILED(dbuf[2] != 0.0, "test88 case 7 failed\n"); - FAILED(dbuf[3] != -3.5, "test88 case 8 failed\n"); -#endif /* SLJIT_64BIT_ARCHITECTURE */ + FAILED(buf[0] != 0xcdab, "test67 case 1 failed\n"); + FAILED(buf[1] != -0x3255, "test67 case 2 failed\n"); + FAILED(buf[2] != 0xcbed, "test67 case 3 failed\n"); + FAILED(hbuf[1] != (sljit_s16)0x9d8c, "test67 case 4 failed\n"); + FAILED(hbuf[2] != (sljit_s16)0x9d8c, "test67 case 5 failed\n"); + FAILED(buf[3] != 0xcbed, "test67 case 6 failed\n"); + FAILED(buf[4] != -0x3413, "test67 case 7 failed\n"); + FAILED(hbuf[3] != (sljit_s16)0xc8d9, "test67 case 8 failed\n"); + FAILED(hbuf[4] != (sljit_s16)0xc8d9, "test67 case 9 failed\n"); + FAILED(ibuf[0] != 0xc2b1, "test67 case 10 failed\n"); + FAILED(ibuf[1] != -0x3d4f, "test67 case 11 failed\n"); + FAILED(ibuf[2] != -0x5141, "test67 case 12 failed\n"); + FAILED(ibuf[3] != 0xaebf, "test67 case 13 failed\n"); + FAILED(hbuf[5] != (sljit_s16)0x0201, "test67 case 14 failed\n"); + FAILED(hbuf[6] != (sljit_s16)0x0201, "test67 case 15 failed\n"); + FAILED(hbuf[7] != -367, "test67 case 16 failed\n"); +#if IS_64BIT + FAILED(hbuf[8] != 0x55aa, "test67 case 17 failed\n"); + FAILED(buf[5] != hbuf[8], "test67 case 18 failed\n"); + FAILED(hbuf[9] != hbuf[8], "test67 case 19 failed\n"); + FAILED(hbuf[10] != -42, "test67 case 20 failed\n"); + FAILED(buf[6] != hbuf[9], "test67 case 21 failed\n"); +#endif /* IS_64BIT */ sljit_free_code(code.code, NULL); successful_tests++; } +static void test68(void) +{ + /* Test reverse four bytes. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[7]; + sljit_s32 ibuf[6]; + sljit_s32 i; + + if (verbose) + printf("Run test68\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 7; i++) + buf[i] = -1; + for (i = 0; i < 6; i++) + ibuf[i] = -1; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, WCONST(0xffffa1b2c3d4, 0xa1b2c3d4)); + sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_R0, 0, SLJIT_R0, 0); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + + /* Sign extend negative integer. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, WCONST(0xffffa1b2c3d4, 0xa1b2c3d4)); + sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_R1, 0, SLJIT_R2, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R1, 0); + + /* Sign extend positive integer. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, WCONST(0xffff1a2b3c4d,0x1a2b3c4d)); + sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_R1, 0, SLJIT_R2, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32), SLJIT_IMM, (sljit_s32)0xf9e8d7c6); + sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32)); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_s32)); + sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_S2, 0, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 0); + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_S2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, (sljit_s32)0xaabbccdd); + sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, (sljit_s32)0xaabbccdd); + sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_R4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R4, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, WCONST(0xffff01020304, 0x01020304)); + /* ibuf[0] */ + sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S4, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 1); + /* ibuf[1] */ + sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_MEM2(SLJIT_S1, SLJIT_S2), 2, SLJIT_S4, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_s32), SLJIT_IMM, (sljit_s32)0xf0e0d0c0); + /* ibuf[2] */ + sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_MEM0(), (sljit_sw)(ibuf + 2), SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_s32)); + + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S2, 0, SLJIT_S1, 0, SLJIT_IMM, WCONST(0x1234567890ab, 0x12345678) - 3 * sizeof(sljit_s32)); + sljit_emit_op1(compiler, SLJIT_REV_U32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), SLJIT_MEM1(SLJIT_S2), WCONST(0x1234567890ab, 0x12345678)); + /* ibuf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_s32), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32)); + + /* SLJIT_REV memory store truncates and does not overflow */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, WCONST(0xffff8192a3b4, 0x8192a3b4)); + /* ibuf[4] */ + sljit_emit_op1(compiler, SLJIT_REV_S32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_s32), SLJIT_R4, 0); + /* ibuf[5] */ + ibuf[5] = 0x55555555; + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)buf, (sljit_sw)ibuf); + + FAILED(buf[0] != WCONST(0xd4c3b2a1, 0xd4c3b2a1), "test68 case 1 failed\n"); + FAILED(buf[1] != WCONST(-0x2b3c4d5f, 0xd4c3b2a1), "test68 case 2 failed\n"); + FAILED(buf[2] != 0x4d3c2b1a, "test68 case 3 failed\n"); + FAILED(buf[3] != WCONST(0xc6d7e8f9, 0xc6d7e8f9), "test68 case 4 failed\n"); + FAILED(buf[4] != WCONST(-0x39281707, 0xc6d7e8f9), "test68 case 5 failed\n"); + FAILED(buf[5] != WCONST(0xddccbbaa, 0xddccbbaa), "test68 case 6 failed\n"); + FAILED(buf[6] != WCONST(-0x22334456, 0xddccbbaa), "test68 case 7 failed\n"); + FAILED(ibuf[0] != 0x04030201, "test68 case 8 failed\n"); + FAILED(ibuf[1] != 0x04030201, "test68 case 9 failed\n"); + FAILED(ibuf[2] != (sljit_s32)0xc0d0e0f0, "test68 case 10 failed\n"); + FAILED(ibuf[3] != (sljit_s32)0xc0d0e0f0, "test68 case 11 failed\n"); + FAILED(ibuf[4] != (sljit_s32)0xb4a39281, "test68 case 12 failed\n"); + FAILED(ibuf[5] != 0x55555555, "test68 case 13 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test69(void) +{ + /* Test atomic load and store. */ + executable_code code; + struct sljit_compiler *compiler = sljit_create_compiler(NULL, NULL); + struct sljit_label *label; + struct sljit_jump *jump; + sljit_sw buf[45]; + sljit_s32 i; + + if (verbose) + printf("Run test69\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_ATOMIC)) { + if (verbose) + printf("no fine-grained atomic available, test69 skipped\n"); + successful_tests++; + return; + } + + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 1; i < 45; i++) + buf[i] = WCONST(0x5555555555555555, 0x55555555); + + buf[0] = 4678; + *(sljit_u8*)(buf + 2) = 78; + *(sljit_u8*)(buf + 5) = 211; + *(sljit_u16*)(buf + 9) = 17897; + *(sljit_u16*)(buf + 12) = 57812; + *(sljit_u32*)(buf + 15) = 1234567890; + *(sljit_u32*)(buf + 17) = 987609876; + buf[20] = (sljit_sw)buf; + *(sljit_u8*)(buf + 22) = 192; + *(sljit_u16*)(buf + 25) = 6359; + ((sljit_u8*)(buf + 28))[1] = 105; + ((sljit_u8*)(buf + 30))[2] = 13; + ((sljit_u16*)(buf + 33))[1] = 14876; +#if IS_64BIT + ((sljit_u8*)(buf + 35))[7] = 0x88; + ((sljit_u16*)(buf + 37))[3] = 0x1337; + ((sljit_s32*)(buf + 39))[1] = -1; +#endif /* IS_64BIT */ + buf[44] = WCONST(0x1122334444332211, 0x11222211); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, -9856); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV, SLJIT_R1, SLJIT_S0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); + /* buf[0] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV | SLJIT_SET_ATOMIC_STORED, SLJIT_R1, SLJIT_S0, SLJIT_R0); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R2, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_IMM, 203); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R2, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw)); + /* buf[2] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_R0, SLJIT_R1, SLJIT_R2); + jump = sljit_emit_jump(compiler, SLJIT_ATOMIC_STORED); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_JUMP), label); + sljit_set_label(jump, sljit_emit_label(compiler)); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_S2, 0); + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + + label = sljit_emit_label(compiler); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 5 * sizeof(sljit_sw)); + sljit_emit_atomic_load(compiler, SLJIT_MOV32_U8, SLJIT_R0, SLJIT_R0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 5 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 97); + /* buf[5] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV32_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_S1, SLJIT_R0, SLJIT_S2); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R1, 0); + /* buf[7] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R0, 0); + /* buf[8] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_S1, 0); + + label = sljit_emit_label(compiler); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 9 * sizeof(sljit_sw)); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U16, SLJIT_S2, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S2, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 9 * sizeof(sljit_sw)); + /* buf[9] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U16 | SLJIT_SET_ATOMIC_STORED, SLJIT_R0, SLJIT_R0, SLJIT_R1); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[10] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_R2, 0); + /* buf[11] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 12 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_IMM, 41306); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV32_U16, SLJIT_R0, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), (sljit_sw)(buf + 13)); + /* buf[12] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV32_U16 | SLJIT_SET_ATOMIC_STORED, SLJIT_R0, SLJIT_R1, SLJIT_R3); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[13] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_R2, 0); + /* buf[14] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S0, 0, SLJIT_IMM, 15 * sizeof(sljit_sw)); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U32, SLJIT_R2, SLJIT_S2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 987654321); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S1, 0, SLJIT_S0, 0, SLJIT_IMM, 15 * sizeof(sljit_sw)); + /* buf[15] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U32 | SLJIT_SET_ATOMIC_STORED, SLJIT_R1, SLJIT_S1, SLJIT_S3); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[16] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 17 * sizeof(sljit_sw)); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV32, SLJIT_R0, SLJIT_R2); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -573621); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 678906789); + /* buf[17] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV32 | SLJIT_SET_ATOMIC_STORED, SLJIT_R1, SLJIT_R2, SLJIT_S2); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[18] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 18 * sizeof(sljit_sw), SLJIT_S1, 0); + /* buf[19] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 19 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 20 * sizeof(sljit_sw)); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_P, SLJIT_R0, SLJIT_R2); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_R0, 0, SLJIT_R2, 0); + /* buf[20] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_P | SLJIT_SET_ATOMIC_STORED, SLJIT_R0, SLJIT_R2, SLJIT_R1); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[21] */ + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_MEM1(SLJIT_S0), 21 * sizeof(sljit_sw), SLJIT_S1, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 22 * sizeof(sljit_sw)); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R3, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 240); + /* buf[22] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_R3, SLJIT_R1, SLJIT_R4); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[23] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 23 * sizeof(sljit_sw), SLJIT_R2, 0); + /* buf[24] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 24 * sizeof(sljit_sw), SLJIT_R3, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 25 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 25 * sizeof(sljit_sw), SLJIT_IMM, 6359); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV, SLJIT_S3, SLJIT_R0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S4, 0, SLJIT_S3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, 4059); + /* buf[25] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV | SLJIT_SET_ATOMIC_STORED, SLJIT_S3, SLJIT_R0, SLJIT_S4); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[26] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 26 * sizeof(sljit_sw), SLJIT_R1, 0); + /* buf[27] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 27 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 28 * sizeof(sljit_sw) + 1); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 204); + /* buf[28] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0); + jump = sljit_emit_jump(compiler, SLJIT_ATOMIC_STORED); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_JUMP), label); + sljit_set_label(jump, sljit_emit_label(compiler)); + /* buf[29] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 29 * sizeof(sljit_sw), SLJIT_S2, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 30 * sizeof(sljit_sw) + 2); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 240); + /* buf[30] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[31] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 31 * sizeof(sljit_sw), SLJIT_S1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_ATOMIC_NOT_STORED); + /* buf[32] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 32 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 33 * sizeof(sljit_sw) + 2); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U16, SLJIT_R0, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 51403); + /* buf[33] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U16 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[34] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 34 * sizeof(sljit_sw), SLJIT_S1, 0); + +#if IS_64BIT + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 35 * sizeof(sljit_sw) + 7 * sizeof(sljit_u8)); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xffa5a5a542); + /* buf[35] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U8 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[36] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 36 * sizeof(sljit_sw), SLJIT_S1, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 37 * sizeof(sljit_sw) + 3 * sizeof(sljit_u16)); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U16, SLJIT_R0, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xffa5a56942); + /* buf[37] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U16 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[38] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 38 * sizeof(sljit_sw), SLJIT_S1, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 39 * sizeof(sljit_sw) + sizeof(sljit_u32)); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U32, SLJIT_R0, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xffffdeadbeef); + /* buf[39] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U32 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[40] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 40 * sizeof(sljit_sw), SLJIT_S1, 0); +#endif /* IS_64BIT */ + + /* buf[41] */ + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 41 * sizeof(sljit_sw), SLJIT_ATOMIC_STORED); + + /* abandoned atomic load (byte) */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 8); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 44 * sizeof(sljit_sw)); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_R3, 0, SLJIT_R3, 0, SLJIT_IMM, 1); + jump = sljit_emit_jump(compiler, SLJIT_ZERO); + /* buf[44] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_U8, SLJIT_R2, SLJIT_R1, SLJIT_R0); + sljit_emit_mem(compiler, SLJIT_MOV_U8 | SLJIT_MEM_UNALIGNED, SLJIT_R4, SLJIT_MEM1(SLJIT_R1), 0); + sljit_set_label(sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R4, 0, SLJIT_R2, 0), label); + /* buf[43] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 43 * sizeof(sljit_sw), SLJIT_R2, 0); + /* buf[42] */ + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 42 * sizeof(sljit_sw), SLJIT_R3, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + + FAILED(buf[0] != -9856, "test69 case 1 failed\n"); + FAILED(buf[1] != 4678, "test69 case 2 failed\n"); + FAILED(*(sljit_u8*)(buf + 2) != 203, "test69 case 3 failed\n"); + FAILED(((sljit_u8*)(buf + 2))[1] != 0x55, "test69 case 4 failed\n"); + FAILED(buf[3] != 78, "test69 case 5 failed\n"); + FAILED(buf[4] != 203, "test69 case 6 failed\n"); + FAILED(*(sljit_u8*)(buf + 5) != 97, "test69 case 7 failed\n"); + FAILED(((sljit_u8*)(buf + 5))[1] != 0x55, "test69 case 8 failed\n"); + FAILED(*(sljit_u32*)(buf + 6) != 211, "test69 case 9 failed\n"); + FAILED(buf[7] != (sljit_sw)(buf + 5), "test69 case 10 failed\n"); + FAILED(buf[8] != 97, "test69 case 11 failed\n"); + FAILED(*(sljit_u16*)(buf + 9) != (sljit_u16)(sljit_sw)(buf + 9), "test69 case 12 failed\n"); + FAILED(((sljit_u8*)(buf + 9))[2] != 0x55, "test69 case 13 failed\n"); + FAILED(buf[10] != 17897, "test69 case 14 failed\n"); + FAILED(buf[11] != (sljit_sw)(buf + 9), "test69 case 15 failed\n"); + FAILED(*(sljit_u16*)(buf + 12) != 41306, "test69 case 16 failed\n"); + FAILED(((sljit_u8*)(buf + 12))[2] != 0x55, "test69 case 17 failed\n"); + FAILED(*(sljit_u32*)(buf + 13) != 57812, "test69 case 18 failed\n"); + FAILED(buf[14] != 41306, "test69 case 19 failed\n"); + FAILED(*(sljit_u32*)(buf + 15) != 987654321, "test69 case 20 failed\n"); +#if IS_64BIT + FAILED(((sljit_u8*)(buf + 15))[4] != 0x55, "test69 case 21 failed\n"); +#endif /* IS_64BIT */ + FAILED(buf[16] != 1234567890, "test69 case 22 failed\n"); + FAILED(*(sljit_u32*)(buf + 17) != 678906789, "test69 case 23 failed\n"); +#if IS_64BIT + FAILED(((sljit_u8*)(buf + 17))[4] != 0x55, "test69 case 24 failed\n"); +#endif /* IS_64BIT */ + FAILED(*(sljit_u32*)(buf + 18) != 987609876, "test69 case 25 failed\n"); +#if IS_64BIT + FAILED(((sljit_u8*)(buf + 18))[4] != 0x55, "test69 case 26 failed\n"); +#endif /* IS_64BIT */ + FAILED(buf[19] != -573621, "test69 case 27 failed\n"); + FAILED(buf[20] != (sljit_sw)(buf + 20), "test 92 case 28 failed\n"); + FAILED(buf[21] != (sljit_sw)buf, "test 92 case 29 failed\n"); + FAILED(*(sljit_u8*)(buf + 22) != 240, "test69 case 30 failed\n"); + FAILED(((sljit_u8*)(buf + 22))[1] != 0x55, "test69 case 31 failed\n"); + FAILED(buf[23] != 192, "test69 case 32 failed\n"); + FAILED(buf[24] != 240, "test69 case 33 failed\n"); + FAILED(buf[25] != 4059, "test69 case 34 failed\n"); + FAILED(buf[26] != 6359, "test69 case 35 failed\n"); + FAILED(buf[27] != (sljit_sw)(buf + 25), "test69 case 36 failed\n"); + FAILED(((sljit_u8*)(buf + 28))[0] != 0x55, "test69 case 37 failed\n"); + FAILED(((sljit_u8*)(buf + 28))[1] != 204, "test69 case 38 failed\n"); + FAILED(((sljit_u8*)(buf + 28))[2] != 0x55, "test69 case 39 failed\n"); + FAILED(buf[29] != 105, "test69 case 40 failed\n"); + FAILED(((sljit_u8*)(buf + 30))[1] != 0x55, "test69 case 41 failed\n"); + FAILED(((sljit_u8*)(buf + 30))[2] != 240, "test69 case 42 failed\n"); + FAILED(((sljit_u8*)(buf + 30))[3] != 0x55, "test69 case 43 failed\n"); + FAILED(buf[31] != 13, "test69 case 44 failed\n"); + FAILED(buf[32] != 0, "test69 case 45 failed\n"); + FAILED(((sljit_u16*)(buf + 33))[0] != 0x5555, "test69 case 46 failed\n"); + FAILED(((sljit_u16*)(buf + 33))[1] != 51403, "test69 case 47 failed\n"); + FAILED(buf[34] != 14876, "test69 case 48 failed\n"); +#if IS_64BIT + FAILED(((sljit_u8*)(buf + 35))[7] != 0x42, "test69 case 49 failed\n"); + FAILED(buf[36] != 0x88, "test69 case 50 failed\n"); + FAILED(((sljit_u16*)(buf + 37))[3] != 0x6942, "test69 case 51 failed\n"); + FAILED(buf[38] != 0x1337, "test69 case 52 failed\n"); + FAILED(((sljit_u32*)(buf + 39))[0] != 0x55555555, "test69 case 53 failed\n"); + FAILED(((sljit_u32*)(buf + 39))[1] != 0xdeadbeef, "test69 case 54 failed\n"); + FAILED(buf[40] != 0xffffffff, "test69 case 55 failed\n"); +#endif /* IS_64BIT */ + FAILED(buf[41] != 1, "test69 case 56 failed\n"); + FAILED(!buf[42], "test69 case 57 failed\n"); + FAILED(buf[43] != 0x11, "test69 case 58 failed\n"); + FAILED(((sljit_u8*)(buf + 44))[1] != buf[43], "test69 case 59 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test70(void) +{ + /* Test accessing temporary registers. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_sw buf[SLJIT_NUMBER_OF_TEMPORARY_REGISTERS + SLJIT_NUMBER_OF_REGISTERS - 1]; + sljit_f64 fbuf[2 * (SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS + SLJIT_NUMBER_OF_FLOAT_REGISTERS)]; + sljit_s32 i, ctr; + + if (verbose) + printf("Run test70\n"); + + for (i = 0; i < SLJIT_NUMBER_OF_TEMPORARY_REGISTERS; i++) + buf[i] = -1; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), SLJIT_NUMBER_OF_REGISTERS - 1, 1, 0, 0, 0); + + ctr = 123; + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_R0, 0, SLJIT_IMM, ctr); + + /* Set registers, also works for virtual registers. */ + for (i = 1; i < SLJIT_NUMBER_OF_TEMPORARY_REGISTERS; i++) { + SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, SLJIT_TMP_R(i)) >= 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_R(i), 0, SLJIT_IMM, ++ctr); + } + + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS - 1; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, ++ctr); + + /* Save registers, temporaries first to avoid issues when virtual registers are copied. */ + ctr = 0; + for (i = 0; i < SLJIT_NUMBER_OF_TEMPORARY_REGISTERS; i++, ctr += (sljit_s32)sizeof(sljit_sw)) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), ctr, SLJIT_TMP_R(i), 0); + + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS - 1; i++, ctr += (sljit_s32)sizeof(sljit_sw)) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), ctr, SLJIT_R(i), 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + for (i = 0; i < (SLJIT_NUMBER_OF_TEMPORARY_REGISTERS + SLJIT_NUMBER_OF_REGISTERS - 1); i++) { + FAILED(buf[i] != (123 + i), "test70 case 1 failed\n"); + } + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + successful_tests++; + return; + } + + ctr = SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS + SLJIT_NUMBER_OF_FLOAT_REGISTERS; + for (i = 0; i < ctr; i++) { + fbuf[i] = 123.0 + i; + fbuf[ctr + i] = -1.0; + } + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_TMP_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + ctr = sizeof(sljit_f64); + for (i = 1; i < SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS; i++, ctr += (sljit_s32)(sizeof(sljit_f64))) { + SLJIT_ASSERT(sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_TMP_FR(i)) >= 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_TMP_FR(i), 0, SLJIT_MEM1(SLJIT_S0), ctr); + } + + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++, ctr += (sljit_s32)(sizeof(sljit_f64))) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_S0), ctr); + + for (i = 0; i < SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS; i++, ctr += (sljit_s32)(sizeof(sljit_f64))) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), ctr, SLJIT_TMP_FR(i), 0); + + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++, ctr += (sljit_s32)(sizeof(sljit_f64))) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), ctr, SLJIT_FR(i), 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)fbuf); + sljit_free_code(code.code, NULL); + + ctr = SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS + SLJIT_NUMBER_OF_FLOAT_REGISTERS; + for (i = 0; i < ctr; i++) { + FAILED(fbuf[ctr + i] != (123.0 + i), "test70 case 2 failed\n"); + } + + if (sljit_has_cpu_feature(SLJIT_HAS_F64_AS_F32_PAIR)) { + fbuf[0] = 123456789012.0; + fbuf[1] = -1.0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_TMP_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_F64_SECOND(SLJIT_TMP_FR0), 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); +#else /* !SLJIT_LITTLE_ENDIAN */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_TMP_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_F64_SECOND(SLJIT_TMP_FR0), 0, SLJIT_MEM1(SLJIT_S0), 0); +#endif /* SLJIT_LITTLE_ENDIAN */ + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_TMP_FR0, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)fbuf); + sljit_free_code(code.code, NULL); + + FAILED(fbuf[1] != 123456789012.0, "test70 case 3 failed\n"); + } + + if (sljit_has_cpu_feature(SLJIT_HAS_SIMD)) { + fbuf[0] = 123456789012.0; + fbuf[1] = 456789012345.0; + fbuf[2] = -1.0; + fbuf[3] = -1.0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); + + i = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_MEM_ALIGNED_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | i, SLJIT_TMP_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | i, SLJIT_TMP_FR0, SLJIT_MEM1(SLJIT_S0), 16); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)fbuf); + sljit_free_code(code.code, NULL); + + FAILED(fbuf[2] != 123456789012.0, "test70 case 4 failed\n"); + FAILED(fbuf[3] != 456789012345.0, "test70 case 5 failed\n"); + } + + successful_tests++; +} + +#include "sljitTestCall.h" +#include "sljitTestFloat.h" +#include "sljitTestSimd.h" +#include "sljitTestSerialize.h" + int sljit_test(int argc, char* argv[]) { sljit_s32 has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0'); @@ -11111,6 +8018,7 @@ int sljit_test(int argc, char* argv[]) #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) test_exec_allocator(); #endif + test_macros(); test1(); test2(); test3(); @@ -11181,37 +8089,90 @@ int sljit_test(int argc, char* argv[]) test68(); test69(); test70(); - test71(); - test72(); - test73(); - test74(); - test75(); - test76(); - test77(); - test78(); - test79(); - test80(); - test81(); - test82(); - test83(); - test84(); - test85(); - test86(); - test87(); - test88(); + + if (verbose) + printf("---- Call tests ----\n"); + + test_call1(); + test_call2(); + test_call3(); + test_call4(); + test_call5(); + test_call6(); + test_call7(); + test_call8(); + test_call9(); + test_call10(); + test_call11(); + test_call12(); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("---- Float tests ----\n"); + test_float1(); + test_float2(); + test_float3(); + test_float4(); + test_float5(); + test_float6(); + test_float7(); + test_float8(); + test_float9(); + test_float10(); + test_float11(); + test_float12(); + test_float13(); + test_float14(); + test_float15(); + test_float16(); + test_float17(); + test_float18(); + test_float19(); + test_float20(); + test_float21(); + test_float22(); + } else { + if (verbose) + printf("no fpu available, fpu tests are skipped\n"); + successful_tests += 22; + } + + if (sljit_has_cpu_feature(SLJIT_HAS_SIMD)) { + if (verbose) + printf("---- SIMD tests ----\n"); + test_simd1(); + test_simd2(); + test_simd3(); + test_simd4(); + test_simd5(); + test_simd6(); + test_simd7(); + test_simd8(); + } else { + if (verbose) + printf("no simd available, simd tests are skipped\n"); + successful_tests += 8; + } + + if (verbose) + printf("---- Serialize tests ----\n"); + + test_serialize1(); + test_serialize2(); + test_serialize3(); #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) sljit_free_unused_memory_exec(); #endif -# define TEST_COUNT 88 +# define TEST_COUNT 115 printf("SLJIT tests: "); if (successful_tests == TEST_COUNT) - printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " "); + printf("all tests " COLOR_GREEN "PASSED" COLOR_DEFAULT " "); else - printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are " COLOR_RED "FAILED" COLOR_DEFAULT " ", TEST_COUNT - successful_tests, (TEST_COUNT - successful_tests) * 100 / TEST_COUNT); - printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "%s\n", sljit_get_platform_name(), sljit_has_cpu_feature(SLJIT_HAS_FPU) ? " (with fpu)" : " (without fpu)"); + printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests " COLOR_RED "FAILED" COLOR_DEFAULT " ", TEST_COUNT - successful_tests, (TEST_COUNT - successful_tests) * 100 / TEST_COUNT); + printf("on " COLOR_ARCH "%s" COLOR_DEFAULT " (%s)\n", sljit_get_platform_name(), sljit_has_cpu_feature(SLJIT_HAS_SIMD) ? "with simd" : (sljit_has_cpu_feature(SLJIT_HAS_FPU) ? "with fpu" : "basic")); return TEST_COUNT - successful_tests; diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestCall.h b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestCall.h new file mode 100755 index 0000000000..891284d53d --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestCall.h @@ -0,0 +1,2031 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static sljit_sw func(sljit_sw a, sljit_sw b, sljit_sw c) +{ + return a + b + c + 5; +} + +static sljit_sw func4(sljit_sw a, sljit_sw b, sljit_sw c, sljit_sw d) +{ + return func(a, b, c) + d; +} + +static void test_call1(void) +{ + /* Test function call. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump = NULL; + sljit_sw buf[9]; + sljit_sw res; + + if (verbose) + printf("Run test_call1\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 0; + buf[7] = 0; + buf[8] = SLJIT_FUNC_ADDR(func); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 4, 2, 0, 0, 0); + + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 7); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -3); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(func)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); + + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -10); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2); + jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_REWRITABLE_JUMP, SLJIT_ARGS3(W, W, W, W)); + sljit_set_target(jump, (sljit_uw)-1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 40); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -3); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -60); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -30); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 10); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 100); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 110); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 120); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 3); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + /* buf[7] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 3); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -6); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(func4)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + /* buf[8] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -10); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 6); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, W, W, W), SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_set_jump_addr(sljit_get_jump_addr(jump), SLJIT_FUNC_UADDR(func), sljit_get_executable_offset(compiler)); + sljit_free_compiler(compiler); + + res = code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(res != -15, "test_call1 case 1 failed\n"); + FAILED(buf[0] != 14, "test_call1 case 2 failed\n"); + FAILED(buf[1] != -8, "test_call1 case 3 failed\n"); + FAILED(buf[2] != SLJIT_FUNC_ADDR(func) + 42, "test_call1 case 4 failed\n"); + FAILED(buf[3] != SLJIT_FUNC_ADDR(func) - 85, "test_call1 case 5 failed\n"); + FAILED(buf[4] != SLJIT_FUNC_ADDR(func) + 31, "test_call1 case 6 failed\n"); + FAILED(buf[5] != 335, "test_call1 case 7 failed\n"); + FAILED(buf[6] != 11, "test_call1 case 8 failed\n"); + FAILED(buf[7] != 5, "test_call1 case 9 failed\n"); + FAILED(buf[8] != -15, "test_call1 case 10 failed\n"); + + successful_tests++; +} + +static void test_call2(void) +{ + /* Ackermann benchmark. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_label *entry; + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_jump *jump1; + struct sljit_jump *jump2; + sljit_sw res; + + if (verbose) + printf("Run test_call2\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + entry = sljit_emit_label(compiler); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2(W, W, W), 3, 2, 0, 0, 0); + /* If x == 0. */ + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_S0, 0, SLJIT_IMM, 0); + jump1 = sljit_emit_jump(compiler, SLJIT_EQUAL); + /* If y == 0. */ + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_S1, 0, SLJIT_IMM, 0); + jump2 = sljit_emit_jump(compiler, SLJIT_EQUAL); + + /* Ack(x,y-1). */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, 1); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W)); + sljit_set_label(jump, entry); + + /* Returns with Ack(x-1, Ack(x,y-1)). */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_RETURN_REG, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W)); + sljit_set_label(jump, entry); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + /* Returns with y+1. */ + label = sljit_emit_label(compiler); + sljit_set_label(jump1, label); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1, SLJIT_S1, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + /* Returns with Ack(x-1,1) */ + label = sljit_emit_label(compiler); + sljit_set_label(jump2, label); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W)); + sljit_set_label(jump, entry); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + /* For benchmarking. */ + /* FAILED(code.func2(3, 11) != 16381, "test_call2 case 1 failed\n"); */ + + res = code.func2(3, 3); + sljit_free_code(code.code, NULL); + + FAILED(res != 61, "test_call2 case 1 failed\n"); + + successful_tests++; +} + +static sljit_f64 test_call3_f1(sljit_f32 a, sljit_f32 b, sljit_f64 c) +{ + return (sljit_f64)a + (sljit_f64)b + c; +} + +static sljit_f32 test_call3_f2(sljit_sw a, sljit_f64 b, sljit_f32 c) +{ + return (sljit_f32)((sljit_f64)a + b + (sljit_f64)c); +} + +static sljit_f64 test_call3_f3(sljit_sw a, sljit_f32 b, sljit_sw c) +{ + return (sljit_f64)a + (sljit_f64)b + (sljit_f64)c; +} + +static sljit_f64 test_call3_f4(sljit_f32 a, sljit_sw b) +{ + return (sljit_f64)a + (sljit_f64)b; +} + +static sljit_f32 test_call3_f5(sljit_f32 a, sljit_f64 b, sljit_s32 c) +{ + return (sljit_f32)((sljit_f64)a + b + (sljit_f64)c); +} + +static sljit_sw test_call3_f6(sljit_f64 a, sljit_sw b) +{ + return (sljit_sw)(a + (sljit_f64)b); +} + +static void test_call3(void) +{ + /* Check function calls with floating point arguments. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump* jump = NULL; + sljit_f64 dbuf[7]; + sljit_f32 sbuf[7]; + sljit_sw wbuf[2]; + + if (verbose) + printf("Run test_call3\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test_call3 skipped\n"); + successful_tests++; + return; + } + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + dbuf[0] = 5.25; + dbuf[1] = 0.0; + dbuf[2] = 2.5; + dbuf[3] = 0.0; + dbuf[4] = 0.0; + dbuf[5] = 0.0; + dbuf[6] = -18.0; + + sbuf[0] = 6.75f; + sbuf[1] = -3.5f; + sbuf[2] = 1.5f; + sbuf[3] = 0.0f; + sbuf[4] = 0.0f; + + wbuf[0] = 0; + wbuf[1] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 3, 3, 4, 0, sizeof(sljit_sw)); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(F64, F32, F32, F64), SLJIT_IMM, SLJIT_FUNC_ADDR(test_call3_f1)); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS3(F64, F32, F32, F64)); + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_call3_f1)); + /* dbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call3_f2)); + sljit_get_local_base(compiler, SLJIT_R1, 0, -16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 16); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(F32, W, F64, F32), SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); + /* sbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f32), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -4); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 9); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS3(F64, W, F32, W)); + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_call3_f3)); + /* dbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -6); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(F64, F32, W)); + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_call3_f4)); + /* dbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call3_f5)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(F32, F32, F64, 32), SLJIT_MEM1(SLJIT_SP), 0); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call3_f6)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, F64, W), SLJIT_R0, 0); + /* wbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_R0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 319); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call3_f6)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, F64, W), SLJIT_R1, 0); + /* wbuf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&dbuf, (sljit_sw)&sbuf, (sljit_sw)&wbuf); + sljit_free_code(code.code, NULL); + + FAILED(dbuf[1] != 8.5, "test_call3 case 1 failed\n"); + FAILED(dbuf[3] != 0.5, "test_call3 case 2 failed\n"); + FAILED(sbuf[3] != 17.75, "test_call3 case 3 failed\n"); + FAILED(dbuf[4] != 11.75, "test_call3 case 4 failed\n"); + FAILED(dbuf[5] != -9.5, "test_call3 case 5 failed\n"); + FAILED(sbuf[4] != 12, "test_call3 case 6 failed\n"); + FAILED(wbuf[0] != SLJIT_FUNC_ADDR(test_call3_f6) - 18, "test_call3 case 7 failed\n"); + FAILED(wbuf[1] != 301, "test_call3 case 8 failed\n"); + + successful_tests++; +} + +static sljit_sw test_call4_f1(sljit_sw a, sljit_s32 b, sljit_sw c, sljit_sw d) +{ + return (sljit_sw)(a + b + c + d - SLJIT_FUNC_ADDR(test_call4_f1)); +} + +static sljit_s32 test_call4_f2(sljit_f64 a, sljit_f32 b, sljit_f64 c, sljit_sw d) +{ + return (sljit_s32)(a + b + c + (sljit_f64)d); +} + +static sljit_f32 test_call4_f3(sljit_f32 a, sljit_s32 b, sljit_f64 c, sljit_sw d) +{ + return (sljit_f32)(a + (sljit_f64)b + c + (sljit_f64)d); +} + +static sljit_f32 test_call4_f4(sljit_f32 a, sljit_f64 b, sljit_f32 c, sljit_f64 d) +{ + return (sljit_f32)(a + b + c + (sljit_f64)d); +} + +static void test_call4(void) +{ + /* Check function calls with four arguments. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump = NULL; + sljit_sw wbuf[5]; + sljit_f64 dbuf[3]; + sljit_f32 sbuf[4]; + + if (verbose) + printf("Run test_call4\n"); + + wbuf[0] = 0; + wbuf[1] = 0; + wbuf[2] = SLJIT_FUNC_ADDR(test_call4_f1); + wbuf[3] = 0; + wbuf[4] = 0; + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + dbuf[0] = 5.125; + dbuf[1] = 6.125; + dbuf[2] = 4.25; + + sbuf[0] = 0.75; + sbuf[1] = -1.5; + sbuf[2] = 0.0; + sbuf[3] = 0.0; + } + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 4, 3, 4, 0, sizeof(sljit_sw)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 33); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -20); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call4_f1)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -40); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, 32, W, W), SLJIT_R2, 0); + /* wbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call4_f1)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -25); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 100); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -10); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, 32, W, W), SLJIT_R0, 0); + /* wbuf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 231); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call4_f1) - 100); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, 32, W, W), SLJIT_MEM2(SLJIT_R0, SLJIT_R2), SLJIT_WORD_SHIFT); + /* wbuf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -100); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(32, F64, F32, F64, W), SLJIT_IMM, SLJIT_FUNC_ADDR(test_call4_f2)); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_R0, 0); + /* wbuf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 36); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 41); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS4(F32, F32, 32, F64, W)); + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_call4_f3)); + /* sbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_f32), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call4_f4)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(F32, F32, F64, F32, F64), SLJIT_R0, 0); + /* sbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_f32), SLJIT_FR0, 0); + } + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&wbuf, (sljit_sw)&dbuf, (sljit_sw)&sbuf); + sljit_free_code(code.code, NULL); + + FAILED(wbuf[0] != -27, "test_call4 case 1 failed\n"); + FAILED(wbuf[1] != 65, "test_call4 case 2 failed\n"); + FAILED(wbuf[3] != (sljit_sw)wbuf + 133, "test_call4 case 3 failed\n"); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + FAILED(wbuf[4] != -88, "test_call4 case 4 failed\n"); + FAILED(sbuf[2] != 79.75f, "test_call4 case 5 failed\n"); + FAILED(sbuf[3] != 8.625f, "test_call4 case 6 failed\n"); + } + + successful_tests++; +} + +static sljit_sw test_call5_f1(sljit_sw a) +{ + return a + 10000; +} + +static sljit_sw test_call5_f2(sljit_sw a, sljit_s32 b, sljit_s32 c, sljit_sw d) +{ + return a | b | c | d; +} + +static sljit_sw test_call5_f3(sljit_sw a, sljit_s32 b, sljit_s32 c, sljit_sw d) +{ + SLJIT_UNUSED_ARG(a); + return b | c | d; +} + +static sljit_sw test_call5_f4(void) +{ + return 7461932; +} + +static void test_call5(void) +{ + /* Test tail calls. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump *jump; + sljit_uw jump_addr; + sljit_sw executable_offset; + sljit_sw res; + + if (verbose) + printf("Run test_call5\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 4, 4, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, -1); + sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS1(W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(test_call5_f1)); + /* Should crash. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.func1(7987); + sljit_free_code(code.code, NULL); + + FAILED(res != 17987, "test_call5 case 1 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 1, 4, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, -1); + jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN, SLJIT_ARGS1(W, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + sljit_set_target(jump, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + executable_offset = sljit_get_executable_offset(compiler); + jump_addr = sljit_get_jump_addr(jump); + sljit_free_compiler(compiler); + + sljit_set_jump_addr(jump_addr, SLJIT_FUNC_UADDR(test_call5_f1), executable_offset); + + res = code.func1(3903); + sljit_free_code(code.code, NULL); + + FAILED(res != 13903, "test_call5 case 2 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 4, 2, 0, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call5_f2)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x28000000); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 0x00140000); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, 0x00002800); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0x00000041); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); + sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, W, 32, 32, W), SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(res != 0x28142841, "test_call5 case 3 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 4, 4, 0, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call5_f2)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)0x81000000); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 0x00480000); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, 0x00002100); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0x00000014); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); + sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, W, 32, 32, W), SLJIT_S3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(res != (sljit_sw)0x81482114, "test_call5 case 4 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 4, 0, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_ADDR(test_call5_f3)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 0x342); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, 0x451000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0x21000000); + sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, W, 32, 32, W), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(res != 0x21451342, "test_call5 case 5 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 1, 0, 0, 0, 9); + sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS0(W), SLJIT_IMM, SLJIT_FUNC_ADDR(test_call5_f4)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(res != 7461932, "test_call5 case 6 failed\n"); + + successful_tests++; +} + +static sljit_sw test_call6_f5(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_f64 d) +{ + if (a == 1345.5 && b == -8724.25 && c == 9034.75 && d == 6307.5) + return 8920567; + return 0; +} + +static sljit_sw test_call6_f6(sljit_f64 a, sljit_f64 b, sljit_f64 c, sljit_sw d) +{ + if (a == 4061.25 && b == -3291.75 && c == 8703.5 && d == 1706) + return 5074526; + return 0; +} + +static void test_call6(void) +{ + /* Test tail calls. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump *jump; + sljit_sw res; + sljit_sw wbuf[1]; + sljit_f64 dbuf[4]; + + if (verbose) + printf("Run test_call6\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test_call6 skipped\n"); + successful_tests++; + return; + } + + /* Next test. */ + + dbuf[0] = 9034.75; + dbuf[1] = 6307.5; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2(W, F32, F64), 1, 1, 4, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_FR0, 0, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_R0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64)); + sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, F64), SLJIT_IMM, SLJIT_FUNC_ADDR(test_call6_f5)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.test_call6_f1(1345.5f, -8724.25); + sljit_free_code(code.code, NULL); + + FAILED(res != 8920567, "test_call6 case 1 failed\n"); + + /* Next test. */ + + wbuf[0] = SLJIT_FUNC_ADDR(test_call6_f5); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4(W, F64, F64, F64, F64), 1, 0, 4, 0, 0); + sljit_emit_icall(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, F64), SLJIT_MEM0(), (sljit_sw)wbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.test_call6_f2(1345.5, -8724.25, 9034.75, 6307.5); + sljit_free_code(code.code, NULL); + + FAILED(res != 8920567, "test_call6 case 2 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3(W, F64, F64, F64), 1, 0, 4, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); + jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_call6_f6)); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.test_call6_f3(4061.25, -3291.75, 8703.5); + sljit_free_code(code.code, NULL); + + FAILED(res != 5074526, "test_call6 case 3 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3(W, F64, F64, F64), SLJIT_NUMBER_OF_SCRATCH_REGISTERS + 1, 0, 4, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); + jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_call6_f6)); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.test_call6_f3(4061.25, -3291.75, 8703.5); + sljit_free_code(code.code, NULL); + + FAILED(res != 5074526, "test_call6 case 4 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3(W, F64, F64, F64), SLJIT_NUMBER_OF_SCRATCH_REGISTERS + 1, 1, 3, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); + jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_call6_f6)); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res = code.test_call6_f3(4061.25, -3291.75, 8703.5); + sljit_free_code(code.code, NULL); + + FAILED(res != 5074526, "test_call6 case 5 failed\n"); + + successful_tests++; +} + +static void test_call7(void) +{ + /* Test register argument and keep saved registers. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump* jump; + sljit_sw buf[9]; + sljit_s32 i; + + if (verbose) + printf("Run test_call7\n"); + + for (i = 0; i < 9; i++) + buf[i] = -1; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 4, 2, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 7945); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -9267); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 4309); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -8321); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 6803); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -5497); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4(W, W, W, W, W)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 4, 2, 0, 0, 32); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[3-6] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 6028); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 4982); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1289); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != 6028, "test_call7 case 1 failed\n"); + FAILED(buf[1] != 6803, "test_call7 case 2 failed\n"); + FAILED(buf[2] != -5497, "test_call7 case 3 failed\n"); + FAILED(buf[3] != 7945, "test_call7 case 4 failed\n"); + FAILED(buf[4] != -9267, "test_call7 case 5 failed\n"); + FAILED(buf[5] != 4309, "test_call7 case 6 failed\n"); + FAILED(buf[6] != -8321, "test_call7 case 7 failed\n"); + + /* Next test. */ + + for (i = 0; i < 9; i++) + buf[i] = -1; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 4, 2, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -2608); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 4751); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 5740); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -9704); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -8749); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 9213); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4(W, W, W, W, W)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(1), SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 6, 2, 0, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_set_context(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(1), SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 6, 2, 0, 0, SLJIT_MAX_LOCAL_SIZE); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[3-7] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_sw), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_sw), SLJIT_R3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -7351); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 3628); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 0); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != -7351, "test_call7 case 8 failed\n"); + FAILED(buf[1] != 3628, "test_call7 case 9 failed\n"); + FAILED(buf[2] != 9213, "test_call7 case 10 failed\n"); + FAILED(buf[3] != -8749, "test_call7 case 11 failed\n"); + FAILED(buf[4] != -2608, "test_call7 case 12 failed\n"); + FAILED(buf[5] != 4751, "test_call7 case 13 failed\n"); + FAILED(buf[6] != 5740, "test_call7 case 14 failed\n"); + FAILED(buf[7] != -9704, "test_call7 case 15 failed\n"); + FAILED(buf[8] != -1, "test_call7 case 16 failed\n"); + + /* Next test. */ + + for (i = 0; i < 9; i++) + buf[i] = -1; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 4, 2, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8653); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 7245); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -3610); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 4591); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -2865); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 2510); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4V(W, W, W, W)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_S1, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[2-7] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_sw), SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_sw), SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 5 * sizeof(sljit_sw), SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 6 * sizeof(sljit_sw), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 7 * sizeof(sljit_sw), SLJIT_R3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 5789); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -9214); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != 5789, "test_call7 case 17 failed\n"); + FAILED(buf[1] != -9214, "test_call7 case 18 failed\n"); + FAILED(buf[2] != -2865, "test_call7 case 19 failed\n"); + FAILED(buf[3] != 2510, "test_call7 case 20 failed\n"); + FAILED(buf[4] != 8653, "test_call7 case 21 failed\n"); + FAILED(buf[5] != 7245, "test_call7 case 22 failed\n"); + FAILED(buf[6] != -3610, "test_call7 case 23 failed\n"); + FAILED(buf[7] != 4591, "test_call7 case 24 failed\n"); + FAILED(buf[8] != -1, "test_call7 case 25 failed\n"); + + /* Next test. */ + + for (i = 0; i < 9; i++) + buf[i] = -1; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 2, 3, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 6071); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -3817); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 9250); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS0(W)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 3 * sizeof(sljit_sw), SLJIT_S2, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS0(W), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 5 * sizeof(sljit_sw), SLJIT_S1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -6278); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1467); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 7150 - 1467); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 8413); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 4892); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -7513); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, W, W, W, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS4(W, W_R, W_R, W_R, W_R), 4, 2, 0, 0, 256); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_R1, 0); + /* buf[7] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R2, 0); + /* buf[8] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 6923); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != 6923, "test_call7 case 26 failed\n"); + FAILED(buf[1] != 4892, "test_call7 case 27 failed\n"); + FAILED(buf[2] != -7513, "test_call7 case 28 failed\n"); + FAILED(buf[3] != 9250, "test_call7 case 29 failed\n"); + FAILED(buf[4] != 6071, "test_call7 case 30 failed\n"); + FAILED(buf[5] != -3817, "test_call7 case 31 failed\n"); + FAILED(buf[6] != -6278, "test_call7 case 32 failed\n"); + FAILED(buf[7] != 7150, "test_call7 case 33 failed\n"); + FAILED(buf[8] != 8413, "test_call7 case 34 failed\n"); + + successful_tests++; +} + +static void test_call8(void) +{ + /* Test register argument and keep saved registers. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump* jump; + sljit_sw buf[9]; + sljit_f64 dbuf[3]; + sljit_s32 i; + + if (verbose) + printf("Run test_call8\n"); + + for (i = 0; i < 8; i++) + buf[i] = -1; + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test_call8 skipped\n"); + successful_tests++; + return; + } + + /* Next test. */ + + for (i = 0; i < 9; i++) + buf[i] = -1; + + dbuf[0] = 4061.25; + dbuf[1] = -3291.75; + dbuf[2] = 8703.5; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 2, 3, 3, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&dbuf); + /* dbuf[0] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_R1), 0); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_f64)); + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_f64)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -8956); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 4381); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -5314); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4(W, F64, F64, F64, W)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 3 * sizeof(sljit_sw), SLJIT_S2, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(1), SLJIT_ARGS4(W, F64, F64, F64, W_R), 1, 3, 3, 0, SLJIT_MAX_LOCAL_SIZE); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&dbuf); + /* dbuf[0] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_FR2, 0); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64), SLJIT_FR0, 0); + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64), SLJIT_FR1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2784); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 1503); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != 2784, "test_call8 case 1 failed\n"); + FAILED(buf[1] != 1503, "test_call8 case 2 failed\n"); + FAILED(buf[2] != 4381, "test_call8 case 3 failed\n"); + FAILED(buf[3] != -5314, "test_call8 case 4 failed\n"); + FAILED(buf[4] != -8956, "test_call8 case 5 failed\n"); + FAILED(buf[5] != 1706, "test_call8 case 6 failed\n"); + FAILED(buf[6] != -1, "test_call8 case 7 failed\n"); + FAILED(dbuf[0] != 8703.5, "test_call8 case 8 failed\n"); + FAILED(dbuf[1] != 4061.25, "test_call8 case 9 failed\n"); + FAILED(dbuf[2] != -3291.75, "test_call8 case 10 failed\n"); + + /* Next test. */ + + for (i = 0; i < 9; i++) + buf[i] = -1; + + dbuf[0] = 4061.25; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 3, 3, 1, 0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM0(), (sljit_sw)&dbuf); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8793); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -4027); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2910); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 4619); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1502); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 5316); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4V(F64, W, W, W)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_S1, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_sw), SLJIT_S2, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS4V(F64, W_R, W_R, W_R), 3, 3, 3, 0, SLJIT_MAX_LOCAL_SIZE); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_sw), SLJIT_S1, 0); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 5 * sizeof(sljit_sw), SLJIT_R0, 0); + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 6 * sizeof(sljit_sw), SLJIT_R1, 0); + /* buf[7] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 7 * sizeof(sljit_sw), SLJIT_R2, 0); + + sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_MEM0(), (sljit_sw)&dbuf, SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 7839); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -9215); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != 7839, "test_call8 case 11 failed\n"); + FAILED(buf[1] != -9215, "test_call8 case 12 failed\n"); + FAILED(buf[2] != 5316, "test_call8 case 13 failed\n"); + FAILED(buf[3] != 4619, "test_call8 case 14 failed\n"); + FAILED(buf[4] != -1502, "test_call8 case 15 failed\n"); + FAILED(buf[5] != 8793, "test_call8 case 16 failed\n"); + FAILED(buf[6] != -4027, "test_call8 case 17 failed\n"); + FAILED(buf[7] != 2910, "test_call8 case 18 failed\n"); + FAILED(buf[8] != -1, "test_call8 case 19 failed\n"); + FAILED(dbuf[0] != -4061.25, "test_call8 case 20 failed\n"); + + /* Next test. */ + + for (i = 0; i < 9; i++) + buf[i] = -1; + + dbuf[0] = 4061.25; + dbuf[1] = -3291.75; + dbuf[2] = 8703.5; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 2, 3, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 7869); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -5406); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 4951); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS0(W)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S1, 0); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 3 * sizeof(sljit_sw), SLJIT_S2, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS0(W), 1, 3, 3, 0, SLJIT_MAX_LOCAL_SIZE); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 5 * sizeof(sljit_sw), SLJIT_S1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + /* dbuf[0] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_R0), 0); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64)); + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1706); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 4713); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -2078); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG | SLJIT_CALL_RETURN, SLJIT_ARGS4(W, F64, F64, F64, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS4(W, F64, F64, F64, W_R), 1, 0, 3, 0, 256); + + /* buf[6] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&buf[6], SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + /* dbuf[0] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR2, 0); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR0, 0); + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64), SLJIT_FR1, 0); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 5074); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != 5074, "test_call8 case 20 failed\n"); + FAILED(buf[1] != 4713, "test_call8 case 22 failed\n"); + FAILED(buf[2] != -2078, "test_call8 case 23 failed\n"); + FAILED(buf[3] != 4951, "test_call8 case 24 failed\n"); + FAILED(buf[4] != 7869, "test_call8 case 25 failed\n"); + FAILED(buf[5] != -5406, "test_call8 case 26 failed\n"); + FAILED(buf[6] != 1706, "test_call8 case 27 failed\n"); + FAILED(buf[7] != -1, "test_call8 case 28 failed\n"); + FAILED(dbuf[0] != 8703.5, "test_call8 case 29 failed\n"); + FAILED(dbuf[1] != 4061.25, "test_call8 case 30 failed\n"); + FAILED(dbuf[2] != -3291.75, "test_call8 case 31 failed\n"); + + successful_tests++; +} + +static void test_call9(void) +{ + /* Test register register preservation in keep saveds mode. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[6 + SLJIT_NUMBER_OF_REGISTERS]; + struct sljit_jump* jump; + sljit_s32 i; + + if (verbose) + printf("Run test_call9\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), SLJIT_NUMBER_OF_REGISTERS - 3, 3, 0, 0, 0); + + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS - 3; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 8469 + 1805 * i); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 3671); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 2418); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 1597); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS4V(W, W, W, W)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 6), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf); + /* buf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 3 * sizeof(sljit_sw), SLJIT_S0, 0); + /* buf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_sw), SLJIT_S1, 0); + /* buf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 5 * sizeof(sljit_sw), SLJIT_S2, 0); + + for (i = 1; i < SLJIT_NUMBER_OF_REGISTERS - 3; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), (6 + i) * (sljit_sw)sizeof(sljit_sw), SLJIT_R(i), 0); + + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(3), SLJIT_ARGS4V(W_R, W_R, W_R, W_R), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_set_context(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(3), SLJIT_ARGS4V(W_R, W_R, W_R, W_R), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE); + + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 0), SLJIT_S0, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 1), SLJIT_S1, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 2), SLJIT_S2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 6501); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 7149); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 5732); + + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG | SLJIT_CALL_RETURN, SLJIT_ARGS0V()); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM0(), 0); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(3), SLJIT_ARGS0V(), 4, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE / 2); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != 3671, "test_call9 case 1 failed\n"); + FAILED(buf[1] != 2418, "test_call9 case 2 failed\n"); + FAILED(buf[2] != 1597, "test_call9 case 3 failed\n"); + FAILED(buf[3] != 6501, "test_call9 case 4 failed\n"); + FAILED(buf[4] != 7149, "test_call9 case 5 failed\n"); + FAILED(buf[5] != 5732, "test_call9 case 6 failed\n"); + + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS - 3; i++) { + FAILED(buf[6 + i] != 8469 + 1805 * i, "test_call9 case 7 failed\n"); + } + + successful_tests++; +} + +static void test_call10(void) +{ + /* Test return with floating point value. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump* jump; + sljit_f64 dbuf[2]; + sljit_f32 sbuf[2]; + + if (verbose) + printf("Run test_call10\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test_call10 skipped\n"); + successful_tests++; + return; + } + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(F64, W), 0, 1, 3, 0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_return(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + dbuf[0] = 35.125; + dbuf[0] = code.test_call10_f2((sljit_sw)dbuf); + sljit_free_code(code.code, NULL); + + FAILED(dbuf[0] != 35.125, "test_call10 case 1 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(F32, W), 0, 1, 1, 0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_RETURN_FREG, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_return(compiler, SLJIT_MOV_F32, SLJIT_RETURN_FREG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + sbuf[0] = -9027.5; + sbuf[0] = code.test_call10_f1((sljit_sw)sbuf); + sljit_free_code(code.code, NULL); + + FAILED(sbuf[0] != -9027.5, "test_call10 case 2 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(F32, W), 0, 1, 1, 0, sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_return(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + sbuf[0] = -6.75; + sbuf[0] = code.test_call10_f1((sljit_sw)sbuf); + sljit_free_code(code.code, NULL); + + FAILED(sbuf[0] != -6.75, "test_call10 case 3 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(F64, W), 0, 1, 1, 0, 2 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_return(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f64)); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + dbuf[0] = 45.125; + dbuf[0] = code.test_call10_f2((sljit_sw)dbuf); + sljit_free_code(code.code, NULL); + + FAILED(dbuf[0] != 45.125, "test_call10 case 4 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 1, 0, 1, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)dbuf - 33); + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS1(F64, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)dbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_RETURN_FREG, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS1(F64, W_R), 1, 0, 1, 0, 0); + sljit_emit_return(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 33); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + dbuf[0] = 2571.75; + dbuf[1] = 0; + code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(dbuf[1] != 2571.75, "test_call10 case 5 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 1, 0, 1, 0, 0); + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS0(F32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)sbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f32), SLJIT_RETURN_FREG, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS0(F32), 0, 0, 1, 0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_RETURN_FREG, 0, SLJIT_MEM0(), (sljit_sw)sbuf); + sljit_emit_return(compiler, SLJIT_MOV_F32, SLJIT_RETURN_FREG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + sbuf[0] = 6310.25; + sbuf[1] = 0; + code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(sbuf[1] != 6310.25, "test_call10 case 6 failed\n"); + + successful_tests++; +} + +static void test_call11(void) +{ + /* Test return_to operation. */ + executable_code code, code2; + struct sljit_compiler* compiler; + struct sljit_jump* jump; + struct sljit_label* label; + sljit_s32 i; + sljit_sw buf[3]; + + if (verbose) + printf("Run test_call11\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 2, 1, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -7602); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S0, 0); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + label = sljit_emit_label(compiler); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); + sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W_R, W_R), 2, 0, 0, 0, 256); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 256 - sizeof(sljit_sw), SLJIT_IMM, -1); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 8945); + sljit_emit_return_to(compiler, SLJIT_MEM1(SLJIT_R1), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + buf[0] = (sljit_sw)sljit_get_label_addr(label); + buf[1] = 0; + + sljit_free_compiler(compiler); + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != 8945, "test_call11 case 1 failed\n"); + FAILED(buf[1] != -7602, "test_call11 case 2 failed\n"); + + /* Next test. */ + + for (i = 0; i < 3; i++) { + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 2, 1, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 6032); + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS1(W, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + label = sljit_emit_label(compiler); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)buf); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_RETURN_REG, 0); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S0, 0); + sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(1), SLJIT_ARGS1V(W_R), 2, i == 1 ? 2 : 1, 0, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_R0, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); + if (i == 2) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 0); + else + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S(i), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), SLJIT_MAX_LOCAL_SIZE - sizeof(sljit_sw), SLJIT_IMM, -1); + if (i != 0) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -3890); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 7145); + if (i == 2) + sljit_emit_return_to(compiler, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_sw)); + else + sljit_emit_return_to(compiler, SLJIT_S(i), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + buf[0] = (sljit_sw)sljit_get_label_addr(label); + buf[1] = 0; + buf[2] = 0; + + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != 7145, "test_call11 case 3 failed\n"); + FAILED(buf[1] != 6032, "test_call11 case 4 failed\n"); + if (i != 0) + FAILED(buf[2] != -3890, "test_call11 case 5 failed\n"); + } + + /* Next test. */ + + for (i = 0; i < 3; i++) { + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P_R), 2, 1, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_R0, 0); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS1(W, W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + label = sljit_emit_label(compiler); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)buf); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); + sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(W_R), 2, 1, 0, 0, (i == 0) ? 0 : (i == 1) ? 512 : 32768); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, -1); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 0x1000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, -4502); + sljit_emit_return_to(compiler, SLJIT_MEM1(SLJIT_R1), -0x1000); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + buf[0] = (sljit_sw)sljit_get_label_addr(label); + buf[1] = 0; + + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != -4502, "test_call11 case 6 failed\n"); + FAILED(buf[1] != (sljit_sw)buf, "test_call11 case 7 failed\n"); + } + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + i = SLJIT_S2; +#else + i = SLJIT_S(SLJIT_NUMBER_OF_SAVED_REGISTERS - 1); +#endif + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 2, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, i, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0(W)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + label = sljit_emit_label(compiler); + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_S0, i), 0, SLJIT_RETURN_REG, 0); + sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 2, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 16); + for (i = 0; i < SLJIT_NUMBER_OF_SAVED_REGISTERS; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S(i), 0, SLJIT_IMM, -1); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, (sljit_sw)(buf + 3)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -3); + sljit_emit_return_to(compiler, SLJIT_MEM2(SLJIT_RETURN_REG, SLJIT_R1), SLJIT_WORD_SHIFT); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + buf[0] = (sljit_sw)sljit_get_label_addr(label); + buf[1] = 0; + buf[2] = 0; + + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[2] != (sljit_sw)(buf + 3), "test_call11 case 8 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P_R, P), 2, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 586000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 392); + sljit_emit_icall(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS0(W), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + label = sljit_emit_label(compiler); + /* buf[0] */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_S0, 0, SLJIT_S1, 0); + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + buf[0] = (sljit_sw)sljit_get_label_addr(label); + + sljit_free_compiler(compiler); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS0V(), 2, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 16); + for (i = 2; i < SLJIT_NUMBER_OF_SAVED_REGISTERS; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S(i), 0, SLJIT_IMM, -1); + /* buf[2] */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM0(), (sljit_sw)(buf + 2), SLJIT_S0, 0, SLJIT_S1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 416000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 931); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 2906); + sljit_emit_return_to(compiler, SLJIT_IMM, buf[0]); + + code2.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + + code.func2(SLJIT_FUNC_ADDR(code2.func0), (sljit_sw)buf); + sljit_free_code(code.code, NULL); + sljit_free_code(code2.code, NULL); + + FAILED(buf[0] != 416931, "test_call11 case 9 failed\n"); + FAILED(buf[1] != 2906, "test_call11 case 10 failed\n"); + FAILED(buf[2] != 586392, "test_call11 case 11 failed\n"); + + successful_tests++; +} + +static void test_call12(void) +{ + /* Test get return address. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump *jump; + struct sljit_label *label; + sljit_uw return_addr = 0; + sljit_uw buf[1]; + + if (verbose) + printf("Run test_call12\n"); + + /* Next test. */ + + buf[0] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(W), 1, 1, 0, 0, 0); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0(W)); + label = sljit_emit_label(compiler); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 1, 0, 0, 0, 0); + sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_RETURN_REG, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + return_addr = sljit_get_label_addr(label); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != return_addr, "test_call12 case 1 failed\n"); + + /* Next test. */ + + buf[0] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 2, 0, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -1); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS2V(W, W)); + label = sljit_emit_label(compiler); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 1, SLJIT_NUMBER_OF_SAVED_REGISTERS - 2, 0, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_MEM0(), (sljit_sw)buf); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + return_addr = sljit_get_label_addr(label); + sljit_free_compiler(compiler); + + code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != return_addr, "test_call12 case 2 failed\n"); + + /* Next test. */ + + buf[0] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(W), 1, 3, 0, 0, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S2, 0, SLJIT_S0, 0, SLJIT_IMM, 16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS1V(W)); + label = sljit_emit_label(compiler); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(3), SLJIT_ARGS1V(W_R), 1, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, SLJIT_MAX_LOCAL_SIZE >> 1); + sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_MEM2(SLJIT_S2, SLJIT_R0), 1); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + return_addr = sljit_get_label_addr(label); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != return_addr, "test_call12 case 3 failed\n"); + + /* Next test. */ + + buf[0] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(W_R), 1, 0, 0, 0, 0); + jump = sljit_emit_call(compiler, SLJIT_CALL_REG_ARG, SLJIT_ARGS1V(W)); + label = sljit_emit_label(compiler); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, SLJIT_ENTER_REG_ARG, SLJIT_ARGS1V(W_R), 1, SLJIT_NUMBER_OF_SAVED_REGISTERS >> 1, 0, 0, 64); + sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + return_addr = sljit_get_label_addr(label); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != return_addr, "test_call12 case 4 failed\n"); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU) && SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0) { + /* Next test. */ + + buf[0] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(W), 1, 1, 0, 0, 0); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0(W)); + label = sljit_emit_label(compiler); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0(W), 1, 3, 0, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS, 64); + sljit_emit_op_dst(compiler, SLJIT_GET_RETURN_ADDRESS, SLJIT_RETURN_REG, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + return_addr = sljit_get_label_addr(label); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[0] != return_addr, "test_call12 case 5 failed\n"); + } + + successful_tests++; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestFloat.h b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestFloat.h new file mode 100755 index 0000000000..24093b840d --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestFloat.h @@ -0,0 +1,2877 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static void test_float1(void) +{ + /* Test fpu monadic functions. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_f64 buf[7]; + sljit_sw buf2[6]; + + if (verbose) + printf("Run test_float1\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = 7.75; + buf[1] = -4.5; + buf[2] = 0.0; + buf[3] = 0.0; + buf[4] = 0.0; + buf[5] = 0.0; + buf[6] = 0.0; + + buf2[0] = 10; + buf2[1] = 10; + buf2[2] = 10; + buf2[3] = 10; + buf2[4] = 10; + buf2[5] = 10; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 3, 2, 6, 0, 0); + /* buf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&buf[2], SLJIT_MEM0(), (sljit_sw)&buf[1]); + /* buf[3] */ + sljit_emit_fop1(compiler, SLJIT_ABS_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + /* buf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM0(), (sljit_sw)&buf[0]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0); + sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_FR2, 0, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_FR2, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&buf[4], SLJIT_FR3, 0); + /* buf[5] */ + sljit_emit_fop1(compiler, SLJIT_ABS_F64, SLJIT_FR4, 0, SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR4, 0); + /* buf[6] */ + sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR4, 0); + + /* buf2[0] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_GREATER, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_F_GREATER); + /* buf2[1] */ + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_GREATER, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR5, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw), SLJIT_F_GREATER); + /* buf2[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_FR1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_sw), SLJIT_F_EQUAL); + /* buf2[3] */ + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_LESS, SLJIT_FR1, 0, SLJIT_FR1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_F_LESS); + /* buf2[4] */ + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_F_EQUAL); + /* buf2[5] */ + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_NOT_EQUAL, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_F_NOT_EQUAL); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, (sljit_sw)&buf2); + sljit_free_code(code.code, NULL); + + FAILED(buf[2] != -4.5, "test_float1 case 1 failed\n"); + FAILED(buf[3] != 4.5, "test_float1 case 2 failed\n"); + FAILED(buf[4] != -7.75, "test_float1 case 3 failed\n"); + FAILED(buf[5] != 4.5, "test_float1 case 4 failed\n"); + FAILED(buf[6] != -4.5, "test_float1 case 5 failed\n"); + + FAILED(buf2[0] != 1, "test_float1 case 6 failed\n"); + FAILED(buf2[1] != 0, "test_float1 case 7 failed\n"); + FAILED(buf2[2] != 1, "test_float1 case 8 failed\n"); + FAILED(buf2[3] != 0, "test_float1 case 9 failed\n"); + FAILED(buf2[4] != 0, "test_float1 case 10 failed\n"); + FAILED(buf2[5] != 1, "test_float1 case 11 failed\n"); + + successful_tests++; +} + +static void test_float2(void) +{ + /* Test fpu diadic functions. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_f64 buf[15]; + + if (verbose) + printf("Run test_float2\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = 7.25; + buf[1] = 3.5; + buf[2] = 1.75; + buf[3] = 0.0; + buf[4] = 0.0; + buf[5] = 0.0; + buf[6] = 0.0; + buf[7] = 0.0; + buf[8] = 0.0; + buf[9] = 0.0; + buf[10] = 0.0; + buf[11] = 0.0; + buf[12] = 8.0; + buf[13] = 4.0; + buf[14] = 0.0; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 1, 6, 0, 0); + + /* ADD */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2); + /* buf[3] */ + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 3, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_FR1, 0); + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR1, 0, SLJIT_FR0, 0, SLJIT_FR1, 0); + /* buf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 4, SLJIT_FR0, 0); + /* buf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 5, SLJIT_FR1, 0); + + /* SUB */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); + /* buf[6] */ + sljit_emit_fop2(compiler, SLJIT_SUB_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 6, SLJIT_FR3, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), SLJIT_F64_SHIFT); + sljit_emit_fop2(compiler, SLJIT_SUB_F64, SLJIT_FR2, 0, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2); + sljit_emit_fop2(compiler, SLJIT_SUB_F64, SLJIT_FR3, 0, SLJIT_FR2, 0, SLJIT_FR3, 0); + /* buf[7] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 7, SLJIT_FR2, 0); + /* buf[8] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 8, SLJIT_FR3, 0); + + /* MUL */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); + /* buf[9] */ + sljit_emit_fop2(compiler, SLJIT_MUL_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 9, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), SLJIT_F64_SHIFT, SLJIT_FR1, 0); + sljit_emit_fop2(compiler, SLJIT_MUL_F64, SLJIT_FR1, 0, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_fop2(compiler, SLJIT_MUL_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2, SLJIT_FR2, 0); + /* buf[10] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 10, SLJIT_FR1, 0); + /* buf[11] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 11, SLJIT_FR5, 0); + + /* DIV */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 12); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 13); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR4, 0, SLJIT_FR5, 0); + /* buf[12] */ + sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 12, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 12, SLJIT_FR1, 0); + sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_FR5, 0, SLJIT_FR5, 0, SLJIT_FR1, 0); + sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_FR4, 0, SLJIT_FR1, 0, SLJIT_FR4, 0); + /* buf[13] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 13, SLJIT_FR5, 0); + /* buf[14] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 14, SLJIT_FR4, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[3] != 10.75, "test_float2 case 1 failed\n"); + FAILED(buf[4] != 5.25, "test_float2 case 2 failed\n"); + FAILED(buf[5] != 7.0, "test_float2 case 3 failed\n"); + FAILED(buf[6] != 0.0, "test_float2 case 4 failed\n"); + FAILED(buf[7] != 5.5, "test_float2 case 5 failed\n"); + FAILED(buf[8] != 3.75, "test_float2 case 6 failed\n"); + FAILED(buf[9] != 24.5, "test_float2 case 7 failed\n"); + FAILED(buf[10] != 38.5, "test_float2 case 8 failed\n"); + FAILED(buf[11] != 9.625, "test_float2 case 9 failed\n"); + FAILED(buf[12] != 2.0, "test_float2 case 10 failed\n"); + FAILED(buf[13] != 2.0, "test_float2 case 11 failed\n"); + FAILED(buf[14] != 0.5, "test_float2 case 12 failed\n"); + + successful_tests++; +} + +static void test_float3(void) +{ + /* Floating point set flags. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s32 i; + + sljit_sw buf[16]; + union { + sljit_f64 value; + struct { + sljit_s32 value1; + sljit_s32 value2; + } u; + } dbuf[4]; + + if (verbose) + printf("Run test_float3\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 16; i++) + buf[i] = 5; + + /* Two NaNs */ + dbuf[0].u.value1 = 0x7fffffff; + dbuf[0].u.value2 = 0x7fffffff; + dbuf[1].u.value1 = 0x7fffffff; + dbuf[1].u.value2 = 0x7fffffff; + dbuf[2].value = -13.0; + dbuf[3].value = 27.0; + + SLJIT_ASSERT(sizeof(sljit_f64) == 8 && sizeof(sljit_s32) == 4 && sizeof(dbuf[0]) == 8); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 1, 2, 4, 0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); + /* buf[0] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_UNORDERED); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_ORDERED, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); + /* buf[1] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_ORDERED); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED, SLJIT_FR1, 0, SLJIT_FR2, 0); + /* buf[2] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_UNORDERED); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_ORDERED, SLJIT_FR1, 0, SLJIT_FR2, 0); + /* buf[3] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_ORDERED); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_LESS, SLJIT_FR1, 0, SLJIT_FR2, 0); + /* buf[4] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_F_LESS); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_GREATER_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); + /* buf[5] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_F_GREATER_EQUAL); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_GREATER, SLJIT_FR1, 0, SLJIT_FR2, 0); + /* buf[6] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_F_GREATER); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_LESS_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); + /* buf[7] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_F_LESS_EQUAL); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); + /* buf[8] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_F_EQUAL); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_NOT_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); + /* buf[9] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_F_NOT_EQUAL); + + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR3, 0, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64)); + /* buf[10] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_UNORDERED); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_EQUAL, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64)); + /* buf[11] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_F_EQUAL); + + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_ORDERED, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64), SLJIT_FR0, 0); + /* buf[12] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_ORDERED); + + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED, SLJIT_FR3, 0, SLJIT_FR2, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S1), 0); + /* buf[13] */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_UNORDERED); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, (sljit_sw)&dbuf); + + FAILED(buf[0] != 1, "test_float3 case 1 failed\n"); + FAILED(buf[1] != 2, "test_float3 case 2 failed\n"); + FAILED(buf[2] != 2, "test_float3 case 3 failed\n"); + FAILED(buf[3] != 1, "test_float3 case 4 failed\n"); + FAILED(buf[4] != 1, "test_float3 case 5 failed\n"); + FAILED(buf[5] != 2, "test_float3 case 6 failed\n"); + FAILED(buf[6] != 2, "test_float3 case 7 failed\n"); + FAILED(buf[7] != 1, "test_float3 case 8 failed\n"); + FAILED(buf[8] != 2, "test_float3 case 9 failed\n"); + FAILED(buf[9] != 1, "test_float3 case 10 failed\n"); + FAILED(buf[10] != 2, "test_float3 case 11 failed\n"); + FAILED(buf[11] != 1, "test_float3 case 12 failed\n"); + FAILED(buf[12] != 2, "test_float3 case 13 failed\n"); + FAILED(buf[13] != 1, "test_float3 case 14 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test_float4(void) +{ + /* Test inline assembly. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_f64 buf[3]; +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + sljit_u8 inst[16]; +#else + sljit_u32 inst; +#endif + + if (verbose) + printf("Run test_float4\n"); + + buf[0] = 13.5; + buf[1] = -2.25; + buf[2] = 0.0; + + compiler = sljit_create_compiler(NULL, NULL); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 0, 1, 2, 0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + /* addsd x, xm */ + inst[0] = 0xf2; + inst[1] = 0x0f; + inst[2] = 0x58; + inst[3] = (sljit_u8)(0xc0 | (sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 3) + | sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1)); + sljit_emit_op_custom(compiler, inst, 4); +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* addsd x, xm */ + if (sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) > 7 || sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1) > 7) { + inst[0] = 0; + if (sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) > 7) + inst[0] |= 0x04; /* REX_R */ + if (sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1) > 7) + inst[0] |= 0x01; /* REX_B */ + inst[1] = 0xf2; + inst[2] = 0x0f; + inst[3] = 0x58; + inst[4] = (sljit_u8)(0xc0 | ((sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) & 0x7) << 3) + | (sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1) & 0x7)); + sljit_emit_op_custom(compiler, inst, 5); + } else { + inst[0] = 0xf2; + inst[1] = 0x0f; + inst[2] = 0x58; + inst[3] = (sljit_u8)(0xc0 | (sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 3) + | sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1)); + sljit_emit_op_custom(compiler, inst, 4); + } +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + /* vadd.f64 dd, dn, dm */ + inst = 0xee300b00 | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 12) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 16) + | (sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + /* fadd rd, rn, rm */ + inst = 0x1e602800 | (sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 5) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1) << 16); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + /* fadd frD, frA, frB */ + inst = (63u << 26) | (21u << 1) | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 21) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 16) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1) << 11); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + /* add.d fd, fs, ft */ + inst = (17u << 26) | (17u << 21) | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 6) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 11) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1) << 16); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) + /* fadd.d rd, rs1, rs2 */ + inst = (0x1u << 25) | (0x7u << 12) | (0x53u) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 7) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 15) + | (sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1) << 20; + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + /* adbr r1, r2 */ + inst = 0xb31a0000 + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 4) + | (sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) + /* fadd.d rd, rs1, rs2 */ + inst = (0x202u << 15) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0)) + | ((sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0) << 5) + | (sljit_u32)sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1) << 10; + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#endif + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[2] != 11.25, "test_float4 case 1 failed\n"); + + successful_tests++; +} + +static void test_float5(void) +{ + /* Test floating point compare. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump* jump; + sljit_sw res[4]; + + union { + sljit_f64 value; + struct { + sljit_u32 value1; + sljit_u32 value2; + } u; + } dbuf[4]; + + if (verbose) + printf("Run test_float5\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + dbuf[0].value = 12.125; + /* a NaN */ + dbuf[1].u.value1 = 0x7fffffff; + dbuf[1].u.value2 = 0x7fffffff; + dbuf[2].value = -13.5; + dbuf[3].value = 12.125; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 1, 1, 3, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + /* dbuf[0] < dbuf[2] -> -2 */ + jump = sljit_emit_fcmp(compiler, SLJIT_F_GREATER_EQUAL, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_F64_SHIFT); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, -2); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + /* dbuf[0] and dbuf[1] is not NaN -> 5 */ + jump = sljit_emit_fcmp(compiler, SLJIT_UNORDERED, SLJIT_MEM0(), (sljit_sw)&dbuf[1], SLJIT_FR1, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 5); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 11); + /* dbuf[0] == dbuf[3] -> 11 */ + jump = sljit_emit_fcmp(compiler, SLJIT_F_EQUAL, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_FR2, 0); + + /* else -> -17 */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, -17); + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + res[0] = code.func1((sljit_sw)&dbuf); + dbuf[3].value = 12; + res[1] = code.func1((sljit_sw)&dbuf); + dbuf[1].value = 0; + res[2] = code.func1((sljit_sw)&dbuf); + dbuf[2].value = 20; + res[3] = code.func1((sljit_sw)&dbuf); + + sljit_free_code(code.code, NULL); + + FAILED(res[0] != 11, "test_float5 case 1 failed\n"); + FAILED(res[1] != -17, "test_float5 case 2 failed\n"); + FAILED(res[2] != 5, "test_float5 case 3 failed\n"); + FAILED(res[3] != -2, "test_float5 case 4 failed\n"); + + successful_tests++; +} + +static void test_float6(void) +{ + /* Test single precision floating point. */ + + executable_code code; + struct sljit_compiler* compiler; + sljit_f32 buf[12]; + sljit_sw buf2[6]; + struct sljit_jump* jump; + + if (verbose) + printf("Run test_float6\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = 5.5; + buf[1] = -7.25; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 0; + buf[7] = 8.75; + buf[8] = 0; + buf[9] = 16.5; + buf[10] = 0; + buf[11] = 0; + + buf2[0] = -1; + buf2[1] = -1; + buf2[2] = -1; + buf2[3] = -1; + buf2[4] = -1; + buf2[5] = -1; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 3, 2, 6, 0, 0); + + /* buf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f32), SLJIT_FR0, 0); + /* buf[3] */ + sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_FR1, 0, SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f32), SLJIT_FR1, 0); + /* buf[4] */ + sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f32), SLJIT_FR5, 0); + /* buf[5] */ + sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f32), SLJIT_FR4, 0); + + /* buf[6] */ + sljit_emit_fop2(compiler, SLJIT_ADD_F32, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f32), SLJIT_FR0, 0); + /* buf[7] */ + sljit_emit_fop2(compiler, SLJIT_SUB_F32, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f32), SLJIT_FR5, 0); + /* buf[8] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop2(compiler, SLJIT_MUL_F32, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f32), SLJIT_FR0, 0, SLJIT_FR0, 0); + /* buf[9] */ + sljit_emit_fop2(compiler, SLJIT_DIV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f32), SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f32), SLJIT_FR2, 0); + /* buf[10] */ + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 0x3d0ac); + sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R0), 0x3d0ac); + /* buf[11] */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 0x3d0ac + sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R0), -0x3d0ac); + + /* buf2[0] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_F_EQUAL); + /* buf2[1] */ + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_LESS, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw), SLJIT_F_LESS); + /* buf2[2] */ + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_sw), SLJIT_F_EQUAL); + /* buf2[3] */ + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_F_GREATER_EQUAL, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_F_GREATER_EQUAL); + + /* buf2[4] */ + jump = sljit_emit_fcmp(compiler, SLJIT_F_LESS_EQUAL | SLJIT_32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_IMM, 7); + sljit_set_label(jump, sljit_emit_label(compiler)); + + /* buf2[5] */ + jump = sljit_emit_fcmp(compiler, SLJIT_F_GREATER | SLJIT_32, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_FR2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_IMM, 6); + sljit_set_label(jump, sljit_emit_label(compiler)); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, (sljit_sw)&buf2); + sljit_free_code(code.code, NULL); + + FAILED(buf[2] != -5.5, "test_float6 case 1 failed\n"); + FAILED(buf[3] != 7.25, "test_float6 case 2 failed\n"); + FAILED(buf[4] != 7.25, "test_float6 case 3 failed\n"); + FAILED(buf[5] != -5.5, "test_float6 case 4 failed\n"); + FAILED(buf[6] != -1.75, "test_float6 case 5 failed\n"); + FAILED(buf[7] != 16.0, "test_float6 case 6 failed\n"); + FAILED(buf[8] != 30.25, "test_float6 case 7 failed\n"); + FAILED(buf[9] != 3, "test_float6 case 8 failed\n"); + FAILED(buf[10] != -5.5, "test_float6 case 9 failed\n"); + FAILED(buf[11] != 7.25, "test_float6 case 10 failed\n"); + FAILED(buf2[0] != 1, "test_float6 case 11 failed\n"); + FAILED(buf2[1] != 2, "test_float6 case 12 failed\n"); + FAILED(buf2[2] != 2, "test_float6 case 13 failed\n"); + FAILED(buf2[3] != 1, "test_float6 case 14 failed\n"); + FAILED(buf2[4] != 7, "test_float6 case 15 failed\n"); + FAILED(buf2[5] != -1, "test_float6 case 16 failed\n"); + + successful_tests++; +} + +static void test_float7(void) +{ + /* Test floating point conversions. */ + executable_code code; + struct sljit_compiler* compiler; + int i; + sljit_f64 dbuf[10]; + sljit_f32 sbuf[10]; + sljit_sw wbuf[10]; + sljit_s32 ibuf[10]; + + if (verbose) + printf("Run test_float7\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 10; i++) { + dbuf[i] = 0.0; + sbuf[i] = 0.0; + wbuf[i] = 0; + ibuf[i] = 0; + } + + dbuf[0] = 123.5; + dbuf[1] = -367; + dbuf[2] = 917.75; + + sbuf[0] = 476.25; + sbuf[1] = -1689.75; + + wbuf[0] = 2345; + + ibuf[0] = 312; + ibuf[1] = -9324; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 3, 3, 6, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&sbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&wbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)&ibuf); + + /* sbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_F64, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 3); + /* sbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_F64, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_F32_SHIFT, SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S1), 0); + /* dbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR4, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_FR2, 0, SLJIT_FR3, 0); + /* dbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_FR2, 0); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR3, 0); + + /* wbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_R0, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_F64_SHIFT); + /* wbuf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S1), 0); + /* wbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_sw), SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_FR0, 0, SLJIT_FR5, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 4); + /* wbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM2(SLJIT_S2, SLJIT_R1), SLJIT_WORD_SHIFT, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); + /* ibuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_MEM1(SLJIT_R2), 2 * sizeof(sljit_s32), SLJIT_FR4, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F32, SLJIT_R0, 0, SLJIT_FR1, 0); + /* ibuf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 3 * sizeof(sljit_s32), SLJIT_R0, 0); + + /* dbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S2), 0); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_FR2, 0, SLJIT_IMM, -6213); + /* dbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR2, 0); + /* dbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64), SLJIT_MEM0(), (sljit_sw)&ibuf[0]); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32)); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_FR1, 0, SLJIT_R0, 0); + /* dbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f64), SLJIT_FR1, 0); + /* dbuf[9] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM0(), (sljit_sw)(dbuf + 9), SLJIT_IMM, -77); + /* sbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_f32), SLJIT_IMM, -123); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 7190); + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_FR3, 0, SLJIT_R0, 0); + /* sbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_f32), SLJIT_FR3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 123); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_R2, 0, SLJIT_IMM, 123 * sizeof(sljit_s32)); + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_FR1, 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 2); + /* sbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_f32), SLJIT_FR1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 3812); + /* sbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_F32_SHIFT, SLJIT_R1, 0); + /* sbuf[9] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM0(), (sljit_sw)(sbuf + 9), SLJIT_IMM, -79); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(dbuf[3] != 476.25, "test_float7 case 1 failed\n"); + FAILED(dbuf[4] != 476.25, "test_float7 case 2 failed\n"); + FAILED(dbuf[5] != 2345.0, "test_float7 case 3 failed\n"); + FAILED(dbuf[6] != -6213.0, "test_float7 case 4 failed\n"); + FAILED(dbuf[7] != 312.0, "test_float7 case 5 failed\n"); + FAILED(dbuf[8] != -9324.0, "test_float7 case 6 failed\n"); + FAILED(dbuf[9] != -77.0, "test_float7 case 7 failed\n"); + + FAILED(sbuf[2] != 123.5, "test_float7 case 8 failed\n"); + FAILED(sbuf[3] != 123.5, "test_float7 case 9 failed\n"); + FAILED(sbuf[4] != 476.25, "test_float7 case 10 failed\n"); + FAILED(sbuf[5] != -123, "test_float7 case 11 failed\n"); + FAILED(sbuf[6] != 7190, "test_float7 case 12 failed\n"); + FAILED(sbuf[7] != 312, "test_float7 case 13 failed\n"); + FAILED(sbuf[8] != 3812, "test_float7 case 14 failed\n"); + FAILED(sbuf[9] != -79.0, "test_float7 case 15 failed\n"); + + FAILED(wbuf[1] != -367, "test_float7 case 16 failed\n"); + FAILED(wbuf[2] != 917, "test_float7 case 17 failed\n"); + FAILED(wbuf[3] != 476, "test_float7 case 18 failed\n"); + FAILED(wbuf[4] != -476, "test_float7 case 19 failed\n"); + + FAILED(ibuf[2] != -917, "test_float7 case 20 failed\n"); + FAILED(ibuf[3] != -1689, "test_float7 case 21 failed\n"); + + successful_tests++; +} + +static void test_float8(void) +{ + /* Test floating point conversions. */ + executable_code code; + struct sljit_compiler* compiler; + int i; + sljit_f64 dbuf[10]; + sljit_f32 sbuf[9]; + sljit_sw wbuf[9]; + sljit_s32 ibuf[9]; + sljit_s32* dbuf_ptr = (sljit_s32*)dbuf; + sljit_s32* sbuf_ptr = (sljit_s32*)sbuf; + + if (verbose) + printf("Run test_float8\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 9; i++) { + dbuf_ptr[i << 1] = -1; + dbuf_ptr[(i << 1) + 1] = -1; + sbuf_ptr[i] = -1; + wbuf[i] = -1; + ibuf[i] = -1; + } + +#if IS_64BIT + dbuf[9] = (sljit_f64)SLJIT_W(0x1122334455); +#endif + dbuf[0] = 673.75; + sbuf[0] = -879.75; + wbuf[0] = 345; + ibuf[0] = -249; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 3, 3, 3, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&sbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&wbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)&ibuf); + + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S1), 0); + /* sbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_F64, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), 0); + /* wbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 0); + /* wbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S1), 0); + /* ibuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_MEM1(SLJIT_R2), 2 * sizeof(sljit_s32), SLJIT_MEM1(SLJIT_S0), 0); + /* ibuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F32, SLJIT_MEM1(SLJIT_R2), 4 * sizeof(sljit_s32), SLJIT_MEM1(SLJIT_S1), 0); + /* dbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S2), 0); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S2), 0); + /* dbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_R2), 0); + /* sbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R2), 0); + +#if IS_64BIT + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64)); + /* wbuf[8] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 8 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_R0, 0, SLJIT_FR2, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_AND32, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffff); + /* ibuf[8] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), 8 * sizeof(sljit_s32), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x4455667788)); + /* dbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f64), SLJIT_R0, 0); + /* dbuf[9] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64), SLJIT_IMM, SLJIT_W(0x7766554433)); +#endif /* IS_64BIT */ + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func0(); + sljit_free_code(code.code, NULL); + + FAILED(dbuf_ptr[(1 * 2) + 0] != -1, "test_float8 case 1 failed\n"); + FAILED(dbuf_ptr[(1 * 2) + 1] != -1, "test_float8 case 2 failed\n"); + FAILED(dbuf[2] != -879.75, "test_float8 case 3 failed\n"); + FAILED(dbuf_ptr[(3 * 2) + 0] != -1, "test_float8 case 4 failed\n"); + FAILED(dbuf_ptr[(3 * 2) + 1] != -1, "test_float8 case 5 failed\n"); + FAILED(dbuf[4] != 345, "test_float8 case 6 failed\n"); + FAILED(dbuf_ptr[(5 * 2) + 0] != -1, "test_float8 case 7 failed\n"); + FAILED(dbuf_ptr[(5 * 2) + 1] != -1, "test_float8 case 8 failed\n"); + FAILED(dbuf[6] != -249, "test_float8 case 9 failed\n"); + FAILED(dbuf_ptr[(7 * 2) + 0] != -1, "test_float8 case 10 failed\n"); + FAILED(dbuf_ptr[(7 * 2) + 1] != -1, "test_float8 case 11 failed\n"); + + FAILED(sbuf_ptr[1] != -1, "test_float8 case 12 failed\n"); + FAILED(sbuf[2] != 673.75, "test_float8 case 13 failed\n"); + FAILED(sbuf_ptr[3] != -1, "test_float8 case 14 failed\n"); + FAILED(sbuf[4] != 345, "test_float8 case 15 failed\n"); + FAILED(sbuf_ptr[5] != -1, "test_float8 case 16 failed\n"); + FAILED(sbuf[6] != -249, "test_float8 case 17 failed\n"); + FAILED(sbuf_ptr[7] != -1, "test_float8 case 18 failed\n"); + + FAILED(wbuf[1] != -1, "test_float8 case 19 failed\n"); + FAILED(wbuf[2] != 673, "test_float8 case 20 failed\n"); + FAILED(wbuf[3] != -1, "test_float8 case 21 failed\n"); + FAILED(wbuf[4] != -879, "test_float8 case 22 failed\n"); + FAILED(wbuf[5] != -1, "test_float8 case 23 failed\n"); + + FAILED(ibuf[1] != -1, "test_float8 case 24 failed\n"); + FAILED(ibuf[2] != 673, "test_float8 case 25 failed\n"); + FAILED(ibuf[3] != -1, "test_float8 case 26 failed\n"); + FAILED(ibuf[4] != -879, "test_float8 case 27 failed\n"); + FAILED(ibuf[5] != -1, "test_float8 case 28 failed\n"); + +#if IS_64BIT + FAILED(dbuf[8] != (sljit_f64)SLJIT_W(0x4455667788), "test_float8 case 29 failed\n"); + FAILED(dbuf[9] != (sljit_f64)SLJIT_W(0x66554433), "test_float8 case 30 failed\n"); + FAILED(wbuf[8] != SLJIT_W(0x1122334455), "test_float8 case 31 failed\n"); + FAILED(ibuf[8] == 0x4455, "test_float8 case 32 failed\n"); +#endif /* IS_64BIT */ + + successful_tests++; +} + +static void test_float9(void) +{ + /* Test stack and floating point operations. */ + executable_code code; + struct sljit_compiler* compiler; +#if !IS_X86 + sljit_uw size1, size2, size3; + int result; +#endif + sljit_f32 sbuf[7]; + + if (verbose) + printf("Run test_float9\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sbuf[0] = 245.5; + sbuf[1] = -100.25; + sbuf[2] = 713.75; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 3, 6, 0, 8 * sizeof(sljit_f32)); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 0); + /* sbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop2(compiler, SLJIT_ADD_F32, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32)); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32), SLJIT_IMM, 5934); + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_MEM1(SLJIT_SP), 3 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32)); + /* sbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 3 * sizeof(sljit_f32)); + +#if !IS_X86 + size1 = compiler->size; +#endif + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f32)); +#if !IS_X86 + size2 = compiler->size; +#endif + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_FR2, 0); +#if !IS_X86 + size3 = compiler->size; +#endif + /* sbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f32), SLJIT_FR5, 0); +#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + result = (compiler->size - size3) == 2 && (size3 - size2) == 1 && (size2 - size1) == 2; +#elif !IS_X86 + result = (compiler->size - size3) == (size3 - size2) && (size3 - size2) == (size2 - size1); +#endif + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&sbuf); + sljit_free_code(code.code, NULL); + + FAILED(sbuf[3] != 245.5, "test_float9 case 1 failed\n"); + FAILED(sbuf[4] != 145.25, "test_float9 case 2 failed\n"); + FAILED(sbuf[5] != 5934, "test_float9 case 3 failed\n"); + FAILED(sbuf[6] != 713.75, "test_float9 case 4 failed\n"); +#if !IS_X86 + FAILED(!result, "test_float9 case 5 failed\n"); +#endif + + successful_tests++; +} + +static void test_float10(void) +{ + /* Test all registers provided by the CPU. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump* jump; + sljit_f64 buf[3]; + sljit_s32 i; + + if (verbose) + printf("Run test_float10\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 6.25; + buf[1] = 17.75; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 0, 1, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS, 0); + + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_S0), 0); + + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0V()); + /* SLJIT_FR0 contains the first value. */ + for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_FR(i), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 1, 0, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf[1]); + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_R0), 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[2] != (SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS * 17.75 + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS * 6.25), "test_float10 case 1 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = -32.5; + buf[1] = -11.25; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 0, 1, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS, 0); + + for (i = 0; i < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_S0), 0); + for (i = 0; i < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FS(i), 0, SLJIT_MEM1(SLJIT_S0), 0); + + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS0V()); + /* SLJIT_FR0 contains the first value. */ + for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_FR(i), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_return_void(compiler); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS0V(), 1, 0, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf[1]); + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_R0), 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[2] != (SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS * -11.25 + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS * -32.5), "test_float10 case 2 failed\n"); + + successful_tests++; +} + +static void test_float11(void) +{ + /* Test float memory accesses with pre/post updates. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_u32 i; + sljit_s32 supported[6]; + sljit_sw wbuf[6]; + sljit_f64 dbuf[4]; + sljit_f32 sbuf[4]; +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + static sljit_u8 expected[6] = { 1, 1, 1, 1, 0, 0 }; +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + static sljit_u8 expected[6] = { 1, 0, 1, 0, 1, 1 }; +#else + static sljit_u8 expected[6] = { 0, 0, 0, 0, 0, 0 }; +#endif + + if (verbose) + printf("Run test_float11\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 6; i++) + wbuf[i] = 0; + + dbuf[0] = 66.725; + dbuf[1] = 0.0; + dbuf[2] = 0.0; + dbuf[3] = 0.0; + + sbuf[0] = 0.0; + sbuf[1] = -22.125; + sbuf[2] = 0.0; + sbuf[3] = 0.0; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 4, 3, 4, 0, sizeof(sljit_sw)); + + supported[0] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_f64)); + if (supported[0] == SLJIT_SUCCESS) { + /* dbuf[1] */ + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, 4 * sizeof(sljit_f64)); + sljit_emit_fmem_update(compiler, SLJIT_MOV_F64, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64), SLJIT_FR0, 0); + /* wbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + } + + supported[1] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_FR2, SLJIT_MEM1(SLJIT_R0), -(sljit_sw)sizeof(sljit_f64)); + if (supported[1] == SLJIT_SUCCESS) { + /* dbuf[2] */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, 2 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_FR2, SLJIT_MEM1(SLJIT_R0), -(sljit_sw)sizeof(sljit_f64)); + /* wbuf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + } + + supported[2] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE, SLJIT_FR1, SLJIT_MEM1(SLJIT_R2), -4 * (sljit_sw)sizeof(sljit_f32)); + if (supported[2] == SLJIT_SUCCESS) { + /* sbuf[0] */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S2, 0, SLJIT_IMM, 4 * sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_STORE, SLJIT_FR1, SLJIT_MEM1(SLJIT_R2), -4 * (sljit_sw)sizeof(sljit_f32)); + /* wbuf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R2, 0); + } + + supported[3] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_FR1, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_f32)); + if (supported[3] == SLJIT_SUCCESS) { + /* sbuf[2] */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S2, 0, SLJIT_IMM, sizeof(sljit_f32)); + sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_POST, SLJIT_FR1, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_f32), SLJIT_FR1, 0); + /* wbuf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R1, 0); + } + + supported[4] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); + if (supported[4] == SLJIT_SUCCESS) { + /* dbuf[3] */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, 8 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -8 * (sljit_sw)sizeof(sljit_f64)); + sljit_emit_fmem_update(compiler, SLJIT_MOV_F64, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); + /* wbuf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R1, 0); + } + + supported[5] = sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE, SLJIT_FR2, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 0); + if (supported[5] == SLJIT_SUCCESS) { + /* sbuf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3 * sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_STORE, SLJIT_FR2, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 0); + /* wbuf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R2, 0); + } + + SLJIT_ASSERT(sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0) == SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(sljit_emit_fmem_update(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0) == SLJIT_ERR_UNSUPPORTED); + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + /* TODO: at least for ARM (both V5 and V7) the range below needs further fixing */ + SLJIT_ASSERT(sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 256) == SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(sljit_emit_fmem_update(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), -257) == SLJIT_ERR_UNSUPPORTED); +#endif + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&wbuf, (sljit_sw)&dbuf, (sljit_sw)&sbuf); + sljit_free_code(code.code, NULL); + + FAILED(sizeof(expected) != sizeof(supported) / sizeof(sljit_s32), "test_float11 case 1 failed\n"); + + for (i = 0; i < sizeof(expected); i++) { + if (expected[i]) { + if (supported[i] != SLJIT_SUCCESS) { + printf("test_float11 case %d should be supported\n", i + 1); + return; + } + } else { + if (supported[i] == SLJIT_SUCCESS) { + printf("test_float11 case %d should not be supported\n", i + 1); + return; + } + } + } + + FAILED(supported[0] == SLJIT_SUCCESS && dbuf[1] != 66.725, "test_float11 case 2 failed\n"); + FAILED(supported[0] == SLJIT_SUCCESS && wbuf[0] != (sljit_sw)(dbuf), "test_float11 case 3 failed\n"); + FAILED(supported[1] == SLJIT_SUCCESS && dbuf[2] != 66.725, "test_float11 case 4 failed\n"); + FAILED(supported[1] == SLJIT_SUCCESS && wbuf[1] != (sljit_sw)(dbuf + 1), "test_float11 case 5 failed\n"); + FAILED(supported[2] == SLJIT_SUCCESS && sbuf[0] != -22.125, "test_float11 case 6 failed\n"); + FAILED(supported[2] == SLJIT_SUCCESS && wbuf[2] != (sljit_sw)(sbuf), "test_float11 case 7 failed\n"); + FAILED(supported[3] == SLJIT_SUCCESS && sbuf[2] != -22.125, "test_float11 case 8 failed\n"); + FAILED(supported[3] == SLJIT_SUCCESS && wbuf[3] != (sljit_sw)(sbuf + 2), "test_float11 case 9 failed\n"); + FAILED(supported[4] == SLJIT_SUCCESS && dbuf[3] != 66.725, "test_float11 case 10 failed\n"); + FAILED(supported[4] == SLJIT_SUCCESS && wbuf[4] != (sljit_sw)(dbuf), "test_float11 case 11 failed\n"); + FAILED(supported[5] == SLJIT_SUCCESS && sbuf[3] != -22.125, "test_float11 case 12 failed\n"); + FAILED(supported[5] == SLJIT_SUCCESS && wbuf[5] != (sljit_sw)(sbuf + 3), "test_float11 case 13 failed\n"); + + successful_tests++; +} + +static void test_float12(void) +{ + /* Test floating point argument passing to sljit_emit_enter. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw wbuf[2]; + sljit_s32 ibuf[2]; + sljit_f64 dbuf[3]; + sljit_f32 fbuf[2]; + + if (verbose) + printf("Run test_float12\n"); + + wbuf[0] = 0; + ibuf[0] = 0; + dbuf[0] = 0; + fbuf[0] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(32, F32, W, F64), 2, 2, 2, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&wbuf, SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&dbuf, SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM0(), (sljit_sw)&fbuf, SLJIT_FR0, 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float12_f1(-6834, 674.5f, 2789, -895.25); + sljit_free_code(code.code, NULL); + + FAILED(wbuf[0] != 2789, "test_float12 case 1 failed\n"); + FAILED(ibuf[0] != -6834, "test_float12 case 2 failed\n"); + FAILED(dbuf[0] != -895.25, "test_float12 case 3 failed\n"); + FAILED(fbuf[0] != 674.5f, "test_float12 case 4 failed\n"); + + ibuf[0] = 0; + dbuf[0] = 0; + fbuf[0] = 0; + fbuf[1] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(F32, F64, F32, 32), 1, 1, 3, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&dbuf, SLJIT_FR1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&fbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f32), SLJIT_FR2, 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float12_f2(-4712.5f, 5342.25, 2904.25f, -4607); + sljit_free_code(code.code, NULL); + + FAILED(ibuf[0] != -4607, "test_float12 case 5 failed\n"); + FAILED(dbuf[0] != 5342.25, "test_float12 case 6 failed\n"); + FAILED(fbuf[0] != -4712.5f, "test_float12 case 7 failed\n"); + FAILED(fbuf[1] != 2904.25f, "test_float12 case 8 failed\n"); + + ibuf[0] = 0; + dbuf[0] = 0; + fbuf[0] = 0; + fbuf[1] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(F64, F32, 32, F32), 1, 1, 3, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&dbuf, SLJIT_FR0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&fbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f32), SLJIT_FR2, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float12_f3(3578.5, 4619.25f, 6859, -1807.75f); + sljit_free_code(code.code, NULL); + + FAILED(ibuf[0] != 6859, "test_float12 case 9 failed\n"); + FAILED(dbuf[0] != 3578.5, "test_float12 case 10 failed\n"); + FAILED(fbuf[0] != 4619.25f, "test_float12 case 11 failed\n"); + FAILED(fbuf[1] != -1807.75f, "test_float12 case 12 failed\n"); + + ibuf[0] = 0; + dbuf[0] = 0; + dbuf[1] = 0; + fbuf[0] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(F64, 32, F32, F64), SLJIT_NUMBER_OF_SCRATCH_REGISTERS + 2, 1, 3, 0, 33); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR2, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM0(), (sljit_sw)&fbuf, SLJIT_FR1, 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float12_f4(2740.75, -2651, -7909.25, 3671.5); + sljit_free_code(code.code, NULL); + + FAILED(ibuf[0] != -2651, "test_float12 case 13 failed\n"); + FAILED(dbuf[0] != 2740.75, "test_float12 case 14 failed\n"); + FAILED(dbuf[1] != 3671.5, "test_float12 case 15 failed\n"); + FAILED(fbuf[0] != -7909.25, "test_float12 case 16 failed\n"); + + wbuf[0] = 0; + ibuf[0] = 0; + ibuf[1] = 0; + fbuf[0] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(F32, 32, W, 32), 1, 3, 1, 0, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&wbuf, SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&ibuf); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_s32), SLJIT_S2, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM0(), (sljit_sw)&fbuf, SLJIT_FR0, 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float12_f5(-5219.25f, -4530, 7214, 6741); + sljit_free_code(code.code, NULL); + + FAILED(wbuf[0] != 7214, "test_float12 case 17 failed\n"); + FAILED(ibuf[0] != -4530, "test_float12 case 18 failed\n"); + FAILED(ibuf[1] != 6741, "test_float12 case 19 failed\n"); + FAILED(fbuf[0] != -5219.25f, "test_float12 case 20 failed\n"); + + wbuf[0] = 0; + wbuf[1] = 0; + dbuf[0] = 0; + dbuf[1] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(F64, F64, W, W), 1, 5, 2, 0, SLJIT_MAX_LOCAL_SIZE - 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_S0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_SP), SLJIT_MAX_LOCAL_SIZE - 2 * sizeof(sljit_f64), SLJIT_FR0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&wbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float12_f6(-3749.75, 5280.5, 9134, -6506); + sljit_free_code(code.code, NULL); + + FAILED(wbuf[0] != 9134, "test_float12 case 21 failed\n"); + FAILED(wbuf[1] != -6506, "test_float12 case 22 failed\n"); + FAILED(dbuf[0] != -3749.75, "test_float12 case 23 failed\n"); + FAILED(dbuf[1] != 5280.5, "test_float12 case 24 failed\n"); + + wbuf[0] = 0; + dbuf[0] = 0; + dbuf[1] = 0; + dbuf[2] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(F64, F64, W, F64), 1, 1, 3, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&wbuf, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64), SLJIT_FR2, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float12_f7(-6049.25, 7301.5, 4610, -4312.75); + sljit_free_code(code.code, NULL); + + FAILED(wbuf[0] != 4610, "test_float12 case 25 failed\n"); + FAILED(dbuf[0] != -6049.25, "test_float12 case 26 failed\n"); + FAILED(dbuf[1] != 7301.5, "test_float12 case 27 failed\n"); + FAILED(dbuf[2] != -4312.75, "test_float12 case 28 failed\n"); + + ibuf[0] = 0; + dbuf[0] = 0; + dbuf[1] = 0; + dbuf[2] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(F64, F64, F64, 32), 1, 1, 3, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&ibuf, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64), SLJIT_FR2, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float12_f8(4810.5, -9148.75, 8601.25, 6703); + sljit_free_code(code.code, NULL); + + FAILED(ibuf[0] != 6703, "test_float12 case 29 failed\n"); + FAILED(dbuf[0] != 4810.5, "test_float12 case 30 failed\n"); + FAILED(dbuf[1] != -9148.75, "test_float12 case 31 failed\n"); + FAILED(dbuf[2] != 8601.25, "test_float12 case 32 failed\n"); + + successful_tests++; +} + +static void test_float13(void) +{ + /* Test using all fpu registers. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_f64 buf[SLJIT_NUMBER_OF_FLOAT_REGISTERS]; + sljit_f64 buf2[2]; + struct sljit_jump *jump; + sljit_s32 i; + + if (verbose) + printf("Run test_float13\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + buf2[0] = 7.75; + buf2[1] = -8.25; + + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + buf[i] = 0.0; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 1, 2, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S1, 0); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS1V(W)); + + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), i * (sljit_sw)sizeof(sljit_f64), SLJIT_FR(i), 0); + sljit_emit_return_void(compiler); + + /* Called function. */ + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_FR0, 0); + + sljit_set_context(compiler, 0, SLJIT_ARGS1V(P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)buf, (sljit_sw)buf2); + sljit_free_code(code.code, NULL); + + for (i = 0; i < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i++) { + FAILED(buf[i] != -8.25, "test_float13 case 1 failed\n"); + } + + for (i = SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) { + FAILED(buf[i] != 7.75, "test_float13 case 2 failed\n"); + } + + /* Next test. */ + + if (SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS >= 3) { + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + buf2[0] = -6.25; + buf2[1] = 3.75; + + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + buf[i] = 0.0; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 1, 2, SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2, 1, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FS0, 0, SLJIT_MEM1(SLJIT_S1), 0); + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_FS0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S1, 0); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS1V(W)); + + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), i * (sljit_sw)sizeof(sljit_f64), SLJIT_FR(i), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) * (sljit_sw)sizeof(sljit_f64), SLJIT_FS0, 0); + sljit_emit_return_void(compiler); + + /* Called function. */ + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, SLJIT_MAX_LOCAL_SIZE); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_FR0, 0); + + sljit_set_context(compiler, 0, SLJIT_ARGS1V(P), 0, 1, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, SLJIT_MAX_LOCAL_SIZE); + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)buf, (sljit_sw)buf2); + sljit_free_code(code.code, NULL); + + for (i = 0; i < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i++) { + FAILED(buf[i] != 3.75, "test_float13 case 3 failed\n"); + } + + for (i = SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2; i++) { + FAILED(buf[i] != -6.25, "test_float13 case 4 failed\n"); + } + + FAILED(buf[SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2] != 0, "test_float13 case 5 failed\n"); + FAILED(buf[SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1] != -6.25, "test_float13 case 6 failed\n"); + } + + successful_tests++; +} + +static void test_float14(void) +{ + /* Test passing arguments in registers. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_sw wbuf[2]; + sljit_f64 dbuf[3]; + + if (verbose) + printf("Run test_float14\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(F64, F64, F64, W_R), 1, 0, 3, 0, SLJIT_MAX_LOCAL_SIZE); + /* wbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&wbuf, SLJIT_R0, 0); + /* dbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_f64), SLJIT_FR2, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float14_f1(7390.25, -8045.5, 1390.75, 8201); + sljit_free_code(code.code, NULL); + + FAILED(wbuf[0] != 8201, "test_float14 case 1 failed\n"); + FAILED(dbuf[0] != 7390.25, "test_float14 case 2 failed\n"); + FAILED(dbuf[1] != -8045.5, "test_float14 case 3 failed\n"); + FAILED(dbuf[2] != 1390.75, "test_float14 case 4 failed\n"); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS4V(F64, F64, W, W_R), 2, 1, 2, 0, SLJIT_MAX_LOCAL_SIZE); + /* wbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&wbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_S0, 0); + /* wbuf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw), SLJIT_R1, 0); + /* dbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_FR0, 0); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_FR1, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.test_float14_f2(4892.75, -3702.5, 4731, 8530); + sljit_free_code(code.code, NULL); + + FAILED(wbuf[0] != 4731, "test_float14 case 5 failed\n"); + FAILED(wbuf[1] != 8530, "test_float14 case 6 failed\n"); + FAILED(dbuf[0] != 4892.75, "test_float14 case 7 failed\n"); + FAILED(dbuf[1] != -3702.5, "test_float14 case 8 failed\n"); + + successful_tests++; +} + +static void test_float15_set(struct sljit_compiler *compiler, sljit_s32 compare, sljit_s32 type, sljit_s32 left_fr, sljit_s32 right_fr) +{ + /* Testing both sljit_emit_op_flags and sljit_emit_jump. */ + struct sljit_jump* jump; + + sljit_emit_fop1(compiler, compare | SLJIT_SET(type & 0xfe), left_fr, 0, right_fr, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R0, 0, type); + jump = sljit_emit_jump(compiler, type); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2); + sljit_set_label(jump, sljit_emit_label(compiler)); + + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_s8)); +} + +static void test_float15(void) +{ + /* Test floating point comparison. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s8 bbuf[96]; + sljit_s32 i; + + union { + sljit_f64 value; + struct { + sljit_s32 value1; + sljit_s32 value2; + } u; + } dbuf[3]; + + union { + sljit_f32 value; + sljit_s32 value1; + } sbuf[3]; + + if (verbose) + printf("Run test_float15\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + dbuf[0].u.value1 = 0x7fffffff; + dbuf[0].u.value2 = 0x7fffffff; + dbuf[1].value = -13.0; + dbuf[2].value = 27.0; + + sbuf[0].value1 = 0x7fffffff; + sbuf[1].value = -13.0; + sbuf[2].value = 27.0; + + for (i = 0; i < 96; i++) + bbuf[i] = -3; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 3, 3, 6, 0, 0); + + i = SLJIT_CMP_F64; + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); + + while (1) { + /* bbuf[0] and bbuf[48] */ + test_float15_set(compiler, i, SLJIT_ORDERED_EQUAL, SLJIT_FR2, SLJIT_FR3); + /* bbuf[1] and bbuf[49] */ + test_float15_set(compiler, i, SLJIT_ORDERED_EQUAL, SLJIT_FR2, SLJIT_FR4); + /* bbuf[2] and bbuf[50] */ + test_float15_set(compiler, i, SLJIT_ORDERED_EQUAL, SLJIT_FR0, SLJIT_FR1); + /* bbuf[3] and bbuf[51] */ + test_float15_set(compiler, i, SLJIT_ORDERED_EQUAL, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[4] and bbuf[52] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_NOT_EQUAL, SLJIT_FR2, SLJIT_FR3); + /* bbuf[5] and bbuf[53] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_NOT_EQUAL, SLJIT_FR2, SLJIT_FR4); + /* bbuf[6] and bbuf[54] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_NOT_EQUAL, SLJIT_FR0, SLJIT_FR1); + /* bbuf[7] and bbuf[55] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_NOT_EQUAL, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[8] and bbuf[56] */ + test_float15_set(compiler, i, SLJIT_ORDERED_LESS, SLJIT_FR2, SLJIT_FR3); + /* bbuf[9] and bbuf[57] */ + test_float15_set(compiler, i, SLJIT_ORDERED_LESS, SLJIT_FR2, SLJIT_FR4); + /* bbuf[10] and bbuf[58] */ + test_float15_set(compiler, i, SLJIT_ORDERED_LESS, SLJIT_FR0, SLJIT_FR1); + /* bbuf[11] and bbuf[59] */ + test_float15_set(compiler, i, SLJIT_ORDERED_LESS, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[12] and bbuf[60] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_FR2, SLJIT_FR4); + /* bbuf[13] and bbuf[61] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_FR4, SLJIT_FR2); + /* bbuf[14] and bbuf[62] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_FR0, SLJIT_FR1); + /* bbuf[15] and bbuf[63] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[16] and bbuf[64] */ + test_float15_set(compiler, i, SLJIT_ORDERED_GREATER, SLJIT_FR2, SLJIT_FR4); + /* bbuf[17] and bbuf[65] */ + test_float15_set(compiler, i, SLJIT_ORDERED_GREATER, SLJIT_FR4, SLJIT_FR2); + /* bbuf[18] and bbuf[66] */ + test_float15_set(compiler, i, SLJIT_ORDERED_GREATER, SLJIT_FR0, SLJIT_FR1); + /* bbuf[19] and bbuf[67] */ + test_float15_set(compiler, i, SLJIT_ORDERED_GREATER, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[20] and bbuf[68] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_LESS_EQUAL, SLJIT_FR2, SLJIT_FR4); + /* bbuf[21] and bbuf[69] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_LESS_EQUAL, SLJIT_FR4, SLJIT_FR2); + /* bbuf[22] and bbuf[70] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_LESS_EQUAL, SLJIT_FR0, SLJIT_FR1); + /* bbuf[23] and bbuf[71] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_LESS_EQUAL, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[24] and bbuf[72] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_EQUAL, SLJIT_FR2, SLJIT_FR4); + /* bbuf[25] and bbuf[73] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_EQUAL, SLJIT_FR2, SLJIT_FR3); + /* bbuf[26] and bbuf[74] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_EQUAL, SLJIT_FR0, SLJIT_FR1); + /* bbuf[27] and bbuf[75] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_EQUAL, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[28] and bbuf[76] */ + test_float15_set(compiler, i, SLJIT_ORDERED_NOT_EQUAL, SLJIT_FR2, SLJIT_FR3); + /* bbuf[29] and bbuf[77] */ + test_float15_set(compiler, i, SLJIT_ORDERED_NOT_EQUAL, SLJIT_FR2, SLJIT_FR4); + /* bbuf[30] and bbuf[78] */ + test_float15_set(compiler, i, SLJIT_ORDERED_NOT_EQUAL, SLJIT_FR0, SLJIT_FR1); + /* bbuf[31] and bbuf[79] */ + test_float15_set(compiler, i, SLJIT_ORDERED_NOT_EQUAL, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[32] and bbuf[80] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_LESS, SLJIT_FR2, SLJIT_FR4); + /* bbuf[33] and bbuf[81] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_LESS, SLJIT_FR2, SLJIT_FR3); + /* bbuf[34] and bbuf[82] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_LESS, SLJIT_FR0, SLJIT_FR1); + /* bbuf[35] and bbuf[83] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_LESS, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[36] and bbuf[84] */ + test_float15_set(compiler, i, SLJIT_ORDERED_GREATER_EQUAL, SLJIT_FR2, SLJIT_FR4); + /* bbuf[37] and bbuf[85] */ + test_float15_set(compiler, i, SLJIT_ORDERED_GREATER_EQUAL, SLJIT_FR4, SLJIT_FR2); + /* bbuf[38] and bbuf[86] */ + test_float15_set(compiler, i, SLJIT_ORDERED_GREATER_EQUAL, SLJIT_FR0, SLJIT_FR1); + /* bbuf[39] and bbuf[87] */ + test_float15_set(compiler, i, SLJIT_ORDERED_GREATER_EQUAL, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[40] and bbuf[88] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_GREATER, SLJIT_FR2, SLJIT_FR4); + /* bbuf[41] and bbuf[89] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_GREATER, SLJIT_FR4, SLJIT_FR2); + /* bbuf[42] and bbuf[90] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_GREATER, SLJIT_FR0, SLJIT_FR1); + /* bbuf[43] and bbuf[91] */ + test_float15_set(compiler, i, SLJIT_UNORDERED_OR_GREATER, SLJIT_FR0, SLJIT_FR2); + + /* bbuf[44] and bbuf[92] */ + test_float15_set(compiler, i, SLJIT_ORDERED_LESS_EQUAL, SLJIT_FR2, SLJIT_FR3); + /* bbuf[45] and bbuf[93] */ + test_float15_set(compiler, i, SLJIT_ORDERED_LESS_EQUAL, SLJIT_FR4, SLJIT_FR2); + /* bbuf[46] and bbuf[94] */ + test_float15_set(compiler, i, SLJIT_ORDERED_LESS_EQUAL, SLJIT_FR0, SLJIT_FR1); + /* bbuf[47] and bbuf[95] */ + test_float15_set(compiler, i, SLJIT_ORDERED_LESS_EQUAL, SLJIT_FR0, SLJIT_FR2); + + if (i == SLJIT_CMP_F32) + break; + + i = SLJIT_CMP_F32; + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_f32)); + } + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&bbuf, (sljit_sw)&dbuf, (sljit_sw)&sbuf); + sljit_free_code(code.code, NULL); + + /* SLJIT_CMP_F64 */ + FAILED(bbuf[0] != 1, "test_float15 case 1 failed\n"); + FAILED(bbuf[1] != 2, "test_float15 case 2 failed\n"); + FAILED(bbuf[2] != 2, "test_float15 case 3 failed\n"); + FAILED(bbuf[3] != 2, "test_float15 case 4 failed\n"); + + FAILED(bbuf[4] != 2, "test_float15 case 5 failed\n"); + FAILED(bbuf[5] != 1, "test_float15 case 6 failed\n"); + FAILED(bbuf[6] != 1, "test_float15 case 7 failed\n"); + FAILED(bbuf[7] != 1, "test_float15 case 8 failed\n"); + + FAILED(bbuf[8] != 2, "test_float15 case 9 failed\n"); + FAILED(bbuf[9] != 1, "test_float15 case 10 failed\n"); + FAILED(bbuf[10] != 2, "test_float15 case 11 failed\n"); + FAILED(bbuf[11] != 2, "test_float15 case 12 failed\n"); + + FAILED(bbuf[12] != 2, "test_float15 case 13 failed\n"); + FAILED(bbuf[13] != 1, "test_float15 case 14 failed\n"); + FAILED(bbuf[14] != 1, "test_float15 case 15 failed\n"); + FAILED(bbuf[15] != 1, "test_float15 case 16 failed\n"); + + FAILED(bbuf[16] != 2, "test_float15 case 17 failed\n"); + FAILED(bbuf[17] != 1, "test_float15 case 18 failed\n"); + FAILED(bbuf[18] != 2, "test_float15 case 19 failed\n"); + FAILED(bbuf[19] != 2, "test_float15 case 20 failed\n"); + + FAILED(bbuf[20] != 1, "test_float15 case 21 failed\n"); + FAILED(bbuf[21] != 2, "test_float15 case 22 failed\n"); + FAILED(bbuf[22] != 1, "test_float15 case 23 failed\n"); + FAILED(bbuf[23] != 1, "test_float15 case 24 failed\n"); + + FAILED(bbuf[24] != 2, "test_float15 case 25 failed\n"); + FAILED(bbuf[25] != 1, "test_float15 case 26 failed\n"); + FAILED(bbuf[26] != 1, "test_float15 case 27 failed\n"); + FAILED(bbuf[27] != 1, "test_float15 case 28 failed\n"); + + FAILED(bbuf[28] != 2, "test_float15 case 29 failed\n"); + FAILED(bbuf[29] != 1, "test_float15 case 30 failed\n"); + FAILED(bbuf[30] != 2, "test_float15 case 31 failed\n"); + FAILED(bbuf[31] != 2, "test_float15 case 32 failed\n"); + + FAILED(bbuf[32] != 1, "test_float15 case 33 failed\n"); + FAILED(bbuf[33] != 2, "test_float15 case 34 failed\n"); + FAILED(bbuf[34] != 1, "test_float15 case 35 failed\n"); + FAILED(bbuf[35] != 1, "test_float15 case 36 failed\n"); + + FAILED(bbuf[36] != 2, "test_float15 case 37 failed\n"); + FAILED(bbuf[37] != 1, "test_float15 case 38 failed\n"); + FAILED(bbuf[38] != 2, "test_float15 case 39 failed\n"); + FAILED(bbuf[39] != 2, "test_float15 case 40 failed\n"); + + FAILED(bbuf[40] != 2, "test_float15 case 41 failed\n"); + FAILED(bbuf[41] != 1, "test_float15 case 42 failed\n"); + FAILED(bbuf[42] != 1, "test_float15 case 43 failed\n"); + FAILED(bbuf[43] != 1, "test_float15 case 44 failed\n"); + + FAILED(bbuf[44] != 1, "test_float15 case 45 failed\n"); + FAILED(bbuf[45] != 2, "test_float15 case 46 failed\n"); + FAILED(bbuf[46] != 2, "test_float15 case 47 failed\n"); + FAILED(bbuf[47] != 2, "test_float15 case 48 failed\n"); + + /* SLJIT_CMP_F32 */ + FAILED(bbuf[48] != 1, "test_float15 case 49 failed\n"); + FAILED(bbuf[49] != 2, "test_float15 case 50 failed\n"); + FAILED(bbuf[50] != 2, "test_float15 case 51 failed\n"); + FAILED(bbuf[51] != 2, "test_float15 case 52 failed\n"); + + FAILED(bbuf[52] != 2, "test_float15 case 53 failed\n"); + FAILED(bbuf[53] != 1, "test_float15 case 54 failed\n"); + FAILED(bbuf[54] != 1, "test_float15 case 55 failed\n"); + FAILED(bbuf[55] != 1, "test_float15 case 56 failed\n"); + + FAILED(bbuf[56] != 2, "test_float15 case 57 failed\n"); + FAILED(bbuf[57] != 1, "test_float15 case 58 failed\n"); + FAILED(bbuf[58] != 2, "test_float15 case 59 failed\n"); + FAILED(bbuf[59] != 2, "test_float15 case 60 failed\n"); + + FAILED(bbuf[60] != 2, "test_float15 case 61 failed\n"); + FAILED(bbuf[61] != 1, "test_float15 case 62 failed\n"); + FAILED(bbuf[62] != 1, "test_float15 case 63 failed\n"); + FAILED(bbuf[63] != 1, "test_float15 case 64 failed\n"); + + FAILED(bbuf[64] != 2, "test_float15 case 65 failed\n"); + FAILED(bbuf[65] != 1, "test_float15 case 66 failed\n"); + FAILED(bbuf[66] != 2, "test_float15 case 67 failed\n"); + FAILED(bbuf[67] != 2, "test_float15 case 68 failed\n"); + + FAILED(bbuf[68] != 1, "test_float15 case 69 failed\n"); + FAILED(bbuf[69] != 2, "test_float15 case 70 failed\n"); + FAILED(bbuf[70] != 1, "test_float15 case 71 failed\n"); + FAILED(bbuf[71] != 1, "test_float15 case 72 failed\n"); + + FAILED(bbuf[72] != 2, "test_float15 case 73 failed\n"); + FAILED(bbuf[73] != 1, "test_float15 case 74 failed\n"); + FAILED(bbuf[74] != 1, "test_float15 case 75 failed\n"); + FAILED(bbuf[75] != 1, "test_float15 case 76 failed\n"); + + FAILED(bbuf[76] != 2, "test_float15 case 77 failed\n"); + FAILED(bbuf[77] != 1, "test_float15 case 78 failed\n"); + FAILED(bbuf[78] != 2, "test_float15 case 79 failed\n"); + FAILED(bbuf[79] != 2, "test_float15 case 80 failed\n"); + + FAILED(bbuf[80] != 1, "test_float15 case 81 failed\n"); + FAILED(bbuf[81] != 2, "test_float15 case 82 failed\n"); + FAILED(bbuf[82] != 1, "test_float15 case 83 failed\n"); + FAILED(bbuf[83] != 1, "test_float15 case 84 failed\n"); + + FAILED(bbuf[84] != 2, "test_float15 case 85 failed\n"); + FAILED(bbuf[85] != 1, "test_float15 case 86 failed\n"); + FAILED(bbuf[86] != 2, "test_float15 case 87 failed\n"); + FAILED(bbuf[87] != 2, "test_float15 case 88 failed\n"); + + FAILED(bbuf[88] != 2, "test_float15 case 89 failed\n"); + FAILED(bbuf[89] != 1, "test_float15 case 90 failed\n"); + FAILED(bbuf[90] != 1, "test_float15 case 91 failed\n"); + FAILED(bbuf[91] != 1, "test_float15 case 92 failed\n"); + + FAILED(bbuf[92] != 1, "test_float15 case 93 failed\n"); + FAILED(bbuf[93] != 2, "test_float15 case 94 failed\n"); + FAILED(bbuf[94] != 2, "test_float15 case 95 failed\n"); + FAILED(bbuf[95] != 2, "test_float15 case 96 failed\n"); + + successful_tests++; +} + +static void test_float16(void) +{ + /* Test sljit_emit_fcopy. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_f64 dbuf[4]; + sljit_f32 sbuf[2]; +#if IS_64BIT + sljit_sw wbuf[2]; + sljit_s32 ibuf[2]; +#else /* !IS_64BIT */ + sljit_s32 ibuf[7]; +#endif /* IS_64BIT */ + + if (verbose) + printf("Run test_float16\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sbuf[0] = 12345.0; + sbuf[1] = -1.0; + ibuf[0] = -1; + ibuf[1] = (sljit_s32)0xc7543100; + dbuf[0] = 123456789012345.0; + dbuf[1] = -1.0; +#if IS_64BIT + wbuf[0] = -1; + wbuf[1] = (sljit_sw)0xc2fee0c29f50cb10; +#else /* !IS_64BIT */ + ibuf[2] = -1; + ibuf[3] = -1; + ibuf[4] = -1; + ibuf[5] = (sljit_sw)0x9f50cb10; + ibuf[6] = (sljit_sw)0xc2fee0c2; +#endif /* IS_64BIT */ + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 5, 5, 5, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)ibuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fcopy(compiler, SLJIT_COPY32_FROM_F32, SLJIT_FR2, SLJIT_R0); + /* ibuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R3, 0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_s32)); + sljit_emit_fcopy(compiler, SLJIT_COPY32_TO_F32, SLJIT_FR4, SLJIT_R3); + /* sbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32), SLJIT_FR4, 0); + +#if IS_64BIT + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)wbuf); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fcopy(compiler, SLJIT_COPY_FROM_F64, SLJIT_FR1, SLJIT_S2); + /* wbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_S2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); + sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR0, SLJIT_R3); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); + sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR3, SLJIT_R2); + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR3, 0); +#else /* !IS_64BIT */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fcopy(compiler, SLJIT_COPY_FROM_F64, SLJIT_FR1, SLJIT_REG_PAIR(SLJIT_S3, SLJIT_S2)); + /* ibuf[2-3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 3 * sizeof(sljit_sw), SLJIT_S3, 0); + + sljit_emit_fcopy(compiler, SLJIT_COPY_FROM_F64, SLJIT_FR1, SLJIT_R2); + /* ibuf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 4 * sizeof(sljit_sw), SLJIT_R2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_MEM1(SLJIT_R1), 5 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 6 * sizeof(sljit_sw)); + sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR0, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R3)); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); + sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR3, SLJIT_REG_PAIR(SLJIT_R2, SLJIT_R2)); + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)0xc00c0000); + sljit_emit_fcopy(compiler, SLJIT_COPY_TO_F64, SLJIT_FR3, SLJIT_R2); + /* dbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR3, 0); +#endif /* IS_64BIT */ + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)dbuf, (sljit_sw)sbuf); + sljit_free_code(code.code, NULL); + + FAILED(ibuf[0] != (sljit_s32)0x4640e400, "test_float16 case 1 failed\n"); + FAILED(sbuf[1] != -54321.0, "test_float16 case 2 failed\n"); +#if IS_64BIT + FAILED(wbuf[0] != (sljit_sw)0x42dc12218377de40, "test_float16 case 3 failed\n"); + FAILED(dbuf[1] != -543210987654321.0, "test_float16 case 4 failed\n"); + FAILED(dbuf[2] != 0.0, "test_float16 case 5 failed\n"); +#else /* !IS_64BIT */ + FAILED(ibuf[2] != (sljit_sw)0x8377de40, "test_float16 case 3 failed\n"); + FAILED(ibuf[3] != (sljit_sw)0x42dc1221, "test_float16 case 4 failed\n"); + FAILED(ibuf[4] != (sljit_sw)0x42dc1221, "test_float16 case 5 failed\n"); + FAILED(dbuf[1] != -543210987654321.0, "test_float16 case 6 failed\n"); + FAILED(dbuf[2] != 0.0, "test_float16 case 7 failed\n"); + FAILED(dbuf[3] != -3.5, "test_float16 case 8 failed\n"); +#endif /* IS_64BIT */ + + successful_tests++; +} + +static void test_float17(void) +{ + /* Test fselect operation. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_f64 dbuf[10]; + sljit_f32 sbuf[10]; + sljit_s32 i; + + if (verbose) + printf("Run test_float17\n"); + + for (i = 4; i < 10; i++) + dbuf[i] = -1.0; + for (i = 4; i < 10; i++) + sbuf[i] = -1.0; + + dbuf[0] = 759.25; + dbuf[1] = -316.25; + dbuf[2] = 591.5; + dbuf[3] = -801.75; + + sbuf[0] = 630.5; + sbuf[1] = -912.75; + sbuf[2] = 264.25; + sbuf[3] = -407.5; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(W, W), 3, 3, 4, 0, 2 * sizeof(sljit_f64)); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op2u(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_fselect(compiler, SLJIT_CARRY, SLJIT_FR2, SLJIT_FR3, 0, SLJIT_FR2); + /* dbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_FR2, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, (sljit_s32)0x80000000); + sljit_emit_op2u(compiler, SLJIT_ADD32 | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_fselect(compiler, SLJIT_OVERFLOW, SLJIT_FR2, SLJIT_FR2, 0, SLJIT_FR3); + /* dbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR2, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED, SLJIT_FR2, 0, SLJIT_FR3, 0); + sljit_emit_fselect(compiler, SLJIT_UNORDERED, SLJIT_FR3, SLJIT_MEM0(), (sljit_sw)(dbuf + 2), SLJIT_FR2); + /* dbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR3, 0); + sljit_emit_fselect(compiler, SLJIT_ORDERED, SLJIT_FR2, SLJIT_MEM0(), (sljit_sw)(dbuf + 2), SLJIT_FR2); + /* dbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64), SLJIT_FR2, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_F_GREATER, SLJIT_FR2, 0, SLJIT_FR2, 0); + sljit_emit_fselect(compiler, SLJIT_F_LESS_EQUAL, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f64), SLJIT_FR0); + /* dbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_fselect(compiler, SLJIT_F_LESS_EQUAL, SLJIT_FR1, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 3, SLJIT_FR1); + /* dbuf[9] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64), SLJIT_FR1, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 10); + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_IMM, 10); + sljit_emit_fselect(compiler, SLJIT_EQUAL | SLJIT_32, SLJIT_FR0, SLJIT_FR1, 0, SLJIT_FR2); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR0, 0); + sljit_emit_fselect(compiler, SLJIT_NOT_EQUAL | SLJIT_32, SLJIT_FR0, SLJIT_FR1, 0, SLJIT_FR2); + /* sbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_f32), SLJIT_FR0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, WCONST(0x1234000000, 0x123400) + 3 * sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_ORDERED_GREATER, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_fselect(compiler, SLJIT_ORDERED_GREATER | SLJIT_32, SLJIT_FR1, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_FR2); + /* sbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_f32), SLJIT_FR1, 0); + sljit_emit_fselect(compiler, SLJIT_ORDERED_GREATER | SLJIT_32, SLJIT_FR2, SLJIT_MEM1(SLJIT_R1), WCONST(-0x1234000000, -0x123400), SLJIT_FR2); + /* sbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_f32), SLJIT_FR2, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -100); + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_R0, 0, SLJIT_IMM, 10); + sljit_emit_fselect(compiler, SLJIT_SIG_LESS | SLJIT_32, SLJIT_FR2, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_FR3); + /* sbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 8 * sizeof(sljit_f32), SLJIT_FR2, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S1, 0, SLJIT_IMM, -0x5678 + 2 * (sljit_s32)sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_ORDERED_EQUAL, SLJIT_FR3, 0, SLJIT_FR3, 0); + sljit_emit_fselect(compiler, SLJIT_ORDERED_EQUAL | SLJIT_32, SLJIT_FR3, SLJIT_MEM1(SLJIT_S2), 0x5678, SLJIT_FR3); + /* sbuf[9] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 9 * sizeof(sljit_f32), SLJIT_FR3, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)dbuf, (sljit_sw)sbuf); + sljit_free_code(code.code, NULL); + + FAILED(dbuf[4] != -316.25, "test_float17 case 1 failed\n"); + FAILED(dbuf[5] != 759.25, "test_float17 case 2 failed\n"); + FAILED(dbuf[6] != 759.25, "test_float17 case 3 failed\n"); + FAILED(dbuf[7] != 591.5, "test_float17 case 4 failed\n"); + FAILED(dbuf[8] != -801.75, "test_float17 case 5 failed\n"); + FAILED(dbuf[9] != -316.25, "test_float17 case 6 failed\n"); + FAILED(sbuf[4] != 630.5, "test_float17 case 7 failed\n"); + FAILED(sbuf[5] != -912.75, "test_float17 case 8 failed\n"); + FAILED(sbuf[6] != 264.25, "test_float17 case 9 failed\n"); + FAILED(sbuf[7] != -407.5, "test_float17 case 10 failed\n"); + FAILED(sbuf[8] != -912.75, "test_float17 case 11 failed\n"); + FAILED(sbuf[9] != 264.25, "test_float17 case 12 failed\n"); + + successful_tests++; +} + +static void test_float18(void) +{ + /* Floating point set immediate. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_f64 dbuf[6]; + sljit_f32 sbuf[5]; + sljit_s32 check_buf[2]; + sljit_s32 i; + + if (verbose) + printf("Run test_float18\n"); + + for (i = 0; i < 6; i++) + dbuf[i] = -1.0; + + for (i = 0; i < 5; i++) + sbuf[i] = -1.0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 2, 2, 4, 0, 0); + + sljit_emit_fset64(compiler, SLJIT_FR0, 0.0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_FR0, 0); + sljit_emit_fset64(compiler, SLJIT_FR1, -0.0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR1, 0); + sljit_emit_fset64(compiler, SLJIT_FR2, 1.0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR2, 0); + sljit_emit_fset64(compiler, SLJIT_FR2, -31.0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR2, 0); + sljit_emit_fset64(compiler, SLJIT_FR2, 545357837627392.0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_FR2, 0); + sljit_emit_fset64(compiler, SLJIT_FR0, 983752153845214.5); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_fset32(compiler, SLJIT_FR0, 0.0f); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_FR0, 0); + sljit_emit_fset32(compiler, SLJIT_FR1, -0.0f); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32), SLJIT_FR1, 0); + sljit_emit_fset32(compiler, SLJIT_FR2, 1.0f); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_FR2, 0); + sljit_emit_fset32(compiler, SLJIT_FR2, 31.0f); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f32), SLJIT_FR2, 0); + sljit_emit_fset32(compiler, SLJIT_FR2, -811.5f); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR2, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&dbuf, (sljit_sw)&sbuf); + sljit_free_code(code.code, NULL); + + copy_u8(check_buf, 0, dbuf + 0, sizeof(sljit_f64)); + FAILED(check_buf[0] != 0, "test_float18 case 1 failed\n"); + FAILED(check_buf[1] != 0, "test_float18 case 2 failed\n"); + copy_u8(check_buf, 0, dbuf + 1, sizeof(sljit_f64)); +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + FAILED(check_buf[0] != 0, "test_float18 case 3 failed\n"); + FAILED(check_buf[1] != (sljit_s32)0x80000000, "test_float18 case 4 failed\n"); +#else /* !SLJIT_LITTLE_ENDIAN */ + FAILED(check_buf[1] != 0, "test_float18 case 3 failed\n"); + FAILED(check_buf[0] != (sljit_s32)0x80000000, "test_float18 case 4 failed\n"); +#endif /* SLJIT_LITTLE_ENDIAN */ + FAILED(dbuf[2] != 1.0, "test_float18 case 5 failed\n"); + FAILED(dbuf[3] != -31.0, "test_float18 case 6 failed\n"); + FAILED(dbuf[4] != 545357837627392.0, "test_float18 case 7 failed\n"); + FAILED(dbuf[5] != 983752153845214.5, "test_float18 case 8 failed\n"); + + copy_u8(check_buf, 0, sbuf + 0, sizeof(sljit_f32)); + FAILED(check_buf[0] != 0, "test_float18 case 9 failed\n"); + copy_u8(check_buf, 0, sbuf + 1, sizeof(sljit_f32)); + FAILED(check_buf[0] != (sljit_s32)0x80000000, "test_float18 case 10 failed\n"); + FAILED(sbuf[2] != 1.0, "test_float18 case 11 failed\n"); + FAILED(sbuf[3] != 31.0, "test_float18 case 12 failed\n"); + FAILED(sbuf[4] != -811.5, "test_float18 case 13 failed\n"); + + successful_tests++; +} + +static void test_float19(void) +{ + /* Floating point convert from unsigned. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_f64 dbuf[9]; + sljit_f32 sbuf[9]; + sljit_s32 i; + sljit_sw value1 = WCONST(0xfffffffffffff800, 0xffffff00); + sljit_sw value2 = WCONST(0x8000000000000801, 0x80000101); + + union { + sljit_f64 value; +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + struct { + sljit_u32 low; + sljit_u32 high; + } bin; +#else /* !SLJIT_LITTLE_ENDIAN */ + struct { + sljit_u32 high; + sljit_u32 low; + } bin; +#endif /* SLJIT_LITTLE_ENDIAN */ + } f64_check; + + union { + sljit_f32 value; + sljit_u32 bin; + } f32_check; + + if (verbose) + printf("Run test_float19\n"); + + for (i = 0; i < 9; i++) + dbuf[i] = -1.0; + + for (i = 0; i < 9; i++) + sbuf[i] = -1.0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 4, 4, 4, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x7fffffff); + /* dbuf[0] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_U32, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R1, 0); + /* sbuf[0] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_U32, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_U32, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 3, SLJIT_IMM, (sljit_sw)0xfff00000); + /* sbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_U32, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), 2, SLJIT_IMM, (sljit_sw)0xfff00000); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)0xffffff80); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_U32, SLJIT_FR1, 0, SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_U32, SLJIT_FR3, 0, SLJIT_R0, 0); + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR1, 0); + /* sbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_FR3, 0); + + /* dbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_UW, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_IMM, (sljit_sw)0xffffff00); + /* sbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_UW, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f32), SLJIT_IMM, (sljit_sw)0xffffff00); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, WCONST(0x7fff000000000000, 0x7fff0000)); + /* dbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_UW, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_R3, 0); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_UW, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_R3, 0); + + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_UW, SLJIT_FR2, 0, SLJIT_MEM0(), (sljit_sw)&value1); + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_UW, SLJIT_FR1, 0, SLJIT_MEM0(), (sljit_sw)&value1); + /* dbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR2, 0); + /* sbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_f32), SLJIT_FR1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, WCONST(0xaaaaaaaaaaaaaaaa, 0xaaaaaaaa)); + /* dbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_UW, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_S3, 0); + /* sbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_UW, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_f32), SLJIT_S3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)&value2 + 64); + /* dbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_UW, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_R2), -64); + /* sbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_UW, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R2), -64); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, WCONST(0x8000000000000401, 0x80000001)); + /* dbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_UW, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f64), SLJIT_R2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, WCONST(0x8000008000000001, 0x80000081)); + /* sbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_UW, SLJIT_MEM1(SLJIT_S1), 8 * sizeof(sljit_f32), SLJIT_R2, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&dbuf, (sljit_sw)&sbuf); + + f64_check.value = dbuf[0]; /* 0x7fffffff */ + FAILED(f64_check.bin.low != 0xffc00000 || f64_check.bin.high != 0x41dfffff, "test_float19 case 1 failed\n"); + f32_check.value = sbuf[0]; /* 0x7fffffff */ + FAILED(f32_check.bin != 0x4f000000, "test_float19 case 2 failed\n"); + f64_check.value = dbuf[1]; /* 0xfff00000 */ + FAILED(f64_check.bin.low != 0 || f64_check.bin.high != 0x41effe00, "test_float19 case 3 failed\n"); + f32_check.value = sbuf[1]; /* 0xfff00000 */ + FAILED(f32_check.bin != 0x4f7ff000, "test_float19 case 4 failed\n"); + f64_check.value = dbuf[2]; /* 0xffffff80 */ + FAILED(f64_check.bin.low != 0xf0000000 || f64_check.bin.high != 0x41efffff, "test_float19 case 5 failed\n"); + f32_check.value = sbuf[2]; /* 0xffffff80 */ + FAILED(f32_check.bin != 0x4f800000, "test_float19 case 6 failed\n"); + f64_check.value = dbuf[3]; /* 0xffffff00 */ + FAILED(f64_check.bin.low != 0xe0000000 || f64_check.bin.high != 0x41efffff, "test_float19 case 7 failed\n"); + f32_check.value = sbuf[3]; /* 0xffffff00 */ + FAILED(f32_check.bin != 0x4f7fffff, "test_float19 case 8 failed\n"); +#if IS_64BIT + f64_check.value = dbuf[4]; /* 0x7fff000000000000 */ + FAILED(f64_check.bin.low != 0 || f64_check.bin.high != 0x43dfffc0, "test_float19 case 9 failed\n"); + f32_check.value = sbuf[4]; /* 0x7fff000000000000 */ + FAILED(f32_check.bin != 0x5efffe00, "test_float19 case 10 failed\n"); + f64_check.value = dbuf[5]; /* 0xfffffffffffff800 */ + FAILED(f64_check.bin.low != 0xffffffff || f64_check.bin.high != 0x43efffff, "test_float19 case 11 failed\n"); + f32_check.value = sbuf[5]; /* 0xfffffffffffff800 */ + FAILED(f32_check.bin != 0x5f800000, "test_float19 case 12 failed\n"); + f64_check.value = dbuf[6]; /* 0xffff000000000000 */ + FAILED(f64_check.bin.low != 0x55555555 || f64_check.bin.high != 0x43e55555, "test_float19 case 13 failed\n"); + f32_check.value = sbuf[6]; /* 0xffff000000000000 */ + FAILED(f32_check.bin != 0x5f2aaaab, "test_float19 case 14 failed\n"); + f64_check.value = dbuf[7]; /* 0x8000000000000801 */ + FAILED(f64_check.bin.low != 1 || f64_check.bin.high != 0x43e00000, "test_float19 case 15 failed\n"); + f32_check.value = sbuf[7]; /* 0x8000000000000801 */ + FAILED(f32_check.bin != 0x5f000000, "test_float19 case 16 failed\n"); + f64_check.value = dbuf[8]; /* 0x8000000000000401 */ + FAILED(f64_check.bin.low != 1 || f64_check.bin.high != 0x43e00000, "test_float19 case 17 failed\n"); + f32_check.value = sbuf[8]; /* 0x8000008000000001 */ + FAILED(f32_check.bin != 0x5f000001, "test_float19 case 18 failed\n"); +#else /* !IS_64BIT */ + f64_check.value = dbuf[4]; /* 0x7fff0000 */ + FAILED(f64_check.bin.low != 0 || f64_check.bin.high != 0x41dfffc0, "test_float19 case 9 failed\n"); + f32_check.value = sbuf[4]; /* 0x7fff0000 */ + FAILED(f32_check.bin != 0x4efffe00, "test_float19 case 10 failed\n"); + f64_check.value = dbuf[5]; /* 0xffffff00 */ + FAILED(f64_check.bin.low != 0xe0000000 || f64_check.bin.high != 0x41efffff, "test_float19 case 11 failed\n"); + f32_check.value = sbuf[5]; /* 0xffffff00 */ + FAILED(f32_check.bin != 0x4f7fffff, "test_float19 case 12 failed\n"); + f64_check.value = dbuf[6]; /* 0xaaaaaaaa */ + FAILED(f64_check.bin.low != 0x55400000 || f64_check.bin.high != 0x41e55555, "test_float19 case 13 failed\n"); + f32_check.value = sbuf[6]; /* 0xaaaaaaaa */ + FAILED(f32_check.bin != 0x4f2aaaab, "test_float19 case 14 failed\n"); + f64_check.value = dbuf[7]; /* 0x80000101 */ + FAILED(f64_check.bin.low != 0x20200000 || f64_check.bin.high != 0x41e00000, "test_float19 case 15 failed\n"); + f32_check.value = sbuf[7]; /* 0x80000101 */ + FAILED(f32_check.bin != 0x4f000001, "test_float19 case 16 failed\n"); + f64_check.value = dbuf[8]; /* 0x80000001 */ + FAILED(f64_check.bin.low != 0x00200000 || f64_check.bin.high != 0x41e00000, "test_float19 case 17 failed\n"); + f32_check.value = sbuf[8]; /* 0x80000081 */ + FAILED(f32_check.bin != 0x4f000001, "test_float19 case 18 failed\n"); +#endif /* IS_64BIT */ + + successful_tests++; +} + +static void test_float20(void) +{ + /* Test fpu copysign. */ + executable_code code; + struct sljit_compiler* compiler; + int i; + + union { + sljit_f64 value; + struct { +#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN + sljit_u32 lo; + sljit_u32 hi; +#else /* !SLJIT_LITTLE_ENDIAN */ + sljit_u32 hi; + sljit_u32 lo; +#endif /* SLJIT_LITTLE_ENDIAN */ + } bits; + } dbuf[8]; + union { + sljit_f32 value; + sljit_u32 bits; + } sbuf[8]; + + if (verbose) + printf("Run test_float20\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 8; i++) + dbuf[i].value = 123.0; + + for (i = 0; i < 8; i++) + sbuf[i].value = 123.0f; + + dbuf[0].value = 1786.5; + dbuf[1].value = -8403.25; + dbuf[2].bits.lo = 0; + dbuf[2].bits.hi = 0x7fff0000; + dbuf[3].value = 9054; + + sbuf[0].value = 6371.75f; + sbuf[1].value = -2713.5f; + sbuf[2].bits = 0xfff00000; + sbuf[3].value = -5791.25f; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 2, 2, 6, 0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_fop2r(compiler, SLJIT_COPYSIGN_F64, SLJIT_FR0, SLJIT_FR0, 0, SLJIT_FR1, 0); + /* dbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop2r(compiler, SLJIT_COPYSIGN_F32, SLJIT_FR3, SLJIT_FR4, 0, SLJIT_FR3, 0); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2 * sizeof(sljit_f64)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 8 * sizeof(sljit_f64)); + sljit_emit_fop2r(compiler, SLJIT_COPYSIGN_F64, SLJIT_FR2, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0, SLJIT_MEM1(SLJIT_R1), -7 * (sljit_sw)sizeof(sljit_f64)); + /* dbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR2, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop2r(compiler, SLJIT_COPYSIGN_F32, SLJIT_FR5, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 2, SLJIT_FR4, 0); + /* sbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_f32), SLJIT_FR5, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64)); + sljit_emit_fop2r(compiler, SLJIT_COPYSIGN_F64, SLJIT_FR0, SLJIT_FR3, 0, SLJIT_FR2, 0); + /* dbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f32)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, 0x12345); + sljit_emit_fop2r(compiler, SLJIT_COPYSIGN_F32, SLJIT_FR2, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_R0), -0x12345 + (sljit_sw)sizeof(sljit_f32)); + /* sbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_f32), SLJIT_FR2, 0); + + sljit_emit_fop2r(compiler, SLJIT_COPYSIGN_F64, SLJIT_FR5, SLJIT_MEM0(), (sljit_sw)(dbuf + 1), SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); + /* dbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64), SLJIT_FR5, 0); + + sljit_emit_fop2r(compiler, SLJIT_COPYSIGN_F32, SLJIT_FR4, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_MEM0(), (sljit_sw)(sbuf + 2)); + /* sbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_f32), SLJIT_FR4, 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&dbuf, (sljit_sw)&sbuf); + FAILED(dbuf[4].value != -1786.5, "test_float20 case 1 failed\n"); + FAILED(sbuf[4].value != 2713.5, "test_float20 case 2 failed\n"); + FAILED(dbuf[5].bits.lo != 0, "test_float20 case 3 failed\n"); + FAILED(dbuf[5].bits.hi != 0xffff0000, "test_float20 case 4 failed\n"); + FAILED(sbuf[5].bits != 0x7ff00000, "test_float20 case 5 failed\n"); + FAILED(dbuf[6].value != 9054, "test_float20 case 6 failed\n"); + FAILED(sbuf[6].value != -5791.25, "test_float20 case 7 failed\n"); + FAILED(dbuf[7].value != 8403.25, "test_float20 case 8 failed\n"); + FAILED(sbuf[7].value != -sbuf[0].value, "test_float20 case 9 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test_float21(void) +{ + /* Test f64 as f32 register pair access. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_f32 buf[10]; + sljit_sw num; + sljit_s32 i; + + if (verbose) + printf("Run test_float21\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_F64_AS_F32_PAIR)) { + if (verbose) + printf("f32 register pairs are not available, test_float21 skipped\n"); + successful_tests++; + return; + } + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = -45.25; + buf[1] = 33.5; + buf[2] = -104.75; + + for (i = 3; i < 10; i++) + buf[i] = -1.0; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 2, 4, 2, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_F64_SECOND(SLJIT_FR0), 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + + sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_F64_SECOND(SLJIT_FR0), 0, SLJIT_F64_SECOND(SLJIT_FR0), 0); + /* buf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f32), SLJIT_F64_SECOND(SLJIT_FR0), 0); + + sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_F64_SECOND(SLJIT_FR0), 0, SLJIT_FR0, 0); + /* buf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f32), SLJIT_F64_SECOND(SLJIT_FR0), 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 89); + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_F64_SECOND(SLJIT_FR1), 0, SLJIT_R0, 0); + /* buf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f32), SLJIT_F64_SECOND(SLJIT_FR1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_F64_SECOND(SLJIT_FR1), 0, SLJIT_FR1, 0); + /* num */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM0(), (sljit_sw)&num, SLJIT_F64_SECOND(SLJIT_FR1), 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FS1, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_F64_SECOND(SLJIT_FS1), 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop2(compiler, SLJIT_ADD_F32, SLJIT_F64_SECOND(SLJIT_FS1), 0, SLJIT_FS1, 0, SLJIT_F64_SECOND(SLJIT_FS1), 0); + /* buf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f32), SLJIT_F64_SECOND(SLJIT_FS1), 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_F64_SECOND(SLJIT_FR1), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop2r(compiler, SLJIT_COPYSIGN_F32, SLJIT_F64_SECOND(SLJIT_FR1), SLJIT_FR1, 0, SLJIT_F64_SECOND(SLJIT_FR1), 0); + /* buf[7] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f32), SLJIT_F64_SECOND(SLJIT_FR1), 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_F64_SECOND(SLJIT_FS0), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fset32(compiler, SLJIT_F64_SECOND(SLJIT_FS0), -78.75f); + /* buf[8] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f32), SLJIT_F64_SECOND(SLJIT_FS0), 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_F64_SECOND(SLJIT_FR3), 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fcopy(compiler, SLJIT_COPY32_TO_F32, SLJIT_F64_SECOND(SLJIT_FR3), SLJIT_S1); + /* buf[9] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f32), SLJIT_F64_SECOND(SLJIT_FR3), 0); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + sljit_free_code(code.code, NULL); + + FAILED(buf[3] != -33.5, "test_float21 case 1 failed\n"); + FAILED(buf[4] != 45.25, "test_float21 case 2 failed\n"); + FAILED(buf[5] != 89.0, "test_float21 case 3 failed\n"); + FAILED(num != -104, "test_float21 case 4 failed\n"); + FAILED(buf[6] != -11.75, "test_float21 case 5 failed\n"); + FAILED(buf[7] != -33.5, "test_float21 case 6 failed\n"); + FAILED(buf[8] != -78.75, "test_float21 case 7 failed\n"); + FAILED(buf[9] != -45.25, "test_float21 case 8 failed\n"); + + successful_tests++; +} + +static void test_float22(void) +{ + /* Test float to int conversion corner cases. */ + executable_code code; + struct sljit_compiler *compiler; + struct sljit_label *label; + int i; + + union { + sljit_f64 value_f64; + sljit_uw value_uw; + sljit_u32 value_u32; + + struct { +#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN + sljit_u32 lo; + sljit_u32 hi; +#else /* !SLJIT_LITTLE_ENDIAN */ + sljit_u32 hi; + sljit_u32 lo; +#endif /* SLJIT_LITTLE_ENDIAN */ + } bits; + } dbuf[32]; + union { + sljit_f32 value_f32; + sljit_u32 bits; + } sbuf[6]; + + const sljit_uw min_uw = (sljit_uw)1 << ((sizeof(sljit_uw) * 8) - 1); + const sljit_u32 min_u32 = (sljit_u32)1 << 31; + +#if SLJIT_CONV_MAX_FLOAT == SLJIT_CONV_RESULT_MIN_INT + const sljit_uw large_pos_uw = min_uw; + const sljit_u32 large_pos_u32 = min_u32; +#else + const sljit_uw large_pos_uw = min_uw - 1; + const sljit_u32 large_pos_u32 = min_u32 - 1; +#endif + +#if SLJIT_CONV_MIN_FLOAT == SLJIT_CONV_RESULT_MIN_INT + const sljit_uw large_neg_uw = min_uw; + const sljit_u32 large_neg_u32 = min_u32; +#else + const sljit_uw large_neg_uw = min_uw - 1; + const sljit_u32 large_neg_u32 = min_u32 - 1; +#endif + +#if SLJIT_CONV_NAN_FLOAT == SLJIT_CONV_RESULT_MIN_INT + const sljit_uw nan_uw = min_uw; + const sljit_u32 nan_u32 = min_u32; +#elif SLJIT_CONV_NAN_FLOAT == SLJIT_CONV_RESULT_MAX_INT + const sljit_uw nan_uw = min_uw - 1; + const sljit_u32 nan_u32 = min_u32 - 1; +#else + const sljit_uw nan_uw = 0; + const sljit_u32 nan_u32 = 0; +#endif + + if (verbose) + printf("Run test_float22\n"); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 31; i++) + dbuf[i].value_f64 = 123.0; + + /* Large positive integer */ + dbuf[0].bits.hi = (sljit_u32)0x7fe << 20; + dbuf[0].bits.lo = 0; + /* Large negative integer */ + dbuf[1].bits.hi = (sljit_u32)0xffe << 20; + dbuf[1].bits.lo = 0; + /* Positive infinity */ + dbuf[2].bits.hi = (sljit_u32)0x7ff << 20; + dbuf[2].bits.lo = 0; + /* Negative infinity */ + dbuf[3].bits.hi = (sljit_u32)0xfff << 20; + dbuf[3].bits.lo = 0; + /* Canonical NaN */ + dbuf[4].bits.hi = (sljit_u32)0xfff << 19; + dbuf[4].bits.lo = 0; + /* NaN */ + dbuf[5].bits.hi = (sljit_u32)0xfff << 20; + dbuf[5].bits.lo = 1; + + /* Large positive integer */ + sbuf[0].bits = (sljit_u32)0x7f000000; + /* Large negative integer */ + sbuf[1].bits = (sljit_u32)0xff000000; + /* Positive infinity */ + sbuf[2].bits = (sljit_u32)0x7f800000; + /* Negative infinity */ + sbuf[3].bits = (sljit_u32)0xff800000; + /* Canonical NaN */ + sbuf[4].bits = (sljit_u32)0x7fc00000; + /* NaN */ + sbuf[5].bits = (sljit_u32)0x7f800001; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, P), 2, 2, 2, 0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 6 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_R0, 0); + + label = sljit_emit_label(compiler); + /* dbuf[6 - 17] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * sizeof(sljit_f64)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_f64)); + sljit_set_label(sljit_emit_cmp(compiler, SLJIT_LESS, SLJIT_S0, 0, SLJIT_R1, 0), label); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, 6 * sizeof(sljit_f32)); + + label = sljit_emit_label(compiler); + /* dbuf[18 - 29] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * sizeof(sljit_f64)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, sizeof(sljit_f32)); + sljit_set_label(sljit_emit_cmp(compiler, SLJIT_LESS, SLJIT_S1, 0, SLJIT_R1, 0), label); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&dbuf, (sljit_sw)&sbuf); + sljit_free_code(code.code, NULL); + + /* Large integer */ + FAILED(dbuf[6].value_uw != large_pos_uw, "test_float22 case 1 failed\n"); + FAILED(dbuf[7].value_u32 != large_pos_u32, "test_float22 case 2 failed\n"); + FAILED(dbuf[8].value_uw != large_neg_uw, "test_float22 case 3 failed\n"); + FAILED(dbuf[9].value_u32 != large_neg_u32, "test_float22 case 4 failed\n"); + /* Infinity */ + FAILED(dbuf[10].value_uw != large_pos_uw, "test_float22 case 5 failed\n"); + FAILED(dbuf[11].value_u32 != large_pos_u32, "test_float22 case 6 failed\n"); + FAILED(dbuf[12].value_uw != large_neg_uw, "test_float22 case 7 failed\n"); + FAILED(dbuf[13].value_u32 != large_neg_u32, "test_float22 case 8 failed\n"); + /* NaN */ + FAILED(dbuf[14].value_uw != nan_uw, "test_float22 case 9 failed\n"); + FAILED(dbuf[15].value_u32 != nan_u32, "test_float22 case 10 failed\n"); + FAILED(dbuf[16].value_uw != nan_uw, "test_float22 case 11 failed\n"); + FAILED(dbuf[17].value_u32 != nan_u32, "test_float22 case 12 failed\n"); + + /* Large integer */ + FAILED(dbuf[18].value_uw != large_pos_uw, "test_float22 case 13 failed\n"); + FAILED(dbuf[19].value_u32 != large_pos_u32, "test_float22 case 14 failed\n"); + FAILED(dbuf[20].value_uw != large_neg_uw, "test_float22 case 15 failed\n"); + FAILED(dbuf[21].value_u32 != large_neg_u32, "test_float22 case 16 failed\n"); + /* Infinity */ + FAILED(dbuf[22].value_uw != large_pos_uw, "test_float22 case 17 failed\n"); + FAILED(dbuf[23].value_u32 != large_pos_u32, "test_float22 case 18 failed\n"); + FAILED(dbuf[24].value_uw != large_neg_uw, "test_float22 case 19 failed\n"); + FAILED(dbuf[25].value_u32 != large_neg_u32, "test_float22 case 20 failed\n"); + /* NaN */ + FAILED(dbuf[26].value_uw != nan_uw, "test_float22 case 21 failed\n"); + FAILED(dbuf[27].value_u32 != nan_u32, "test_float22 case 22 failed\n"); + FAILED(dbuf[28].value_uw != nan_uw, "test_float22 case 23 failed\n"); + FAILED(dbuf[29].value_u32 != nan_u32, "test_float22 case 24 failed\n"); + + FAILED(dbuf[30].value_f64 != 123.0, "test_float22 case 25 failed\n"); + + successful_tests++; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestSerialize.h b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestSerialize.h new file mode 100755 index 0000000000..522e91c5be --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestSerialize.h @@ -0,0 +1,361 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static void test_serialize1(void) +{ + /* Test serializing large code. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_label *label; + struct sljit_jump *jump1; + struct sljit_jump *jump2; + struct sljit_jump *mov_addr; + sljit_sw executable_offset; + sljit_uw const_addr; + sljit_uw jump_addr; + sljit_uw label_addr; + sljit_sw buf[3]; + sljit_uw* serialized_buffer; + sljit_uw serialized_size; + sljit_s32 i; + + if (verbose) + printf("Run test_serialize1\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 2, 0, 0, 0); + + jump1 = sljit_emit_jump(compiler, SLJIT_JUMP); + label = sljit_emit_label(compiler); + jump2 = sljit_emit_jump(compiler, SLJIT_JUMP); + sljit_set_label(jump2, label); + label = sljit_emit_label(compiler); + sljit_set_label(jump1, label); + + mov_addr = sljit_emit_mov_addr(compiler, SLJIT_R2, 0); + /* buf[0] */ + sljit_emit_const(compiler, SLJIT_MEM1(SLJIT_S0), 0, -1234); + + sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, -1234); + + label = sljit_emit_label(compiler); + sljit_set_label(mov_addr, label); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 7); + for (i = 0; i < 4096; i++) + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 3); + + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R1, 0); + + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_IMM, -56789); + jump1 = sljit_emit_jump(compiler, SLJIT_JUMP | SLJIT_REWRITABLE_JUMP); + label = sljit_emit_label(compiler); + sljit_set_label(jump1, label); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_IMM, 0); + label = sljit_emit_label(compiler); + + serialized_buffer = sljit_serialize_compiler(compiler, 0, &serialized_size); + FAILED(!serialized_buffer, "cannot serialize compiler\n"); + sljit_free_compiler(compiler); + + /* Continue code generation. */ + compiler = sljit_deserialize_compiler(serialized_buffer, serialized_size, 0, NULL, NULL); + SLJIT_FREE(serialized_buffer, NULL); + FAILED(!compiler, "cannot deserialize compiler\n"); + + jump1 = sljit_emit_jump(compiler, SLJIT_JUMP); + label = sljit_emit_label(compiler); + jump2 = sljit_emit_jump(compiler, SLJIT_JUMP); + sljit_set_label(jump2, label); + label = sljit_emit_label(compiler); + sljit_set_label(jump1, label); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + executable_offset = sljit_get_executable_offset(compiler); + const_addr = sljit_get_const_addr(sljit_get_first_const(compiler)); + jump1 = sljit_get_next_jump(sljit_get_next_jump(sljit_get_next_jump(sljit_get_first_jump(compiler)))); + SLJIT_ASSERT(!sljit_jump_is_mov_addr(jump1)); + jump_addr = sljit_get_jump_addr(jump1); + label = sljit_get_next_label(sljit_get_next_label(sljit_get_next_label(sljit_get_next_label(sljit_get_first_label(compiler))))); + label_addr = sljit_get_label_addr(label); + sljit_free_compiler(compiler); + + sljit_set_const(const_addr, 87654, executable_offset); + sljit_set_jump_addr(jump_addr, label_addr, executable_offset); + + code.func1((sljit_sw)&buf); + FAILED(buf[0] != 87654, "test_serialize1 case 1 failed\n"); + FAILED(buf[1] != 7 + 4096 * 3, "test_serialize1 case 2 failed\n"); + FAILED(buf[2] != -56789, "test_serialize1 case 3 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test_serialize2(void) +{ + /* Test serializing jumps/labels. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_label *label; + struct sljit_jump *jump; + sljit_uw* serialized_buffer; + sljit_uw serialized_size; + sljit_sw buf[3]; + + if (verbose) + printf("Run test_serialize2\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS2V(P, W), 3, 3, 0, 0, 32); + sljit_emit_cmp(compiler, SLJIT_EQUAL, SLJIT_S1, 0, SLJIT_IMM, 37); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), 0); + + sljit_emit_label(compiler); + /* buf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, -5678); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 16, SLJIT_IMM, -8765); + + sljit_emit_mov_addr(compiler, SLJIT_S2, 0); + sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_S2, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), 0); + + serialized_buffer = sljit_serialize_compiler(compiler, 0, &serialized_size); + FAILED(!serialized_buffer, "cannot serialize compiler\n"); + sljit_free_compiler(compiler); + + /* Continue code generation. */ + compiler = sljit_deserialize_compiler(serialized_buffer, serialized_size, 0, NULL, NULL); + SLJIT_FREE(serialized_buffer, NULL); + FAILED(!compiler, "cannot deserialize compiler\n"); + + label = sljit_emit_label(compiler); + SLJIT_ASSERT(sljit_get_label_index(label) == 1); + jump = sljit_get_first_jump(compiler); + SLJIT_ASSERT(!sljit_jump_is_mov_addr(jump)); + SLJIT_ASSERT(!sljit_jump_has_label(jump) && !sljit_jump_has_target(jump)); + sljit_set_label(jump, label); + + /* buf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, 3456); + + label = sljit_get_first_label(compiler); + SLJIT_ASSERT(sljit_get_label_index(label) == 0); + jump = sljit_emit_jump(compiler, SLJIT_JUMP); + sljit_set_label(jump, label); + + sljit_emit_label(compiler); + + serialized_buffer = sljit_serialize_compiler(compiler, 0, &serialized_size); + FAILED(!serialized_buffer, "cannot serialize compiler\n"); + sljit_free_compiler(compiler); + + /* Continue code generation. */ + compiler = sljit_deserialize_compiler(serialized_buffer, serialized_size, 0, NULL, NULL); + SLJIT_FREE(serialized_buffer, NULL); + FAILED(!compiler, "cannot deserialize compiler\n"); + + sljit_emit_return_void(compiler); + + jump = sljit_get_first_jump(compiler); + SLJIT_ASSERT(sljit_jump_has_label(jump) && !sljit_jump_has_target(jump)); + jump = sljit_get_next_jump(jump); + SLJIT_ASSERT(sljit_jump_is_mov_addr(jump)); + jump = sljit_get_next_jump(jump); + SLJIT_ASSERT(!sljit_jump_is_mov_addr(jump)); + SLJIT_ASSERT(!sljit_jump_has_label(jump) && !sljit_jump_has_target(jump)); + + label = sljit_emit_label(compiler); + sljit_set_label(jump, label); + + /* buf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_SP), 16); + sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_S2, 0); + + label = sljit_get_first_label(compiler); + SLJIT_ASSERT(sljit_get_label_index(label) == 0); + label = sljit_get_next_label(label); + SLJIT_ASSERT(sljit_get_label_index(label) == 1); + label = sljit_get_next_label(label); + SLJIT_ASSERT(sljit_get_label_index(label) == 2); + jump = sljit_get_next_jump(sljit_get_first_jump(compiler)); + SLJIT_ASSERT(sljit_jump_is_mov_addr(jump)); + sljit_set_label(jump, label); + label = sljit_get_next_label(label); + SLJIT_ASSERT(sljit_get_label_index(label) == 3); + SLJIT_ASSERT(sljit_get_next_label(label) == NULL); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, 37); + FAILED(buf[0] != -5678, "test_serialize2 case 1 failed\n"); + FAILED(buf[1] != 3456, "test_serialize2 case 2 failed\n"); + FAILED(buf[2] != -8765, "test_serialize2 case 3 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test_serialize3_f1(sljit_sw a, sljit_sw b, sljit_sw c) +{ + sljit_sw* ptr = (sljit_sw*)c; + ptr[0] = a; + ptr[1] = b; +} + +static void test_serialize3(void) +{ + /* Test serializing consts/calls. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + sljit_sw executable_offset; + sljit_uw* serialized_buffer; + sljit_uw serialized_size; + sljit_sw buf[6]; + sljit_sw label_addr; + sljit_s32 i; + + if (verbose) + printf("Run test_serialize3\n"); + + FAILED(!compiler, "cannot create compiler\n"); + for (i = 0; i < 6 ; i++) + buf[i] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 3, 0, 0, 32); + + sljit_emit_mov_addr(compiler, SLJIT_R0, 0); + sljit_emit_const(compiler, SLJIT_R1, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_S0, 0); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS3V(W, W, W)); + /* buf[0], buf[1] */ + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_serialize3_f1)); + + serialized_buffer = sljit_serialize_compiler(compiler, 0, &serialized_size); + FAILED(!serialized_buffer, "cannot serialize compiler\n"); + sljit_free_compiler(compiler); + + /* Continue code generation. */ + compiler = sljit_deserialize_compiler(serialized_buffer, serialized_size, 0, NULL, NULL); + SLJIT_FREE(serialized_buffer, NULL); + FAILED(!compiler, "cannot deserialize compiler\n"); + + sljit_emit_mov_addr(compiler, SLJIT_R0, 0); + sljit_emit_const(compiler, SLJIT_R1, 0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS3V(W, W, W)); + /* buf[2], buf[3] */ + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_serialize3_f1)); + + serialized_buffer = sljit_serialize_compiler(compiler, 0, &serialized_size); + FAILED(!serialized_buffer, "cannot serialize compiler\n"); + sljit_free_compiler(compiler); + + /* Continue code generation. */ + compiler = sljit_deserialize_compiler(serialized_buffer, serialized_size, 0, NULL, NULL); + SLJIT_FREE(serialized_buffer, NULL); + FAILED(!compiler, "cannot deserialize compiler\n"); + + sljit_emit_mov_addr(compiler, SLJIT_R0, 0); + sljit_emit_const(compiler, SLJIT_R1, 0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 4 * sizeof(sljit_sw)); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_ARGS3V(W, W, W)); + /* buf[4], buf[5] */ + sljit_set_target(jump, SLJIT_FUNC_UADDR(test_serialize3_f1)); + + sljit_emit_return_void(compiler); + SLJIT_ASSERT(sljit_get_first_label(compiler) == NULL); + label = sljit_emit_label(compiler); + SLJIT_ASSERT(sljit_get_label_index(label) == 0); + + jump = sljit_get_first_jump(compiler); + SLJIT_ASSERT(sljit_jump_is_mov_addr(jump)); + sljit_set_label(jump, label); + jump = sljit_get_next_jump(jump); + SLJIT_ASSERT(!sljit_jump_is_mov_addr(jump)); + SLJIT_ASSERT(sljit_jump_has_target(jump) && sljit_jump_get_target(jump) == SLJIT_FUNC_UADDR(test_serialize3_f1)); + jump = sljit_get_next_jump(jump); + SLJIT_ASSERT(sljit_jump_is_mov_addr(jump)); + sljit_set_label(jump, label); + jump = sljit_get_next_jump(jump); + SLJIT_ASSERT(!sljit_jump_is_mov_addr(jump)); + SLJIT_ASSERT(sljit_jump_has_target(jump) && sljit_jump_get_target(jump) == SLJIT_FUNC_UADDR(test_serialize3_f1)); + jump = sljit_get_next_jump(jump); + SLJIT_ASSERT(sljit_jump_is_mov_addr(jump)); + sljit_set_label(jump, label); + jump = sljit_get_next_jump(jump); + SLJIT_ASSERT(sljit_jump_has_target(jump) && sljit_jump_get_target(jump) == SLJIT_FUNC_UADDR(test_serialize3_f1)); + SLJIT_ASSERT(sljit_get_next_jump(jump) == NULL); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + executable_offset = sljit_get_executable_offset(compiler); + + const_ = sljit_get_first_const(compiler); + sljit_set_const(sljit_get_const_addr(const_), 0x5678, executable_offset); + const_ = sljit_get_next_const(const_); + sljit_set_const(sljit_get_const_addr(const_), -0x9876, executable_offset); + const_ = sljit_get_next_const(const_); + sljit_set_const(sljit_get_const_addr(const_), 0x2345, executable_offset); + SLJIT_ASSERT(sljit_get_next_const(const_) == NULL); + + label_addr = (sljit_sw)sljit_get_label_addr(label); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[0] != label_addr, "test_serialize3 case 1 failed\n"); + FAILED(buf[1] != 0x5678, "test_serialize3 case 2 failed\n"); + FAILED(buf[2] != label_addr, "test_serialize3 case 3 failed\n"); + FAILED(buf[3] != -0x9876, "test_serialize3 case 4 failed\n"); + FAILED(buf[4] != label_addr, "test_serialize3 case 5 failed\n"); + FAILED(buf[5] != 0x2345, "test_serialize3 case 6 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestSimd.h b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestSimd.h new file mode 100755 index 0000000000..d71f15066d --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTestSimd.h @@ -0,0 +1,2333 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static void simd_set(sljit_u8* buf, sljit_u8 start, sljit_s32 length) +{ + do { + *buf++ = start; + start = (sljit_u8)(start + 103); + + if (start == 0xaa) + start = 0xab; + } while (--length != 0); +} + +static sljit_s32 check_simd_mov(sljit_u8* buf, sljit_u8 start, sljit_s32 length) +{ + if (buf[-1] != 0xaa || buf[length] != 0xaa) + return 0; + + do { + if (*buf++ != start) + return 0; + + start = (sljit_u8)(start + 103); + + if (start == 0xaa) + start = 0xab; + } while (--length != 0); + + return 1; +} + +static void test_simd1(void) +{ + /* Test simd data transfer. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s32 i, type; + sljit_u8 supported[1]; + sljit_u8* buf; + sljit_u8 data[63 + 880]; + sljit_s32 fs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_FS0 : SLJIT_FR5; + + if (verbose) + printf("Run test_simd1\n"); + + /* Buffer is 64 byte aligned. */ + buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63); + + for (i = 0; i < 880; i++) + buf[i] = 0xaa; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + simd_set(buf + 0, 81, 16); + simd_set(buf + 65, 213, 16); + simd_set(buf + 104, 33, 16); + simd_set(buf + 160, 140, 16); + simd_set(buf + 210, 7, 16); + simd_set(buf + 256, 239, 16); + simd_set(buf + 312, 176, 16); + simd_set(buf + 368, 88, 8); + simd_set(buf + 393, 197, 8); + simd_set(buf + 416, 58, 16); + simd_set(buf + 432, 203, 16); + simd_set(buf + 496, 105, 16); + simd_set(buf + 560, 19, 16); + simd_set(buf + 616, 202, 8); + simd_set(buf + 648, 123, 8); + simd_set(buf + 704, 85, 32); + simd_set(buf + 801, 215, 32); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 2, 2, 6, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0, 64); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_ALIGNED_128; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + /* buf[32] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 65); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 82 >> 1); + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_UNALIGNED; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0); + /* buf[82] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), 1); + + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 70001); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 70001); + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_R0), 70001 + 104); + /* buf[136] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_R1), 136 - 70001); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_128; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM0(), (sljit_sw)(buf + 160)); + /* buf[192] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM0(), (sljit_sw)(buf + 192)); + + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1001); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 1001); + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_R0), 1001 + 210); + /* buf[230] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_R1), 230 - 1001); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 256 >> 3); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 288 >> 3); + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_MEM_ALIGNED_128; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 3); + /* buf[288] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), 3); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_MEM_ALIGNED_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 312); + /* buf[344] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 344); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 368); + /* buf[384] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 384); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 393); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 402); + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_MEM_UNALIGNED; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0); + /* buf[402] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), 0); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_MEM_ALIGNED_128; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 416); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 432); + /* buf[464] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 464); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 496); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 480); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_FR3, 0); + /* buf[528] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 528); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 560); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 544); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_FR0, 0); + /* buf[592] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 592); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 616); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 608); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_FR5, 0); + /* buf[632] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 632); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 648); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 640); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, fs0, 0); + /* buf[664] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 664); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_256; + supported[0] = sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 704) != SLJIT_ERR_UNSUPPORTED; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32, SLJIT_FR2, fs0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 384); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM2(SLJIT_R1, SLJIT_S1), 1); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16; + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 801 - 32); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 32); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0); + sljit_get_local_base(compiler, SLJIT_R1, 0, 128); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_R1), -128); + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_MEM_ALIGNED_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM0(), (sljit_sw)(buf + 834)); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(!check_simd_mov(buf + 32, 81, 16), "test_simd1 case 1 failed\n"); + FAILED(!check_simd_mov(buf + 82, 213, 16), "test_simd1 case 2 failed\n"); + FAILED(!check_simd_mov(buf + 136, 33, 16), "test_simd1 case 3 failed\n"); + FAILED(!check_simd_mov(buf + 192, 140, 16), "test_simd1 case 4 failed\n"); + FAILED(!check_simd_mov(buf + 230, 7, 16), "test_simd1 case 5 failed\n"); + FAILED(!check_simd_mov(buf + 288, 239, 16), "test_simd1 case 6 failed\n"); + FAILED(!check_simd_mov(buf + 344, 176, 16), "test_simd1 case 7 failed\n"); +#if IS_ARM + FAILED(!check_simd_mov(buf + 384, 88, 8), "test_simd1 case 8 failed\n"); + FAILED(!check_simd_mov(buf + 402, 197, 8), "test_simd1 case 9 failed\n"); +#endif /* IS_ARM */ + FAILED(!check_simd_mov(buf + 464, sljit_has_cpu_feature(SLJIT_SIMD_REGS_ARE_PAIRS) ? 203 : 58, 16), "test_simd1 case 10 failed\n"); + FAILED(!check_simd_mov(buf + 528, 105, 16), "test_simd1 case 11 failed\n"); + FAILED(!check_simd_mov(buf + 592, 19, 16), "test_simd1 case 12 failed\n"); +#if IS_ARM + FAILED(!check_simd_mov(buf + 632, 202, 8), "test_simd1 case 13 failed\n"); + FAILED(!check_simd_mov(buf + 664, 123, 8), "test_simd1 case 14 failed\n"); +#endif /* IS_ARM */ + + if (supported[0]) { + FAILED(!check_simd_mov(buf + 768, 85, 32), "test_simd1 case 15 failed\n"); + FAILED(!check_simd_mov(buf + 834, 215, 32), "test_simd1 case 16 failed\n"); + } + + successful_tests++; +} + +static sljit_s32 check_simd_lane_mov(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 is_odd) +{ + sljit_s32 count = (length / elem_size) >> 1; + sljit_s32 value = 180 + length - elem_size; + sljit_s32 i; + + if (!is_odd) + value -= elem_size; + + do { + if (is_odd) { + for (i = 0; i < elem_size; i++) + if (*buf++ != 0xaa) + return 0; + } + + for (i = 0; i < elem_size; i++) + if (*buf++ != value++) + return 0; + + if (!is_odd) { + for (i = 0; i < elem_size; i++) + if (*buf++ != 0xaa) + return 0; + } + + value -= 3 * elem_size; + } while (--count != 0); + + return 1; +} + +static void test_simd2(void) +{ + /* Test simd lane data transfer. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s32 i, type; + sljit_u8 supported[1]; + sljit_u8* buf; + sljit_u8 data[63 + 576]; + sljit_f64 tmp[1]; + sljit_u32 f32_result = 0; + sljit_sw result[6]; + sljit_s32 result32[5]; + sljit_s32 fs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_FS0 : SLJIT_FR5; + + if (verbose) + printf("Run test_simd2\n"); + + /* Buffer is 64 byte aligned. */ + buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63); + + for (i = 0; i < 64; i++) + buf[i] = (sljit_u8)(180 + i); + + for (i = 64; i < 576; i++) + buf[i] = 0xaa; + + for (i = 0; i < 6; i++) + result[i] = 0; + + for (i = 0; i < 5; i++) + result32[i] = 0; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 4, 4, 6, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0, 16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)tmp - 100000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)tmp + 1000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 100000 / 2); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 64); + + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 14, SLJIT_R2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 0, SLJIT_R2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 12, SLJIT_MEM1(SLJIT_SP), 0); + sljit_get_local_base(compiler, SLJIT_R2, 0, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 2, SLJIT_MEM1(SLJIT_R2), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 10, SLJIT_MEM0(), (sljit_sw)tmp); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 4, SLJIT_MEM0(), (sljit_sw)tmp); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 8, SLJIT_R3, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 6, SLJIT_R3, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 6, SLJIT_MEM1(SLJIT_R0), 100000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 8, SLJIT_MEM1(SLJIT_R1), -1000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 4, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 10, SLJIT_MEM1(SLJIT_R1), -1000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 2, SLJIT_MEM1(SLJIT_R1), -1000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 12, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 0, SLJIT_S2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 14, SLJIT_S2, 0); + /* buf[128] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 128); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 1, SLJIT_R2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, 15, SLJIT_IMM, 181 + 0xffff00); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 183 + 0xffff00); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, 13, SLJIT_R2, 0); + for (i = 5; i < 16; i += 2) { + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | SLJIT_32 | type, SLJIT_FR0, i, SLJIT_R2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_32 | type, SLJIT_FR5, 16 - i, SLJIT_R2, 0); + } + /* buf[144] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 144); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 64); + + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, 6, SLJIT_R2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 0, SLJIT_R2, 0); + sljit_get_local_base(compiler, SLJIT_R2, 0, 4); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, 4, SLJIT_MEM1(SLJIT_R2), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 2, SLJIT_MEM1(SLJIT_SP), 4); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, 2, SLJIT_MEM0(), (sljit_sw)tmp); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 4, SLJIT_MEM0(), (sljit_sw)tmp); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, 0, SLJIT_S3, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 6, SLJIT_S3, 0); + /* buf[160] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 160); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, 7, SLJIT_MEM1(SLJIT_R0), 100000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, 1, SLJIT_MEM1(SLJIT_R1), -1000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, 5, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, 3, SLJIT_MEM1(SLJIT_R1), -1000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, 3, SLJIT_MEM1(SLJIT_R1), -1000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, 5, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, 1, SLJIT_S2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, 7, SLJIT_S2, 0); + /* buf[176] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 176); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 64); + + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, 2, SLJIT_R2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 0, SLJIT_R2, 0); + sljit_get_local_base(compiler, SLJIT_R2, 0, 8); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_R2), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 2, SLJIT_MEM1(SLJIT_SP), 8); + /* buf[192] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 192); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, 3, SLJIT_S3, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, 1, SLJIT_S3, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, 1, SLJIT_MEM1(SLJIT_R0), 100000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, 3, SLJIT_MEM1(SLJIT_R1), -1000); + /* buf[208] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 208); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 64); + + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 0, SLJIT_R2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, 0, SLJIT_R2, 0); + /* buf[224] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 224); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 1, SLJIT_MEM1(SLJIT_R1), -1000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 1, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1); + /* buf[240] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 240); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 64); + + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 2, SLJIT_FR1, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, 0, SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM0(), (sljit_sw)&f32_result, SLJIT_FR1, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 0, SLJIT_FR0, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, 2, SLJIT_FR0, 0); + /* buf[256] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 256); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 3, SLJIT_FR2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, 1, SLJIT_FR2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 1, SLJIT_FR4, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, 3, SLJIT_FR4, 0); + /* buf[272] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 272); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 2, SLJIT_MEM1(SLJIT_SP), 4); + sljit_get_local_base(compiler, SLJIT_R2, 0, 4); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_R2), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_R0), 100000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 2, SLJIT_MEM1(SLJIT_R1), -1000); + /* buf[288] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 288); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 3, SLJIT_MEM1(SLJIT_R1), -1000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 1, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 1, SLJIT_MEM0(), (sljit_sw)tmp); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 3, SLJIT_MEM0(), (sljit_sw)tmp); + /* buf[304] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 304); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 64); + + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, 0, SLJIT_FR4, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 0, SLJIT_FR4, 0); + /* buf[320] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 320); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, 1, SLJIT_FR2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, 1, SLJIT_FR2, 0); + /* buf[336] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 336); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 64); + sljit_get_local_base(compiler, SLJIT_R2, 0, 8); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_R2), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_SP), 8); + /* buf[352] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 352); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, 1, SLJIT_MEM1(SLJIT_R0), 100000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 1, SLJIT_MEM1(SLJIT_R1), -1000); + /* buf[368] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 368); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)result); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 64); + type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_lane_mov(compiler, type, SLJIT_FR1, 6, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, -1); + sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_FR1, 13, SLJIT_S3, 0); + /* result[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_R0, 0); + /* result[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw), SLJIT_S3, 0); + + type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -1); + sljit_emit_simd_lane_mov(compiler, type, SLJIT_FR1, 5, SLJIT_R3, 0); + sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_FR1, 7, SLJIT_R1, 0); + /* result[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw) * 2, SLJIT_R3, 0); + /* result[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw) * 3, SLJIT_R1, 0); + + type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_lane_mov(compiler, type, SLJIT_FR1, 2, SLJIT_S3, 0); + sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_FR1, 3, SLJIT_R0, 0); + /* result[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw) * 4, SLJIT_S3, 0); + /* result[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw) * 5, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)result32); + type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_32; + sljit_emit_simd_lane_mov(compiler, type, SLJIT_FR1, 0, SLJIT_R3, 0); + sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_FR1, 3, SLJIT_S2, 0); + /* result32[0] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_R3, 0); + /* result32[1] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32), SLJIT_S2, 0); + + type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16 | SLJIT_32; + sljit_emit_simd_lane_mov(compiler, type, SLJIT_FR1, 0, SLJIT_R1, 0); + sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_FR1, 3, SLJIT_S3, 0); + /* result32[2] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32) * 2, SLJIT_R1, 0); + /* result32[3] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32) * 3, SLJIT_S3, 0); + + type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_32; + sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_FR1, 0, SLJIT_R0, 0); + /* result32[4] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32) * 4, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)tmp - 100000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)tmp + 1000); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 64); + supported[0] = sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, 30, SLJIT_MEM1(SLJIT_R1), -1000) != SLJIT_ERR_UNSUPPORTED; + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_R1), -1000); + + for (i = 2; i < 32; i += 2) { + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, 30 - i, SLJIT_R2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, i, SLJIT_R2, 0); + } + /* buf[384] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 384); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, 1, SLJIT_MEM1(SLJIT_SP), 8); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 15, SLJIT_MEM1(SLJIT_SP), 8); + + for (i = 3; i < 16; i += 2) { + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, i, SLJIT_R2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, 16 - i, SLJIT_R2, 0); + } + /* buf[416] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 416); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, 6, SLJIT_MEM1(SLJIT_R0), 100000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, 0, SLJIT_MEM0(), (sljit_sw)tmp); + + for (i = 2; i < 8; i += 2) { + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, 6 - i, SLJIT_S1, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, i, SLJIT_S1, 0); + } + /* buf[448] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 448); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, 1, SLJIT_MEM1(SLJIT_R0), 100000); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, 3, SLJIT_MEM2(SLJIT_R1, SLJIT_S1), 0); + + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, 3, SLJIT_S1, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, 1, SLJIT_S1, 0); + /* buf[480] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 480); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, 1, SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, 7, SLJIT_MEM1(SLJIT_SP), 0); + + for (i = 3; i < 8; i += 2) { + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, i, SLJIT_FR2, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, 8 - i, SLJIT_FR2, 0); + } + /* buf[512] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 512); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 2, SLJIT_MEM0(), (sljit_sw)tmp); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_R0), 100000); + + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, 0, SLJIT_FR0, 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, 2, SLJIT_FR0, 0); + /* buf[544] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 544); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(!check_simd_lane_mov(buf + 128, 16, 1, 0), "test_simd2 case 1 failed\n"); + FAILED(!check_simd_lane_mov(buf + 144, 16, 1, 1), "test_simd2 case 2 failed\n"); + FAILED(!check_simd_lane_mov(buf + 160, 16, 2, 0), "test_simd2 case 3 failed\n"); + FAILED(!check_simd_lane_mov(buf + 176, 16, 2, 1), "test_simd2 case 4 failed\n"); + FAILED(!check_simd_lane_mov(buf + 192, 16, 4, 0), "test_simd2 case 5 failed\n"); + FAILED(!check_simd_lane_mov(buf + 208, 16, 4, 1), "test_simd2 case 6 failed\n"); +#if IS_64BIT + FAILED(!check_simd_lane_mov(buf + 224, 16, 8, 0), "test_simd2 case 7 failed\n"); + FAILED(!check_simd_lane_mov(buf + 240, 16, 8, 1), "test_simd2 case 8 failed\n"); +#endif /* IS_64BIT */ + FAILED(!check_simd_lane_mov(buf + 256, 16, 4, 0), "test_simd2 case 9 failed\n"); + FAILED(!check_simd_lane_mov(buf + 272, 16, 4, 1), "test_simd2 case 10 failed\n"); + FAILED(!check_simd_lane_mov(buf + 288, 16, 4, 0), "test_simd2 case 11 failed\n"); + FAILED(!check_simd_lane_mov(buf + 304, 16, 4, 1), "test_simd2 case 12 failed\n"); + FAILED(f32_result != LITTLE_BIG(0xbfbebdbc, 0xbcbdbebf), "test_simd2 case 13 failed\n"); + FAILED(!check_simd_lane_mov(buf + 320, 16, 8, 0), "test_simd2 case 14 failed\n"); + FAILED(!check_simd_lane_mov(buf + 336, 16, 8, 1), "test_simd2 case 15 failed\n"); + FAILED(!check_simd_lane_mov(buf + 352, 16, 8, 0), "test_simd2 case 16 failed\n"); + FAILED(!check_simd_lane_mov(buf + 368, 16, 8, 1), "test_simd2 case 17 failed\n"); + FAILED(result[0] != 186, "test_simd2 case 18 failed\n"); + FAILED(result[1] != -63, "test_simd2 case 19 failed\n"); + FAILED(result[2] != LITTLE_BIG(49086, 48831), "test_simd2 case 20 failed\n"); + FAILED(result[3] != LITTLE_BIG(-15422, -15677), "test_simd2 case 21 failed\n"); + FAILED(result[4] != LITTLE_BIG(WCONST(3216948668, -1078018628), WCONST(3166551743, -1128415553)), "test_simd2 case 22 failed\n"); + FAILED(result[5] != LITTLE_BIG(-1010646592, -1061043517), "test_simd2 case 23 failed\n"); + FAILED(result32[0] != 180, "test_simd2 case 24 failed\n"); + FAILED(result32[1] != -73, "test_simd2 case 25 failed\n"); + FAILED(result32[2] != LITTLE_BIG(46516, 46261), "test_simd2 case 26 failed\n"); + FAILED(result32[3] != LITTLE_BIG(-17478, -17733), "test_simd2 case 27 failed\n"); + FAILED(result32[4] != LITTLE_BIG(-1212762700, -1263159625), "test_simd2 case 28 failed\n"); + + if (supported[0]) { + FAILED(!check_simd_lane_mov(buf + 384, 32, 1, 0), "test_simd2 case 29 failed\n"); + FAILED(!check_simd_lane_mov(buf + 416, 32, 2, 1), "test_simd2 case 30 failed\n"); + FAILED(!check_simd_lane_mov(buf + 448, 32, 4, 0), "test_simd2 case 31 failed\n"); +#if IS_64BIT + FAILED(!check_simd_lane_mov(buf + 480, 32, 8, 1), "test_simd2 case 32 failed\n"); +#endif /* IS_64BIT */ + FAILED(!check_simd_lane_mov(buf + 512, 32, 4, 1), "test_simd2 case 33 failed\n"); + FAILED(!check_simd_lane_mov(buf + 544, 32, 8, 0), "test_simd2 case 34 failed\n"); + } + + successful_tests++; +} + +static sljit_s32 check_simd_replicate(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 value) +{ + sljit_s32 count = length / elem_size; + sljit_s32 i; + + do { + for (i = 0; i < elem_size; i++) + if (*buf++ != value++) + return 0; + + value -= elem_size; + } while (--count != 0); + + return 1; +} + +static sljit_s32 check_simd_replicate_u32(sljit_u8* buf, sljit_s32 length, sljit_u32 value) +{ + sljit_s32 count = length / 4; + sljit_u32 start_value = value; + sljit_s32 i; + + do { + for (i = 0; i < 4; i++) { + if (*buf++ != (value & 0xff)) + return 0; + value >>= 8; + } + + value = start_value; + } while (--count != 0); + + return 1; +} + +static void test_simd3(void) +{ + /* Test simd replicate scalar to all lanes. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s32 i, type; + sljit_u8 supported[1]; + sljit_u8* buf; + sljit_u8 data[63 + 768]; + sljit_s32 fs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_FS0 : SLJIT_FR5; + + if (verbose) + printf("Run test_simd3\n"); + + /* Buffer is 64 byte aligned. */ + buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63); + + for (i = 0; i < 32; i++) + buf[i] = (sljit_u8)(200 + i); + + for (i = 32; i < 768; i++) + buf[i] = 0xaa; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 4, 4, 6, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0, 16); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xffff00 + 78); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR0, SLJIT_R2, 0); + /* buf[48] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 48); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR3, SLJIT_IMM, 0xffff00 + 253); + /* buf[64] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 64); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_SP), 3, SLJIT_IMM, 42); + sljit_emit_simd_replicate(compiler, type, fs0, SLJIT_MEM1(SLJIT_SP), 3); + /* buf[80] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 80); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 15); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR5, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0); + /* buf[96] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 96); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 24); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR3, SLJIT_R1, 0); + /* buf[112] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 112); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_replicate(compiler, type, fs0, SLJIT_MEM0(), (sljit_sw)(buf + 10)); + /* buf[128] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 128); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 10000 + 20); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), -10000); + /* buf[144] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 144); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_S3, 0, SLJIT_MEM1(SLJIT_S0), 28); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR1, SLJIT_S3, 0); + /* buf[160] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 160); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_MEM1(SLJIT_SP), 4, SLJIT_MEM1(SLJIT_S0), 12); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR5, SLJIT_MEM1(SLJIT_SP), 4); + /* buf[176] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 176); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 100000 - 24); + sljit_emit_simd_replicate(compiler, type, fs0, SLJIT_MEM1(SLJIT_R2), 100000); + /* buf[192] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 192); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 8); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR2, SLJIT_S1, 0); + /* buf[208] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 208); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 3); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 3); + /* buf[224] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 224); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_replicate(compiler, type, fs0, SLJIT_MEM0(), (sljit_sw)buf); + /* buf[240] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 240); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 4); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR1, SLJIT_FR2, 0); + /* buf[256] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 256); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 20); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR4, SLJIT_FR4, 0); + /* buf[272] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 272); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), 4, SLJIT_MEM1(SLJIT_S0), 12); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 4); + /* buf[288] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 288); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_replicate(compiler, type, fs0, SLJIT_FR0, 0); + /* buf[304] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 304); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR5, SLJIT_FR5, 0); + /* buf[320] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 320); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 10000 + 8); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR1, SLJIT_MEM1(SLJIT_R2), -10000); + /* buf[336] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 336); + + /* Test constant values. */ + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_replicate(compiler, type, SLJIT_FR0, SLJIT_IMM, WCONST(0xff00123456, 0x123456)); + /* buf[352] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 352); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_replicate(compiler, type, SLJIT_FR1, SLJIT_IMM, 0xff0000); + /* buf[368] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 368); + + sljit_emit_simd_replicate(compiler, type, SLJIT_FR2, SLJIT_IMM, 0x1ffff); + /* buf[384] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 384); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_replicate(compiler, type, SLJIT_FR3, SLJIT_IMM, 0); + /* buf[400] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 400); + + /* Test ARM constant values. */ + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_replicate(compiler, type, fs0, SLJIT_IMM, 0xff0034); + /* buf[416] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 416); + + sljit_emit_simd_replicate(compiler, type, SLJIT_FR5, SLJIT_IMM, 0xff45ff); + /* buf[432] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 432); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_replicate(compiler, type, SLJIT_FR0, SLJIT_IMM, 0xb3); + /* buf[448] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 448); + + sljit_emit_simd_replicate(compiler, type, SLJIT_FR1, SLJIT_IMM, (sljit_sw)0xffff46ff); + /* buf[464] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 464); + + sljit_emit_simd_replicate(compiler, type, fs0, SLJIT_IMM, 0x4c0000); + /* buf[480] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 480); + + sljit_emit_simd_replicate(compiler, type, SLJIT_FR3, SLJIT_IMM, 0x71ffffff); + /* buf[496] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 496); + + sljit_emit_simd_replicate(compiler, type, SLJIT_FR4, SLJIT_IMM, 0x9eff); + /* buf[512] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 512); + + sljit_emit_simd_replicate(compiler, type, SLJIT_FR5, SLJIT_IMM, (sljit_sw)0xff070000); + /* buf[528] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 528); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8; + supported[0] = sljit_emit_simd_replicate(compiler, type, SLJIT_FR2, SLJIT_IMM, 0xffff00 + 181) != SLJIT_ERR_UNSUPPORTED; + /* buf[544] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 544); + + sljit_emit_simd_replicate(compiler, type, fs0, SLJIT_IMM, 0xffff00); + /* buf[576] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 576); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16; + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 30); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR1, SLJIT_R1, 0); + /* buf[608] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 608); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_replicate(compiler, type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 4); + /* buf[640] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 640); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64; + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 4); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_S1), 2); + /* buf[672] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 672); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT; + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 20); + sljit_emit_simd_replicate(compiler, type, SLJIT_FR1, SLJIT_FR2, 0); + /* buf[704] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 704); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_replicate(compiler, type, fs0, SLJIT_MEM0(), (sljit_sw)(buf + 8)); + /* buf[736] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 736); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(!check_simd_replicate(buf + 48, 16, 1, 78), "test_simd3 case 1 failed\n"); + FAILED(!check_simd_replicate(buf + 64, 16, 1, 253), "test_simd3 case 2 failed\n"); + FAILED(!check_simd_replicate(buf + 80, 16, 1, 42), "test_simd3 case 3 failed\n"); + FAILED(!check_simd_replicate(buf + 96, 16, 1, 215), "test_simd3 case 4 failed\n"); + FAILED(!check_simd_replicate(buf + 112, 16, 2, 224), "test_simd3 case 5 failed\n"); + FAILED(!check_simd_replicate(buf + 128, 16, 2, 210), "test_simd3 case 6 failed\n"); + FAILED(!check_simd_replicate(buf + 144, 16, 2, 220), "test_simd3 case 7 failed\n"); + FAILED(!check_simd_replicate(buf + 160, 16, 4, 228), "test_simd3 case 8 failed\n"); + FAILED(!check_simd_replicate(buf + 176, 16, 4, 212), "test_simd3 case 9 failed\n"); + FAILED(!check_simd_replicate(buf + 192, 16, 4, 224), "test_simd3 case 10 failed\n"); +#if IS_64BIT + FAILED(!check_simd_replicate(buf + 208, 16, 8, 208), "test_simd3 case 11 failed\n"); + FAILED(!check_simd_replicate(buf + 224, 16, 8, 224), "test_simd3 case 12 failed\n"); + FAILED(!check_simd_replicate(buf + 240, 16, 8, 200), "test_simd3 case 13 failed\n"); +#endif /* IS_64BIT */ + FAILED(!check_simd_replicate(buf + 256, 16, 4, 204), "test_simd3 case 14 failed\n"); + FAILED(!check_simd_replicate(buf + 272, 16, 4, 220), "test_simd3 case 15 failed\n"); + FAILED(!check_simd_replicate(buf + 288, 16, 4, 212), "test_simd3 case 16 failed\n"); + FAILED(!check_simd_replicate(buf + 304, 16, 8, 216), "test_simd3 case 17 failed\n"); + FAILED(!check_simd_replicate(buf + 320, 16, 8, 200), "test_simd3 case 18 failed\n"); + FAILED(!check_simd_replicate(buf + 336, 16, 8, 208), "test_simd3 case 19 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 352, 16, LITTLE_BIG(0x123456, 0x56341200)), "test_simd3 case 20 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 368, 16, 0), "test_simd3 case 21 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 384, 16, 0xffffffff), "test_simd3 case 22 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 400, 16, 0), "test_simd3 case 23 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 416, 16, LITTLE_BIG(0x340034, 0x34003400)), "test_simd3 case 24 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 432, 16, LITTLE_BIG(0x45ff45ff, 0xff45ff45)), "test_simd3 case 25 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 448, 16, LITTLE_BIG(0xb3, 0xb3000000)), "test_simd3 case 26 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 464, 16, LITTLE_BIG(0xffff46ff, 0xff46ffff)), "test_simd3 case 27 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 480, 16, LITTLE_BIG(0x4c0000, 0x4c00)), "test_simd3 case 28 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 496, 16, LITTLE_BIG(0x71ffffff, 0xffffff71)), "test_simd3 case 29 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 512, 16, LITTLE_BIG(0x9eff, 0xff9e0000)), "test_simd3 case 30 failed\n"); + FAILED(!check_simd_replicate_u32(buf + 528, 16, LITTLE_BIG(0xff070000, 0x07ff)), "test_simd3 case 31 failed\n"); + + if (supported[0]) { + FAILED(!check_simd_replicate(buf + 544, 32, 1, 181), "test_simd3 case 32 failed\n"); + FAILED(!check_simd_replicate(buf + 576, 32, 1, 0), "test_simd3 case 33 failed\n"); + FAILED(!check_simd_replicate(buf + 608, 32, 2, 230), "test_simd3 case 34 failed\n"); + FAILED(!check_simd_replicate(buf + 640, 32, 4, 204), "test_simd3 case 35 failed\n"); +#if IS_64BIT + FAILED(!check_simd_replicate(buf + 672, 32, 8, 216), "test_simd3 case 36 failed\n"); +#endif /* IS_64BIT */ + FAILED(!check_simd_replicate(buf + 704, 32, 4, 220), "test_simd3 case 37 failed\n"); + FAILED(!check_simd_replicate(buf + 736, 32, 8, 208), "test_simd3 case 38 failed\n"); + } + + successful_tests++; +} + +static void test_simd4(void) +{ + /* Test simd replicate lane to all lanes. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s32 i, type; + sljit_u8 supported[1]; + sljit_u8* buf; + sljit_u8 data[63 + 992]; + sljit_s32 fs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_FS0 : SLJIT_FR5; + + if (verbose) + printf("Run test_simd4\n"); + + /* Buffer is 64 byte aligned. */ + buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63); + + for (i = 0; i < 32; i++) + buf[i] = (sljit_u8)(100 + i); + + for (i = 32; i < 992; i++) + buf[i] = 0xaa; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 4, 4, 6, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0, 16); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR0, SLJIT_FR0, 0); + /* buf[48] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 48); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR2, SLJIT_FR1, 12); + /* buf[64] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 64); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR3, SLJIT_FR5, 6); + /* buf[80] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 80); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR3, SLJIT_FR3, 9); + /* buf[96] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 96); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_lane_replicate(compiler, type, fs0, SLJIT_FR0, 10); + /* buf[112] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 112); + /* buf[128] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 128); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR0, SLJIT_FR0, 0); + /* buf[144] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 144); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR1, SLJIT_FR1, 3); + /* buf[160] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 160); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR0, SLJIT_FR4, 5); + /* buf[176] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 176); + /* buf[192] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 192); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR3, SLJIT_FR3, 0); + /* buf[208] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 208); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR1, SLJIT_FR1, 2); + /* buf[224] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 224); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR5, SLJIT_FR2, 3); + /* buf[240] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 240); + /* buf[256] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 256); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR1, SLJIT_FR1, 0); + /* buf[272] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 272); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR0, SLJIT_FR0, 1); + /* buf[288] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 288); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_lane_replicate(compiler, type, fs0, SLJIT_FR3, 1); + /* buf[304] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 304); + /* buf[320] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 320); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR2, SLJIT_FR2, 0); + /* buf[336] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 336); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR0, SLJIT_FR0, 3); + /* buf[352] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 352); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR5, SLJIT_FR0, 1); + /* buf[368] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 368); + /* buf[384] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 384); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR4, SLJIT_FR4, 0); + /* buf[400] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 400); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR1, SLJIT_FR1, 1); + /* buf[416] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 416); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 16); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR0, SLJIT_FR2, 1); + /* buf[432] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 432); + /* buf[448] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 448); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + supported[0] = sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR2, SLJIT_FR2, 0) != SLJIT_ERR_UNSUPPORTED; + /* buf[480] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 480); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR0, SLJIT_FR4, 13); + /* buf[512] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 512); + + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR1, SLJIT_FR4, 6); + /* buf[544] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 544); + + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR4, SLJIT_FR4, 28); + /* buf[576] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 576); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR2, SLJIT_FR1, 0); + /* buf[608] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 608); + + sljit_emit_simd_lane_replicate(compiler, type, fs0, SLJIT_FR1, 2); + /* buf[640] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 640); + + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR1, SLJIT_FR1, 13); + /* buf[672] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 672); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR0, SLJIT_FR5, 0); + /* buf[704] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 704); + + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR5, SLJIT_FR5, 5); + /* buf[736] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 736); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, fs0, SLJIT_FR0, 0); + /* buf[768] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 768); + + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR0, SLJIT_FR0, 1); + /* buf[800] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 800); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR2, SLJIT_FR1, 0); + /* buf[832] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 832); + + sljit_emit_simd_lane_replicate(compiler, type, fs0, SLJIT_FR1, 1); + /* buf[864] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 864); + + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR1, SLJIT_FR1, 4); + /* buf[896] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 896); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_replicate(compiler, type, SLJIT_FR1, fs0, 0); + /* buf[928] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 928); + + sljit_emit_simd_lane_replicate(compiler, type, fs0, fs0, 2); + /* buf[960] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 960); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(!check_simd_replicate(buf + 48, 16, 1, 100), "test_simd4 case 1 failed\n"); + FAILED(!check_simd_replicate(buf + 64, 16, 1, 128), "test_simd4 case 2 failed\n"); + FAILED(!check_simd_replicate(buf + 80, 16, 1, 106), "test_simd4 case 3 failed\n"); + FAILED(!check_simd_replicate(buf + 96, 16, 1, 125), "test_simd4 case 4 failed\n"); + FAILED(!check_simd_replicate(buf + 112, 16, 1, 110), "test_simd4 case 5 failed\n"); + FAILED(!check_simd_replicate(buf + 128, 16, 16, 100), "test_simd4 case 6 failed\n"); + FAILED(!check_simd_replicate(buf + 144, 16, 2, 100), "test_simd4 case 7 failed\n"); + FAILED(!check_simd_replicate(buf + 160, 16, 2, 122), "test_simd4 case 8 failed\n"); + FAILED(!check_simd_replicate(buf + 176, 16, 2, 126), "test_simd4 case 9 failed\n"); + FAILED(!check_simd_replicate(buf + 192, 16, 16, 116), "test_simd4 case 10 failed\n"); + FAILED(!check_simd_replicate(buf + 208, 16, 4, 100), "test_simd4 case 11 failed\n"); + FAILED(!check_simd_replicate(buf + 224, 16, 4, 124), "test_simd4 case 12 failed\n"); + FAILED(!check_simd_replicate(buf + 240, 16, 4, 112), "test_simd4 case 13 failed\n"); + FAILED(!check_simd_replicate(buf + 256, 16, 16, 100), "test_simd4 case 14 failed\n"); + FAILED(!check_simd_replicate(buf + 272, 16, 8, 116), "test_simd4 case 15 failed\n"); + FAILED(!check_simd_replicate(buf + 288, 16, 8, 108), "test_simd4 case 16 failed\n"); + FAILED(!check_simd_replicate(buf + 304, 16, 8, 124), "test_simd4 case 17 failed\n"); + FAILED(!check_simd_replicate(buf + 320, 16, 16, 116), "test_simd4 case 18 failed\n"); + FAILED(!check_simd_replicate(buf + 336, 16, 4, 100), "test_simd4 case 19 failed\n"); + FAILED(!check_simd_replicate(buf + 352, 16, 4, 128), "test_simd4 case 20 failed\n"); + FAILED(!check_simd_replicate(buf + 368, 16, 4, 104), "test_simd4 case 21 failed\n"); + FAILED(!check_simd_replicate(buf + 384, 16, 16, 100), "test_simd4 case 22 failed\n"); + FAILED(!check_simd_replicate(buf + 400, 16, 8, 116), "test_simd4 case 23 failed\n"); + FAILED(!check_simd_replicate(buf + 416, 16, 8, 108), "test_simd4 case 24 failed\n"); + FAILED(!check_simd_replicate(buf + 432, 16, 8, 124), "test_simd4 case 25 failed\n"); + FAILED(!check_simd_replicate(buf + 448, 16, 16, 116), "test_simd4 case 26 failed\n"); + + if (supported[0]) { + FAILED(!check_simd_replicate(buf + 480, 32, 1, 100), "test_simd4 case 27 failed\n"); + FAILED(!check_simd_replicate(buf + 512, 32, 1, 113), "test_simd4 case 28 failed\n"); + FAILED(!check_simd_replicate(buf + 544, 32, 1, 106), "test_simd4 case 29 failed\n"); + FAILED(!check_simd_replicate(buf + 576, 32, 1, 128), "test_simd4 case 30 failed\n"); + FAILED(!check_simd_replicate(buf + 608, 32, 2, 100), "test_simd4 case 31 failed\n"); + FAILED(!check_simd_replicate(buf + 640, 32, 2, 104), "test_simd4 case 32 failed\n"); + FAILED(!check_simd_replicate(buf + 672, 32, 2, 126), "test_simd4 case 33 failed\n"); + FAILED(!check_simd_replicate(buf + 704, 32, 4, 100), "test_simd4 case 34 failed\n"); + FAILED(!check_simd_replicate(buf + 736, 32, 4, 120), "test_simd4 case 35 failed\n"); + FAILED(!check_simd_replicate(buf + 768, 32, 8, 100), "test_simd4 case 36 failed\n"); + FAILED(!check_simd_replicate(buf + 800, 32, 8, 108), "test_simd4 case 37 failed\n"); + FAILED(!check_simd_replicate(buf + 832, 32, 4, 100), "test_simd4 case 38 failed\n"); + FAILED(!check_simd_replicate(buf + 864, 32, 4, 104), "test_simd4 case 39 failed\n"); + FAILED(!check_simd_replicate(buf + 896, 32, 4, 116), "test_simd4 case 40 failed\n"); + FAILED(!check_simd_replicate(buf + 928, 32, 8, 100), "test_simd4 case 41 failed\n"); + FAILED(!check_simd_replicate(buf + 960, 32, 8, 116), "test_simd4 case 42 failed\n"); + } + + successful_tests++; +} + +static sljit_s32 check_simd_lane_mov_zero(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 start, sljit_s32 value) +{ + sljit_s32 i; + + for (i = 0; i < start; i++) + if (*buf++ != 0) + return 0; + + for (i = 0; i < elem_size; i++) + if (*buf++ != value++) + return 0; + + for (i = start + elem_size; i < length; i++) + if (*buf++ != 0) + return 0; + + return 1; +} + +static void test_simd5(void) +{ + /* Test simd zero register before move to lane. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s32 i, type; + sljit_u8 supported[1]; + sljit_u8* buf; + sljit_u8 data[63 + 672]; + sljit_s32 fs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_FS0 : SLJIT_FR5; + + if (verbose) + printf("Run test_simd5\n"); + + /* Buffer is 64 byte aligned. */ + buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63); + + for (i = 0; i < 64; i++) + buf[i] = (sljit_u8)(100 + i); + + for (i = 64; i < 672; i++) + buf[i] = 0xaa; + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 4, 4, 6, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0, 16); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 100000); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 10000); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xffff00 + 85); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR0, 0, SLJIT_R2, 0); + /* buf[64] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 64); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, fs0, 0, SLJIT_IMM, 0xffff00 + 18); + /* buf[80] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 80); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_SP), 10, SLJIT_IMM, 170); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | SLJIT_32 | type, SLJIT_FR5, 5, SLJIT_MEM1(SLJIT_SP), 10); + /* buf[96] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 96); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, LITTLE_BIG(0x789a6d6c, 0x789a6c6d)); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR1, 0, SLJIT_S2, 0); + /* buf[112] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 112); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | SLJIT_32 | type, SLJIT_FR4, 0, SLJIT_IMM, LITTLE_BIG(0xff8382, 0xff8283)); + /* buf[128] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 128); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, fs0, 3, SLJIT_MEM1(SLJIT_R0), 100004); + /* buf[144] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 144); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_S0), 4); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | SLJIT_32 | type, SLJIT_FR2, 0, SLJIT_R2, 0); + /* buf[160] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 160); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR5, 0, SLJIT_IMM, LITTLE_BIG(0x29282726, 0x26272829)); + /* buf[176] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 176); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 3); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | SLJIT_32 | type, SLJIT_FR1, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), 2); + /* buf[192] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 192); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR3, 3, SLJIT_MEM1(SLJIT_R1), -10000 + 8); + /* buf[208] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 208); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, fs0, 0, SLJIT_S2, 0); + /* buf[224] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 224); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR4, 0, + SLJIT_IMM, LITTLE_BIG(WCONST(0xe3e2e1e0dfdedddc, 0), WCONST(0xdcdddedfe0e1e2e3, 0))); + /* buf[240] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 240); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 8); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR3, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), 0); + /* buf[256] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 256); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR1, 1, SLJIT_MEM1(SLJIT_R0), 100000); + /* buf[272] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 272); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 12); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR0, 0, SLJIT_FR0, 0); + /* buf[288] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 288); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 4); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR2, 0, SLJIT_FR5, 0); + /* buf[304] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 304); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 1); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR1, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), 3); + /* buf[320] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 320); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR4, 1, SLJIT_FR4, 0); + /* buf[336] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 336); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 8); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR2, 0, SLJIT_FR2, 0); + /* buf[352] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 352); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR3, 0, SLJIT_FR4, 0); + /* buf[368] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 368); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR1, 0, SLJIT_MEM0(), (sljit_sw)(buf + 8)); + /* buf[384] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 384); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR2, 1, SLJIT_FR2, 0); + /* buf[400] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 400); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 215); + supported[0] = sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR0, 0, SLJIT_R2, 0) != SLJIT_ERR_UNSUPPORTED; + /* buf[416] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 416); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, fs0, 17, SLJIT_IMM, 78); + /* buf[448] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 448); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 0xff3433); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR1, 4, SLJIT_S1, 0); + /* buf[480] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 480); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR5, 5, SLJIT_MEM1(SLJIT_S0), 60); + /* buf[512] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 512); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR4, 3, SLJIT_MEM0(), (sljit_sw)buf + 32); + /* buf[544] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 544); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT; + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 48); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR2, 3, SLJIT_FR2, 0); + /* buf[576] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 576); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S0), 8); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR3, 6, SLJIT_FR3, 0); + /* buf[608] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 608); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_SP), 8, SLJIT_MEM1(SLJIT_S0), 40); + sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_FR0, 3, SLJIT_MEM1(SLJIT_SP), 8); + /* buf[640] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 640); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(!check_simd_lane_mov_zero(buf + 64, 16, 1, 0, 85), "test_simd5 case 1 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 80, 16, 1, 0, 18), "test_simd5 case 2 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 96, 16, 1, 5, 170), "test_simd5 case 3 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 112, 16, 2, 0, 108), "test_simd5 case 4 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 128, 16, 2, 0, 130), "test_simd5 case 5 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 144, 16, 2, 6, 104), "test_simd5 case 6 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 160, 16, 4, 0, 104), "test_simd5 case 7 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 176, 16, 4, 0, 38), "test_simd5 case 8 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 192, 16, 4, 0, 112), "test_simd5 case 9 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 208, 16, 4, 12, 108), "test_simd5 case 10 failed\n"); +#if IS_64BIT + FAILED(!check_simd_lane_mov_zero(buf + 224, 16, 8, 0, 100), "test_simd5 case 11 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 240, 16, 8, 0, 220), "test_simd5 case 12 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 256, 16, 8, 0, 108), "test_simd5 case 13 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 272, 16, 8, 8, 100), "test_simd5 case 14 failed\n"); +#endif /* IS_64BIT */ + FAILED(!check_simd_lane_mov_zero(buf + 288, 16, 4, 0, 112), "test_simd5 case 15 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 304, 16, 4, 0, 104), "test_simd5 case 16 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 320, 16, 4, 0, 108), "test_simd5 case 17 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 336, 16, 4, 4, 100), "test_simd5 case 18 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 352, 16, 8, 0, 108), "test_simd5 case 19 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 368, 16, 8, 0, 100), "test_simd5 case 20 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 384, 16, 8, 0, 108), "test_simd5 case 21 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 400, 16, 8, 8, 100), "test_simd5 case 22 failed\n"); + + if (supported[0]) { + FAILED(!check_simd_lane_mov_zero(buf + 416, 32, 1, 0, 215), "test_simd5 case 23 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 448, 32, 1, 17, 78), "test_simd5 case 24 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 480, 32, 2, 8, 51), "test_simd5 case 25 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 512, 32, 4, 20, 160), "test_simd5 case 26 failed\n"); +#if IS_64BIT + FAILED(!check_simd_lane_mov_zero(buf + 544, 32, 8, 24, 132), "test_simd5 case 27 failed\n"); +#endif /* IS_64BIT */ + FAILED(!check_simd_lane_mov_zero(buf + 576, 32, 4, 12, 148), "test_simd5 case 28 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 608, 32, 4, 24, 108), "test_simd5 case 29 failed\n"); + FAILED(!check_simd_lane_mov_zero(buf + 640, 32, 8, 24, 140), "test_simd5 case 30 failed\n"); + } + + successful_tests++; +} + +static void init_simd_extend(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 is_float, sljit_s32 data) +{ + sljit_u8* end = buf + length; + + do { + if (elem_size == 1) + *buf = (sljit_u8)data; + else if (elem_size == 2) + *(sljit_u16*)buf = (sljit_u16)data; + else if (!is_float) + *(sljit_u32*)buf = (sljit_u32)data; + else + *(sljit_f32*)buf = (sljit_f32)data; + + buf += elem_size; + data++; + } while (buf < end); +} + +static sljit_s32 check_simd_extend_unsigned(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_u32 mask) +{ + sljit_s32 data; + sljit_u8* end = buf + length; + + if (elem_size == 2) + data = -(length >> 2); + else if (elem_size == 4) + data = -(length >> 3); + else + data = -(length >> 4); + + do { + if (elem_size == 2) { + if (*(sljit_u16*)buf != ((sljit_u16)data & mask)) + return 0; + } else if (elem_size == 4) { + if (*(sljit_u32*)buf != ((sljit_u32)data & mask)) + return 0; + } else { +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + if (*(sljit_u32*)buf != ((sljit_u32)data & mask) || *(sljit_u32*)(buf + 4) != 0) + return 0; +#else /* !SLJIT_LITTLE_ENDIAN */ + if (*(sljit_u32*)(buf + 4) != ((sljit_u32)data & mask) || *(sljit_u32*)buf != 0) + return 0; +#endif /* SLJIT_LITTLE_ENDIAN */ + } + + buf += elem_size; + data++; + } while (buf < end); + + return 1; +} + +static sljit_s32 check_simd_extend_signed(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 is_float) +{ + sljit_s32 data; + sljit_u8* end = buf + length; + + if (elem_size == 2) + data = -(length >> 2); + else if (elem_size == 4) + data = -(length >> 3); + else if (!is_float) + data = -(length >> 4); + else + data = 1000; + + do { + if (elem_size == 2) { + if (*(sljit_s16*)buf != data) + return 0; + } else if (elem_size == 4) { + if (*(sljit_s32*)buf != data) + return 0; + } else if (!is_float) { +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + if (*(sljit_s32*)buf != data) + return 0; + if (*(sljit_s32*)(buf + 4) != (data >> 31)) + return 0; +#else /* !SLJIT_LITTLE_ENDIAN */ + if (*(sljit_s32*)(buf + 4) != data) + return 0; + if (*(sljit_s32*)buf != (data >> 31)) + return 0; +#endif /* SLJIT_LITTLE_ENDIAN */ + } else { + if (*(sljit_f64*)buf != (sljit_f64)data) + return 0; + } + + buf += elem_size; + data++; + } while (buf < end); + + return 1; +} + +static void test_simd6(void) +{ + /* Test simd extension operation. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s32 i, type; + sljit_u8 supported[1]; + sljit_u8* buf; + sljit_u8 data[63 + 1088]; + sljit_s32 fs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_FS0 : SLJIT_FR5; + + if (verbose) + printf("Run test_simd6\n"); + + /* Buffer is 64 byte aligned. */ + buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63); + + for (i = 0; i < 1088; i++) + buf[i] = 0xaa; + + init_simd_extend(buf + 0, 16, 1, 0, -8); + init_simd_extend(buf + 32, 16, 2, 0, -4); + init_simd_extend(buf + 64, 16, 4, 0, -2); + init_simd_extend(buf + 96, 16, 4, 1, 1000); + init_simd_extend(buf + 128, 8, 1, 0, -4); + init_simd_extend(buf + 160, 8, 2, 0, -2); + init_simd_extend(buf + 192, 8, 4, 0, -1); + init_simd_extend(buf + 224, 8, 4, 1, 1000); + init_simd_extend(buf + 256, 4, 1, 0, -2); + init_simd_extend(buf + 288, 4, 2, 0, -1); + init_simd_extend(buf + 320, 2, 1, 0, -1); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 4, 4, 6, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0, 32); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 128); + sljit_emit_simd_extend(compiler, type, SLJIT_FR2, SLJIT_FR0, 0); + /* buf[352] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 352); + + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR1, SLJIT_FR0, 0); + /* buf[368] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 368); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 128); + sljit_emit_simd_extend(compiler, type, SLJIT_FR0, SLJIT_MEM1(SLJIT_R1), 0); + /* buf[384] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 384); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 128); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, fs0, SLJIT_MEM1(SLJIT_SP), 0); + /* buf[400] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 400); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 160); + sljit_emit_simd_extend(compiler, type, SLJIT_FR4, SLJIT_FR4, 0); + /* buf[416] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 416); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 160); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR0, SLJIT_FR4, 0); + /* buf[432] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 432); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 40); + sljit_emit_simd_extend(compiler, type, SLJIT_FR1, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), 2); + /* buf[448] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 448); + + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, fs0, SLJIT_MEM0(), (sljit_sw)(buf + 160)); + /* buf[464] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 464); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_EXTEND_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 192); + sljit_emit_simd_extend(compiler, type, SLJIT_FR0, SLJIT_FR2, 0); + /* buf[480] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 480); + + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR3, SLJIT_FR2, 0); + /* buf[496] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 496); + + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 10000 - 192); + sljit_emit_simd_extend(compiler, type, SLJIT_FR2, SLJIT_MEM1(SLJIT_R0), 10000); + /* buf[512] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 512); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 100000 + 192); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, fs0, SLJIT_MEM1(SLJIT_R0), -100000); + /* buf[528] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 528); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_EXTEND_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 224); + sljit_emit_simd_extend(compiler, type, fs0, fs0, 0); + /* buf[544] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 544); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 224); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_simd_extend(compiler, type, SLJIT_FR3, SLJIT_MEM1(SLJIT_SP), 0); + /* buf[560] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 560); + + sljit_emit_simd_extend(compiler, type, SLJIT_FR5, SLJIT_FR1, 0); + /* buf[576] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR5, SLJIT_MEM1(SLJIT_S0), 576); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 256); + sljit_emit_simd_extend(compiler, type, SLJIT_FR0, SLJIT_FR2, 0); + /* buf[592] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 592); + + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR2, SLJIT_FR2, 0); + /* buf[608] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 608); + + sljit_emit_simd_extend(compiler, type, fs0, SLJIT_MEM1(SLJIT_S0), 256); + /* buf[624] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 624); + + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR4, SLJIT_MEM0(), (sljit_sw)(buf + 256)); + /* buf[640] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 640); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 320); + sljit_emit_simd_extend(compiler, type, SLJIT_FR0, fs0, 0); + /* buf[656] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 656); + + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR0, fs0, 0); + /* buf[672] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 672); + + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_S0), 320); + sljit_emit_simd_extend(compiler, type, SLJIT_FR3, SLJIT_MEM1(SLJIT_SP), 0); + /* buf[688] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 688); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 320); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR3, SLJIT_MEM2(SLJIT_S0, SLJIT_S2), 0); + /* buf[704] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 704); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 288); + sljit_emit_simd_extend(compiler, type, SLJIT_FR2, SLJIT_FR0, 0); + /* buf[720] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 720); + + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR0, SLJIT_FR0, 0); + /* buf[736] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 736); + + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 100000 - 288); + sljit_emit_simd_extend(compiler, type, fs0, SLJIT_MEM1(SLJIT_R2), 100000); + /* buf[752] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 752); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S0, 0, SLJIT_IMM, 10000 + 288); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR1, SLJIT_MEM1(SLJIT_S2), -10000); + /* buf[768] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 768); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 256); + sljit_emit_simd_extend(compiler, type, fs0, SLJIT_FR1, 0); + /* buf[784] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 784); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 256); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR2, SLJIT_MEM1(SLJIT_R2), 0); + /* buf[792] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 792); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_32; + sljit_emit_simd_extend(compiler, type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 320); + /* buf[800] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 800); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 320); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR2, fs0, 0); + /* buf[808] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 808); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 288); + sljit_emit_simd_extend(compiler, type, SLJIT_FR2, SLJIT_FR1, 0); + /* buf[816] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 816); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 288); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, fs0, SLJIT_MEM2(SLJIT_S1, SLJIT_S0), 0); + /* buf[824] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 824); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + supported[0] = sljit_emit_simd_extend(compiler, type, SLJIT_FR4, SLJIT_FR1, 0) != SLJIT_ERR_UNSUPPORTED; + /* buf[832] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 832); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_32; + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 128); + /* buf[864] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 864); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_64; + sljit_emit_simd_extend(compiler, type, fs0, SLJIT_MEM0(), (sljit_sw)(buf + 256)); + /* buf[896] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 896); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_32; + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 16); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 1); + /* buf[928] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 928); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_64; + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S1, 0, SLJIT_S0, 0, SLJIT_IMM, 100000 + 160); + sljit_emit_simd_extend(compiler, type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S1), -100000); + /* buf[960] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 960); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_EXTEND_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_FR0, fs0, 0); + /* buf[992] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 992); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_EXTEND_64 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 96); + sljit_emit_simd_extend(compiler, type, SLJIT_FR2, SLJIT_FR2, 0); + /* buf[1024] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 1024); + + sljit_emit_simd_extend(compiler, type, SLJIT_FR4, SLJIT_MEM0(), (sljit_sw)(buf + 96)); + /* buf[1056] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 1056); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(!check_simd_extend_unsigned(buf + 352, 16, 2, 0xff), "test_simd6 case 1 failed\n"); + FAILED(!check_simd_extend_signed(buf + 368, 16, 2, 0), "test_simd6 case 2 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 384, 16, 2, 0xff), "test_simd6 case 3 failed\n"); + FAILED(!check_simd_extend_signed(buf + 400, 16, 2, 0), "test_simd6 case 4 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 416, 16, 4, 0xffff), "test_simd6 case 5 failed\n"); + FAILED(!check_simd_extend_signed(buf + 432, 16, 4, 0), "test_simd6 case 6 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 448, 16, 4, 0xffff), "test_simd6 case 7 failed\n"); + FAILED(!check_simd_extend_signed(buf + 464, 16, 4, 0), "test_simd6 case 8 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 480, 16, 8, 0xffffffff), "test_simd6 case 9 failed\n"); + FAILED(!check_simd_extend_signed(buf + 496, 16, 8, 0), "test_simd6 case 10 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 512, 16, 8, 0xffffffff), "test_simd6 case 11 failed\n"); + FAILED(!check_simd_extend_signed(buf + 528, 16, 8, 0), "test_simd6 case 12 failed\n"); + FAILED(!check_simd_extend_signed(buf + 544, 16, 8, 1), "test_simd6 case 13 failed\n"); + FAILED(!check_simd_extend_signed(buf + 560, 16, 8, 1), "test_simd6 case 14 failed\n"); + FAILED(!check_simd_extend_signed(buf + 576, 16, 8, 1), "test_simd6 case 15 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 592, 16, 4, 0xff), "test_simd6 case 16 failed\n"); + FAILED(!check_simd_extend_signed(buf + 608, 16, 4, 0), "test_simd6 case 17 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 624, 16, 4, 0xff), "test_simd6 case 18 failed\n"); + FAILED(!check_simd_extend_signed(buf + 640, 16, 4, 0), "test_simd6 case 19 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 656, 16, 8, 0xff), "test_simd6 case 20 failed\n"); + FAILED(!check_simd_extend_signed(buf + 672, 16, 8, 0), "test_simd6 case 21 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 688, 16, 8, 0xff), "test_simd6 case 22 failed\n"); + FAILED(!check_simd_extend_signed(buf + 704, 16, 8, 0), "test_simd6 case 23 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 720, 16, 8, 0xffff), "test_simd6 case 24 failed\n"); + FAILED(!check_simd_extend_signed(buf + 736, 16, 8, 0), "test_simd6 case 25 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 752, 16, 8, 0xffff), "test_simd6 case 26 failed\n"); + FAILED(!check_simd_extend_signed(buf + 768, 16, 8, 0), "test_simd6 case 27 failed\n"); + +#if IS_ARM + FAILED(!check_simd_extend_unsigned(buf + 784, 8, 2, 0xff), "test_simd6 case 28 failed\n"); + FAILED(!check_simd_extend_signed(buf + 792, 8, 2, 0), "test_simd6 case 29 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 800, 8, 4, 0xff), "test_simd6 case 30 failed\n"); + FAILED(!check_simd_extend_signed(buf + 808, 8, 4, 0), "test_simd6 case 31 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 816, 8, 4, 0xffff), "test_simd6 case 32 failed\n"); + FAILED(!check_simd_extend_signed(buf + 824, 8, 4, 0), "test_simd6 case 33 failed\n"); +#endif /* IS_ARM */ + + if (supported[0]) { + FAILED(!check_simd_extend_unsigned(buf + 832, 32, 2, 0xff), "test_simd6 case 34 failed\n"); + FAILED(!check_simd_extend_signed(buf + 864, 32, 4, 0), "test_simd6 case 35 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 896, 32, 8, 0xff), "test_simd6 case 36 failed\n"); + FAILED(!check_simd_extend_signed(buf + 928, 32, 4, 0), "test_simd6 case 37 failed\n"); + FAILED(!check_simd_extend_unsigned(buf + 960, 32, 8, 0xffff), "test_simd6 case 38 failed\n"); + FAILED(!check_simd_extend_signed(buf + 992, 32, 8, 0), "test_simd6 case 39 failed\n"); + FAILED(!check_simd_extend_signed(buf + 1024, 32, 8, 1), "test_simd6 case 40 failed\n"); + FAILED(!check_simd_extend_signed(buf + 1056, 32, 8, 1), "test_simd6 case 41 failed\n"); + } + + successful_tests++; +} + +static void init_simd_sign(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_u32 data) +{ +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + sljit_u8* end = buf + length; + + do { + if (elem_size == 1) + *buf = (sljit_u8)(((data & 0x1) << 7) + 0x7f); + else if (elem_size == 2) + *(sljit_u16*)buf = (sljit_u16)(((data & 0x1) << 15) + 0x7fff); + else if (elem_size == 4) + *(sljit_u32*)buf = (sljit_u32)(((data & 0x1) << 31) + 0x7fffffff); + else { + *(sljit_u32*)buf = 0xffffffff; + *(sljit_u32*)(buf + 4) = (sljit_u32)(((data & 0x1) << 31) + 0x7fffffff); + } + + data >>= 1; + buf += elem_size; + } while (buf < end); +#else /* !SLJIT_LITTLE_ENDIAN */ + sljit_u8* current = buf + length - elem_size; + + do { + if (elem_size == 1) + *current = (sljit_u8)(((data & 0x1) << 7) + 0x7f); + else if (elem_size == 2) + *(sljit_u16*)current = (sljit_u16)(((data & 0x1) << 15) + 0x7fff); + else if (elem_size == 4) + *(sljit_u32*)current = (sljit_u32)(((data & 0x1) << 31) + 0x7fffffff); + else { + *(sljit_u32*)(current + 4) = 0xffffffff; + *(sljit_u32*)current = (sljit_u32)(((data & 0x1) << 31) + 0x7fffffff); + } + + data >>= 1; + current -= elem_size; + } while (current >= buf); +#endif /* SLJIT_LITTLE_ENDIAN */ +} + +static void test_simd7(void) +{ + /* Test simd sign extraction operation. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s32 i, type; + sljit_u8 supported[1]; + sljit_u8* buf; + sljit_u8 data[63 + 288]; + sljit_s32 fs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_FS0 : SLJIT_FR5; + sljit_uw resw[9]; + sljit_u32 res32[7]; + + if (verbose) + printf("Run test_simd7\n"); + + /* Buffer is 64 byte aligned. */ + buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63); + + for (i = 0; i < 9; i++) + resw[i] = (sljit_uw)-1; + for (i = 0; i < 7; i++) + res32[i] = (sljit_u32)-1; + + init_simd_sign(buf + 0, 16, 1, 0x8fa3); + init_simd_sign(buf + 16, 16, 1, 0x34d5); + init_simd_sign(buf + 32, 16, 2, 0xa6); + init_simd_sign(buf + 48, 16, 2, 0x5e); + init_simd_sign(buf + 64, 16, 4, 0xd); + init_simd_sign(buf + 80, 16, 4, 0x5); + init_simd_sign(buf + 96, 16, 8, 0x2); + init_simd_sign(buf + 112, 16, 8, 0x1); + + init_simd_sign(buf + 128, 8, 1, 0x45); + init_simd_sign(buf + 136, 8, 2, 0x9); + init_simd_sign(buf + 144, 8, 4, 0x1); + + init_simd_sign(buf + 160, 32, 1, 0x51e83b71); + init_simd_sign(buf + 192, 32, 2, 0xc90d); + init_simd_sign(buf + 224, 32, 4, 0xa5); + init_simd_sign(buf + 256, 32, 8, 0x9); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 4, 4, 6, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0, 16); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_R0, 0); + /* resw[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_R0, 0); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 16); + /* resw[1] */ + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_uw)); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_R2, 0); + /* resw[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_uw), SLJIT_R2, 0); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 48); + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_FR4, SLJIT_MEM1(SLJIT_SP), 4); + /* res32[0] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_MEM1(SLJIT_SP), 4); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 64); + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, fs0, SLJIT_R1, 0); + /* res32[1] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_u32), SLJIT_R1, 0); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 80); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 4); + /* res32[2] */ + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_FR1, SLJIT_MEM2(SLJIT_S2, SLJIT_R1), 1); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 96); + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_S3, 0); + /* resw[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_uw), SLJIT_S3, 0); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 112); + /* resw[4] */ + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM0(), (sljit_sw)(resw + 4)); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 128); + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_R0, 0); + /* resw[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_uw), SLJIT_R0, 0); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 136); + /* res32[4] */ + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_FR0, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_u32)); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 144); + /* resw[6] */ + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_uw)); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8; + supported[0] = sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 160) != SLJIT_ERR_UNSUPPORTED; + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_FR2, SLJIT_R2, 0); + /* res32[5] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S2), 5 * sizeof(sljit_u32), SLJIT_R2, 0); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 192); + /* resw[7] */ + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_uw)); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32; + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_uw)); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 224); + /* resw[8] */ + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 3); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 256); + /* res32[6] */ + sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_FR0, SLJIT_MEM0(), (sljit_sw)(res32 + 6)); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)buf, (sljit_sw)resw, (sljit_sw)res32); + sljit_free_code(code.code, NULL); + + FAILED(resw[0] != 0x8fa3, "test_simd7 case 1 failed\n"); + FAILED(resw[1] != 0x34d5, "test_simd7 case 2 failed\n"); + FAILED(resw[2] != 0xa6, "test_simd7 case 3 failed\n"); + FAILED(res32[0] != 0x5e, "test_simd7 case 4 failed\n"); + FAILED(res32[1] != 0xd, "test_simd7 case 5 failed\n"); + FAILED(res32[2] != 0x5, "test_simd7 case 6 failed\n"); + FAILED(res32[3] != (sljit_u32)-1, "test_simd7 case 7 failed\n"); + FAILED(resw[3] != 0x2, "test_simd7 case 8 failed\n"); + FAILED(resw[4] != 0x1, "test_simd7 case 9 failed\n"); +#if IS_ARM + FAILED(resw[5] != 0x45, "test_simd7 case 10 failed\n"); + FAILED(res32[4] != 0x9, "test_simd7 case 11 failed\n"); + FAILED(resw[6] != 0x1, "test_simd7 case 12 failed\n"); +#endif /* IS_ARM */ + + if (supported[0]) { + FAILED(res32[5] != 0x51e83b71, "test_simd7 case 13 failed\n"); + FAILED(resw[7] != 0xc90d, "test_simd7 case 14 failed\n"); + FAILED(resw[8] != 0xa5, "test_simd7 case 15 failed\n"); + FAILED(res32[6] != 0x9, "test_simd7 case 16 failed\n"); + } + + successful_tests++; +} + +static void init_simd_u32(sljit_u8* buf, sljit_s32 length, sljit_u32 data) +{ + sljit_u32* current = (sljit_u32*)buf; + sljit_u32* end = (sljit_u32*)(buf + length); + + while (current < end) + *current++ = data; +} + +static sljit_s32 check_simd_u32(sljit_u8* buf, sljit_s32 length, sljit_u32 data) +{ + sljit_u32* current = (sljit_u32*)buf; + sljit_u32* end = (sljit_u32*)(buf + length); + + while (current < end) { + if (*current++ != data) + return 0; + } + + return 1; +} + +static void test_simd8(void) +{ + /* Test simd binary logical operation. */ + executable_code code; + struct sljit_compiler* compiler; + sljit_s32 i, type; + sljit_u8 supported[1]; + sljit_u8* buf; + sljit_u8 data[63 + 1024]; + sljit_s32 fs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_FS1 : SLJIT_FR5; + + if (verbose) + printf("Run test_simd8\n"); + + /* Buffer is 64 byte aligned. */ + buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63); + + for (i = 0; i < 1024; i++) + buf[i] = 0xaa; + + init_simd_u32(buf, 32, 0x00ff00ff); + init_simd_u32(buf + 32, 32, 0x0000ffff); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARGS3V(P, P, P), 4, 4, 6, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0, 16); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR2); + /* buf[64] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 64); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_OR | type, SLJIT_FR2, SLJIT_FR0, SLJIT_FR2); + /* buf[80] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 80); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_XOR | type, SLJIT_FR4, fs0, SLJIT_FR2); + /* buf[96] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 96); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type, SLJIT_FR1, SLJIT_FR2, SLJIT_FR0); + /* buf[112] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 112); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_OR | type, fs0, SLJIT_FR0, fs0); + /* buf[128] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 128); + + type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_XOR | type, SLJIT_FR2, SLJIT_FR4, SLJIT_FR0); + /* buf[144] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 144); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_32; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type, SLJIT_FR4, SLJIT_FR0, SLJIT_FR4); + /* buf[160] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR4, SLJIT_MEM1(SLJIT_S0), 160); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, fs0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_OR | type, SLJIT_FR0, SLJIT_FR2, fs0); + /* buf[168] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 168); + + type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_64; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_XOR | type, fs0, SLJIT_FR0, SLJIT_FR2); + /* buf[176] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 176); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8; + supported[0] = sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type | SLJIT_SIMD_TEST, SLJIT_FR0, SLJIT_FR0, SLJIT_FR2) != SLJIT_ERR_UNSUPPORTED; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR2); + /* buf[192] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 192); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_256; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR2, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_OR | type, fs0, SLJIT_FR0, SLJIT_FR2); + /* buf[224] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, fs0, SLJIT_MEM1(SLJIT_S0), 224); + + type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT; + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR1, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 32); + sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_XOR | type, SLJIT_FR3, SLJIT_FR1, SLJIT_FR3); + /* buf[256] */ + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_FR3, SLJIT_MEM1(SLJIT_S0), 256); + + sljit_emit_return_void(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)buf); + sljit_free_code(code.code, NULL); + + FAILED(!check_simd_u32(buf + 64, 16, 0x000000ff), "test_simd8 case 1 failed\n"); + FAILED(!check_simd_u32(buf + 80, 16, 0x00ffffff), "test_simd8 case 2 failed\n"); + FAILED(!check_simd_u32(buf + 96, 16, 0x00ffff00), "test_simd8 case 3 failed\n"); + FAILED(!check_simd_u32(buf + 112, 16, 0x000000ff), "test_simd8 case 4 failed\n"); + FAILED(!check_simd_u32(buf + 128, 16, 0x00ffffff), "test_simd8 case 5 failed\n"); + FAILED(!check_simd_u32(buf + 144, 16, 0x00ffff00), "test_simd8 case 6 failed\n"); + +#if IS_ARM + FAILED(!check_simd_u32(buf + 160, 8, 0x000000ff), "test_simd8 case 7 failed\n"); + FAILED(!check_simd_u32(buf + 168, 8, 0x00ffffff), "test_simd8 case 8 failed\n"); + FAILED(!check_simd_u32(buf + 176, 8, 0x00ffff00), "test_simd8 case 9 failed\n"); +#endif /* IS_ARM */ + + if (supported[0]) { + FAILED(!check_simd_u32(buf + 192, 32, 0x000000ff), "test_simd8 case 10 failed\n"); + FAILED(!check_simd_u32(buf + 224, 32, 0x00ffffff), "test_simd8 case 11 failed\n"); + FAILED(!check_simd_u32(buf + 256, 32, 0x00ffff00), "test_simd8 case 12 failed\n"); + } + + successful_tests++; +} diff --git a/waterbox/ares64/ares/thirdparty/sljitAllocator.cpp b/waterbox/ares64/ares/thirdparty/sljitAllocator.cpp old mode 100644 new mode 100755 index 3e5b7e338c..0d8146d6f9 --- a/waterbox/ares64/ares/thirdparty/sljitAllocator.cpp +++ b/waterbox/ares64/ares/thirdparty/sljitAllocator.cpp @@ -5,5 +5,5 @@ auto sljit_nall_malloc_exec(sljit_uw size, void* exec_allocator_data) -> void* { auto allocator = (nall::bump_allocator*)exec_allocator_data; - return allocator->acquire(size); + return allocator->tryAcquire(size, false); } diff --git a/waterbox/ares64/ares/thirdparty/sljitConfigPost.h b/waterbox/ares64/ares/thirdparty/sljitConfigPost.h old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/sljitConfigPre.h b/waterbox/ares64/ares/thirdparty/sljitConfigPre.h old mode 100644 new mode 100755 diff --git a/waterbox/ares64/ares/thirdparty/xxhash.h b/waterbox/ares64/ares/thirdparty/xxhash.h old mode 100644 new mode 100755