diff --git a/Assets/dll/ares64.wbx.gz b/Assets/dll/ares64.wbx.gz new file mode 100644 index 0000000000..30e9e36f93 Binary files /dev/null and b/Assets/dll/ares64.wbx.gz differ diff --git a/src/BizHawk.Client.Common/config/Config.cs b/src/BizHawk.Client.Common/config/Config.cs index 983611bbb6..69bfc2ec37 100644 --- a/src/BizHawk.Client.Common/config/Config.cs +++ b/src/BizHawk.Client.Common/config/Config.cs @@ -26,6 +26,8 @@ namespace BizHawk.Client.Common new[] { CoreNames.QuickNes, CoreNames.NesHawk, CoreNames.SubNesHawk }), (new[] { VSystemID.Raw.SNES }, new[] { CoreNames.Faust, CoreNames.Snes9X, CoreNames.Bsnes, CoreNames.Bsnes115 }), + (new[] { VSystemID.Raw.N64 }, + new[] { CoreNames.Mupen64Plus, CoreNames.Ares64, }), (new[] { VSystemID.Raw.SGB }, new[] { CoreNames.Gambatte, CoreNames.Bsnes, CoreNames.Bsnes115}), (new[] { VSystemID.Raw.GB, VSystemID.Raw.GBC }, @@ -316,6 +318,7 @@ namespace BizHawk.Client.Common { [VSystemID.Raw.NES] = CoreNames.QuickNes, [VSystemID.Raw.SNES] = CoreNames.Snes9X, + [VSystemID.Raw.N64] = CoreNames.Mupen64Plus, [VSystemID.Raw.GB] = CoreNames.Gambatte, [VSystemID.Raw.GBC] = CoreNames.Gambatte, [VSystemID.Raw.GBL] = CoreNames.GambatteLink, diff --git a/src/BizHawk.Client.EmuHawk/MainForm.cs b/src/BizHawk.Client.EmuHawk/MainForm.cs index 31d6da6da6..b8371a288b 100644 --- a/src/BizHawk.Client.EmuHawk/MainForm.cs +++ b/src/BizHawk.Client.EmuHawk/MainForm.cs @@ -1980,7 +1980,7 @@ namespace BizHawk.Client.EmuHawk case VSystemID.Raw.INTV: IntvSubMenu.Visible = true; break; - case VSystemID.Raw.N64: + case VSystemID.Raw.N64 when Emulator is N64: N64SubMenu.Visible = true; break; case VSystemID.Raw.NES: diff --git a/src/BizHawk.Emulation.Cores/BizHawk.Emulation.Cores.csproj b/src/BizHawk.Emulation.Cores/BizHawk.Emulation.Cores.csproj index bcd273f1b1..e0e273ba5e 100644 --- a/src/BizHawk.Emulation.Cores/BizHawk.Emulation.Cores.csproj +++ b/src/BizHawk.Emulation.Cores/BizHawk.Emulation.Cores.csproj @@ -49,6 +49,7 @@ + diff --git a/src/BizHawk.Emulation.Cores/Consoles/Nintendo/Ares64/Ares64.ISettable.cs b/src/BizHawk.Emulation.Cores/Consoles/Nintendo/Ares64/Ares64.ISettable.cs new file mode 100644 index 0000000000..647283a62c --- /dev/null +++ b/src/BizHawk.Emulation.Cores/Consoles/Nintendo/Ares64/Ares64.ISettable.cs @@ -0,0 +1,54 @@ +using System.ComponentModel; + +using BizHawk.Common; +using BizHawk.Emulation.Common; + +namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64 +{ + public partial class Ares64 : ISettable + { + private Ares64SyncSettings _syncSettings; + + public object GetSettings() => null; + + public Ares64SyncSettings GetSyncSettings() => _syncSettings.Clone(); + + public PutSettingsDirtyBits PutSettings(object o) => PutSettingsDirtyBits.None; + + public PutSettingsDirtyBits PutSyncSettings(Ares64SyncSettings o) + { + var ret = Ares64SyncSettings.NeedsReboot(_syncSettings, o); + _syncSettings = o; + return ret ? PutSettingsDirtyBits.RebootCore : PutSettingsDirtyBits.None; + } + + public class Ares64SyncSettings + { + [DisplayName("Player 1 Controller")] + [Description("")] + [DefaultValue(LibAres64.ControllerType.Mempak)] + public LibAres64.ControllerType P1Controller { get; set; } + + [DisplayName("Player 2 Controller")] + [Description("")] + [DefaultValue(LibAres64.ControllerType.Unplugged)] + public LibAres64.ControllerType P2Controller { get; set; } + + [DisplayName("Player 3 Controller")] + [Description("")] + [DefaultValue(LibAres64.ControllerType.Unplugged)] + public LibAres64.ControllerType P3Controller { get; set; } + + [DisplayName("Player 4 Controller")] + [Description("")] + [DefaultValue(LibAres64.ControllerType.Unplugged)] + public LibAres64.ControllerType P4Controller { get; set; } + + public Ares64SyncSettings() => SettingsUtil.SetDefaultValues(this); + + public Ares64SyncSettings Clone() => MemberwiseClone() as Ares64SyncSettings; + + public static bool NeedsReboot(Ares64SyncSettings x, Ares64SyncSettings y) => !DeepEquality.DeepEquals(x, y); + } + } +} diff --git a/src/BizHawk.Emulation.Cores/Consoles/Nintendo/Ares64/Ares64.cs b/src/BizHawk.Emulation.Cores/Consoles/Nintendo/Ares64/Ares64.cs new file mode 100644 index 0000000000..6444280fb7 --- /dev/null +++ b/src/BizHawk.Emulation.Cores/Consoles/Nintendo/Ares64/Ares64.cs @@ -0,0 +1,210 @@ +using System; +using System.IO; + +using BizHawk.Common; +using BizHawk.Emulation.Common; +using BizHawk.Emulation.Cores.Properties; +using BizHawk.Emulation.Cores.Waterbox; + +namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64 +{ + [PortedCore(CoreNames.Ares64, "ares team, Near", "v126", "https://ares-emulator.github.io/", isReleased: false)] + [ServiceNotApplicable(new[] { typeof(IDriveLight), })] + public partial class Ares64 : WaterboxCore, IRegionable + { + private readonly LibAres64 _core; + + [CoreConstructor(VSystemID.Raw.N64)] + public Ares64(CoreLoadParameters lp) + : base(lp.Comm, new Configuration + { + DefaultWidth = 640, + DefaultHeight = 480, + MaxWidth = 640, + MaxHeight = 576, + MaxSamples = 2048, + DefaultFpsNumerator = 60000, + DefaultFpsDenominator = 1001, + SystemId = VSystemID.Raw.N64, + }) + { + _syncSettings = lp.SyncSettings ?? new(); + + ControllerSettings = new[] + { + _syncSettings.P1Controller, + _syncSettings.P2Controller, + _syncSettings.P3Controller, + _syncSettings.P4Controller, + }; + + N64Controller = CreateControllerDefinition(ControllerSettings); + + _core = PreInit(new WaterboxOptions + { + Filename = "ares64.wbx", + SbrkHeapSizeKB = 2 * 1024, + SealedHeapSizeKB = 4, + InvisibleHeapSizeKB = 6 * 1024, + PlainHeapSizeKB = 4, + MmapHeapSizeKB = 512 * 1024, + SkipCoreConsistencyCheck = CoreComm.CorePreferences.HasFlag(CoreComm.CorePreferencesFlags.WaterboxCoreConsistencyCheck), + SkipMemoryConsistencyCheck = CoreComm.CorePreferences.HasFlag(CoreComm.CorePreferencesFlags.WaterboxMemoryConsistencyCheck), + }); + + var rom = lp.Roms[0].RomData; + + Region = rom[0x3E] switch + { + 0x44 or 0x46 or 0x49 or 0x50 or 0x53 or 0x55 or 0x58 or 0x59 => DisplayType.PAL, + _ => DisplayType.NTSC, + }; + + var pal = Region == DisplayType.PAL; + + if (pal) + { + VsyncNumerator = 50; + VsyncDenominator = 1; + } + + var pif = Util.DecompressGzipFile(new MemoryStream(pal ? Resources.PIF_PAL_ROM.Value : Resources.PIF_NTSC_ROM.Value)); + + _exe.AddReadonlyFile(pif, pal ? "pif.pal.rom" : "pif.ntsc.rom"); + _exe.AddReadonlyFile(rom, "program.rom"); + + if (!_core.Init(ControllerSettings, pal)) + { + throw new InvalidOperationException("Init returned false!"); + } + + _exe.RemoveReadonlyFile(pal ? "pif.pal.rom" : "pif.ntsc.rom"); + _exe.RemoveReadonlyFile("program.rom"); + + PostInit(); + DeterministicEmulation = true; + } + + public DisplayType Region { get; } + + public override ControllerDefinition ControllerDefinition => N64Controller; + + private ControllerDefinition N64Controller { get; } + + public LibAres64.ControllerType[] ControllerSettings { get; } + + private static ControllerDefinition CreateControllerDefinition(LibAres64.ControllerType[] controllerSettings) + { + var ret = new ControllerDefinition("Nintendo 64 Controller"); + for (int i = 0; i < 4; i++) + { + if (controllerSettings[i] != LibAres64.ControllerType.Unplugged) + { + ret.BoolButtons.Add($"P{i + 1} DPad U"); + ret.BoolButtons.Add($"P{i + 1} DPad D"); + ret.BoolButtons.Add($"P{i + 1} DPad L"); + ret.BoolButtons.Add($"P{i + 1} DPad R"); + ret.BoolButtons.Add($"P{i + 1} Start"); + ret.BoolButtons.Add($"P{i + 1} Z"); + ret.BoolButtons.Add($"P{i + 1} B"); + ret.BoolButtons.Add($"P{i + 1} A"); + ret.BoolButtons.Add($"P{i + 1} C Up"); + ret.BoolButtons.Add($"P{i + 1} C Down"); + ret.BoolButtons.Add($"P{i + 1} C Left"); + ret.BoolButtons.Add($"P{i + 1} C Right"); + ret.BoolButtons.Add($"P{i + 1} L"); + ret.BoolButtons.Add($"P{i + 1} R"); + ret.AddXYPair($"P{i + 1} {{0}} Axis", AxisPairOrientation.RightAndDown, (-32768).RangeTo(32767), 0); + if (controllerSettings[i] == LibAres64.ControllerType.Rumblepak) + { + ret.HapticsChannels.Add($"P{i + 1} Rumble Pak"); + } + } + } + ret.BoolButtons.Add("Reset"); + ret.BoolButtons.Add("Power"); + return ret.MakeImmutable(); + } + + private static LibAres64.Buttons GetButtons(IController controller, int num) + { + LibAres64.Buttons ret = 0; + + if (controller.IsPressed($"P{num} DPad U")) + ret |= LibAres64.Buttons.UP; + if (controller.IsPressed($"P{num} DPad D")) + ret |= LibAres64.Buttons.DOWN; + if (controller.IsPressed($"P{num} DPad L")) + ret |= LibAres64.Buttons.LEFT; + if (controller.IsPressed($"P{num} DPad R")) + ret |= LibAres64.Buttons.RIGHT; + if (controller.IsPressed($"P{num} B")) + ret |= LibAres64.Buttons.B; + if (controller.IsPressed($"P{num} A")) + ret |= LibAres64.Buttons.A; + if (controller.IsPressed($"P{num} C Up")) + ret |= LibAres64.Buttons.C_UP; + if (controller.IsPressed($"P{num} C Down")) + ret |= LibAres64.Buttons.C_DOWN; + if (controller.IsPressed($"P{num} C Left")) + ret |= LibAres64.Buttons.C_LEFT; + if (controller.IsPressed($"P{num} C Right")) + ret |= LibAres64.Buttons.C_RIGHT; + if (controller.IsPressed($"P{num} L")) + ret |= LibAres64.Buttons.L; + if (controller.IsPressed($"P{num} R")) + ret |= LibAres64.Buttons.R; + if (controller.IsPressed($"P{num} Z")) + ret |= LibAres64.Buttons.Z; + if (controller.IsPressed($"P{num} Start")) + ret |= LibAres64.Buttons.START; + + return ret; + } + + protected override LibWaterboxCore.FrameInfo FrameAdvancePrep(IController controller, bool render, bool rendersound) + { + for (int i = 0; i < 4; i++) + { + if (ControllerSettings[i] == LibAres64.ControllerType.Rumblepak) + { + controller.SetHapticChannelStrength($"P{i + 1} Rumble Pak", _core.GetRumbleStatus(i) ? int.MaxValue : 0); + } + } + + return new LibAres64.FrameInfo + { + P1Buttons = GetButtons(controller, 1), + P1XAxis = (short)controller.AxisValue("P1 X Axis"), + P1YAxis = (short)controller.AxisValue("P1 Y Axis"), + + P2Buttons = GetButtons(controller, 2), + P2XAxis = (short)controller.AxisValue("P2 X Axis"), + P2YAxis = (short)controller.AxisValue("P2 Y Axis"), + + P3Buttons = GetButtons(controller, 3), + P3XAxis = (short)controller.AxisValue("P3 X Axis"), + P3YAxis = (short)controller.AxisValue("P3 Y Axis"), + + P4Buttons = GetButtons(controller, 4), + P4XAxis = (short)controller.AxisValue("P4 X Axis"), + P4YAxis = (short)controller.AxisValue("P4 Y Axis"), + + Reset = controller.IsPressed("Reset"), + Power = controller.IsPressed("Power"), + }; + } + + protected override void FrameAdvancePost() + { + if (BufferWidth == 0) + { + BufferWidth = BufferHeight == 239 ? 320 : 640; + } + } + + public override int VirtualWidth => 640; + + public override int VirtualHeight => 480; + } +} diff --git a/src/BizHawk.Emulation.Cores/Consoles/Nintendo/Ares64/LibAres64.cs b/src/BizHawk.Emulation.Cores/Consoles/Nintendo/Ares64/LibAres64.cs new file mode 100644 index 0000000000..1e85054655 --- /dev/null +++ b/src/BizHawk.Emulation.Cores/Consoles/Nintendo/Ares64/LibAres64.cs @@ -0,0 +1,67 @@ +using System; +using System.Runtime.InteropServices; + +using BizHawk.BizInvoke; +using BizHawk.Emulation.Cores.Waterbox; + +namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64 +{ + public abstract class LibAres64 : LibWaterboxCore + { + [Flags] + public enum Buttons : uint + { + UP = 1 << 0, + DOWN = 1 << 1, + LEFT = 1 << 2, + RIGHT = 1 << 3, + B = 1 << 4, + A = 1 << 5, + C_UP = 1 << 6, + C_DOWN = 1 << 7, + C_LEFT = 1 << 8, + C_RIGHT = 1 << 9, + L = 1 << 10, + R = 1 << 11, + Z = 1 << 12, + START = 1 << 13, + } + + public enum ControllerType : uint + { + Unplugged, + Standard, + Mempak, + Rumblepak, + } + + [StructLayout(LayoutKind.Sequential)] + public new class FrameInfo : LibWaterboxCore.FrameInfo + { + public Buttons P1Buttons; + public short P1XAxis; + public short P1YAxis; + + public Buttons P2Buttons; + public short P2XAxis; + public short P2YAxis; + + public Buttons P3Buttons; + public short P3XAxis; + public short P3YAxis; + + public Buttons P4Buttons; + public short P4XAxis; + public short P4YAxis; + + public bool Reset; + public bool Power; + } + + [BizImport(CC)] + public abstract bool Init(ControllerType[] controllerSettings, bool pal); + + [BizImport(CC)] + public abstract bool GetRumbleStatus(int num); + } +} diff --git a/src/BizHawk.Emulation.Cores/CoreNames.cs b/src/BizHawk.Emulation.Cores/CoreNames.cs index 7bee7bfb53..747822d36f 100644 --- a/src/BizHawk.Emulation.Cores/CoreNames.cs +++ b/src/BizHawk.Emulation.Cores/CoreNames.cs @@ -10,6 +10,7 @@ namespace BizHawk.Emulation.Cores public static class CoreNames { public const string A7800Hawk = "A7800Hawk"; + public const string Ares64 = "Ares64"; public const string Atari2600Hawk = "Atari2600Hawk"; public const string Bsnes = "BSNES"; public const string Bsnes115 = "BSNESv115+"; diff --git a/src/BizHawk.Emulation.Cores/Properties/Resources.cs b/src/BizHawk.Emulation.Cores/Properties/Resources.cs index cd5a5ca24b..7b2aebc343 100644 --- a/src/BizHawk.Emulation.Cores/Properties/Resources.cs +++ b/src/BizHawk.Emulation.Cores/Properties/Resources.cs @@ -24,5 +24,7 @@ namespace BizHawk.Emulation.Cores.Properties { internal static readonly Lazy ZX_plus2_rom = new Lazy(() => ReadEmbeddedByteArray("plus2.rom.gz")); internal static readonly Lazy ZX_plus2a_rom = new Lazy(() => ReadEmbeddedByteArray("plus2a.rom.gz")); internal static readonly Lazy TMDS = new Lazy(() => ReadEmbeddedByteArray("tmds.zip.gz")); + internal static readonly Lazy PIF_PAL_ROM = new Lazy(() => ReadEmbeddedByteArray("pif.pal.rom.gz")); + internal static readonly Lazy PIF_NTSC_ROM = new Lazy(() => ReadEmbeddedByteArray("pif.ntsc.rom.gz")); } } diff --git a/src/BizHawk.Emulation.Cores/Resources/pif.ntsc.rom.gz b/src/BizHawk.Emulation.Cores/Resources/pif.ntsc.rom.gz new file mode 100644 index 0000000000..060b9119b2 Binary files /dev/null and b/src/BizHawk.Emulation.Cores/Resources/pif.ntsc.rom.gz differ diff --git a/src/BizHawk.Emulation.Cores/Resources/pif.pal.rom.gz b/src/BizHawk.Emulation.Cores/Resources/pif.pal.rom.gz new file mode 100644 index 0000000000..fe0c1e554a Binary files /dev/null and b/src/BizHawk.Emulation.Cores/Resources/pif.pal.rom.gz differ diff --git a/src/BizHawk.Emulation.Cores/vpads_schemata/N64Schema.cs b/src/BizHawk.Emulation.Cores/vpads_schemata/N64Schema.cs index f0eef1a5c7..0168e2a7d2 100644 --- a/src/BizHawk.Emulation.Cores/vpads_schemata/N64Schema.cs +++ b/src/BizHawk.Emulation.Cores/vpads_schemata/N64Schema.cs @@ -4,6 +4,7 @@ using System.Drawing; using BizHawk.Common; using BizHawk.Emulation.Common; +using BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64; using BizHawk.Emulation.Cores.Nintendo.N64; namespace BizHawk.Emulation.Cores @@ -14,17 +15,33 @@ namespace BizHawk.Emulation.Cores { public IEnumerable GetPadSchemas(IEmulator core, Action showMessageBox) { - var ss = ((N64)core).GetSyncSettings(); - for (var i = 0; i < 4; i++) + if (core is N64 n64) { - if (ss.Controllers[i].IsConnected) + var ss = n64.GetSyncSettings(); + for (var i = 0; i < 4; i++) { - yield return StandardController(i + 1); + if (ss.Controllers[i].IsConnected) + { + yield return StandardController(i + 1, MupenRange); + } + } + } + else if (core is Ares64 ares64) + { + for (var i = 0; i < 4; i++) + { + if (ares64.ControllerSettings[i] != LibAres64.ControllerType.Unplugged) + { + yield return StandardController(i + 1, AresRange); + } } } } - private static PadSchema StandardController(int controller) + private static readonly Func MupenRange = (bool isY) => new((-128).RangeTo(127), 0, false); + private static readonly Func AresRange = (bool isY) => new((-32768).RangeTo(32767), 0, isY); + + private static PadSchema StandardController(int controller, Func makeRange) { return new PadSchema { @@ -47,8 +64,8 @@ namespace BizHawk.Emulation.Cores new ButtonSchema(194, 221, controller, "C Right") { Icon = VGamepadButtonImage.YellowArrE }, new AnalogSchema(6, 14, $"P{controller} X Axis") { - Spec = new AxisSpec((-128).RangeTo(127), 0), - SecondarySpec = new AxisSpec((-128).RangeTo(127), 0) + Spec = makeRange(false), + SecondarySpec = makeRange(true) } } }; diff --git a/waterbox/ares64/0001-Add-__divmodti4-to-match-libgcc.patch b/waterbox/ares64/0001-Add-__divmodti4-to-match-libgcc.patch new file mode 100644 index 0000000000..3e3145a70d --- /dev/null +++ b/waterbox/ares64/0001-Add-__divmodti4-to-match-libgcc.patch @@ -0,0 +1,182 @@ +From c9af34027bc9cb852a4e5e96154a7bd89531a6de Mon Sep 17 00:00:00 2001 +From: Craig Topper +Date: Wed, 16 Sep 2020 21:56:01 -0700 +Subject: [PATCH] Add __divmodti4 to match libgcc. + +gcc has used this on x86-64 since at least version 7. + +Reviewed By: MaskRay + +Differential Revision: https://reviews.llvm.org/D80506 +--- + compiler-rt/lib/builtins/CMakeLists.txt | 1 + + compiler-rt/lib/builtins/README.txt | 2 + + compiler-rt/lib/builtins/divmodti4.c | 32 +++++++ + .../test/builtins/Unit/divmodti4_test.c | 91 +++++++++++++++++++ + 4 files changed, 126 insertions(+) + create mode 100644 compiler-rt/lib/builtins/divmodti4.c + create mode 100644 compiler-rt/test/builtins/Unit/divmodti4_test.c + +diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt +index 8dbe15364ab8..3c50df179764 100644 +--- a/compiler-rt/lib/builtins/CMakeLists.txt ++++ b/compiler-rt/lib/builtins/CMakeLists.txt +@@ -71,6 +71,7 @@ set(GENERIC_SOURCES + divdi3.c + divmoddi4.c + divmodsi4.c ++ divmodti4.c + divsc3.c + divsf3.c + divsi3.c +diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt +index f9e1bc805092..d66d725e7ab5 100644 +--- a/compiler-rt/lib/builtins/README.txt ++++ b/compiler-rt/lib/builtins/README.txt +@@ -87,6 +87,8 @@ du_int __udivmoddi4(du_int a, du_int b, du_int* rem); // a / b, *rem = a % b u + tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); // a / b, *rem = a % b unsigned + su_int __udivmodsi4(su_int a, su_int b, su_int* rem); // a / b, *rem = a % b unsigned + si_int __divmodsi4(si_int a, si_int b, si_int* rem); // a / b, *rem = a % b signed ++di_int __divmoddi4(di_int a, di_int b, di_int* rem); // a / b, *rem = a % b signed ++ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem); // a / b, *rem = a % b signed + + + +diff --git a/compiler-rt/lib/builtins/divmodti4.c b/compiler-rt/lib/builtins/divmodti4.c +new file mode 100644 +index 000000000000..b243ba4ef853 +--- /dev/null ++++ b/compiler-rt/lib/builtins/divmodti4.c +@@ -0,0 +1,32 @@ ++//===-- divmodti4.c - Implement __divmodti4 -------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements __divmodti4 for the compiler_rt library. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "int_lib.h" ++ ++#ifdef CRT_HAS_128BIT ++ ++// Returns: a / b, *rem = a % b ++ ++COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int *rem) { ++ const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; ++ ti_int s_a = a >> bits_in_tword_m1; // s_a = a < 0 ? -1 : 0 ++ ti_int s_b = b >> bits_in_tword_m1; // s_b = b < 0 ? -1 : 0 ++ a = (a ^ s_a) - s_a; // negate if s_a == -1 ++ b = (b ^ s_b) - s_b; // negate if s_b == -1 ++ s_b ^= s_a; // sign of quotient ++ tu_int r; ++ ti_int q = (__udivmodti4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 ++ *rem = (r ^ s_a) - s_a; // negate if s_a == -1 ++ return q; ++} ++ ++#endif // CRT_HAS_128BIT +diff --git a/compiler-rt/test/builtins/Unit/divmodti4_test.c b/compiler-rt/test/builtins/Unit/divmodti4_test.c +new file mode 100644 +index 000000000000..a9f70dcf1c1e +--- /dev/null ++++ b/compiler-rt/test/builtins/Unit/divmodti4_test.c +@@ -0,0 +1,91 @@ ++// RUN: %clang_builtins %s %librt -o %t && %run %t ++// REQUIRES: librt_has_divmodti4 ++// REQUIRES: int128 ++//===-- divmodti4_test.c - Test __divmodti4 -------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file tests __divmodti4 for the compiler_rt library. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "int_lib.h" ++#include ++ ++#ifdef CRT_HAS_128BIT ++ ++// Effects: if rem != 0, *rem = a % b ++// Returns: a / b ++ ++COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem); ++ ++int test__divmodti4(ti_int a, ti_int b, ti_int expected_q, ti_int expected_r) { ++ ti_int r; ++ ti_int q = __divmodti4(a, b, &r); ++ if (q != expected_q || r != expected_r) ++ { ++ utwords at; ++ at.all = a; ++ utwords bt; ++ bt.all = b; ++ utwords expected_qt; ++ expected_qt.all = expected_q; ++ utwords expected_rt; ++ expected_rt.all = expected_r; ++ utwords qt; ++ qt.all = q; ++ utwords rt; ++ rt.all = r; ++ printf("error in __divmodti4: 0x%.16llX%.16llX / 0x%.16llX%.16llX = " ++ "0x%.16llX%.16llX, R = 0x%.16llX%.16llX, expected 0x%.16llX%.16llX, " ++ "0x%.16llX%.16llX\n", ++ at.s.high, at.s.low, bt.s.high, bt.s.low, qt.s.high, qt.s.low, ++ rt.s.high, rt.s.low, expected_qt.s.high, expected_qt.s.low, ++ expected_rt.s.high, expected_rt.s.low); ++ } ++ return !(q == expected_q && r == expected_r); ++} ++ ++char assumption_1[sizeof(ti_int) == 2*sizeof(di_int)] = {0}; ++ ++tu_int tests[][4] = ++{ ++{ (ti_int) 0, (ti_int) 1, (ti_int) 0, (ti_int) 0 }, ++{ (ti_int) 0, (ti_int)-1, (ti_int) 0, (ti_int) 0 }, ++{ (ti_int) 2, (ti_int) 1, (ti_int) 2, (ti_int) 0 }, ++{ (ti_int) 2, (ti_int)-1, (ti_int)-2, (ti_int) 0 }, ++{ (ti_int)-2, (ti_int) 1, (ti_int)-2, (ti_int) 0 }, ++{ (ti_int)-2, (ti_int)-1, (ti_int) 2, (ti_int) 0 }, ++{ (ti_int) 5, (ti_int) 3, (ti_int) 1, (ti_int) 2 }, ++{ (ti_int) 5, (ti_int)-3, (ti_int)-1, (ti_int) 2 }, ++{ (ti_int)-5, (ti_int) 3, (ti_int)-1, (ti_int)-2 }, ++{ (ti_int)-5, (ti_int)-3, (ti_int) 1, (ti_int)-2 }, ++{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 1, (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)0x0LL }, ++{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-1, (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)0x0LL }, ++{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-2, (ti_int)0x4000000000000000LL << 64 | 0, (ti_int)0x0LL }, ++{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 2, (ti_int)0xC000000000000000LL << 64 | 0, (ti_int)0x0LL }, ++{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-3, (ti_int)0x2AAAAAAAAAAAAAAALL << 64 | 0xAAAAAAAAAAAAAAAALL, (ti_int)-2 }, ++{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 3, (ti_int)0xD555555555555555LL << 64 | 0x5555555555555556LL, (ti_int)-2 }, ++}; ++ ++#endif ++ ++int main() ++{ ++#ifdef CRT_HAS_128BIT ++ const unsigned N = sizeof(tests) / sizeof(tests[0]); ++ unsigned i; ++ for (i = 0; i < N; ++i) ++ if (test__divmodti4(tests[i][0], tests[i][1], tests[i][2], tests[i][3])) ++ return 1; ++ ++ ++#else ++ printf("skipped\n"); ++#endif ++ return 0; ++} +-- +2.25.1 + diff --git a/waterbox/ares64/BizInterface.cpp b/waterbox/ares64/BizInterface.cpp new file mode 100644 index 0000000000..7276e66724 --- /dev/null +++ b/waterbox/ares64/BizInterface.cpp @@ -0,0 +1,578 @@ +#include + +#include +#include + +#define EXPORT extern "C" ECL_EXPORT + +typedef enum +{ + Unplugged, + Standard, + Mempak, + Rumblepak, +} ControllerType; + +typedef enum +{ + UP = 1 << 0, + DOWN = 1 << 1, + LEFT = 1 << 2, + RIGHT = 1 << 3, + B = 1 << 4, + A = 1 << 5, + C_UP = 1 << 6, + C_DOWN = 1 << 7, + C_LEFT = 1 << 8, + C_RIGHT = 1 << 9, + L = 1 << 10, + R = 1 << 11, + Z = 1 << 12, + START = 1 << 13, +} Buttons_t; + +struct BizPlatform : ares::Platform +{ + auto attach(ares::Node::Object) -> void override; + auto pak(ares::Node::Object) -> ares::VFS::Pak override; + auto video(ares::Node::Video::Screen, const u32*, u32, u32, u32) -> void override; + auto input(ares::Node::Input::Input) -> void override; + + ares::VFS::Pak bizpak = new vfs::directory; + ares::Node::Audio::Stream stream = nullptr; + u32* videobuf = nullptr; + u32 pitch = 0; + u32 width = 0; + u32 height = 0; + bool newframe = false; + void (*inputcb)() = nullptr; + bool lagged = true; +}; + +auto BizPlatform::attach(ares::Node::Object node) -> void +{ + if (auto stream = node->cast()) + { + stream->setResamplerFrequency(44100); + this->stream = stream; + } +} + +auto BizPlatform::pak(ares::Node::Object) -> ares::VFS::Pak +{ + return bizpak; +} + +auto BizPlatform::video(ares::Node::Video::Screen screen, const u32* data, u32 pitch, u32 width, u32 height) -> void +{ + videobuf = (u32*)data; + this->pitch = pitch >> 2; + this->width = width; + this->height = height; + newframe = true; +} + +auto BizPlatform::input(ares::Node::Input::Input node) -> void +{ + if (auto input = node->cast()) + { + if (input->name() == "Start") + { + lagged = false; + if (inputcb) inputcb(); + } + } +}; + +static ares::Node::System root; +static BizPlatform platform; + +static inline void HackeryDoo() +{ + root->run(); + root->run(); + platform.newframe = false; + f64 buf[2]; + while (platform.stream->pending()) platform.stream->read(buf); +} + +typedef enum +{ + NONE, + EEPROM512, + EEPROM2KB, + SRAM32KB, + SRAM96KB, + FLASH128KB, +} SaveType; + +static inline SaveType DetectSaveType(u8* rom) +{ + string id; + id.append((char)rom[0x3B]); + id.append((char)rom[0x3C]); + id.append((char)rom[0x3D]); + + char region_code = rom[0x3E]; + u8 revision = rom[0x3F]; + + SaveType ret = NONE; + if (id == "NTW") ret = EEPROM512; + if (id == "NHF") ret = EEPROM512; + if (id == "NOS") ret = EEPROM512; + if (id == "NTC") ret = EEPROM512; + if (id == "NER") ret = EEPROM512; + if (id == "NAG") ret = EEPROM512; + if (id == "NAB") ret = EEPROM512; + if (id == "NS3") ret = EEPROM512; + if (id == "NTN") ret = EEPROM512; + if (id == "NBN") ret = EEPROM512; + if (id == "NBK") ret = EEPROM512; + if (id == "NFH") ret = EEPROM512; + if (id == "NMU") ret = EEPROM512; + if (id == "NBC") ret = EEPROM512; + if (id == "NBH") ret = EEPROM512; + if (id == "NHA") ret = EEPROM512; + if (id == "NBM") ret = EEPROM512; + if (id == "NBV") ret = EEPROM512; + if (id == "NBD") ret = EEPROM512; + if (id == "NCT") ret = EEPROM512; + if (id == "NCH") ret = EEPROM512; + if (id == "NCG") ret = EEPROM512; + if (id == "NP2") ret = EEPROM512; + if (id == "NXO") ret = EEPROM512; + if (id == "NCU") ret = EEPROM512; + if (id == "NCX") ret = EEPROM512; + if (id == "NDY") ret = EEPROM512; + if (id == "NDQ") ret = EEPROM512; + if (id == "NDR") ret = EEPROM512; + if (id == "NN6") ret = EEPROM512; + if (id == "NDU") ret = EEPROM512; + if (id == "NJM") ret = EEPROM512; + if (id == "NFW") ret = EEPROM512; + if (id == "NF2") ret = EEPROM512; + if (id == "NKA") ret = EEPROM512; + if (id == "NFG") ret = EEPROM512; + if (id == "NGL") ret = EEPROM512; + if (id == "NGV") ret = EEPROM512; + if (id == "NGE") ret = EEPROM512; + if (id == "NHP") ret = EEPROM512; + if (id == "NPG") ret = EEPROM512; + if (id == "NIJ") ret = EEPROM512; + if (id == "NIC") ret = EEPROM512; + if (id == "NFY") ret = EEPROM512; + if (id == "NKI") ret = EEPROM512; + if (id == "NLL") ret = EEPROM512; + if (id == "NLR") ret = EEPROM512; + if (id == "NKT") ret = EEPROM512; + if (id == "CLB") ret = EEPROM512; + if (id == "NLB") ret = EEPROM512; + if (id == "NMW") ret = EEPROM512; + if (id == "NML") ret = EEPROM512; + if (id == "NTM") ret = EEPROM512; + if (id == "NMI") ret = EEPROM512; + if (id == "NMG") ret = EEPROM512; + if (id == "NMO") ret = EEPROM512; + if (id == "NMS") ret = EEPROM512; + if (id == "NMR") ret = EEPROM512; + if (id == "NCR") ret = EEPROM512; + if (id == "NEA") ret = EEPROM512; + if (id == "NPW") ret = EEPROM512; + if (id == "NPM") ret = EEPROM512; + if (id == "NPY") ret = EEPROM512; + if (id == "NPT") ret = EEPROM512; + if (id == "NRA") ret = EEPROM512; + if (id == "NWQ") ret = EEPROM512; + if (id == "NSU") ret = EEPROM512; + if (id == "NSN") ret = EEPROM512; + if (id == "NK2") ret = EEPROM512; + if (id == "NSV") ret = EEPROM512; + if (id == "NFX") ret = EEPROM512; + if (id == "NFP") ret = EEPROM512; + if (id == "NS6") ret = EEPROM512; + if (id == "NNA") ret = EEPROM512; + if (id == "NRS") ret = EEPROM512; + if (id == "NSW") ret = EEPROM512; + if (id == "NSC") ret = EEPROM512; + if (id == "NSA") ret = EEPROM512; + if (id == "NB6") ret = EEPROM512; + if (id == "NSM") ret = EEPROM512; + if (id == "NSS") ret = EEPROM512; + if (id == "NTX") ret = EEPROM512; + if (id == "NT6") ret = EEPROM512; + if (id == "NTP") ret = EEPROM512; + if (id == "NTJ") ret = EEPROM512; + if (id == "NRC") ret = EEPROM512; + if (id == "NTR") ret = EEPROM512; + if (id == "NTB") ret = EEPROM512; + if (id == "NGU") ret = EEPROM512; + if (id == "NIR") ret = EEPROM512; + if (id == "NVL") ret = EEPROM512; + if (id == "NVY") ret = EEPROM512; + if (id == "NWR") ret = EEPROM512; + if (id == "NWC") ret = EEPROM512; + if (id == "NAD") ret = EEPROM512; + if (id == "NWU") ret = EEPROM512; + if (id == "NYK") ret = EEPROM512; + if (id == "NMZ") ret = EEPROM512; + if (id == "NDK" && region_code == 'J') ret = EEPROM512; + if (id == "NWT" && region_code == 'J') ret = EEPROM512; + + if (id == "NB7") ret = EEPROM2KB; + if (id == "NGT") ret = EEPROM2KB; + if (id == "NFU") ret = EEPROM2KB; + if (id == "NCW") ret = EEPROM2KB; + if (id == "NCZ") ret = EEPROM2KB; + if (id == "ND6") ret = EEPROM2KB; + if (id == "NDO") ret = EEPROM2KB; + if (id == "ND2") ret = EEPROM2KB; + if (id == "N3D") ret = EEPROM2KB; + if (id == "NMX") ret = EEPROM2KB; + if (id == "NGC") ret = EEPROM2KB; + if (id == "NIM") ret = EEPROM2KB; + if (id == "NK4") ret = EEPROM2KB; + if (id == "NNB") ret = EEPROM2KB; + if (id == "NMV") ret = EEPROM2KB; + if (id == "NM8") ret = EEPROM2KB; + if (id == "NEV") ret = EEPROM2KB; + if (id == "NPP") ret = EEPROM2KB; + if (id == "NUB") ret = EEPROM2KB; + if (id == "NPD") ret = EEPROM2KB; + if (id == "NRZ") ret = EEPROM2KB; + if (id == "NR7") ret = EEPROM2KB; + if (id == "NEP") ret = EEPROM2KB; + if (id == "NYS") ret = EEPROM2KB; + if (id == "ND3" && region_code == 'J') ret = EEPROM2KB; + if (id == "ND4" && region_code == 'J') ret = EEPROM2KB; + + if (id == "NTE") ret = SRAM32KB; + if (id == "NVB") ret = SRAM32KB; + if (id == "CFZ") ret = SRAM32KB; + if (id == "NFZ") ret = SRAM32KB; + if (id == "NSI") ret = SRAM32KB; + if (id == "NG6") ret = SRAM32KB; + if (id == "N3H") ret = SRAM32KB; + if (id == "NGP") ret = SRAM32KB; + if (id == "NYW") ret = SRAM32KB; + if (id == "NHY") ret = SRAM32KB; + if (id == "NIB") ret = SRAM32KB; + if (id == "NPS") ret = SRAM32KB; + if (id == "NPA") ret = SRAM32KB; + if (id == "NP4") ret = SRAM32KB; + if (id == "NJ5") ret = SRAM32KB; + if (id == "NP6") ret = SRAM32KB; + if (id == "NPE") ret = SRAM32KB; + if (id == "NJG") ret = SRAM32KB; + if (id == "CZL") ret = SRAM32KB; + if (id == "NZL") ret = SRAM32KB; + if (id == "NKG") ret = SRAM32KB; + if (id == "NMF") ret = SRAM32KB; + if (id == "NRI") ret = SRAM32KB; + if (id == "NUT") ret = SRAM32KB; + if (id == "NUM") ret = SRAM32KB; + if (id == "NOB") ret = SRAM32KB; + if (id == "CPS") ret = SRAM32KB; + if (id == "NB5") ret = SRAM32KB; + if (id == "NRE") ret = SRAM32KB; + if (id == "NAL") ret = SRAM32KB; + if (id == "NT3") ret = SRAM32KB; + if (id == "NS4") ret = SRAM32KB; + if (id == "NA2") ret = SRAM32KB; + if (id == "NVP") ret = SRAM32KB; + if (id == "NWL") ret = SRAM32KB; + if (id == "NW2") ret = SRAM32KB; + if (id == "NWX") ret = SRAM32KB; + if (id == "NK4" && region_code == 'J' && revision < 2) ret = SRAM32KB; + + if (id == "CDZ") ret = SRAM96KB; + + if (id == "NCC") ret = FLASH128KB; + if (id == "NDA") ret = FLASH128KB; + if (id == "NAF") ret = FLASH128KB; + if (id == "NJF") ret = FLASH128KB; + if (id == "NKJ") ret = FLASH128KB; + if (id == "NZS") ret = FLASH128KB; + if (id == "NM6") ret = FLASH128KB; + if (id == "NCK") ret = FLASH128KB; + if (id == "NMQ") ret = FLASH128KB; + if (id == "NPN") ret = FLASH128KB; + if (id == "NPF") ret = FLASH128KB; + if (id == "NPO") ret = FLASH128KB; + if (id == "CP2") ret = FLASH128KB; + if (id == "NP3") ret = FLASH128KB; + if (id == "NRH") ret = FLASH128KB; + if (id == "NSQ") ret = FLASH128KB; + if (id == "NT9") ret = FLASH128KB; + if (id == "NW4") ret = FLASH128KB; + if (id == "NDP") ret = FLASH128KB; + + return ret; +} + +EXPORT bool Init(ControllerType* controllers, bool pal) +{ + FILE* f; + array_view* data; + u32 len; + string name; + + name = pal ? "pif.pal.rom" : "pif.ntsc.rom"; + f = fopen(name, "rb"); + fseek(f, 0, SEEK_END); + len = ftell(f); + data = new array_view(new u8[len], len); + fseek(f, 0, SEEK_SET); + fread((void*)data->data(), 1, len, f); + fclose(f); + platform.bizpak->append(name, *data); + + name = "program.rom"; + f = fopen(name, "rb"); + fseek(f, 0, SEEK_END); + len = ftell(f); + data = new array_view(new u8[len], len); + fseek(f, 0, SEEK_SET); + fread((void*)data->data(), 1, len, f); + fclose(f); + platform.bizpak->append(name, *data); + + string region = pal ? "PAL" : "NTSC"; + platform.bizpak->setAttribute("region", region); + + string cic = pal ? "CIC-NUS-7101" : "CIC-NUS-6102"; + u32 crc32 = Hash::CRC32({&((u8*)data->data())[0x40], 0x9C0}).value(); + if (crc32 == 0x1DEB51A9) cic = pal ? "CIC-NUS-7102" : "CIC-NUS-6101"; + if (crc32 == 0xC08E5BD6) cic = pal ? "CIC-NUS-7101" : "CIC-NUS-6102"; + if (crc32 == 0x03B8376A) cic = pal ? "CIC-NUS-7103" : "CIC-NUS-6103"; + if (crc32 == 0xCF7F41DC) cic = pal ? "CIC-NUS-7105" : "CIC-NUS-6105"; + if (crc32 == 0xD1059C6A) cic = pal ? "CIC-NUS-7106" : "CIC-NUS-6106"; + platform.bizpak->setAttribute("cic", cic); + + SaveType save = DetectSaveType((u8*)data->data()); + if (save != NONE) + { + switch (save) + { + case EEPROM512: len = 512; name = "save.eeprom"; break; + case EEPROM2KB: len = 2 * 1024; name = "save.eeprom"; break; + case SRAM32KB: len = 32 * 1024; name = "save.ram"; break; + case SRAM96KB: len = 96 * 1024; name = "save.ram"; break; + case FLASH128KB: len = 128 * 1024; name = "save.flash"; break; + default: return false; + } + data = new array_view(new u8[len], len); + memset((void*)data->data(), 0xFF, len); + platform.bizpak->append(name, *data); + } + + ares::platform = &platform; + + if (!ares::Nintendo64::load(root, {"[Nintendo] Nintendo 64 (", region, ")"})) + { + return false; + } + + if (auto port = root->find("Cartridge Slot")) + { + port->allocate(); + port->connect(); + } + else + { + return false; + } + + for (int i = 0; i < 4; i++) + { + if (auto port = root->find({"Controller Port ", 1 + i})) + { + if (controllers[i] == Unplugged) continue; + + auto peripheral = port->allocate("Gamepad"); + port->connect(); + + string name; + switch (controllers[i]) + { + case Mempak: name = "Controller Pak"; break; + case Rumblepak: name = "Rumble Pak"; break; + default: continue; + } + + if (auto port = peripheral->find("Pak")) + { + port->allocate(name); + port->connect(); + } + else + { + return false; + } + } + else + { + return false; + } + } + + root->power(false); + HackeryDoo(); + return true; +} + +EXPORT bool GetRumbleStatus(u32 num) +{ + ares::Nintendo64::Gamepad* c = nullptr; + switch (num) + { + case 0: c = (ares::Nintendo64::Gamepad*)ares::Nintendo64::controllerPort1.device.data(); break; + case 1: c = (ares::Nintendo64::Gamepad*)ares::Nintendo64::controllerPort2.device.data(); break; + case 2: c = (ares::Nintendo64::Gamepad*)ares::Nintendo64::controllerPort3.device.data(); break; + case 3: c = (ares::Nintendo64::Gamepad*)ares::Nintendo64::controllerPort4.device.data(); break; + } + return c ? c->motor->enable() : false; +} + +#define MAYBE_ADD_MEMORY_DOMAIN(mem, name, flags) do { \ + if (ares::Nintendo64::mem.data) \ + { \ + m[i].Data = ares::Nintendo64::mem.data; \ + m[i].Name = name; \ + m[i].Size = ares::Nintendo64::mem.size; \ + m[i].Flags = flags | MEMORYAREA_FLAGS_YUGEENDIAN | MEMORYAREA_FLAGS_WORDSIZE4 | MEMORYAREA_FLAGS_WRITABLE; \ + i++; \ + } \ +} while (0) \ + +#define MAYBE_ADD_MEMPAK_DOMAIN(num) do { \ + if (auto c = (ares::Nintendo64::Gamepad*)ares::Nintendo64::controllerPort##num.device.data()) \ + { \ + if (c->ram.data) \ + { \ + m[i].Data = c->ram.data; \ + m[i].Name = "MEMPAK " #num; \ + m[i].Size = c->ram.size; \ + m[i].Flags = MEMORYAREA_FLAGS_ONEFILLED | MEMORYAREA_FLAGS_SAVERAMMABLE | MEMORYAREA_FLAGS_YUGEENDIAN | MEMORYAREA_FLAGS_WORDSIZE4 | MEMORYAREA_FLAGS_WRITABLE; \ + i++; \ + } \ + } \ +} while (0) \ + +EXPORT void GetMemoryAreas(MemoryArea *m) +{ + int i = 0; + MAYBE_ADD_MEMORY_DOMAIN(rdram.ram, "RDRAM", MEMORYAREA_FLAGS_PRIMARY); + MAYBE_ADD_MEMORY_DOMAIN(cartridge.rom, "ROM", 0); + MAYBE_ADD_MEMORY_DOMAIN(pi.rom, "PI ROM", 0); + MAYBE_ADD_MEMORY_DOMAIN(pi.ram, "PI RAM", 0); + MAYBE_ADD_MEMORY_DOMAIN(rsp.dmem, "RSP DMEM", 0); + MAYBE_ADD_MEMORY_DOMAIN(rsp.imem, "RSP IMEM", 0); + MAYBE_ADD_MEMORY_DOMAIN(cartridge.ram, "SRAM", MEMORYAREA_FLAGS_ONEFILLED | MEMORYAREA_FLAGS_SAVERAMMABLE); + MAYBE_ADD_MEMORY_DOMAIN(cartridge.eeprom, "EEPROM", MEMORYAREA_FLAGS_ONEFILLED | MEMORYAREA_FLAGS_SAVERAMMABLE); + MAYBE_ADD_MEMORY_DOMAIN(cartridge.flash, "FLASH", MEMORYAREA_FLAGS_ONEFILLED | MEMORYAREA_FLAGS_SAVERAMMABLE); + MAYBE_ADD_MEMPAK_DOMAIN(1); + MAYBE_ADD_MEMPAK_DOMAIN(2); + MAYBE_ADD_MEMPAK_DOMAIN(3); + MAYBE_ADD_MEMPAK_DOMAIN(4); +} + +// fixme: this mismatches the c# side due to some re-ordering c# is doing for some reason +struct MyFrameInfo : public FrameInfo +{ + Buttons_t P1Buttons; + Buttons_t P2Buttons; + Buttons_t P3Buttons; + Buttons_t P4Buttons; + + s16 P1XAxis; + s16 P1YAxis; + + s16 P2XAxis; + s16 P2YAxis; + + s16 P3XAxis; + s16 P3YAxis; + + s16 P4XAxis; + s16 P4YAxis; + + bool Reset; + bool Power; +}; + +#define UPDATE_CONTROLLER(NUM) do { \ + if (auto c = (ares::Nintendo64::Gamepad*)ares::Nintendo64::controllerPort##NUM.device.data()) \ + { \ + c->x->setValue(f->P##NUM##XAxis); \ + c->y->setValue(f->P##NUM##YAxis); \ + c->up->setValue(f->P##NUM##Buttons & UP); \ + c->down->setValue(f->P##NUM##Buttons & DOWN); \ + c->left->setValue(f->P##NUM##Buttons & LEFT); \ + c->right->setValue(f->P##NUM##Buttons & RIGHT); \ + c->b->setValue(f->P##NUM##Buttons & B); \ + c->a->setValue(f->P##NUM##Buttons & A); \ + c->cameraUp->setValue(f->P##NUM##Buttons & C_UP); \ + c->cameraDown->setValue(f->P##NUM##Buttons & C_DOWN); \ + c->cameraLeft->setValue(f->P##NUM##Buttons & C_LEFT); \ + c->cameraRight->setValue(f->P##NUM##Buttons & C_RIGHT); \ + c->l->setValue(f->P##NUM##Buttons & L); \ + c->r->setValue(f->P##NUM##Buttons & R); \ + c->z->setValue(f->P##NUM##Buttons & Z); \ + c->start->setValue(f->P##NUM##Buttons & START); \ + } \ +} while (0) + +EXPORT void FrameAdvance(MyFrameInfo* f) +{ + if (f->Power) + { + root->power(false); + HackeryDoo(); + } + else if (f->Reset) + { + root->power(true); + HackeryDoo(); + } + + UPDATE_CONTROLLER(1); + UPDATE_CONTROLLER(2); + UPDATE_CONTROLLER(3); + UPDATE_CONTROLLER(4); + + platform.lagged = true; + + root->run(); + + f->Width = platform.width; + f->Height = platform.height; + if (platform.newframe) + { + u32* src = platform.videobuf; + u32* dst = f->VideoBuffer; + for (int i = 0; i < f->Height; i++) + { + memcpy(dst, src, f->Width * 4); + dst += f->Width; + src += platform.pitch; + } + platform.newframe = false; + } + + s16* soundbuf = f->SoundBuffer; + while (platform.stream->pending()) + { + f64 buf[2]; + platform.stream->read(buf); + *soundbuf++ = (s16)std::clamp(buf[0] * 32768, -32768.0, 32767.0); + *soundbuf++ = (s16)std::clamp(buf[1] * 32768, -32768.0, 32767.0); + f->Samples++; + } + + f->Lagged = platform.lagged; +} + +EXPORT void SetInputCallback(void (*callback)()) +{ + platform.inputcb = callback; +} diff --git a/waterbox/ares64/Makefile b/waterbox/ares64/Makefile new file mode 100644 index 0000000000..4fe8261897 --- /dev/null +++ b/waterbox/ares64/Makefile @@ -0,0 +1,57 @@ +NEED_LIBCO := 1 + +ARES_PATH = $(ROOT_DIR)/ares/ares +MAME_PATH = $(ROOT_DIR)/ares/thirdparty/mame + +CXXFLAGS := -std=c++17 -msse4.2 \ + -I../libco -I.$(ROOT_DIR)/ares/ -I.$(ROOT_DIR)/ares/thirdparty/ -I.$(ARES_PATH) \ + -Werror=int-to-pointer-cast -Wno-unused-but-set-variable \ + -Wno-parentheses -Wno-reorder -Wno-unused-variable \ + -Wno-sign-compare -Wno-switch -Wno-unused-local-typedefs \ + -fno-strict-aliasing -fwrapv -fno-operator-names \ + -I.$(MAME_PATH)/devices -I.$(MAME_PATH)/emu \ + -I.$(MAME_PATH)/lib/util -I.$(MAME_PATH)/mame \ + -I.$(MAME_PATH)/osd -DMAME_RDP -DLSB_FIRST -DPTR64 -DSDLMAME_EMSCRIPTEN + +TARGET = ares64.wbx + +SRCS_PROCESSORS = \ + $(ARES_PATH)/component/processor/sm5k/sm5k.cpp + +SRCS_ARES = \ + $(ARES_PATH)/ares/ares.cpp \ + $(ARES_PATH)/ares/memory/fixed-allocator.cpp + +SRCS_N64 = \ + $(ARES_PATH)/n64/memory/memory.cpp \ + $(ARES_PATH)/n64/system/system.cpp \ + $(ARES_PATH)/n64/cartridge/cartridge.cpp \ + $(ARES_PATH)/n64/controller/controller.cpp \ + $(ARES_PATH)/n64/dd/dd.cpp \ + $(ARES_PATH)/n64/sp/sp.cpp \ + $(ARES_PATH)/n64/dp/dp.cpp \ + $(ARES_PATH)/n64/mi/mi.cpp \ + $(ARES_PATH)/n64/vi/vi.cpp \ + $(ARES_PATH)/n64/ai/ai.cpp \ + $(ARES_PATH)/n64/pi/pi.cpp \ + $(ARES_PATH)/n64/ri/ri.cpp \ + $(ARES_PATH)/n64/si/si.cpp \ + $(ARES_PATH)/n64/rdram/rdram.cpp \ + $(ARES_PATH)/n64/cpu/cpu.cpp \ + $(ARES_PATH)/n64/rdp/rdp.cpp \ + $(ARES_PATH)/n64/rsp/rsp.cpp + +SRCS_MAME = \ + $(MAME_PATH)/emu/emucore.cpp \ + $(MAME_PATH)/lib/util/delegate.cpp \ + $(MAME_PATH)/lib/util/strformat.cpp \ + $(MAME_PATH)/mame/video/n64.cpp \ + $(MAME_PATH)/mame/video/pin64.cpp \ + $(MAME_PATH)/mame/video/rdpblend.cpp \ + $(MAME_PATH)/mame/video/rdptpipe.cpp \ + $(MAME_PATH)/osd/osdcore.cpp \ + $(MAME_PATH)/osd/osdsync.cpp + +SRCS = $(SRCS_PROCESSORS) $(SRCS_ARES) $(SRCS_N64) $(SRCS_MAME) BizInterface.cpp + +include ../common.mak diff --git a/waterbox/ares64/ares/LICENSE b/waterbox/ares64/ares/LICENSE new file mode 100644 index 0000000000..b13e2e81a1 --- /dev/null +++ b/waterbox/ares64/ares/LICENSE @@ -0,0 +1,78 @@ +---------------------------------------------------------------------- +ares + +Copyright (c) 2004-2021 ares team, Near et al + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +---------------------------------------------------------------------- + +---------------------------------------------------------------------- +Stack-less Just-In-Time compiler + +Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +---------------------------------------------------------------------- + +---------------------------------------------------------------------- +MAME +Copyright (c) 1997-2021 MAMEdev and contributors + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +---------------------------------------------------------------------- diff --git a/waterbox/ares64/ares/ares/ares/ares.cpp b/waterbox/ares64/ares/ares/ares/ares.cpp new file mode 100644 index 0000000000..46d6c7bbef --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/ares.cpp @@ -0,0 +1,11 @@ +#include +#include +#include +#include + +namespace ares { + +Platform* platform = nullptr; +bool _runAhead = false; + +} diff --git a/waterbox/ares64/ares/ares/ares/ares.hpp b/waterbox/ares64/ares/ares/ares/ares.hpp new file mode 100644 index 0000000000..e09e2d706a --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/ares.hpp @@ -0,0 +1,83 @@ +#pragma once + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace nall; + +namespace ares { + static const string Name = "ares"; + static const string Version = "126"; + static const string Copyright = "ares team, Near"; + static const string License = "ISC"; + static const string LicenseURI = "https://opensource.org/licenses/ISC"; + static const string Website = "ares-emulator.github.io"; + static const string WebsiteURI = "https://ares-emulator.github.io"; + + //incremented only when serialization format changes + static const u32 SerializerSignature = 0x31545342; //"BST1" (little-endian) + static const string SerializerVersion = "125"; + + namespace VFS { + using Pak = shared_pointer; + using File = shared_pointer; + } + + namespace Video { + static constexpr bool Threaded = false; + } + + namespace Constants { + namespace Colorburst { + static constexpr f64 NTSC = 315.0 / 88.0 * 1'000'000.0; + static constexpr f64 PAL = 283.75 * 15'625.0 + 25.0; + } + } + + extern bool _runAhead; + inline auto runAhead() -> bool { return _runAhead; } + inline auto setRunAhead(bool runAhead) -> void { _runAhead = runAhead; } +} + +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/waterbox/ares64/ares/ares/ares/debug/debug.cpp b/waterbox/ares64/ares/ares/ares/debug/debug.cpp new file mode 100644 index 0000000000..2b115b774b --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/debug/debug.cpp @@ -0,0 +1,45 @@ +namespace ares { + +Debug _debug; + +auto Debug::reset() -> void { + _totalNotices = 0; + _unhandledNotices.reset(); + _unimplementedNotices.reset(); + _unusualNotices.reset(); + _unverifiedNotices.reset(); +} + +auto Debug::_unhandled(const string& text) -> void { + if(_unhandledNotices.find(text)) return; + if(_totalNotices++ > 256) return; + _unhandledNotices.append(text); + + print(terminal::color::yellow("[unhandled] "), text, "\n"); +} + +auto Debug::_unimplemented(const string& text) -> void { + if(_unimplementedNotices.find(text)) return; + if(_totalNotices++ > 256) return; + _unimplementedNotices.append(text); + + print(terminal::color::magenta("[unimplemented] "), text, "\n"); +} + +auto Debug::_unusual(const string& text) -> void { + if(_unusualNotices.find(text)) return; + if(_totalNotices++ > 256) return; + _unusualNotices.append(text); + + print(terminal::color::cyan("[unusual] "), text, "\n"); +} + +auto Debug::_unverified(const string& text) -> void { + if(_unverifiedNotices.find(text)) return; + if(_totalNotices++ > 256) return; + _unverifiedNotices.append(text); + + print(terminal::color::gray("[unverified] "), text, "\n"); +} + +} diff --git a/waterbox/ares64/ares/ares/ares/debug/debug.hpp b/waterbox/ares64/ares/ares/ares/debug/debug.hpp new file mode 100644 index 0000000000..5f764a23e9 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/debug/debug.hpp @@ -0,0 +1,39 @@ +namespace ares { + +struct Debug { + auto reset() -> void; + + template auto unhandled(P&&... p) -> void { + return _unhandled({forward

(p)...}); + } + + template auto unimplemented(P&&... p) -> void { + return _unimplemented({forward

(p)...}); + } + + template auto unusual(P&&... p) -> void { + return _unusual({forward

(p)...}); + } + + template auto unverified(P&&... p) -> void { + return _unverified({forward

(p)...}); + } + +private: + auto _unhandled(const string&) -> void; + auto _unimplemented(const string&) -> void; + auto _unusual(const string&) -> void; + auto _unverified(const string&) -> void; + + u64 _totalNotices = 0; + vector _unhandledNotices; + vector _unimplementedNotices; + vector _unusualNotices; + vector _unverifiedNotices; +}; + +extern Debug _debug; + +} + +#define debug(function, ...) if constexpr(1) ares::_debug.function(__VA_ARGS__) diff --git a/waterbox/ares64/ares/ares/ares/inline.hpp b/waterbox/ares64/ares/ares/ares/inline.hpp new file mode 100644 index 0000000000..f72a69893d --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/inline.hpp @@ -0,0 +1,4 @@ +#include +#include +#include +#include diff --git a/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.cpp b/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.cpp new file mode 100644 index 0000000000..cb3a7384e8 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.cpp @@ -0,0 +1,22 @@ +#include + +namespace ares::Memory { + +#if defined(PLATFORM_MACOS) && defined(ARCHITECTURE_ARM64) +//stub for unsupported platforms +FixedAllocator::FixedAllocator() { +} +#else +alignas(4096) u8 fixedBuffer[128_MiB]; + +FixedAllocator::FixedAllocator() { + _allocator.resize(sizeof(fixedBuffer), 0, fixedBuffer); +} +#endif + +auto FixedAllocator::get() -> bump_allocator& { + static FixedAllocator allocator; + return allocator._allocator; +} + +} diff --git a/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.hpp b/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.hpp new file mode 100644 index 0000000000..09d8d85e44 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/memory/fixed-allocator.hpp @@ -0,0 +1,14 @@ +#pragma once + +namespace ares::Memory { + +struct FixedAllocator { + static auto get() -> bump_allocator&; + +private: + FixedAllocator(); + + bump_allocator _allocator; +}; + +} diff --git a/waterbox/ares64/ares/ares/ares/memory/memory.hpp b/waterbox/ares64/ares/ares/ares/memory/memory.hpp new file mode 100644 index 0000000000..9d2ea3c5cf --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/memory/memory.hpp @@ -0,0 +1,30 @@ +#pragma once + +namespace ares::Memory { + +inline auto mirror(u32 address, u32 size) -> u32 { + if(size == 0) return 0; + u32 base = 0; + u32 mask = 1 << 31; + while(address >= size) { + while(!(address & mask)) mask >>= 1; + address -= mask; + if(size > mask) { + size -= mask; + base += mask; + } + mask >>= 1; + } + return base + address; +} + +inline auto reduce(u32 address, u32 mask) -> u32 { + while(mask) { + u32 bits = (mask & -mask) - 1; + address = address >> 1 & ~bits | address & bits; + mask = (mask & mask - 1) >> 1; + } + return address; +} + +} diff --git a/waterbox/ares64/ares/ares/ares/memory/readable.hpp b/waterbox/ares64/ares/ares/ares/memory/readable.hpp new file mode 100644 index 0000000000..23dae3494d --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/memory/readable.hpp @@ -0,0 +1,73 @@ +#pragma once + +#include + +namespace ares::Memory { + +template +struct Readable { + ~Readable() { reset(); } + + auto reset() -> void { + delete[] self.data; + self.data = nullptr; + self.size = 0; + self.mask = 0; + } + + auto allocate(u32 size, T fill = (T)~0ull) -> void { + if(!size) return reset(); + delete[] self.data; + self.size = size; + self.mask = bit::round(self.size) - 1; + self.data = new T[self.mask + 1]; + memory::fill(self.data, self.mask + 1, fill); + } + + auto fill(T fill = ~0ull) -> void { + for(u32 address : range(self.size)) { + self.data[address] = fill; + } + } + + auto load(VFS::File fp) -> void { + if(!self.size) allocate(fp->size()); + fp->read({self.data, min(fp->size(), self.size * sizeof(T))}); + for(u32 address = self.size; address <= self.mask; address++) { + self.data[address] = self.data[mirror(address, self.size)]; + } + } + + auto save(VFS::File fp) -> void { + fp->write({self.data, min(fp->size(), self.size * sizeof(T))}); + } + + explicit operator bool() const { return (bool)self.data; } + auto data() const -> const T* { return self.data; } + auto size() const -> u32 { return self.size; } + auto mask() const -> u32 { return self.mask; } + + auto operator[](u32 address) const -> T { return self.data[address & self.mask]; } + auto read(u32 address) const -> T { return self.data[address & self.mask]; } + auto write(u32 address, T data) const -> void {} + auto program(u32 address, T data) const -> void { self.data[address & self.mask] = data; } + + auto begin() -> T* { return &self.data[0]; } + auto end() -> T* { return &self.data[self.size]; } + + auto begin() const -> const T* { return &self.data[0]; } + auto end() const -> const T* { return &self.data[self.size]; } + + auto serialize(serializer& s) -> void { + s(array_span{self.data, self.size}); + } + +//private: + struct { + T* data = nullptr; + u32 size = 0; + u32 mask = 0; + } self; +}; + +} diff --git a/waterbox/ares64/ares/ares/ares/memory/writable.hpp b/waterbox/ares64/ares/ares/ares/memory/writable.hpp new file mode 100644 index 0000000000..ebde9ebc70 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/memory/writable.hpp @@ -0,0 +1,75 @@ +#pragma once + +#include + +namespace ares::Memory { + +template +struct Writable { + ~Writable() { reset(); } + + auto reset() -> void { + delete[] self.data; + self.data = nullptr; + self.size = 0; + self.mask = 0; + } + + auto allocate(u32 size, T fill = (T)~0ull) -> void { + if(!size) return reset(); + delete[] self.data; + self.size = size; + self.mask = bit::round(self.size) - 1; + self.data = new T[self.mask + 1]; + memory::fill(self.data, self.mask + 1, fill); + } + + auto fill(T fill = ~0ull) -> void { + for(u32 address : range(self.size)) { + self.data[address] = fill; + } + } + + auto load(VFS::File fp) -> void { + if(!self.size) allocate(fp->size()); + fp->read({self.data, min(fp->size(), self.size * sizeof(T))}); + for(u32 address = self.size; address <= self.mask; address++) { + self.data[address] = self.data[mirror(address, self.size)]; + } + } + + auto save(VFS::File fp) -> void { + fp->write({self.data, min(fp->size(), self.size * sizeof(T))}); + } + + explicit operator bool() const { return (bool)self.data; } + auto data() -> T* { return self.data; } + auto data() const -> const T* { return self.data; } + auto size() const -> u32 { return self.size; } + auto mask() const -> u32 { return self.mask; } + + auto operator[](u32 address) -> T& { return self.data[address & self.mask]; } + auto operator[](u32 address) const -> T { return self.data[address & self.mask]; } + auto read(u32 address) const -> T { return self.data[address & self.mask]; } + auto write(u32 address, T data) -> void { self.data[address & self.mask] = data; } + auto program(u32 address, T data) -> void { self.data[address & self.mask] = data; } + + auto begin() -> T* { return &self.data[0]; } + auto end() -> T* { return &self.data[self.size]; } + + auto begin() const -> const T* { return &self.data[0]; } + auto end() const -> const T* { return &self.data[self.size]; } + + auto serialize(serializer& s) -> void { + s(array_span{self.data, self.size}); + } + +private: + struct { + T* data = nullptr; + u32 size = 0; + u32 mask = 0; + } self; +}; + +} diff --git a/waterbox/ares64/ares/ares/ares/node/attribute.hpp b/waterbox/ares64/ares/ares/ares/node/attribute.hpp new file mode 100644 index 0000000000..6a88d1d5c1 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/attribute.hpp @@ -0,0 +1,8 @@ +struct Attribute { + Attribute(const string& name, const any& value = {}) : name(name), value(value) {} + auto operator==(const Attribute& source) const -> bool { return name == source.name; } + auto operator< (const Attribute& source) const -> bool { return name < source.name; } + + string name; + any value; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/audio/audio.hpp b/waterbox/ares64/ares/ares/ares/node/audio/audio.hpp new file mode 100644 index 0000000000..4f34b18658 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/audio/audio.hpp @@ -0,0 +1,4 @@ +struct Audio : Object { + DeclareClass(Audio, "audio") + using Object::Object; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/audio/stream.cpp b/waterbox/ares64/ares/ares/ares/node/audio/stream.cpp new file mode 100644 index 0000000000..88ad3bf702 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/audio/stream.cpp @@ -0,0 +1,131 @@ +auto Stream::setChannels(u32 channels) -> void { + _channels.reset(); + _channels.resize(channels); +} + +auto Stream::setFrequency(f64 frequency) -> void { + _frequency = frequency; + setResamplerFrequency(_resamplerFrequency); +} + +auto Stream::setResamplerFrequency(f64 resamplerFrequency) -> void { + _resamplerFrequency = resamplerFrequency; + + for(auto& channel : _channels) { + channel.nyquist.reset(); + channel.resampler.reset(_frequency, _resamplerFrequency); + } + + if(_frequency >= _resamplerFrequency * 2) { + //add a low-pass filter to prevent aliasing during resampling + f64 cutoffFrequency = min(25000.0, _resamplerFrequency / 2.0 - 2000.0); + for(auto& channel : _channels) { + u32 passes = 3; + for(u32 pass : range(passes)) { + DSP::IIR::Biquad filter; + f64 q = DSP::IIR::Biquad::butterworth(passes * 2, pass); + filter.reset(DSP::IIR::Biquad::Type::LowPass, cutoffFrequency, _frequency, q); + channel.nyquist.append(filter); + } + } + } +} + +auto Stream::setMuted(bool muted) -> void { + _muted = muted; +} + +auto Stream::resetFilters() -> void { + for(auto& channel : _channels) { + channel.filters.reset(); + } +} + +auto Stream::addLowPassFilter(f64 cutoffFrequency, u32 order, u32 passes) -> void { + for(auto& channel : _channels) { + for(u32 pass : range(passes)) { + if(order == 1) { + Filter filter{Filter::Mode::OnePole, Filter::Type::LowPass, Filter::Order::First}; + filter.onePole.reset(DSP::IIR::OnePole::Type::LowPass, cutoffFrequency, _frequency); + channel.filters.append(filter); + } + if(order == 2) { + Filter filter{Filter::Mode::Biquad, Filter::Type::LowPass, Filter::Order::Second}; + f64 q = DSP::IIR::Biquad::butterworth(passes * 2, pass); + filter.biquad.reset(DSP::IIR::Biquad::Type::LowPass, cutoffFrequency, _frequency, q); + channel.filters.append(filter); + } + } + } +} + +auto Stream::addHighPassFilter(f64 cutoffFrequency, u32 order, u32 passes) -> void { + for(auto& channel : _channels) { + for(u32 pass : range(passes)) { + if(order == 1) { + Filter filter{Filter::Mode::OnePole, Filter::Type::HighPass, Filter::Order::First}; + filter.onePole.reset(DSP::IIR::OnePole::Type::HighPass, cutoffFrequency, _frequency); + channel.filters.append(filter); + } + if(order == 2) { + Filter filter{Filter::Mode::Biquad, Filter::Type::HighPass, Filter::Order::Second}; + f64 q = DSP::IIR::Biquad::butterworth(passes * 2, pass); + filter.biquad.reset(DSP::IIR::Biquad::Type::HighPass, cutoffFrequency, _frequency, q); + channel.filters.append(filter); + } + } + } +} + +auto Stream::addLowShelfFilter(f64 cutoffFrequency, u32 order, f64 gain, f64 slope) -> void { + for(auto& channel : _channels) { + if(order == 2) { + Filter filter{Filter::Mode::Biquad, Filter::Type::LowShelf, Filter::Order::Second}; + f64 q = DSP::IIR::Biquad::shelf(gain, slope); + filter.biquad.reset(DSP::IIR::Biquad::Type::LowShelf, cutoffFrequency, _frequency, q); + channel.filters.append(filter); + } + } +} + +auto Stream::addHighShelfFilter(f64 cutoffFrequency, u32 order, f64 gain, f64 slope) -> void { + for(auto& channel : _channels) { + if(order == 2) { + Filter filter{Filter::Mode::Biquad, Filter::Type::HighShelf, Filter::Order::Second}; + f64 q = DSP::IIR::Biquad::shelf(gain, slope); + filter.biquad.reset(DSP::IIR::Biquad::Type::HighShelf, cutoffFrequency, _frequency, q); + channel.filters.append(filter); + } + } +} + +auto Stream::pending() const -> bool { + return _channels && _channels[0].resampler.pending(); +} + +auto Stream::read(f64 samples[]) -> u32 { + for(u32 c : range(_channels.size())) { + samples[c] = _channels[c].resampler.read() * !muted(); + } + return _channels.size(); +} + +auto Stream::write(const f64 samples[]) -> void { + for(u32 c : range(_channels.size())) { + f64 sample = samples[c] + 1e-25; //constant offset used to suppress denormals + for(auto& filter : _channels[c].filters) { + switch(filter.mode) { + case Filter::Mode::OnePole: sample = filter.onePole.process(sample); break; + case Filter::Mode::Biquad: sample = filter.biquad.process(sample); break; + } + } + for(auto& filter : _channels[c].nyquist) { + sample = filter.process(sample); + } + _channels[c].resampler.write(sample); + } + + //if there are samples pending, then alert the frontend to possibly process them. + //this will generally happen when every audio stream has pending samples to be mixed. + if(pending()) platform->audio(shared()); +} diff --git a/waterbox/ares64/ares/ares/ares/node/audio/stream.hpp b/waterbox/ares64/ares/ares/ares/node/audio/stream.hpp new file mode 100644 index 0000000000..366593b7fc --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/audio/stream.hpp @@ -0,0 +1,49 @@ +struct Stream : Audio { + DeclareClass(Stream, "audio.stream") + using Audio::Audio; + + auto channels() const -> u32 { return _channels.size(); } + auto frequency() const -> f64 { return _frequency; } + auto resamplerFrequency() const -> f64 { return _resamplerFrequency; } + auto muted() const -> bool { return _muted; } + + auto setChannels(u32 channels) -> void; + auto setFrequency(f64 frequency) -> void; + auto setResamplerFrequency(f64 resamplerFrequency) -> void; + auto setMuted(bool muted) -> void; + + auto resetFilters() -> void; + auto addLowPassFilter(f64 cutoffFrequency, u32 order, u32 passes = 1) -> void; + auto addHighPassFilter(f64 cutoffFrequency, u32 order, u32 passes = 1) -> void; + auto addLowShelfFilter(f64 cutoffFrequency, u32 order, f64 gain, f64 slope) -> void; + auto addHighShelfFilter(f64 cutoffFrequency, u32 order, f64 gain, f64 slope) -> void; + + auto pending() const -> bool; + auto read(f64 samples[]) -> u32; + auto write(const f64 samples[]) -> void; + + template + auto frame(P&&... p) -> void { + if(runAhead()) return; + f64 samples[sizeof...(p)] = {forward

(p)...}; + write(samples); + } + +protected: + struct Filter { + enum class Mode : u32 { OnePole, Biquad } mode; + enum class Type : u32 { None, LowPass, HighPass, LowShelf, HighShelf } type; + enum class Order : u32 { None, First, Second } order; + DSP::IIR::OnePole onePole; + DSP::IIR::Biquad biquad; + }; + struct Channel { + vector filters; + vector nyquist; + DSP::Resampler::Cubic resampler; + }; + vector _channels; + f64 _frequency = 48000.0; + f64 _resamplerFrequency = 48000.0; + bool _muted = false; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/class.hpp b/waterbox/ares64/ares/ares/ares/node/class.hpp new file mode 100644 index 0000000000..b171cdcf3c --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/class.hpp @@ -0,0 +1,34 @@ +//horrible implementation of run-time introspection: +//allow converting a unique class string to a derived Node type. + +struct Class { + struct Instance { + const string identifier; + const function create; + }; + + static auto classes() -> vector& { + static vector classes; + return classes; + } + + template static auto register() -> void { + if(!classes().find([&](auto instance) { return instance.identifier == T::identifier(); })) { + classes().append({T::identifier(), &T::create}); + } else { + throw; + } + } + + static auto create(string identifier) -> Node::Object { + if(auto index = classes().find([&](auto instance) { return instance.identifier == identifier; })) { + return classes()[*index].create(); + } + if(identifier == "Object") throw; //should never occur: detects unregistered classes + return create("Object"); + } + + template struct Register { + Register() { Class::register(); } + }; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/component/component.hpp b/waterbox/ares64/ares/ares/ares/node/component/component.hpp new file mode 100644 index 0000000000..edee5c3ff4 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/component/component.hpp @@ -0,0 +1,4 @@ +struct Component : Object { + DeclareClass(Component, "component"); + using Object::Object; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/component/real-time-clock.hpp b/waterbox/ares64/ares/ares/ares/node/component/real-time-clock.hpp new file mode 100644 index 0000000000..c9395f46eb --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/component/real-time-clock.hpp @@ -0,0 +1,30 @@ +struct RealTimeClock : Component { + DeclareClass(RealTimeClock, "component.real-time-clock") + using Component::Component; + + auto update() -> void { if(_update) return _update(); } + auto timestamp() const -> u64 { return _timestamp; } + + auto setUpdate(function update) -> void { _update = update; } + auto setTimestamp(u64 timestamp) -> void { _timestamp = timestamp; } + + auto synchronize(u64 timestamp = 0) -> void { + if(!timestamp) timestamp = chrono::timestamp(); + _timestamp = timestamp; + update(); + } + + auto serialize(string& output, string depth) -> void override { + Component::serialize(output, depth); + output.append(depth, " timestamp: ", _timestamp, "\n"); + } + + auto unserialize(Markup::Node node) -> void override { + Component::unserialize(node); + _timestamp = node["timestamp"].natural(); + } + +protected: + function _update; + u64 _timestamp; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/debugger.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/debugger.hpp new file mode 100644 index 0000000000..a5b5954a70 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/debugger/debugger.hpp @@ -0,0 +1,4 @@ +struct Debugger : Object { + DeclareClass(Debugger, "debugger") + using Object::Object; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/graphics.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/graphics.hpp new file mode 100644 index 0000000000..d3a46c7a82 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/debugger/graphics.hpp @@ -0,0 +1,26 @@ +struct Graphics : Debugger { + DeclareClass(Graphics, "debugger.graphics") + + Graphics(string name = {}) : Debugger(name) { + } + + auto width() const -> u32 { return _width; } + auto height() const -> u32 { return _height; } + auto capture() const -> vector { if(_capture) return _capture(); return {}; } + + auto setSize(u32 width, u32 height) -> void { _width = width, _height = height; } + auto setCapture(function ()> capture) -> void { _capture = capture; } + + auto serialize(string& output, string depth) -> void override { + Debugger::serialize(output, depth); + } + + auto unserialize(Markup::Node node) -> void override { + Debugger::unserialize(node); + } + +protected: + u32 _width = 0; + u32 _height = 0; + function ()> _capture; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/memory.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/memory.hpp new file mode 100644 index 0000000000..3b46a1094e --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/debugger/memory.hpp @@ -0,0 +1,27 @@ +struct Memory : Debugger { + DeclareClass(Memory, "debugger.memory") + + Memory(string name = {}) : Debugger(name) { + } + + auto size() const -> u32 { return _size; } + auto read(u32 address) const -> n8 { if(_read) return _read(address); return 0; } + auto write(u32 address, u8 data) const -> void { if(_write) return _write(address, data); } + + auto setSize(u32 size) -> void { _size = size; } + auto setRead(function read) -> void { _read = read; } + auto setWrite(function write) -> void { _write = write; } + + auto serialize(string& output, string depth) -> void override { + Debugger::serialize(output, depth); + } + + auto unserialize(Markup::Node node) -> void override { + Debugger::unserialize(node); + } + +protected: + u32 _size = 0; + function _read; + function _write; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/properties.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/properties.hpp new file mode 100644 index 0000000000..9c85950cde --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/debugger/properties.hpp @@ -0,0 +1,21 @@ +struct Properties : Debugger { + DeclareClass(Properties, "debugger.properties") + + Properties(string name = {}) : Debugger(name) { + } + + auto query() const -> string { if(_query) return _query(); return {}; } + + auto setQuery(function query) -> void { _query = query; } + + auto serialize(string& output, string depth) -> void override { + Debugger::serialize(output, depth); + } + + auto unserialize(Markup::Node node) -> void override { + Debugger::unserialize(node); + } + +protected: + function _query; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp new file mode 100644 index 0000000000..3f530bfc66 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp @@ -0,0 +1,125 @@ +struct Instruction : Tracer { + DeclareClass(Instruction, "debugger.tracer.instruction") + + Instruction(string name = {}, string component = {}) : Tracer(name, component) { + setMask(_mask); + setDepth(_depth); + } + + auto addressBits() const -> u32 { return _addressBits; } + auto addressMask() const -> u32 { return _addressMask; } + auto mask() const -> bool { return _mask; } + auto depth() const -> u32 { return _depth; } + + auto setAddressBits(u32 addressBits, u32 addressMask = 0) -> void { + _addressBits = addressBits; + _addressMask = addressMask; + } + + auto setMask(bool mask) -> void { + _mask = mask; + } + + auto setDepth(u32 depth) -> void { + _depth = depth; + _history.reset(); + _history.resize(depth); + for(auto& history : _history) history = ~0; + } + + auto address(u32 address) -> bool { + address &= (1ull << _addressBits) - 1; //mask upper bits of address + _address = address; + address >>= _addressMask; //clip unneeded alignment bits (to reduce _masks size) + + if(_mask && updateMasks()) { + if(_masks[address >> 3] & 1 << (address & 7)) return false; //do not trace twice + _masks[address >> 3] |= 1 << (address & 7); + } + + if(_depth) { + for(auto history : _history) { + if(_address == history) { + _omitted++; + return false; //do not trace again if recently traced + } + } + for(auto index : range(_depth - 1)) { + _history[index] = _history[index + 1]; + } + _history.last() = _address; + } + + return true; + } + + //mark an already-executed address as not executed yet for trace masking. + //call when writing to executable RAM to support self-modifying code. + auto invalidate(u32 address) -> void { + if(unlikely(_mask && updateMasks())) { + address &= (1ull << _addressBits) - 1; + address >>= _addressMask; + _masks[address >> 3] &= ~(1 << (address & 7)); + } + } + + auto notify(const string& instruction, const string& context, const string& extra = {}) -> void { + if(!enabled()) return; + + if(_omitted) { + PlatformLog({ + "[Omitted: ", _omitted, "]\n"} + ); + _omitted = 0; + } + + string output{ + _component, " ", + hex(_address, _addressBits + 3 >> 2), " ", + instruction, " ", + context, " ", + extra + }; + PlatformLog({output.strip(), "\n"}); + } + + auto serialize(string& output, string depth) -> void override { + Tracer::serialize(output, depth); + output.append(depth, " addressBits: ", _addressBits, "\n"); + output.append(depth, " addressMask: ", _addressMask, "\n"); + output.append(depth, " mask: ", _mask, "\n"); + output.append(depth, " depth: ", _depth, "\n"); + } + + auto unserialize(Markup::Node node) -> void override { + Tracer::unserialize(node); + _addressBits = node["addressBits"].natural(); + _addressMask = node["addressMask"].natural(); + _mask = node["mask"].boolean(); + _depth = node["depth"].natural(); + + setMask(_mask); + setDepth(_depth); + } + +protected: + auto updateMasks() -> bool { + auto size = 1ull << _addressBits >> _addressMask >> 3; + if(!_mask || !size) return _masks.reset(), false; + if(_masks.size() == size) return true; + _masks.reset(); + _masks.resize(size); + return true; + } + + u32 _addressBits = 32; + u32 _addressMask = 0; + bool _mask = false; + u32 _depth = 4; + +//unserialized: + n64 _address = 0; + n64 _omitted = 0; + vector _history; + vector _masks; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/notification.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/notification.hpp new file mode 100644 index 0000000000..a9f7331d38 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/notification.hpp @@ -0,0 +1,18 @@ +struct Notification : Tracer { + DeclareClass(Notification, "debugger.tracer.notification") + + Notification(string name = {}, string component = {}) : Tracer(name, component) { + } + + auto notify(const string& message = {}) -> void { + if(!enabled()) return; + + if(message) { + PlatformLog({_component, " ", _name, ": ", message, "\n"}); + } else { + PlatformLog({_component, " ", _name, "\n"}); + } + } + +protected: +}; diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/tracer.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/tracer.hpp new file mode 100644 index 0000000000..82f38e201a --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/tracer.hpp @@ -0,0 +1,29 @@ +struct Tracer : Debugger { + DeclareClass(Tracer, "debugger.tracer") + + Tracer(string name = {}, string component = {}) : Debugger(name) { + _component = component; + } + + auto component() const -> string { return _component; } + auto enabled() const -> bool { return _enabled; } + + auto setComponent(string component) -> void { _component = component; } + auto setEnabled(bool enabled) -> void { _enabled = enabled; } + + auto serialize(string& output, string depth) -> void override { + Debugger::serialize(output, depth); + output.append(depth, " component: ", _component, "\n"); + output.append(depth, " enabled: ", _enabled, "\n"); + } + + auto unserialize(Markup::Node node) -> void override { + Debugger::unserialize(node); + _component = node["component"].string(); + _enabled = node["enabled"].boolean(); + } + +protected: + string _component; + bool _enabled = false; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/input/axis.hpp b/waterbox/ares64/ares/ares/ares/node/input/axis.hpp new file mode 100644 index 0000000000..866fc5eb4e --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/input/axis.hpp @@ -0,0 +1,12 @@ +struct Axis : Input { + DeclareClass(Axis, "input.axis") + using Input::Input; + + auto value() const -> s64 { return _value; } + auto setValue(s64 value) -> void { _value = value; } + +protected: + s64 _value = 0; + s64 _minimum = -32768; + s64 _maximum = +32767; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/input/button.hpp b/waterbox/ares64/ares/ares/ares/node/input/button.hpp new file mode 100644 index 0000000000..deb1a88d93 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/input/button.hpp @@ -0,0 +1,10 @@ +struct Button : Input { + DeclareClass(Button, "input.button") + using Input::Input; + + auto value() const -> bool { return _value; } + auto setValue(bool value) -> void { _value = value; } + +protected: + bool _value = 0; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/input/input.hpp b/waterbox/ares64/ares/ares/ares/node/input/input.hpp new file mode 100644 index 0000000000..04ef9f4126 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/input/input.hpp @@ -0,0 +1,4 @@ +struct Input : Object { + DeclareClass(Input, "input") + using Object::Object; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/input/rumble.hpp b/waterbox/ares64/ares/ares/ares/node/input/rumble.hpp new file mode 100644 index 0000000000..cad47ebd0a --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/input/rumble.hpp @@ -0,0 +1,10 @@ +struct Rumble : Input { + DeclareClass(Rumble, "input.rumble") + using Input::Input; + + auto enable() const -> bool { return _enable; } + auto setEnable(bool enable) -> void { _enable = enable; } + +protected: + bool _enable = 0; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/input/trigger.hpp b/waterbox/ares64/ares/ares/ares/node/input/trigger.hpp new file mode 100644 index 0000000000..6a092439bd --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/input/trigger.hpp @@ -0,0 +1,12 @@ +struct Trigger : Input { + DeclareClass(Trigger, "input.trigger") + using Input::Input; + + auto value() const -> s64 { return _value; } + auto setValue(s64 value) -> void { _value = value; } + +protected: + s64 _value = 0; + s64 _minimum = 0; + s64 _maximum = +32767; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/node.cpp b/waterbox/ares64/ares/ares/ares/node/node.cpp new file mode 100644 index 0000000000..c8b96ef7f0 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/node.cpp @@ -0,0 +1,9 @@ +namespace ares::Core { + namespace Video { + #include + #include + } + namespace Audio { + #include + } +} diff --git a/waterbox/ares64/ares/ares/ares/node/node.hpp b/waterbox/ares64/ares/ares/ares/node/node.hpp new file mode 100644 index 0000000000..e91ed880d4 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/node.hpp @@ -0,0 +1,185 @@ +namespace ares::Core { + struct Object; + struct System; + struct Peripheral; + struct Port; + namespace Component { + struct Component; + struct RealTimeClock; + } + namespace Video { + struct Video; + struct Sprite; + struct Screen; + } + namespace Audio { + struct Audio; + struct Stream; + } + namespace Input { + struct Input; + struct Button; + struct Axis; + struct Trigger; + struct Rumble; + } + namespace Setting { + struct Setting; + struct Boolean; + struct Natural; + struct Integer; + struct Real; + struct String; + } + namespace Debugger { + struct Debugger; + struct Memory; + struct Graphics; + struct Properties; + namespace Tracer { + struct Tracer; + struct Notification; + struct Instruction; + } + } +} + +namespace ares::Node { + using Object = shared_pointer; + using System = shared_pointer; + using Peripheral = shared_pointer; + using Port = shared_pointer; + namespace Component { + using Component = shared_pointer; + using RealTimeClock = shared_pointer; + } + namespace Video { + using Video = shared_pointer; + using Sprite = shared_pointer; + using Screen = shared_pointer; + } + namespace Audio { + using Audio = shared_pointer; + using Stream = shared_pointer; + } + namespace Input { + using Input = shared_pointer; + using Button = shared_pointer; + using Axis = shared_pointer; + using Trigger = shared_pointer; + using Rumble = shared_pointer; + } + namespace Setting { + using Setting = shared_pointer; + using Boolean = shared_pointer; + using Natural = shared_pointer; + using Integer = shared_pointer; + using Real = shared_pointer; + using String = shared_pointer; + } + namespace Debugger { + using Debugger = shared_pointer; + using Memory = shared_pointer; + using Graphics = shared_pointer; + using Properties = shared_pointer; + namespace Tracer { + using Tracer = shared_pointer; + using Notification = shared_pointer; + using Instruction = shared_pointer; + } + } +} + +namespace ares::Core { + // forward declarations + static auto PlatformAttach(Node::Object) -> void; + static auto PlatformDetach(Node::Object) -> void; + static auto PlatformLog(string_view) -> void; + + #include + #include + #include + #include + #include + #include + namespace Component { + #include + #include + } + namespace Video { + #include + #include + #include + } + namespace Audio { + #include + #include + } + namespace Input { + #include + #include + #include + #include + #include + } + namespace Setting { + #include + #include + #include + #include + #include + #include + } + namespace Debugger { + #include + #include + #include + #include + namespace Tracer { + #include + #include + #include + } + } +} + +namespace ares::Node { + static inline auto create(string identifier) -> Object { + return Core::Class::create(identifier); + } + + static inline auto serialize(Object node) -> string { + if(!node) return {}; + string result; + node->serialize(result, {}); + return result; + } + + static inline auto unserialize(string markup) -> Object { + auto document = BML::unserialize(markup); + if(!document) return {}; + auto node = Core::Class::create(document["node"].string()); + node->unserialize(document["node"]); + return node; + } + + static inline auto parent(Object child) -> Object { + if(!child || !child->parent()) return {}; + if(auto parent = child->parent().acquire()) return parent; + return {}; + } + + template + static inline auto find(Object from, string name) -> Object { + if(!from) return {}; + if(auto object = from->find(name)) return object; + return {}; + } + + template + static inline auto enumerate(Object from) -> vector { + vector objects; + if(from) from->enumerate(objects); + return objects; + } +} diff --git a/waterbox/ares64/ares/ares/ares/node/object.hpp b/waterbox/ares64/ares/ares/ares/node/object.hpp new file mode 100644 index 0000000000..5225a404c7 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/object.hpp @@ -0,0 +1,235 @@ +//identifier() is static, allowing template to access via T::identifier() +//identity() is virtual, allowing T* to access via T->identity() + +#define DeclareClass(Type, Name) \ + static auto identifier() -> string { return Name; } \ + static auto create() -> Node::Object { return new Type; } \ + auto identity() const -> string override { return Name; } \ + private: static inline Class::Register register; public: \ + +struct Object : shared_pointer_this { + static auto identifier() -> string { return "Object"; } + static auto create() -> Node::Object { return new Object; } + virtual auto identity() const -> string { return "Object"; } + private: static inline Class::Register register; public: +//DeclareClass(Object, "object") + + Object(string name = {}) : _name(name) {} + virtual ~Object() = default; + + auto name() const -> string { return _name; } + auto parent() const -> shared_pointer_weak { return _parent; } + + auto setName(string_view name) -> void { _name = name; } + + auto prepend(Node::Object node) -> Node::Object { + if(auto found = find(node)) return found; + _nodes.prepend(node); + node->_parent = shared(); + PlatformAttach(node); + return node; + } + + template + auto prepend(P&&... p) -> Node::Object { + using Type = typename T::type; + return prepend(shared_pointer::create(forward

(p)...)); + } + + auto append(Node::Object node) -> Node::Object { + if(auto found = find(node)) return found; + _nodes.append(node); + node->_parent = shared(); + PlatformAttach(node); + return node; + } + + template + auto append(P&&... p) -> Node::Object { + using Type = typename T::type; + return append(shared_pointer::create(forward

(p)...)); + } + + auto remove(Node::Object node) -> void { + if(auto index = _nodes.find(node)) { + PlatformDetach(node); + node->reset(); + node->_parent.reset(); + _nodes.remove(*index); + } + } + + auto reset() -> void { + for(auto& node : _nodes) { + PlatformDetach(node); + node->reset(); + node->_parent.reset(); + } + _nodes.reset(); + } + + template + auto cast() -> shared_pointer { + if(dynamic_cast(this)) return shared(); + return {}; + } + + template + auto is() -> bool { + return (bool)cast(); + } + + template + auto find() -> vector> { + vector> result; + if(dynamic_cast(this)) { + if(auto instance = shared()) result.append(instance); + } + for(auto& node : _nodes) result.append(node->find()); + return result; + } + + template + auto find(u32 index) -> shared_pointer { + auto result = find(); + if(index < result.size()) return result[index]; + return {}; + } + + auto find(Node::Object source) -> Node::Object { + if(!source) return {}; + for(auto& node : _nodes) { + if(node->identity() == source->identity() && node->_name == source->_name) return node; + } + return {}; + } + + template + auto find(string name) -> T { + using Type = typename T::type; + auto path = name.split("/"); + name = path.takeFirst(); + for(auto& node : _nodes) { + if(node->_name != name) continue; + if(path) return node->find(path.merge("/")); + if(node->identity() == Type::identifier()) return node; + } + return {}; + } + + template + auto scan(string name) -> T { + using Type = typename T::type; + for(auto& node : _nodes) { + if(node->identity() == Type::identifier() && node->_name == name) return node; + if(auto result = node->scan(name)) return result; + } + return {}; + } + + template + auto enumerate(vector& objects) -> void { + using Type = typename T::type; + if(auto instance = cast()) objects.append(instance); + for(auto& node : _nodes) node->enumerate(objects); + } + + auto pak() -> VFS::Pak { + return _pak; + } + + auto setPak(VFS::Pak pak) -> bool { + _pak = pak; + return (bool)_pak; + } + + template + auto attribute(const string& name) const -> T { + if(auto attribute = _attributes.find(name)) { + if(attribute->value.is()) return attribute->value.get(); + } + return {}; + } + + template + auto hasAttribute(const string& name) const -> bool { + if(auto attribute = _attributes.find(name)) { + if(attribute->value.is()) return true; + } + return false; + } + + template + auto setAttribute(const string& name, const U& value = {}) -> void { + if constexpr(is_same_v && !is_same_v) return setAttribute(name, string{value}); + if(auto attribute = _attributes.find(name)) { + if((const T&)value) attribute->value = (const T&)value; + else _attributes.remove(*attribute); + } else { + if((const T&)value) _attributes.insert({name, (const T&)value}); + } + } + + virtual auto load(Node::Object source) -> bool { + if(!source || identity() != source->identity() || _name != source->_name) return false; + _attributes = source->_attributes; + return true; + } + + auto save() -> string { + string markup; + serialize(markup, {}); + return markup; + } + + virtual auto serialize(string& output, string depth) -> void { + output.append(depth, "node: ", identity(), "\n"); + output.append(depth, " name: ", _name, "\n"); + for(auto& attribute : _attributes) { + if(!attribute.value.is()) continue; + output.append(depth, " attribute\n"); + output.append(depth, " name: ", attribute.name, "\n"); + output.append(depth, " value: ", attribute.value.get(), "\n"); + } + depth.append(" "); + for(auto& node : _nodes) { + node->serialize(output, depth); + } + } + + virtual auto unserialize(Markup::Node markup) -> void { + if(!markup) return; + _name = markup["name"].string(); + _attributes.reset(); + for(auto& attribute : markup.find("attribute")) { + _attributes.insert({attribute["name"].string(), attribute["value"].string()}); + } + for(auto& leaf : markup.find("node")) { + auto node = Class::create(leaf.string()); + append(node); + node->unserialize(leaf); + } + } + + virtual auto copy(Node::Object source) -> void { + _attributes = source->_attributes; + for(auto& from : source->_nodes) { + for(auto& to : _nodes) { + if(from->identity() != to->identity()) continue; + if(from->name() != to->name()) continue; + to->copy(from); + break; + } + } + } + + auto begin() { return _nodes.begin(); } + auto end() { return _nodes.end(); } + +protected: + string _name; + VFS::Pak _pak; + set _attributes; + shared_pointer_weak _parent; + vector _nodes; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/peripheral.hpp b/waterbox/ares64/ares/ares/ares/node/peripheral.hpp new file mode 100644 index 0000000000..fd567643a3 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/peripheral.hpp @@ -0,0 +1,4 @@ +struct Peripheral : Object { + DeclareClass(Peripheral, "peripheral") + using Object::Object; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/port.hpp b/waterbox/ares64/ares/ares/ares/node/port.hpp new file mode 100644 index 0000000000..ad34168617 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/port.hpp @@ -0,0 +1,74 @@ +struct Port : Object { + DeclareClass(Port, "port") + using Object::Object; + + auto type() const -> string { return _type; } + auto family() const -> string { return _family; } + auto hotSwappable() const -> bool { return _hotSwappable; } + auto supported() const -> vector { return _supported; } + + auto setAllocate(function allocate) -> void { _allocate = allocate; } + auto setConnect(function connect) -> void { _connect = connect; } + auto setDisconnect(function disconnect) -> void { _disconnect = disconnect; } + auto setType(string type) -> void { _type = type; } + auto setFamily(string family) -> void { _family = family; } + auto setHotSwappable(bool hotSwappable) -> void { _hotSwappable = hotSwappable; } + auto setSupported(vector supported) -> void { _supported = supported; } + + auto connected() -> Node::Peripheral { + return find(0); + } + + auto allocate(string name = {}) -> Node::Peripheral { + disconnect(); + if(_allocate) return _allocate(name); + return {}; + } + + auto connect() -> void { + if(_connect) _connect(); + } + + auto disconnect() -> void { + if(auto peripheral = connected()) { + if(_disconnect) _disconnect(); + remove(peripheral); + } + } + + auto serialize(string& output, string depth) -> void override { + Object::serialize(output, depth); + output.append(depth, " type: ", _type, "\n"); + output.append(depth, " family: ", _family, "\n"); + output.append(depth, " hotSwappable: ", _hotSwappable, "\n"); + } + + auto unserialize(Markup::Node node) -> void override { + Object::unserialize(node); + _type = node["type"].string(); + _family = node["family"].string(); + _hotSwappable = node["hotSwappable"].boolean(); + } + + auto copy(Node::Object object) -> void override { + if(auto source = object->cast()) { + Object::copy(source); + if(auto peripheral = source->find(0)) { + if(auto node = allocate(peripheral->name())) { + node->copy(peripheral); + connect(); + node->copy(peripheral); + } + } + } + } + +protected: + function _allocate; + function _connect; + function _disconnect; + string _type; + string _family; + bool _hotSwappable = false; + vector _supported; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/setting/boolean.hpp b/waterbox/ares64/ares/ares/ares/node/setting/boolean.hpp new file mode 100644 index 0000000000..0f0aa3dfc8 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/setting/boolean.hpp @@ -0,0 +1,36 @@ +struct Boolean : Setting { + DeclareClass(Boolean, "setting.boolean") + + Boolean(string name = {}, bool value = {}, function modify = {}) : Setting(name) { + _currentValue = value; + _latchedValue = value; + _modify = modify; + } + + auto modify(bool value) const -> void { if(_modify) return _modify(value); } + auto value() const -> bool { return _currentValue; } + auto latch() const -> bool { return _latchedValue; } + + auto setModify(function modify) { _modify = modify; } + + auto setValue(bool value) -> void { + _currentValue = value; + if(_dynamic) setLatch(); + } + + auto setLatch() -> void override { + if(_latchedValue == _currentValue) return; + _latchedValue = _currentValue; + modify(_latchedValue); + } + + auto readValue() const -> string override { return value(); } + auto readLatch() const -> string override { return latch(); } + auto readAllowedValues() const -> vector override { return {"false", "true"}; } + auto writeValue(string value) -> void override { setValue(value.boolean()); } + +protected: + function _modify; + bool _currentValue = {}; + bool _latchedValue = {}; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/setting/integer.hpp b/waterbox/ares64/ares/ares/ares/node/setting/integer.hpp new file mode 100644 index 0000000000..979a4e6385 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/setting/integer.hpp @@ -0,0 +1,47 @@ +struct Integer : Setting { + DeclareClass(Integer, "setting.integer") + + Integer(string name = {}, s64 value = {}, function modify = {}) : Setting(name) { + _currentValue = value; + _latchedValue = value; + _modify = modify; + } + + auto modify(s64 value) const -> void { if(_modify) return _modify(value); } + auto value() const -> s64 { return _currentValue; } + auto latch() const -> s64 { return _latchedValue; } + + auto setModify(function modify) { _modify = modify; } + + auto setValue(s64 value) -> void { + if(_allowedValues && !_allowedValues.find(value)) return; + _currentValue = value; + if(_dynamic) setLatch(); + } + + auto setLatch() -> void override { + if(_latchedValue == _currentValue) return; + _latchedValue = _currentValue; + modify(_latchedValue); + } + + auto setAllowedValues(vector allowedValues) -> void { + _allowedValues = allowedValues; + if(_allowedValues && !_allowedValues.find(_currentValue)) setValue(_allowedValues.first()); + } + + auto readValue() const -> string override { return value(); } + auto readLatch() const -> string override { return latch(); } + auto readAllowedValues() const -> vector override { + vector values; + for(auto value : _allowedValues) values.append(value); + return values; + } + auto writeValue(string value) -> void override { setValue(value.integer()); } + +protected: + function _modify; + s64 _currentValue = {}; + s64 _latchedValue = {}; + vector _allowedValues; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/setting/natural.hpp b/waterbox/ares64/ares/ares/ares/node/setting/natural.hpp new file mode 100644 index 0000000000..d4645e30f5 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/setting/natural.hpp @@ -0,0 +1,47 @@ +struct Natural : Setting { + DeclareClass(Natural, "setting.natural") + + Natural(string name = {}, u64 value = {}, function modify = {}) : Setting(name) { + _currentValue = value; + _latchedValue = value; + _modify = modify; + } + + auto modify(u64 value) const -> void { if(_modify) return _modify(value); } + auto value() const -> u64 { return _currentValue; } + auto latch() const -> u64 { return _latchedValue; } + + auto setModify(function modify) { _modify = modify; } + + auto setValue(u64 value) -> void { + if(_allowedValues && !_allowedValues.find(value)) return; + _currentValue = value; + if(_dynamic) setLatch(); + } + + auto setLatch() -> void override { + if(_latchedValue == _currentValue) return; + _latchedValue = _currentValue; + modify(_latchedValue); + } + + auto setAllowedValues(vector allowedValues) -> void { + _allowedValues = allowedValues; + if(_allowedValues && !_allowedValues.find(_currentValue)) setValue(_allowedValues.first()); + } + + auto readValue() const -> string override { return value(); } + auto readLatch() const -> string override { return latch(); } + auto readAllowedValues() const -> vector override { + vector values; + for(auto value : _allowedValues) values.append(value); + return values; + } + auto writeValue(string value) -> void override { setValue(value.natural()); } + +protected: + function _modify; + u64 _currentValue = {}; + u64 _latchedValue = {}; + vector _allowedValues; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/setting/real.hpp b/waterbox/ares64/ares/ares/ares/node/setting/real.hpp new file mode 100644 index 0000000000..65e4e4e3a4 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/setting/real.hpp @@ -0,0 +1,47 @@ +struct Real : Setting { + DeclareClass(Real, "setting.real") + + Real(string name = {}, f64 value = {}, function modify = {}) : Setting(name) { + _currentValue = value; + _latchedValue = value; + _modify = modify; + } + + auto modify(f64 value) const -> void { if(_modify) return _modify(value); } + auto value() const -> f64 { return _currentValue; } + auto latch() const -> f64 { return _latchedValue; } + + auto setModify(function modify) { _modify = modify; } + + auto setValue(f64 value) -> void { + if(_allowedValues && !_allowedValues.find(value)) return; + _currentValue = value; + if(_dynamic) setLatch(); + } + + auto setLatch() -> void override { + if(_latchedValue == _currentValue) return; + _latchedValue = _currentValue; + modify(_latchedValue); + } + + auto setAllowedValues(vector allowedValues) -> void { + _allowedValues = allowedValues; + if(_allowedValues && !_allowedValues.find(_currentValue)) setValue(_allowedValues.first()); + } + + auto readValue() const -> string override { return value(); } + auto readLatch() const -> string override { return latch(); } + auto readAllowedValues() const -> vector override { + vector values; + for(auto value : _allowedValues) values.append(value); + return values; + } + auto writeValue(string value) -> void override { setValue(value.real()); } + +protected: + function _modify; + f64 _currentValue = {}; + f64 _latchedValue = {}; + vector _allowedValues; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/setting/setting.hpp b/waterbox/ares64/ares/ares/ares/node/setting/setting.hpp new file mode 100644 index 0000000000..4e0ae3e935 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/setting/setting.hpp @@ -0,0 +1,46 @@ +struct Setting : Object { + DeclareClass(Setting, "setting") + using Object::Object; + + auto dynamic() const -> bool { return _dynamic; } + + auto setDynamic(bool dynamic) -> void { + _dynamic = dynamic; + } + + virtual auto setLatch() -> void {} + + virtual auto readValue() const -> string { return {}; } + virtual auto readLatch() const -> string { return {}; } + virtual auto readAllowedValues() const -> vector { return {}; } + virtual auto writeValue(string value) -> void {} + + auto load(Node::Object source) -> bool override { + if(!Object::load(source)) return false; + if(auto setting = source->cast>()) writeValue(setting->readValue()); + return true; + } + + auto copy(Node::Object object) -> void override { + if(auto source = object->cast()) { + Object::copy(source); + writeValue(source->readValue()); + setLatch(); + } + } + + auto serialize(string& output, string depth) -> void override { + Object::serialize(output, depth); + output.append(depth, " dynamic: ", _dynamic, "\n"); + output.append(depth, " value: ", readValue(), "\n"); + } + + auto unserialize(Markup::Node node) -> void override { + Object::unserialize(node); + _dynamic = node["dynamic"].boolean(); + writeValue(node["value"].string()); + } + +protected: + bool _dynamic = false; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/setting/string.hpp b/waterbox/ares64/ares/ares/ares/node/setting/string.hpp new file mode 100644 index 0000000000..71278a728f --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/setting/string.hpp @@ -0,0 +1,43 @@ +struct String : Setting { + DeclareClass(String, "setting.string") + + String(string name = {}, string value = {}, function modify = {}) : Setting(name) { + _currentValue = value; + _latchedValue = value; + _modify = modify; + } + + auto modify(string value) const -> void { if(_modify) return _modify(value); } + auto value() const -> string { return _currentValue; } + auto latch() const -> string { return _latchedValue; } + + auto setModify(function modify) { _modify = modify; } + + auto setValue(string value) -> void { + if(_allowedValues && !_allowedValues.find(value)) return; + _currentValue = value; + if(_dynamic) setLatch(); + } + + auto setLatch() -> void override { + if(_latchedValue == _currentValue) return; + _latchedValue = _currentValue; + modify(_latchedValue); + } + + auto setAllowedValues(vector allowedValues) -> void { + _allowedValues = allowedValues; + if(_allowedValues && !_allowedValues.find(_currentValue)) setValue(_allowedValues.first()); + } + + auto readValue() const -> string override { return value(); } + auto readLatch() const -> string override { return latch(); } + auto readAllowedValues() const -> vector override { return _allowedValues; } + auto writeValue(string value) -> void override { setValue(value); } + +protected: + function _modify; + string _currentValue = {}; + string _latchedValue = {}; + vector _allowedValues; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/system.hpp b/waterbox/ares64/ares/ares/ares/node/system.hpp new file mode 100644 index 0000000000..a32cffc143 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/system.hpp @@ -0,0 +1,29 @@ +struct System : Object { + DeclareClass(System, "system") + using Object::Object; + + auto game() -> string { if(_game) return _game(); return {}; } + auto run() -> void { if(_run) return _run(); } + auto power(bool reset = false) -> void { if(_power) return _power(reset); } + auto save() -> void { if(_save) return _save(); } + auto unload() -> void { if(_unload) return _unload(); } + auto serialize(bool synchronize = true) -> serializer { if(_serialize) return _serialize(synchronize); return {}; } + auto unserialize(serializer& s) -> bool { if(_unserialize) return _unserialize(s); return false; } + + auto setGame(function game) -> void { _game = game; } + auto setRun(function run) -> void { _run = run; } + auto setPower(function power) -> void { _power = power; } + auto setSave(function save) -> void { _save = save; } + auto setUnload(function unload) -> void { _unload = unload; } + auto setSerialize(function serialize) -> void { _serialize = serialize; } + auto setUnserialize(function unserialize) -> void { _unserialize = unserialize; } + +protected: + function _game; + function _run; + function _power; + function _save; + function _unload; + function _serialize; + function _unserialize; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/video/screen.cpp b/waterbox/ares64/ares/ares/ares/node/video/screen.cpp new file mode 100644 index 0000000000..19fe96a044 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/video/screen.cpp @@ -0,0 +1,388 @@ +Screen::Screen(string name, u32 width, u32 height) : Video(name) { + _canvasWidth = width; + _canvasHeight = height; + + if(width && height) { + _inputA = alloc_invisible(width * height); + _inputB = alloc_invisible(width * height); + _output = alloc_invisible(width * height); + _rotate = alloc_invisible(width * height); + + if constexpr(ares::Video::Threaded) { + _thread = nall::thread::create({&Screen::main, this}); + } + } +} + +Screen::~Screen() { + if constexpr(ares::Video::Threaded) { + if(_canvasWidth && _canvasHeight) { + _kill = true; + _thread.join(); + } + } +} + +auto Screen::main(uintptr_t) -> void { + while(!_kill) { + usleep(1); + if(_frame) { + refresh(); + _frame = false; + } + } +} + +auto Screen::quit() -> void { + _kill = true; + _thread.join(); + _sprites.reset(); +} + +auto Screen::power() -> void { + lock_guard lock(_mutex); + memory::fill(_inputA.data(), _canvasWidth * _canvasHeight, _fillColor); + memory::fill(_inputB.data(), _canvasWidth * _canvasHeight, _fillColor); + memory::fill(_output.data(), _canvasWidth * _canvasHeight, _fillColor); + memory::fill(_rotate.data(), _canvasWidth * _canvasHeight, _fillColor); +} + +auto Screen::pixels(bool frame) -> array_span { + if(frame == 0) return {_inputA.data(), _canvasWidth * _canvasHeight}; + if(frame == 1) return {_inputB.data(), _canvasWidth * _canvasHeight}; + return {}; +} + +auto Screen::resetPalette() -> void { + lock_guard lock(_mutex); + _palette.reset(); +} + +auto Screen::resetSprites() -> void { + lock_guard lock(_mutex); + _sprites.reset(); +} + +auto Screen::setRefresh(function refresh) -> void { + lock_guard lock(_mutex); + _refresh = refresh; +} + +auto Screen::setViewport(u32 x, u32 y, u32 width, u32 height) -> void { + lock_guard lock(_mutex); + _viewportX = x; + _viewportY = y; + _viewportWidth = width; + _viewportHeight = height; +} + +auto Screen::setSize(u32 width, u32 height) -> void { + lock_guard lock(_mutex); + _width = width; + _height = height; +} + +auto Screen::setScale(f64 scaleX, f64 scaleY) -> void { + lock_guard lock(_mutex); + _scaleX = scaleX; + _scaleY = scaleY; +} + +auto Screen::setAspect(f64 aspectX, f64 aspectY) -> void { + lock_guard lock(_mutex); + _aspectX = aspectX; + _aspectY = aspectY; +} + +auto Screen::setSaturation(f64 saturation) -> void { + lock_guard lock(_mutex); + _saturation = saturation; + _palette.reset(); +} + +auto Screen::setGamma(f64 gamma) -> void { + lock_guard lock(_mutex); + _gamma = gamma; + _palette.reset(); +} + +auto Screen::setLuminance(f64 luminance) -> void { + lock_guard lock(_mutex); + _luminance = luminance; + _palette.reset(); +} + +auto Screen::setFillColor(u32 fillColor) -> void { + lock_guard lock(_mutex); + _fillColor = fillColor; +} + +auto Screen::setColorBleed(bool colorBleed) -> void { + lock_guard lock(_mutex); + _colorBleed = colorBleed; +} + +auto Screen::setInterframeBlending(bool interframeBlending) -> void { + lock_guard lock(_mutex); + _interframeBlending = interframeBlending; +} + +auto Screen::setRotation(u32 rotation) -> void { + lock_guard lock(_mutex); + _rotation = rotation; +} + +auto Screen::setProgressive(bool progressiveDouble) -> void { + lock_guard lock(_mutex); + _interlace = false; + _progressive = true; + _progressiveDouble = progressiveDouble; +} + +auto Screen::setInterlace(bool interlaceField) -> void { + lock_guard lock(_mutex); + _progressive = false; + _interlace = true; + _interlaceField = interlaceField; +} + +auto Screen::attach(Node::Video::Sprite sprite) -> void { + lock_guard lock(_mutex); + if(_sprites.find(sprite)) return; + _sprites.append(sprite); +} + +auto Screen::detach(Node::Video::Sprite sprite) -> void { + lock_guard lock(_mutex); + if(!_sprites.find(sprite)) return; + _sprites.removeByValue(sprite); +} + +auto Screen::colors(u32 colors, function color) -> void { + lock_guard lock(_mutex); + _colors = colors; + _color = color; + _palette.reset(); +} + +auto Screen::frame() -> void { + if(runAhead()) return; + while(_frame) spinloop(); + + lock_guard lock(_mutex); + _inputA.swap(_inputB); + _frame = true; + if constexpr(!ares::Video::Threaded) { + refresh(); + _frame = false; + } +} + +auto Screen::refresh() -> void { + lock_guard lock(_mutex); + if(runAhead()) return; + + refreshPalette(); + if(_refresh) _refresh(); + + auto viewX = _viewportX; + auto viewY = _viewportY; + auto viewWidth = _viewportWidth; + auto viewHeight = _viewportHeight; + + auto pitch = _canvasWidth; + auto width = _canvasWidth; + auto height = _canvasHeight; + auto input = _inputB.data(); + auto output = _output.data(); + + for(u32 y : range(height)) { + auto source = input + y * pitch; + auto target = output + y * width; + + if(_interlace) { + if((_interlaceField & 1) == (y & 1)) { + for(u32 x : range(width)) { + auto color = _palette[*source++]; + *target++ = color; + } + } + } else if(_progressive && _progressiveDouble) { + source = input + (y & ~1) * pitch; + for(u32 x : range(width)) { + auto color = _palette[*source++]; + *target++ = color; + } + } else if(_interframeBlending) { + n32 mask = 1 << 24 | 1 << 16 | 1 << 8 | 1 << 0; + for(u32 x : range(width)) { + auto a = *target; + auto b = _palette[*source++]; + *target++ = (a + b - ((a ^ b) & mask)) >> 1; + } + } else { + for(u32 x : range(width)) { + auto color = _palette[*source++]; + *target++ = color; + } + } + } + + if(_colorBleed) { + n32 mask = 1 << 24 | 1 << 16 | 1 << 8 | 1 << 0; + for(u32 y : range(height)) { + auto target = output + y * width; + for(u32 x : range(width)) { + auto a = target[x]; + auto b = target[x + (x != width - 1)]; + target[x] = (a + b - ((a ^ b) & mask)) >> 1; + } + } + } + + for(auto& sprite : _sprites) { + if(!sprite->visible()) continue; + + n32 alpha = 255u << 24; + for(int y : range(sprite->height())) { + s32 pixelY = sprite->y() + y; + if(pixelY < 0 || pixelY >= height) continue; + + auto source = sprite->image().data() + y * sprite->width(); + auto target = &output[pixelY * width]; + for(s32 x : range(sprite->width())) { + s32 pixelX = sprite->x() + x; + if(pixelX < 0 || pixelX >= width) continue; + + auto pixel = source[x]; + if(pixel >> 24) target[pixelX] = alpha | pixel; + } + } + } + + if(_rotation == 90) { + //rotate left + for(u32 y : range(height)) { + auto source = output + y * width; + for(u32 x : range(width)) { + auto target = _rotate.data() + (width - 1 - x) * height + y; + *target = *source++; + } + } + output = _rotate.data(); + swap(width, height); + swap(viewWidth, viewHeight); + } + + if(_rotation == 180) { + //rotate upside down + for(u32 y : range(height)) { + auto source = output + y * width; + for(u32 x : range(width)) { + auto target = _rotate.data() + (height - 1 - y) * width + (width - 1 - x); + *target = *source++; + } + } + output = _rotate.data(); + } + + if(_rotation == 270) { + //rotate right + for(u32 y : range(height)) { + auto source = output + y * width; + for(u32 x : range(width)) { + auto target = _rotate.data() + x * height + (height - 1 - y); + *target = *source++; + } + } + output = _rotate.data(); + swap(width, height); + swap(viewWidth, viewHeight); + } + + platform->video(shared(), output + viewX + viewY * width, width * sizeof(u32), viewWidth, viewHeight); + memory::fill(_inputB.data(), width * height, _fillColor); +} + +auto Screen::refreshPalette() -> void { + lock_guard lock(_mutex); + if(_palette) return; + + //generate the color lookup palettes to convert native colors to ARGB8888 + _palette = new u32[_colors]; + for(u32 index : range(_colors)) { + n64 color = _color(index); + n16 b = color.bit( 0,15); + n16 g = color.bit(16,31); + n16 r = color.bit(32,47); + n16 a = 65535; + + if(_saturation != 1.0) { + n16 grayscale = uclamp<16>((r + g + b) / 3); + f64 inverse = max(0.0, 1.0 - _saturation); + r = uclamp<16>(r * _saturation + grayscale * inverse); + g = uclamp<16>(g * _saturation + grayscale * inverse); + b = uclamp<16>(b * _saturation + grayscale * inverse); + } + + if(_gamma != 1.0) { + f64 reciprocal = 1.0 / 32767.0; + r = r > 32767 ? r : n16(32767 * pow(r * reciprocal, _gamma)); + g = g > 32767 ? g : n16(32767 * pow(g * reciprocal, _gamma)); + b = b > 32767 ? b : n16(32767 * pow(b * reciprocal, _gamma)); + } + + if(_luminance != 1.0) { + r = uclamp<16>(r * _luminance); + g = uclamp<16>(g * _luminance); + b = uclamp<16>(b * _luminance); + } + + a >>= 8; + r >>= 8; + g >>= 8; + b >>= 8; + + _palette[index] = a << 24 | r << 16 | g << 8 | b << 0; + } +} + +auto Screen::serialize(string& output, string depth) -> void { + Video::serialize(output, depth); + output.append(depth, " width: ", _width, "\n"); + output.append(depth, " height: ", _height, "\n"); + output.append(depth, " scaleX: ", _scaleX, "\n"); + output.append(depth, " scaleY: ", _scaleY, "\n"); + output.append(depth, " aspectX: ", _aspectX, "\n"); + output.append(depth, " aspectY: ", _aspectY, "\n"); + output.append(depth, " colors: ", _colors, "\n"); + output.append(depth, " saturation: ", _saturation, "\n"); + output.append(depth, " gamma: ", _gamma, "\n"); + output.append(depth, " luminance: ", _luminance, "\n"); + output.append(depth, " fillColor: ", _fillColor, "\n"); + output.append(depth, " colorBleed: ", _colorBleed, "\n"); + output.append(depth, " interlace: ", _interlace, "\n"); + output.append(depth, " interframeBlending: ", _interframeBlending, "\n"); + output.append(depth, " rotation: ", _rotation, "\n"); +} + +auto Screen::unserialize(Markup::Node node) -> void { + Video::unserialize(node); + _width = node["width"].natural(); + _height = node["height"].natural(); + _scaleX = node["scaleX"].real(); + _scaleY = node["scaleY"].real(); + _aspectX = node["aspectX"].real(); + _aspectY = node["aspectY"].real(); + _colors = node["colors"].natural(); + _saturation = node["saturation"].real(); + _gamma = node["gamma"].real(); + _luminance = node["luminance"].real(); + _fillColor = node["fillColor"].natural(); + _colorBleed = node["colorBleed"].boolean(); + _interlace = node["interlace"].natural(); + _interframeBlending = node["interframeBlending"].boolean(); + _rotation = node["rotation"].natural(); + resetPalette(); + resetSprites(); +} diff --git a/waterbox/ares64/ares/ares/ares/node/video/screen.hpp b/waterbox/ares64/ares/ares/ares/node/video/screen.hpp new file mode 100644 index 0000000000..67bc39b4cf --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/video/screen.hpp @@ -0,0 +1,106 @@ +struct Screen : Video { + DeclareClass(Screen, "video.screen") + using Video::Video; + + Screen(string name = {}, u32 width = 0, u32 height = 0); + ~Screen(); + auto main(uintptr_t) -> void; + auto quit() -> void; + auto power() -> void; + + auto canvasWidth() const -> u32 { return _canvasWidth; } + auto canvasHeight() const -> u32 { return _canvasHeight; } + auto width() const -> u32 { return _width; } + auto height() const -> u32 { return _height; } + auto scaleX() const -> f64 { return _scaleX; } + auto scaleY() const -> f64 { return _scaleY; } + auto aspectX() const -> f64 { return _aspectX; } + auto aspectY() const -> f64 { return _aspectY; } + auto colors() const -> u32 { return _colors; } + auto pixels(bool frame = 0) -> array_span; + + auto saturation() const -> double { return _saturation; } + auto gamma() const -> double { return _gamma; } + auto luminance() const -> double { return _luminance; } + + auto fillColor() const -> u32 { return _fillColor; } + auto colorBleed() const -> bool { return _colorBleed; } + auto interframeBlending() const -> bool { return _interframeBlending; } + auto rotation() const -> u32 { return _rotation; } + + auto resetPalette() -> void; + auto resetSprites() -> void; + + auto setRefresh(function refresh) -> void; + auto setViewport(u32 x, u32 y, u32 width, u32 height) -> void; + + auto setSize(u32 width, u32 height) -> void; + auto setScale(f64 scaleX, f64 scaleY) -> void; + auto setAspect(f64 aspectX, f64 aspectY) -> void; + + auto setSaturation(f64 saturation) -> void; + auto setGamma(f64 gamma) -> void; + auto setLuminance(f64 luminance) -> void; + + auto setFillColor(u32 fillColor) -> void; + auto setColorBleed(bool colorBleed) -> void; + auto setInterframeBlending(bool interframeBlending) -> void; + auto setRotation(u32 rotation) -> void; + + auto setProgressive(bool progressiveDouble = false) -> void; + auto setInterlace(bool interlaceField) -> void; + + auto attach(Node::Video::Sprite) -> void; + auto detach(Node::Video::Sprite) -> void; + + auto colors(u32 colors, function color) -> void; + auto frame() -> void; + auto refresh() -> void; + + auto serialize(string& output, string depth) -> void override; + auto unserialize(Markup::Node node) -> void override; + +private: + auto refreshPalette() -> void; + +protected: + u32 _canvasWidth = 0; + u32 _canvasHeight = 0; + u32 _width = 0; + u32 _height = 0; + f64 _scaleX = 1.0; + f64 _scaleY = 1.0; + f64 _aspectX = 1.0; + f64 _aspectY = 1.0; + u32 _colors = 0; + f64 _saturation = 1.0; + f64 _gamma = 1.0; + f64 _luminance = 1.0; + u32 _fillColor = 0; + bool _colorBleed = false; + bool _interframeBlending = false; + u32 _rotation = 0; //counter-clockwise (90 = left, 270 = right) + + function _color; + unique_pointer _inputA; + unique_pointer _inputB; + unique_pointer _output; + unique_pointer _rotate; + unique_pointer _palette; + vector _sprites; + +//unserialized: + nall::thread _thread; + recursive_mutex _mutex; + atomic _kill = false; + atomic _frame = false; + function _refresh; + bool _progressive = false; + bool _progressiveDouble = false; + bool _interlace = false; + bool _interlaceField = false; + u32 _viewportX = 0; + u32 _viewportY = 0; + u32 _viewportWidth = 0; + u32 _viewportHeight = 0; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/video/sprite.cpp b/waterbox/ares64/ares/ares/ares/node/video/sprite.cpp new file mode 100644 index 0000000000..2b455b2a7c --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/video/sprite.cpp @@ -0,0 +1,26 @@ +auto Sprite::setVisible(bool visible) -> void { + _visible = visible; +} + +auto Sprite::setPosition(u32 x, u32 y) -> void { + _x = x; + _y = y; +} + +auto Sprite::setImage(nall::image image, bool invert) -> void { + _width = image.width(); + _height = image.height(); + _pixels = new u32[_width * _height]; + image.transform(0, 32, 255u << 24, 255u << 16, 255u << 8, 255u << 0); + memory::copy(_pixels.data(), _width * _height * sizeof(u32), image.data(), image.size()); + if(!invert) return; + + for(u32 y : range(_height)) { + auto data = _pixels.data() + y * _width; + for(u32 x : range(_width)) { + auto pixel = data[x]; + pixel ^= 0xffffff; + data[x] = pixel; + } + } +} diff --git a/waterbox/ares64/ares/ares/ares/node/video/sprite.hpp b/waterbox/ares64/ares/ares/ares/node/video/sprite.hpp new file mode 100644 index 0000000000..7a3877c4f1 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/video/sprite.hpp @@ -0,0 +1,23 @@ +struct Sprite : Video { + DeclareClass(Sprite, "video.sprite") + using Video::Video; + + auto visible() const -> bool { return _visible; } + auto x() const -> u32 { return _x; } + auto y() const -> u32 { return _y; } + auto width() const -> u32 { return _width; } + auto height() const -> u32 { return _height; } + auto image() const -> array_view { return {_pixels.data(), _width * _height}; } + + auto setVisible(bool visible) -> void; + auto setPosition(u32 x, u32 y) -> void; + auto setImage(nall::image, bool invert = false) -> void; + +protected: + bool _visible = false; + u32 _x = 0; + u32 _y = 0; + u32 _width = 0; + u32 _height = 0; + unique_pointer _pixels; +}; diff --git a/waterbox/ares64/ares/ares/ares/node/video/video.hpp b/waterbox/ares64/ares/ares/ares/node/video/video.hpp new file mode 100644 index 0000000000..db1fa9f695 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/node/video/video.hpp @@ -0,0 +1,4 @@ +struct Video : Object { + DeclareClass(Video, "video"); + using Object::Object; +}; diff --git a/waterbox/ares64/ares/ares/ares/platform.hpp b/waterbox/ares64/ares/ares/ares/platform.hpp new file mode 100644 index 0000000000..92114345b7 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/platform.hpp @@ -0,0 +1,33 @@ +#pragma once + +namespace ares { + +enum class Event : u32 { + None, + Step, + Frame, + Power, + Synchronize, +}; + +struct Platform { + virtual auto attach(Node::Object) -> void {} + virtual auto detach(Node::Object) -> void {} + virtual auto pak(Node::Object) -> shared_pointer { return {}; } + virtual auto event(Event) -> void {} + virtual auto log(string_view message) -> void {} + virtual auto video(Node::Video::Screen, const u32* data, u32 pitch, u32 width, u32 height) -> void {} + virtual auto audio(Node::Audio::Stream) -> void {} + virtual auto input(Node::Input::Input) -> void {} +}; + +extern Platform* platform; + +} + +namespace ares::Core { + // forward declarations + auto PlatformAttach(Node::Object node) -> void { if(platform && node->name()) platform->attach(node); } + auto PlatformDetach(Node::Object node) -> void { if(platform && node->name()) platform->detach(node); } + auto PlatformLog(string_view text) -> void { if(platform) platform->log(text); } +} diff --git a/waterbox/ares64/ares/ares/ares/random.hpp b/waterbox/ares64/ares/ares/ares/random.hpp new file mode 100644 index 0000000000..bc7f000017 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/random.hpp @@ -0,0 +1,96 @@ +#pragma once + +namespace ares { + +struct Random { + enum class Entropy : u32 { None, Low, High }; + + auto operator()() -> n64 { + return random(); + } + + auto entropy(Entropy entropy) -> void { + _entropy = entropy; + seed(); + } + + auto seed(maybe seed = nothing, maybe sequence = nothing) -> void { + if(!seed) seed = (n32)clock(); + if(!sequence) sequence = 0; + + _state = 0; + _increment = sequence() << 1 | 1; + step(); + _state += seed(); + step(); + } + + auto random() -> n64 { + if(_entropy == Entropy::None) return 0; + return (n64)step() << 32 | (n64)step() << 0; + } + + auto bias(n64 bias) -> n64 { + if(_entropy == Entropy::None) return bias; + return random(); + } + + auto bound(n64 bound) -> n64 { + n64 threshold = -bound % bound; + while(true) { + n64 result = random(); + if(result >= threshold) return result % bound; + } + } + + auto array(array_span buffer) -> void { + if(_entropy == Entropy::None) { + memory::fill(buffer.data(), buffer.size()); + return; + } + + if(_entropy == Entropy::High) { + for(n32 address : range(buffer.size())) { + buffer[address] = random(); + } + return; + } + + //Entropy::Low + u32 lobit = random() & 3; + u32 hibit = (lobit + 8 + (random() & 3)) & 15; + u32 lovalue = random() & 255; + u32 hivalue = random() & 255; + if((random() & 3) == 0) lovalue = 0; + if((random() & 1) == 0) hivalue = ~lovalue; + + for(n32 address : range(buffer.size())) { + n8 value = address.bit(lobit) ? lovalue : hivalue; + if(address.bit(hibit)) value = ~value; + if((random() & 511) == 0) value.bit(random() & 7) ^= 1; + if((random() & 2047) == 0) value.bit(random() & 7) ^= 1; + buffer[address] = value; + } + } + + auto serialize(serializer& s) -> void { + s((u32&)_entropy); + s(_state); + s(_increment); + } + +private: + auto step() -> n32 { + n64 state = _state; + _state = state * 6364136223846793005ull + _increment; + n32 xorshift = (state >> 18 ^ state) >> 27; + n32 rotate = state >> 59; + return xorshift >> rotate | xorshift << (-rotate & 31); + } + + Entropy _entropy = Entropy::High; + n64 _state; + n64 _increment; +}; + +} diff --git a/waterbox/ares64/ares/ares/ares/resource/GNUmakefile b/waterbox/ares64/ares/ares/ares/resource/GNUmakefile new file mode 100644 index 0000000000..8218cabbba --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/resource/GNUmakefile @@ -0,0 +1,6 @@ +all: + sourcery resource.bml resource.cpp resource.hpp + +clean: + rm resource.cpp + rm resource.hpp diff --git a/waterbox/ares64/ares/ares/ares/resource/icon.png b/waterbox/ares64/ares/ares/ares/resource/icon.png new file mode 100644 index 0000000000..eae8ef637b Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/icon.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/icon@2x.png b/waterbox/ares64/ares/ares/ares/resource/icon@2x.png new file mode 100644 index 0000000000..361fd6c556 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/icon@2x.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/logo.png b/waterbox/ares64/ares/ares/ares/resource/logo.png new file mode 100644 index 0000000000..4193e22587 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/logo.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/logo@2x.png b/waterbox/ares64/ares/ares/ares/resource/logo@2x.png new file mode 100644 index 0000000000..b543715b0f Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/logo@2x.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/resource.bml b/waterbox/ares64/ares/ares/ares/resource/resource.bml new file mode 100644 index 0000000000..1b4f14c1a0 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/resource/resource.bml @@ -0,0 +1,29 @@ +namespace name=Resource + namespace name=Ares + binary name=Icon1x file=icon.png + binary name=Icon2x file=icon@2x.png + binary name=Logo1x file=logo.png + binary name=Logo2x file=logo@2x.png + namespace name=Sprite + namespace name=SuperFamicom + binary name=CrosshairBlue file=sprite/sfc/crosshair-blue.png + binary name=CrosshairGreen file=sprite/sfc/crosshair-green.png + binary name=CrosshairRed file=sprite/sfc/crosshair-red.png + namespace name=WonderSwan + binary name=Auxiliary0 file=sprite/ws/auxiliary-0.png + binary name=Auxiliary1 file=sprite/ws/auxiliary-1.png + binary name=Auxiliary2 file=sprite/ws/auxiliary-2.png + binary name=Headphones file=sprite/ws/headphones.png + binary name=Initialized file=sprite/ws/initialized.png + binary name=LowBattery file=sprite/ws/low-battery.png + binary name=Orientation0 file=sprite/ws/orientation-0.png + binary name=Orientation1 file=sprite/ws/orientation-1.png + binary name=PoweredOn file=sprite/ws/powered-on.png + binary name=Sleeping file=sprite/ws/sleeping.png + binary name=VolumeA0 file=sprite/ws/volume-a0.png + binary name=VolumeA1 file=sprite/ws/volume-a1.png + binary name=VolumeA2 file=sprite/ws/volume-a2.png + binary name=VolumeB0 file=sprite/ws/volume-b0.png + binary name=VolumeB1 file=sprite/ws/volume-b1.png + binary name=VolumeB2 file=sprite/ws/volume-b2.png + binary name=VolumeB3 file=sprite/ws/volume-b3.png diff --git a/waterbox/ares64/ares/ares/ares/resource/resource.cpp b/waterbox/ares64/ares/ares/ares/resource/resource.cpp new file mode 100644 index 0000000000..f605cb990c --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/resource/resource.cpp @@ -0,0 +1,1640 @@ +#include "resource.hpp" + +namespace Resource { +namespace Ares { +const unsigned char Icon1x[6562] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,128,0,0,0,128,8,6,0,0,0,195,62,97, + 203,0,0,0,4,103,65,77,65,0,0,177,143,11,252,97,5,0,0,0,32,99,72,82,77,0,0,122,38,0,0,128, + 132,0,0,250,0,0,0,128,232,0,0,117,48,0,0,234,96,0,0,58,152,0,0,23,112,156,186,81,60,0,0,0, + 6,98,75,71,68,0,0,0,0,0,0,249,67,187,127,0,0,0,9,112,72,89,115,0,0,0,96,0,0,0,96,0, + 240,107,66,207,0,0,0,7,116,73,77,69,7,228,3,25,12,0,21,79,119,140,126,0,0,24,145,73,68,65,84,120, + 218,237,157,121,116,85,199,157,231,63,117,151,183,191,39,61,45,32,33,132,22,4,72,236,251,106,22,35,28,176,143,77, + 66,156,100,156,196,91,210,233,57,246,36,221,227,142,51,137,167,113,220,238,196,113,22,187,211,238,100,38,238,201,44,113, + 60,167,51,157,105,143,103,226,156,147,165,19,239,110,27,219,128,1,177,25,132,132,192,32,208,246,164,247,244,246,119,111, + 205,31,87,74,220,182,65,79,210,91,132,185,159,131,14,7,206,189,87,181,124,235,87,85,191,170,250,21,216,216,216,216, + 216,216,216,216,216,216,216,92,93,136,98,39,96,10,229,91,22,59,81,197,224,106,20,128,120,215,207,40,38,176,5,88, + 1,164,128,40,208,3,156,31,249,59,50,242,127,233,98,39,62,215,104,197,78,64,17,144,124,112,107,111,6,238,23,32, + 220,110,45,237,243,234,26,144,25,142,165,195,241,184,209,37,165,108,7,78,0,135,129,99,64,47,150,40,174,104,174,70, + 11,112,41,124,64,43,240,5,135,174,172,219,184,174,90,189,121,103,131,195,231,213,221,157,93,97,113,170,51,76,123,103, + 216,60,125,38,18,29,28,74,245,198,226,153,54,96,15,240,26,150,40,66,88,150,228,138,194,22,192,251,41,1,118,10, + 193,23,155,26,75,154,239,253,226,98,121,243,77,13,94,77,19,250,80,56,205,133,158,24,71,223,14,177,255,96,31,123, + 15,244,154,237,167,194,67,161,161,228,113,195,144,207,2,191,7,222,2,194,197,206,68,182,216,2,184,52,117,192,93,46, + 167,122,251,206,235,235,60,187,239,93,174,55,214,251,61,82,34,20,1,25,67,18,26,74,210,222,17,230,229,87,187,121, + 241,213,110,121,248,88,40,52,16,74,188,41,37,191,0,126,11,116,1,70,177,51,114,57,212,98,39,96,10,51,4,188, + 156,49,228,145,35,199,67,205,47,191,214,93,81,95,231,79,214,215,250,29,114,164,225,120,220,26,181,53,62,214,175,174, + 226,134,235,106,197,166,245,213,238,96,137,163,41,60,156,254,72,56,146,186,193,48,100,21,208,7,12,48,69,187,7,91, + 0,151,199,0,78,2,47,244,246,37,42,159,125,241,220,92,175,71,75,45,108,41,83,53,77,81,165,132,209,31,151,75, + 165,174,214,199,230,13,213,108,223,90,171,214,213,250,43,162,209,244,134,190,129,228,141,153,140,89,3,92,192,18,195,148, + 154,110,218,2,200,142,16,240,92,44,158,73,191,178,231,194,170,88,60,147,94,189,172,82,115,58,85,85,190,171,58,165, + 4,33,4,193,82,39,43,151,86,178,125,235,76,49,187,33,80,50,56,148,90,219,219,31,223,145,201,200,82,224,52,48, + 88,236,12,141,98,11,32,123,82,192,27,233,140,121,97,239,129,222,245,125,3,73,177,126,245,116,213,237,210,254,149,8, + 224,143,86,193,235,209,89,186,176,156,235,182,206,20,51,103,248,130,221,23,99,27,251,7,18,155,77,73,18,232,24,249, + 102,81,177,5,48,62,12,224,144,105,210,117,232,200,192,250,158,222,184,118,205,218,170,15,20,193,40,163,66,88,177,184, + 130,173,155,106,20,183,91,171,238,236,138,92,55,28,77,207,1,58,129,139,197,204,144,45,128,137,241,182,148,116,30,62, + 22,218,20,26,76,106,215,172,173,82,157,14,245,178,101,41,129,96,169,147,13,107,170,88,190,164,194,209,215,159,88,244, + 78,119,180,213,48,100,18,120,155,34,89,3,91,0,19,231,164,148,156,107,59,58,176,197,148,176,126,213,116,93,211,196, + 229,69,48,50,70,104,152,229,167,117,115,13,30,143,86,118,162,125,232,218,225,104,186,6,203,153,52,88,232,76,216,2, + 152,28,199,165,100,240,224,225,254,173,229,65,167,185,108,113,133,131,44,124,43,82,130,199,173,179,102,197,52,22,182,148, + 57,58,187,194,203,206,95,136,173,196,154,113,156,45,100,6,108,1,76,158,35,233,140,169,189,213,214,183,126,209,252,178, + 244,236,250,128,83,202,236,28,108,66,8,154,26,2,108,88,83,37,66,131,169,89,167,58,195,91,50,134,236,197,90,107, + 40,136,223,192,22,192,228,49,129,67,177,88,166,254,200,177,129,150,173,155,106,140,178,160,211,33,179,156,237,75,9,229, + 101,46,54,173,175,66,8,17,108,59,26,218,156,76,25,41,224,16,5,88,125,180,5,144,27,146,192,225,158,190,196,166, + 161,112,42,216,186,185,70,213,117,37,235,178,149,18,220,46,141,213,43,166,81,90,234,240,28,56,220,191,33,26,203,168, + 192,94,242,60,56,180,5,144,59,66,192,197,147,29,67,59,234,102,250,88,178,176,60,235,174,0,172,89,130,166,9,150, + 45,170,160,166,218,235,216,119,160,111,117,100,56,237,2,222,192,18,88,94,176,5,144,91,58,13,67,86,158,236,24,90, + 217,186,169,198,40,47,115,101,221,21,140,162,40,208,50,55,200,172,90,159,190,239,64,223,202,161,112,202,129,181,236,156, + 23,75,96,11,32,183,152,192,201,254,129,228,22,32,184,117,227,12,93,8,161,76,228,67,205,115,74,153,57,195,171,189, + 177,175,119,121,100,56,13,240,58,144,201,117,130,109,1,228,158,65,32,211,217,21,185,110,221,170,233,70,221,76,159,115, + 188,86,96,148,121,77,165,76,159,230,209,247,236,237,89,17,141,101,34,88,99,130,156,206,14,108,1,228,135,211,241,132, + 177,42,30,207,204,218,190,173,86,29,203,65,116,41,132,128,230,185,165,120,220,154,99,207,222,158,21,201,148,113,6,56, + 146,203,132,218,2,200,15,9,32,126,174,59,122,253,218,149,211,105,168,243,79,216,10,40,138,96,97,115,144,68,202,240, + 236,61,208,183,204,48,228,62,224,76,174,18,106,11,32,127,156,79,166,204,181,166,41,103,109,111,157,169,41,202,196,198, + 2,0,186,166,178,100,97,57,103,207,69,131,71,142,135,230,0,47,144,35,183,177,45,128,252,145,4,196,133,158,248,246, + 214,77,53,178,122,186,103,194,86,64,2,110,183,198,252,230,32,251,15,245,205,58,223,29,243,0,207,147,3,71,145,45, + 128,252,210,27,139,103,90,43,203,93,21,27,215,85,101,181,78,112,41,70,61,134,53,213,94,241,210,171,221,205,195,209, + 244,121,172,13,168,147,194,22,64,126,25,6,102,134,35,233,117,31,189,161,94,241,120,180,73,157,195,144,18,234,106,125, + 164,210,166,227,181,55,47,54,155,166,124,5,107,171,217,132,177,5,144,127,226,145,72,122,215,154,149,211,196,220,217,37, + 238,137,118,3,163,168,138,194,188,166,82,142,28,11,149,159,58,29,246,1,255,204,36,156,68,182,0,242,79,56,147,49, + 55,150,5,157,245,173,155,107,52,96,194,131,65,176,198,3,62,175,78,213,116,15,207,191,114,190,105,120,56,221,9,28, + 156,232,247,108,1,228,159,20,80,151,72,24,107,63,126,99,131,238,113,107,147,46,115,41,97,230,12,47,131,131,41,253, + 245,125,61,51,165,228,159,177,182,177,143,155,73,169,209,38,107,246,188,115,62,154,110,239,8,199,133,200,205,89,28,77, + 83,184,253,150,57,44,108,9,46,7,238,100,130,117,105,91,128,194,144,72,38,141,27,23,52,151,149,173,89,81,233,50, + 115,112,50,64,74,40,43,117,97,154,136,23,94,233,174,53,12,249,28,214,73,230,113,97,91,128,194,16,2,142,183,29, + 237,87,83,105,51,103,155,60,36,176,243,250,58,86,44,173,152,13,220,198,4,26,180,45,128,194,144,4,142,182,29,29, + 136,15,71,211,102,142,122,1,164,148,76,171,116,243,217,79,54,225,114,170,159,4,22,140,247,27,182,0,10,199,201,190, + 129,164,54,20,78,41,185,26,7,128,213,21,108,223,90,203,210,69,229,245,192,39,24,167,179,201,22,64,225,232,140,68, + 82,241,179,231,162,145,92,30,201,30,181,2,187,110,108,64,215,148,143,3,245,227,121,223,22,64,225,184,24,141,101,134, + 206,158,27,206,228,227,80,254,142,214,90,102,55,6,230,1,59,198,243,158,45,128,194,17,51,77,217,223,219,151,112,229, + 178,11,0,48,77,201,172,153,94,90,55,213,104,192,78,32,144,237,187,182,0,10,71,10,8,15,71,211,222,124,124,92, + 211,20,174,223,86,75,89,208,185,26,88,150,237,123,182,0,10,71,6,24,14,71,82,166,105,202,156,71,13,49,77,201, + 226,5,101,44,158,95,86,134,21,235,40,43,108,1,20,14,3,72,12,71,51,105,195,144,57,63,245,35,37,148,150,56, + 217,184,190,26,44,1,148,103,243,158,45,128,194,97,2,233,84,202,48,164,156,236,154,224,7,35,4,108,92,87,77,121, + 153,171,133,44,125,2,182,0,62,68,152,166,100,78,99,128,57,141,129,32,176,46,155,119,108,1,20,14,5,208,29,14, + 85,21,185,158,6,140,32,37,148,150,58,89,177,180,18,96,45,48,230,128,211,22,64,225,80,1,151,207,171,57,84,117, + 226,27,68,199,66,87,5,203,151,84,224,118,107,139,129,202,177,158,183,5,80,56,116,192,95,18,112,8,69,153,216,57, + 129,108,48,37,44,104,14,18,44,113,84,98,133,191,189,44,182,0,10,135,14,248,189,30,61,175,241,133,165,148,84,77, + 243,80,87,235,247,3,139,198,122,222,22,64,225,240,40,138,40,175,172,112,37,242,52,9,0,172,113,128,207,167,211,212, + 24,0,152,195,24,1,193,109,1,20,142,233,94,143,86,82,91,227,211,242,29,42,210,229,80,104,172,15,0,52,97,5, + 193,190,36,182,0,10,71,99,192,239,112,215,214,248,252,249,14,21,42,129,250,89,126,60,110,173,14,91,0,83,134,166, + 138,50,87,166,36,160,155,249,236,2,0,144,48,163,202,131,219,165,249,129,105,151,123,212,22,64,97,112,2,243,23,45, + 40,115,251,188,186,82,128,250,103,122,165,27,183,91,117,3,51,46,247,172,45,128,194,16,4,154,23,205,47,51,28,186, + 162,231,251,151,73,9,94,175,78,176,212,233,100,12,95,128,45,128,194,208,82,18,112,204,92,190,164,130,92,236,8,30, + 27,137,174,43,4,252,14,29,75,124,151,100,44,1,216,23,74,228,134,117,181,53,94,189,169,33,224,206,123,255,63,130, + 174,41,248,125,58,76,98,16,248,222,155,181,108,38,70,0,184,102,195,154,42,45,88,234,84,11,84,255,104,170,192,227, + 209,192,114,64,93,18,219,2,228,159,22,183,91,91,180,109,115,141,161,170,249,115,1,191,23,161,8,28,186,2,99,212, + 225,88,199,149,109,1,76,158,237,77,13,1,223,138,165,149,110,179,48,3,0,164,4,69,88,219,196,24,163,14,199,178, + 0,83,234,122,147,43,144,42,96,199,245,219,106,69,121,153,75,47,148,249,31,15,246,44,32,191,180,86,148,187,230,222, + 180,163,78,17,162,112,101,45,132,181,42,152,206,152,48,70,35,190,92,162,36,176,9,184,155,49,70,146,54,31,72,0, + 184,101,219,230,26,173,101,94,176,96,230,127,20,105,74,210,233,201,9,0,172,187,116,191,13,108,43,104,234,63,28,108, + 11,248,29,107,110,253,212,28,225,116,40,5,191,162,55,99,72,98,177,12,140,17,72,106,44,1,164,70,54,47,253,9, + 80,90,232,76,92,193,148,2,159,255,200,181,53,142,213,43,166,21,188,245,3,164,211,38,35,33,102,35,151,123,110,44, + 1,68,221,46,205,112,58,213,117,192,199,10,158,139,43,151,93,101,65,231,218,63,189,163,69,184,157,106,17,6,127,130, + 116,198,36,28,73,165,25,35,158,224,88,2,232,241,251,116,117,243,250,106,85,8,238,2,26,10,157,149,43,144,70,224, + 174,79,125,108,182,186,106,89,165,215,40,66,235,23,2,162,209,52,161,193,100,146,49,130,70,140,37,128,243,82,146,222, + 117,99,131,99,222,156,210,102,224,46,174,206,43,231,179,69,7,238,158,215,84,50,239,174,207,181,232,154,150,253,165,17, + 185,68,0,23,123,227,196,227,153,56,208,125,185,103,199,180,0,195,177,116,196,235,209,60,95,190,123,145,116,58,212,91, + 25,231,233,211,171,140,29,78,167,250,153,123,238,94,44,27,235,3,158,98,244,253,0,8,56,223,29,35,158,48,34,76, + 210,2,68,98,177,76,87,71,87,152,143,223,212,224,249,248,77,13,30,224,47,129,217,197,201,217,148,166,9,216,125,243, + 77,13,238,93,55,214,123,164,148,69,243,162,10,224,244,153,8,177,120,166,139,73,14,2,135,129,246,142,211,17,84,85, + 56,254,227,95,44,213,23,182,4,231,3,247,3,37,197,202,224,20,164,4,216,189,176,165,172,249,107,255,126,169,195,227, + 214,198,125,83,72,46,73,164,12,58,186,194,0,237,192,101,119,33,143,37,128,12,112,162,189,99,136,161,112,154,134,58, + 191,231,161,221,171,168,172,112,125,20,248,34,99,172,52,93,37,232,192,151,42,43,220,31,125,104,247,74,26,234,252,197, + 51,253,88,3,192,225,225,12,237,29,97,128,19,140,113,203,72,54,238,201,182,211,103,34,145,11,61,49,164,68,108,185, + 102,134,255,190,123,150,10,143,91,251,115,224,86,174,238,5,35,1,220,234,113,107,127,126,223,61,75,196,150,107,102,248, + 77,179,120,166,31,172,187,8,187,47,198,56,125,38,18,6,218,198,122,62,27,1,28,15,13,165,122,143,190,29,66,17, + 32,4,202,237,183,204,245,222,245,185,22,77,85,197,3,192,46,174,78,17,8,96,151,166,42,15,220,253,249,249,234,237, + 183,204,245,22,210,223,127,41,20,1,71,143,135,24,28,74,245,2,199,199,124,62,139,111,246,198,227,153,182,183,14,246, + 145,49,36,82,130,67,87,245,175,124,105,137,255,142,79,207,245,43,138,248,46,112,35,87,151,8,4,112,163,162,136,239, + 220,241,233,57,254,123,191,180,216,239,208,139,225,240,121,63,233,140,100,255,161,62,226,137,204,33,160,119,172,231,179,17, + 64,20,216,179,247,64,31,131,67,73,132,176,142,31,249,124,186,227,175,239,91,233,255,236,39,155,202,20,69,252,13,87, + 143,37,80,128,93,138,34,254,230,51,159,104,42,127,240,190,149,126,159,71,119,20,106,171,215,229,16,2,6,135,146,236, + 59,208,11,214,45,99,177,177,222,201,214,81,161,38,146,198,71,175,221,88,227,174,157,225,67,202,63,220,118,169,94,179, + 182,74,29,10,167,212,182,163,3,91,76,83,14,1,71,177,162,97,124,24,113,0,183,106,154,242,240,29,159,158,91,250, + 208,238,85,254,146,128,195,97,78,129,202,7,80,85,193,193,35,3,252,248,167,199,67,241,120,230,81,160,107,204,119,178, + 252,246,112,60,145,185,174,190,214,95,187,126,77,21,163,249,125,151,8,52,195,148,28,58,210,223,154,78,155,58,86,248, + 242,188,221,118,89,36,2,192,61,94,143,182,251,75,95,88,224,190,255,43,203,253,1,223,212,169,124,176,6,128,63,123, + 170,157,223,62,123,118,63,240,3,32,62,214,59,217,10,32,14,52,8,69,108,186,126,91,45,46,215,31,95,147,18,28, + 14,69,93,191,122,186,94,22,116,154,251,15,246,173,139,197,51,13,88,215,155,13,20,187,80,114,196,108,224,59,149,229, + 174,59,31,248,218,10,253,139,95,88,224,119,187,180,41,97,246,71,177,204,127,138,199,30,111,163,235,236,240,79,129,95, + 101,243,222,120,124,213,102,56,146,218,185,97,77,149,187,126,150,159,247,230,93,83,133,186,108,113,133,99,201,194,242,244, + 225,99,161,150,222,254,248,102,224,34,112,154,43,183,75,112,0,215,3,127,187,176,165,108,227,99,15,175,83,118,221,216, + 16,208,84,161,77,161,186,7,44,243,255,250,254,94,254,203,79,142,13,36,18,198,183,201,194,252,195,248,4,16,138,39, + 140,245,193,82,103,211,150,107,170,185,68,148,19,209,88,31,112,182,110,174,49,194,145,84,105,123,71,120,71,38,99,78, + 199,114,72,12,22,187,144,198,73,3,240,85,151,75,221,125,243,206,198,25,143,61,188,94,95,190,164,194,39,101,241,167, + 122,31,132,97,74,254,235,19,199,121,233,181,238,23,128,255,68,150,93,240,120,4,144,4,252,195,195,233,29,215,93,91, + 171,148,5,157,124,80,43,144,18,17,44,117,58,90,55,213,104,179,106,253,180,119,132,151,15,132,18,91,177,182,38,157, + 198,186,84,113,42,83,10,220,34,4,223,109,106,44,217,254,224,215,86,232,95,254,119,139,189,211,42,220,174,98,59,121, + 46,133,162,8,186,206,14,243,189,31,30,204,244,245,39,30,3,254,37,219,119,199,187,92,57,48,52,148,250,200,172,90, + 223,180,85,203,42,185,148,25,148,18,116,93,81,23,47,40,119,110,219,92,99,42,66,4,59,186,34,219,226,241,204,42, + 44,33,157,103,234,13,18,3,88,43,157,223,42,11,58,63,127,219,191,153,91,241,189,191,94,163,111,217,48,195,175,105, + 162,96,7,58,38,130,16,130,127,124,250,20,255,231,23,157,71,77,83,62,196,56,172,237,120,5,48,104,154,178,122,120, + 56,189,105,251,214,90,188,222,203,47,5,72,137,40,11,186,28,91,54,206,208,215,173,154,110,196,227,153,89,231,186,99, + 215,39,83,198,26,172,249,116,15,89,204,85,243,89,118,192,116,172,221,78,127,85,18,112,220,117,211,246,89,13,15,63, + 176,90,187,227,150,185,254,138,50,151,115,170,182,250,63,100,64,8,122,250,226,60,252,253,183,56,243,206,240,223,3,255, + 111,60,239,79,100,195,194,64,223,64,98,71,83,99,73,233,146,133,229,140,213,50,164,4,33,132,82,55,211,231,220,190, + 173,86,93,187,114,154,52,77,57,235,66,79,124,123,60,145,185,14,168,193,154,101,68,152,196,245,103,227,41,51,172,214, + 190,4,248,130,16,220,95,81,238,250,236,206,235,235,26,190,113,223,74,253,223,222,57,223,211,56,203,239,1,242,126,140, + 59,23,40,138,224,233,95,118,242,228,63,158,236,204,24,242,1,172,129,247,184,10,99,188,168,192,119,214,175,158,254,149, + 39,31,191,150,202,10,55,217,78,135,132,176,18,156,76,25,153,99,111,15,198,127,249,155,46,243,87,191,63,75,71,103, + 56,18,139,103,218,128,151,129,215,128,183,177,174,89,201,213,120,193,201,200,17,109,96,61,112,141,203,165,46,106,106,40, + 241,223,240,145,90,110,218,94,167,180,204,11,186,157,14,69,51,77,57,166,168,167,10,66,8,122,122,227,220,126,247,243, + 188,246,230,197,239,97,237,213,24,215,140,107,162,230,109,177,211,161,62,253,237,7,86,207,254,147,219,154,179,22,192,187, + 81,20,129,148,152,253,3,137,204,254,131,125,177,223,189,240,142,242,202,158,11,153,119,206,71,211,225,72,234,156,148,156, + 0,14,99,205,32,58,177,148,29,195,178,18,105,172,208,171,163,49,119,21,44,97,106,88,83,55,15,150,105,111,192,10, + 148,212,34,4,205,1,191,163,182,182,198,171,175,95,83,165,183,110,156,97,172,90,62,205,93,94,230,210,133,64,41,230, + 18,238,68,17,66,240,223,158,60,198,95,126,243,205,83,201,148,177,139,44,86,255,222,247,141,9,254,110,5,184,127,241, + 130,178,191,122,242,241,107,149,198,250,0,19,45,192,81,171,96,24,210,8,13,38,141,246,142,112,124,223,193,62,218,142, + 246,171,109,71,7,18,253,3,73,53,28,73,197,99,177,204,144,97,202,126,32,140,213,93,36,249,227,158,119,29,112,97, + 29,96,9,168,138,40,247,120,180,146,128,223,225,46,47,115,26,139,230,151,187,23,207,47,203,44,95,90,65,83,99,192, + 29,44,113,170,170,42,212,43,169,181,191,175,2,20,65,199,233,48,183,221,245,188,209,118,116,224,27,192,67,252,177,65, + 100,95,254,147,72,67,189,162,136,167,254,226,238,69,43,118,223,187,12,69,153,252,88,73,8,75,213,138,128,84,218,76, + 15,71,211,230,80,56,165,156,61,23,141,156,61,55,156,233,237,75,184,162,177,180,119,112,40,101,70,99,153,116,42,101, + 24,0,14,135,170,250,188,154,94,18,112,40,94,143,30,173,172,112,37,106,107,124,90,109,141,215,95,18,112,152,62,175, + 174,56,116,69,55,165,181,144,117,165,86,250,187,49,76,201,67,143,238,231,239,254,254,240,155,166,41,63,73,150,142,159, + 247,149,249,36,211,113,231,140,42,207,127,254,241,99,155,60,155,215,87,147,235,45,208,163,130,16,35,41,29,117,62,153, + 166,52,12,67,154,163,81,183,133,16,66,85,133,50,26,129,83,74,9,214,159,15,77,133,191,27,85,17,188,240,47,221, + 252,233,61,47,197,46,92,140,221,13,60,57,225,111,77,50,45,237,145,225,116,83,111,127,98,241,214,77,53,120,61,122, + 206,143,19,143,174,60,74,105,69,195,30,49,219,138,16,168,66,136,145,31,84,41,173,126,124,212,172,143,254,124,216,80, + 132,160,111,32,193,215,191,181,151,67,71,250,255,55,240,8,69,188,60,58,5,156,126,231,220,240,86,175,87,47,95,179, + 98,26,121,10,132,109,51,130,97,74,126,244,63,142,242,15,255,116,242,132,105,202,47,3,103,38,243,189,92,28,92,184, + 96,152,50,121,162,125,104,235,252,230,160,99,78,67,201,135,178,229,77,5,20,69,240,220,75,231,249,230,163,251,163,225, + 112,234,1,178,92,241,187,28,185,58,185,114,124,56,154,174,57,221,21,89,190,97,77,149,40,47,115,217,34,200,49,138, + 34,56,217,49,196,125,15,190,46,79,180,15,253,4,120,148,49,78,254,102,67,174,4,144,6,218,206,117,199,150,13,132, + 146,245,155,214,87,227,113,229,61,36,238,85,131,162,8,66,131,73,30,120,120,47,191,123,225,220,203,192,127,32,139,253, + 126,217,144,203,179,107,131,64,123,123,231,208,102,69,136,178,53,43,166,161,105,246,120,96,178,8,1,137,164,193,247,127, + 212,198,147,63,63,113,202,48,228,159,1,135,114,245,253,92,31,94,60,107,24,178,231,240,177,208,230,210,18,135,119,233, + 162,242,156,248,7,174,86,132,0,195,144,60,241,15,111,243,183,143,183,245,198,226,198,215,128,95,231,242,119,228,227,244, + 234,177,100,202,72,30,56,220,127,77,117,149,199,49,127,94,240,170,216,42,156,107,4,150,31,227,169,103,58,249,230,35, + 111,69,6,6,147,15,2,63,97,2,222,190,203,145,15,1,72,224,80,52,150,81,246,29,236,91,83,91,227,211,155,231, + 150,218,34,24,7,163,142,175,103,126,221,197,253,223,218,27,191,208,19,123,4,248,33,121,88,45,205,215,249,245,12,176, + 55,50,156,118,238,61,208,183,178,166,218,171,205,155,83,138,237,34,24,27,33,172,22,244,204,111,186,216,253,208,155,201, + 115,231,163,127,7,124,151,60,237,155,200,103,0,131,20,240,70,56,146,210,95,223,223,187,124,90,165,91,111,158,83,106, + 143,9,46,195,104,159,255,212,51,157,220,255,208,155,241,115,221,209,31,0,15,51,198,17,239,201,144,239,8,22,73,96, + 79,100,56,45,247,236,189,184,210,227,210,28,11,90,202,112,232,170,237,39,120,15,138,34,72,38,77,126,242,179,19,124, + 227,145,253,195,23,123,226,143,2,223,33,143,149,15,249,23,0,88,62,130,55,98,177,204,208,158,189,61,43,19,9,195, + 179,100,97,57,30,183,102,139,96,132,209,121,254,99,143,183,241,253,31,181,245,133,172,1,223,15,40,192,118,185,66,197, + 176,201,0,251,82,41,179,107,255,193,190,229,157,103,34,193,133,205,65,42,108,143,33,138,34,104,239,24,226,235,223,218, + 203,255,252,249,201,83,177,120,230,171,192,19,20,102,123,92,193,4,0,214,244,229,136,97,200,125,199,79,12,206,222,127, + 176,175,174,186,202,35,234,103,249,81,21,229,170,243,26,42,66,96,152,146,231,94,58,207,87,31,124,93,62,251,210,185, + 151,51,150,147,231,215,228,120,170,119,57,138,17,197,234,12,240,194,249,11,49,239,139,175,118,207,75,167,77,199,188,166, + 82,124,94,253,170,177,6,170,42,232,235,79,240,163,255,126,132,111,60,178,63,122,242,212,208,19,192,189,76,96,75,215, + 164,211,82,164,50,24,4,158,139,70,51,239,188,190,183,167,229,208,209,254,242,234,233,30,102,206,240,162,170,202,135,86, + 8,138,34,200,24,146,151,95,187,192,215,31,222,203,255,122,234,212,201,161,72,234,1,172,133,157,156,248,246,199,75,177, + 4,0,214,224,240,128,97,200,87,58,78,71,188,207,191,124,126,118,127,40,233,152,53,211,71,121,153,139,15,83,168,1, + 33,4,66,8,58,187,194,252,240,199,135,121,248,251,7,162,135,142,244,255,147,97,200,47,99,45,233,22,164,191,255,192, + 180,21,187,112,70,240,1,55,43,138,248,179,249,205,193,37,159,251,204,60,109,231,142,58,166,79,115,143,236,236,185,50, + 77,130,85,241,86,208,198,95,252,234,52,79,252,236,132,113,236,68,232,45,195,144,63,0,254,47,86,20,182,226,166,177, + 216,9,120,79,90,106,129,59,92,78,245,206,165,139,202,235,111,253,212,28,101,123,107,45,211,43,221,0,19,222,121,92, + 104,70,157,93,61,189,113,126,243,236,89,126,246,84,187,220,127,176,175,51,145,52,158,196,26,225,79,104,3,103,62,152, + 74,2,24,69,5,230,1,183,185,93,234,45,139,22,148,207,250,216,13,245,202,13,215,213,82,87,235,67,211,20,166,226, + 118,238,209,237,237,153,140,201,217,119,162,252,230,185,179,60,253,203,211,230,193,195,253,103,227,137,204,83,192,79,153,130, + 209,83,166,162,0,70,209,128,22,224,19,186,174,220,220,88,31,152,219,186,169,70,223,209,58,147,37,11,203,9,150,58, + 173,155,49,138,40,134,209,74,151,18,66,131,73,218,142,14,240,235,223,159,229,217,23,207,165,59,186,194,237,169,148,249, + 52,240,115,172,104,93,147,222,189,147,151,60,20,59,1,89,160,0,117,192,14,33,216,89,90,226,92,189,120,126,89,112, + 211,134,106,177,105,93,53,77,141,1,130,165,78,52,85,144,239,125,255,239,62,183,144,206,72,6,135,146,156,236,8,243, + 242,171,221,188,248,106,183,60,124,108,32,20,26,76,190,46,37,207,0,191,197,154,242,78,169,22,255,190,60,21,59,1, + 227,36,0,44,5,182,169,170,104,13,150,56,91,154,26,3,37,43,150,86,42,203,151,84,176,96,94,144,170,233,30,124, + 62,29,151,99,196,185,244,135,243,1,35,255,120,23,163,66,121,255,42,165,53,120,251,195,121,4,172,240,171,195,195,25, + 186,47,198,56,122,60,196,254,67,125,236,61,208,107,182,119,132,135,6,135,146,71,13,67,62,7,252,14,43,62,82,184, + 216,5,149,45,87,154,0,70,81,176,14,123,46,0,214,1,107,221,110,109,113,176,196,81,89,87,235,247,54,53,6,148, + 198,250,0,245,179,252,204,168,242,48,189,210,141,207,171,163,105,10,186,174,160,169,2,161,88,45,25,172,11,150,164,41, + 201,24,214,61,59,233,140,73,52,154,230,98,111,156,243,221,49,78,159,137,208,209,21,166,189,35,108,158,62,19,137,14, + 134,83,61,113,235,48,235,107,192,30,172,120,72,33,10,232,193,203,21,87,170,0,222,139,23,168,192,26,51,44,4,230, + 10,65,147,219,165,213,185,92,106,192,227,214,92,193,82,167,51,224,119,104,126,191,46,60,110,13,93,83,70,47,86,36, + 53,82,233,177,88,134,200,112,90,134,35,169,76,104,48,153,140,197,51,137,68,194,8,199,19,153,46,41,105,199,58,168, + 218,134,213,167,247,82,220,216,6,57,225,195,34,128,247,162,97,137,194,15,76,195,186,66,125,26,86,248,23,31,214,97, + 210,119,95,141,43,71,126,210,88,115,243,16,86,240,138,238,145,191,35,88,1,51,51,217,38,192,198,198,198,198,198,198, + 198,198,198,198,198,198,198,102,10,242,255,1,73,55,77,63,249,154,219,213,0,0,0,37,116,69,88,116,100,97,116,101, + 58,99,114,101,97,116,101,0,50,48,50,48,45,48,51,45,50,53,84,48,51,58,48,48,58,50,49,43,48,57,58,48, + 48,37,63,130,7,0,0,0,37,116,69,88,116,100,97,116,101,58,109,111,100,105,102,121,0,50,48,50,48,45,48,51, + 45,50,53,84,48,51,58,48,48,58,50,49,43,48,57,58,48,48,84,98,58,187,0,0,0,0,73,69,78,68,174,66, + 96,130, +}; +const unsigned char Icon2x[9662] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,1,0,0,0,1,0,8,6,0,0,0,92,114,168, + 102,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,5,26,105,84,88,116,88,77, + 76,58,99,111,109,46,97,100,111,98,101,46,120,109,112,0,0,0,0,0,60,63,120,112,97,99,107,101,116,32,98,101, + 103,105,110,61,34,239,187,191,34,32,105,100,61,34,87,53,77,48,77,112,67,101,104,105,72,122,114,101,83,122,78,84, + 99,122,107,99,57,100,34,63,62,32,60,120,58,120,109,112,109,101,116,97,32,120,109,108,110,115,58,120,61,34,97,100, + 111,98,101,58,110,115,58,109,101,116,97,47,34,32,120,58,120,109,112,116,107,61,34,65,100,111,98,101,32,88,77,80, + 32,67,111,114,101,32,53,46,54,45,99,49,52,53,32,55,57,46,49,54,51,52,57,57,44,32,50,48,49,56,47,48, + 56,47,49,51,45,49,54,58,52,48,58,50,50,32,32,32,32,32,32,32,32,34,62,32,60,114,100,102,58,82,68,70, + 32,120,109,108,110,115,58,114,100,102,61,34,104,116,116,112,58,47,47,119,119,119,46,119,51,46,111,114,103,47,49,57, + 57,57,47,48,50,47,50,50,45,114,100,102,45,115,121,110,116,97,120,45,110,115,35,34,62,32,60,114,100,102,58,68, + 101,115,99,114,105,112,116,105,111,110,32,114,100,102,58,97,98,111,117,116,61,34,34,32,120,109,108,110,115,58,120,109, + 112,61,34,104,116,116,112,58,47,47,110,115,46,97,100,111,98,101,46,99,111,109,47,120,97,112,47,49,46,48,47,34, + 32,120,109,108,110,115,58,100,99,61,34,104,116,116,112,58,47,47,112,117,114,108,46,111,114,103,47,100,99,47,101,108, + 101,109,101,110,116,115,47,49,46,49,47,34,32,120,109,108,110,115,58,112,104,111,116,111,115,104,111,112,61,34,104,116, + 116,112,58,47,47,110,115,46,97,100,111,98,101,46,99,111,109,47,112,104,111,116,111,115,104,111,112,47,49,46,48,47, + 34,32,120,109,108,110,115,58,120,109,112,77,77,61,34,104,116,116,112,58,47,47,110,115,46,97,100,111,98,101,46,99, + 111,109,47,120,97,112,47,49,46,48,47,109,109,47,34,32,120,109,108,110,115,58,115,116,69,118,116,61,34,104,116,116, + 112,58,47,47,110,115,46,97,100,111,98,101,46,99,111,109,47,120,97,112,47,49,46,48,47,115,84,121,112,101,47,82, + 101,115,111,117,114,99,101,69,118,101,110,116,35,34,32,120,109,112,58,67,114,101,97,116,111,114,84,111,111,108,61,34, + 65,100,111,98,101,32,80,104,111,116,111,115,104,111,112,32,67,67,32,50,48,49,57,32,40,77,97,99,105,110,116,111, + 115,104,41,34,32,120,109,112,58,67,114,101,97,116,101,68,97,116,101,61,34,50,48,50,49,45,48,56,45,50,55,84, + 50,48,58,49,49,58,52,52,43,48,51,58,48,48,34,32,120,109,112,58,77,111,100,105,102,121,68,97,116,101,61,34, + 50,48,50,49,45,48,57,45,48,51,84,50,49,58,53,53,43,48,51,58,48,48,34,32,120,109,112,58,77,101,116,97, + 100,97,116,97,68,97,116,101,61,34,50,48,50,49,45,48,57,45,48,51,84,50,49,58,53,53,43,48,51,58,48,48, + 34,32,100,99,58,102,111,114,109,97,116,61,34,105,109,97,103,101,47,112,110,103,34,32,112,104,111,116,111,115,104,111, + 112,58,67,111,108,111,114,77,111,100,101,61,34,51,34,32,112,104,111,116,111,115,104,111,112,58,73,67,67,80,114,111, + 102,105,108,101,61,34,115,82,71,66,32,73,69,67,54,49,57,54,54,45,50,46,49,34,32,120,109,112,77,77,58,73, + 110,115,116,97,110,99,101,73,68,61,34,120,109,112,46,105,105,100,58,57,57,49,53,52,98,56,52,45,100,100,54,48, + 45,52,100,101,54,45,97,54,57,101,45,54,55,49,49,100,54,48,49,57,98,57,55,34,32,120,109,112,77,77,58,68, + 111,99,117,109,101,110,116,73,68,61,34,120,109,112,46,100,105,100,58,57,57,49,53,52,98,56,52,45,100,100,54,48, + 45,52,100,101,54,45,97,54,57,101,45,54,55,49,49,100,54,48,49,57,98,57,55,34,32,120,109,112,77,77,58,79, + 114,105,103,105,110,97,108,68,111,99,117,109,101,110,116,73,68,61,34,120,109,112,46,100,105,100,58,57,57,49,53,52, + 98,56,52,45,100,100,54,48,45,52,100,101,54,45,97,54,57,101,45,54,55,49,49,100,54,48,49,57,98,57,55,34, + 62,32,60,120,109,112,77,77,58,72,105,115,116,111,114,121,62,32,60,114,100,102,58,83,101,113,62,32,60,114,100,102, + 58,108,105,32,115,116,69,118,116,58,97,99,116,105,111,110,61,34,99,114,101,97,116,101,100,34,32,115,116,69,118,116, + 58,105,110,115,116,97,110,99,101,73,68,61,34,120,109,112,46,105,105,100,58,57,57,49,53,52,98,56,52,45,100,100, + 54,48,45,52,100,101,54,45,97,54,57,101,45,54,55,49,49,100,54,48,49,57,98,57,55,34,32,115,116,69,118,116, + 58,119,104,101,110,61,34,50,48,50,49,45,48,56,45,50,55,84,50,48,58,49,49,58,52,52,43,48,51,58,48,48, + 34,32,115,116,69,118,116,58,115,111,102,116,119,97,114,101,65,103,101,110,116,61,34,65,100,111,98,101,32,80,104,111, + 116,111,115,104,111,112,32,67,67,32,50,48,49,57,32,40,77,97,99,105,110,116,111,115,104,41,34,47,62,32,60,47, + 114,100,102,58,83,101,113,62,32,60,47,120,109,112,77,77,58,72,105,115,116,111,114,121,62,32,60,47,114,100,102,58, + 68,101,115,99,114,105,112,116,105,111,110,62,32,60,47,114,100,102,58,82,68,70,62,32,60,47,120,58,120,109,112,109, + 101,116,97,62,32,60,63,120,112,97,99,107,101,116,32,101,110,100,61,34,114,34,63,62,126,10,193,94,0,0,32,74, + 73,68,65,84,120,218,237,157,7,120,85,69,250,198,79,122,51,64,2,9,65,136,161,44,144,64,64,8,1,130,139,64, + 32,148,16,52,20,19,66,2,1,87,141,96,89,89,117,109,8,130,178,128,168,43,118,212,85,97,117,45,232,95,220,181, + 128,53,161,153,32,189,151,132,14,2,162,20,197,10,100,254,243,221,51,147,76,46,9,105,183,156,115,238,251,62,207,251, + 60,130,64,206,61,231,251,126,119,206,204,55,223,104,140,49,13,134,97,207,52,110,2,12,3,0,48,12,3,0,48,12, + 3,0,48,12,3,0,48,12,3,0,48,12,3,0,48,12,3,0,48,12,3,0,48,12,3,0,48,12,3,0,48,12, + 3,0,48,12,3,0,48,12,91,29,0,144,169,228,195,237,47,28,200,29,36,28,40,28,160,252,154,254,76,4,183,47, + 110,155,121,4,0,64,142,212,25,238,2,238,111,133,183,136,95,47,226,126,130,251,54,238,97,220,29,185,67,112,187,0, + 0,200,98,241,36,124,150,123,21,247,73,229,247,42,51,65,226,83,238,185,220,185,220,93,197,72,2,2,0,32,19,170, + 192,46,193,127,108,210,56,48,191,109,235,6,103,3,3,125,88,53,48,144,62,199,189,134,123,30,119,38,247,229,184,173, + 0,0,100,30,253,153,251,43,53,169,189,189,189,142,253,109,82,231,229,59,86,103,94,248,124,113,26,251,215,211,125,216, + 180,123,18,88,214,200,54,172,83,135,112,230,239,231,93,29,20,246,113,47,228,30,195,221,24,183,24,0,128,140,175,193, + 226,155,188,44,145,27,134,250,111,89,186,104,232,246,211,251,39,48,213,39,74,114,89,225,167,233,236,229,167,250,176,219, + 243,226,89,207,110,145,44,192,191,202,17,195,121,238,66,238,105,220,221,185,189,112,171,1,0,200,152,162,228,204,224,62, + 160,36,240,133,132,43,155,44,63,188,53,231,172,61,8,84,31,223,61,142,45,125,111,40,155,113,95,34,75,29,24,205, + 66,47,243,171,10,8,251,185,31,227,78,196,237,6,0,32,99,42,148,123,54,247,111,50,113,253,252,188,15,46,120,174, + 239,186,75,65,192,126,148,240,225,91,67,216,29,19,227,89,199,216,176,170,96,176,135,123,22,119,28,110,57,0,0,25, + 79,109,237,231,7,58,180,15,91,249,237,246,177,103,107,10,2,233,29,69,153,236,177,135,147,216,85,61,154,210,28,67, + 101,48,88,161,233,43,11,193,184,237,0,0,100,172,215,130,60,77,175,25,176,37,107,64,128,247,158,207,254,47,109,71, + 109,33,32,189,115,117,38,155,243,80,79,150,148,24,201,188,188,46,2,193,41,238,231,52,189,246,0,2,0,32,131,40, + 134,123,185,146,168,191,229,229,198,22,212,21,2,210,219,11,51,216,244,123,187,177,22,151,135,84,54,42,88,201,125,13, + 38,14,1,0,200,24,242,230,190,131,251,119,153,164,237,218,52,88,85,151,87,2,123,127,207,231,12,222,120,177,63,27, + 148,220,162,178,87,132,245,220,57,220,126,0,0,0,0,185,95,61,52,125,54,223,150,160,129,129,190,187,87,127,49,124, + 127,125,33,32,189,105,197,117,108,226,245,113,44,56,216,183,178,21,4,122,29,241,5,0,0,0,200,189,162,2,159,79, + 101,114,242,119,249,211,175,207,239,191,222,81,16,32,31,220,146,205,230,76,235,193,46,111,118,209,235,193,94,1,2,31, + 0,0,0,128,220,59,65,120,47,247,5,145,152,231,38,79,234,180,220,145,16,144,53,6,143,63,146,196,154,69,5,219, + 131,96,51,119,42,0,0,0,64,238,213,104,238,95,69,82,150,166,167,198,228,159,218,55,161,212,209,32,248,174,56,151, + 205,155,117,21,139,138,12,178,7,193,231,220,241,0,0,0,0,185,79,73,220,199,101,82,82,245,224,201,189,227,47,56, + 26,2,100,62,233,104,91,57,104,216,192,95,133,192,31,220,79,114,55,2,0,0,0,200,61,162,194,161,98,77,41,26, + 250,97,207,248,243,206,128,0,185,100,125,22,187,62,187,61,243,241,169,176,106,240,157,166,175,24,0,0,0,0,228,6, + 69,105,122,99,17,91,66,182,140,9,45,60,94,156,251,187,179,32,64,94,253,197,8,150,210,183,185,253,107,193,18,77, + 175,93,0,0,0,0,200,197,10,227,46,146,201,24,19,29,90,196,215,249,207,57,19,2,228,133,207,247,179,159,31,248, + 73,211,235,22,124,0,0,0,0,114,173,26,106,122,151,33,253,117,32,54,108,165,179,230,4,84,31,216,156,205,114,179, + 218,217,151,24,83,69,97,43,0,0,0,128,92,171,6,154,210,99,160,115,199,240,21,206,88,29,168,204,75,22,165,242, + 42,197,134,42,4,104,47,67,30,0,0,0,64,174,85,19,117,78,96,120,90,203,124,87,0,128,124,108,231,56,118,243, + 132,56,251,209,192,155,154,137,87,10,0,0,200,140,106,198,93,34,18,176,116,234,221,221,86,186,10,2,228,119,23,12, + 100,77,35,130,236,75,138,19,1,0,0,0,114,157,218,104,250,18,157,109,205,190,54,205,69,28,181,100,72,157,138,20, + 8,252,102,198,87,2,0,0,50,179,122,107,162,98,144,15,203,79,57,114,3,81,77,204,231,31,108,123,11,252,124,43, + 52,53,125,73,211,15,76,1,0,0,0,200,5,162,178,225,82,77,236,34,116,196,86,226,186,76,16,218,189,18,172,214, + 76,210,206,28,0,128,172,160,89,154,210,79,192,213,0,32,23,175,203,98,87,247,138,82,33,112,68,211,15,58,1,0, + 0,0,200,201,162,194,156,207,100,242,229,77,136,43,112,7,4,104,115,209,95,114,218,219,47,21,14,2,0,0,0,200, + 249,10,215,244,125,253,182,9,185,47,22,167,237,114,7,4,200,52,47,160,116,33,162,115,12,38,1,0,0,0,228,124, + 245,212,244,163,197,88,96,128,79,49,95,183,255,197,93,16,248,247,11,201,204,238,56,180,105,0,0,0,0,57,95,83, + 101,210,93,213,35,106,153,187,0,64,254,242,191,195,88,100,147,10,147,131,116,8,170,23,0,0,65,206,157,15,88,33, + 18,174,116,225,11,253,214,187,19,2,212,143,48,38,250,50,21,2,243,53,189,25,42,0,0,65,78,82,75,238,31,41, + 190,249,193,163,7,142,238,24,251,179,59,33,176,101,213,117,172,117,203,6,42,4,94,211,12,178,163,16,0,128,172,170, + 219,101,194,245,239,211,188,192,157,0,32,239,94,59,154,118,48,170,16,88,104,132,145,0,0,0,89,85,148,92,95,139, + 100,59,199,15,26,221,225,110,8,236,225,229,195,124,7,163,10,129,103,1,0,8,114,158,98,53,113,32,41,29,77,238, + 170,173,195,213,65,32,182,93,35,21,2,79,2,0,16,228,60,205,145,201,54,227,129,196,85,238,6,128,60,203,208,110, + 78,96,42,0,0,65,206,17,29,77,254,45,197,58,111,246,121,212,29,123,5,42,243,54,126,134,225,21,45,42,172,14, + 76,2,0,32,200,57,186,94,38,218,136,97,173,242,141,0,0,242,218,252,145,172,73,120,160,218,134,124,16,0,0,65, + 206,153,16,92,171,233,219,134,207,236,221,48,230,164,81,32,240,21,47,22,10,10,42,59,179,144,150,46,175,4,0,32, + 200,241,74,149,163,128,212,148,104,195,140,2,100,247,97,101,239,192,65,205,133,91,137,1,0,200,147,180,92,36,217,217, + 93,107,70,159,48,18,4,30,190,63,81,157,15,160,22,232,1,0,0,4,57,86,253,101,146,13,78,110,97,168,81,0, + 121,66,118,59,251,146,97,0,0,130,156,49,10,160,227,199,15,109,205,254,201,72,0,56,81,146,203,122,245,104,170,66, + 224,6,0,0,130,28,171,107,101,130,221,148,27,91,96,180,81,0,127,53,97,81,77,203,142,45,167,126,135,137,0,0, + 4,57,78,180,29,119,59,197,190,175,143,215,97,87,28,49,86,91,127,194,123,12,250,150,55,26,165,38,39,13,1,0, + 8,114,156,242,228,40,96,238,140,158,133,70,3,0,121,214,212,238,234,171,192,27,0,0,4,57,78,65,220,63,80,252, + 71,69,4,175,53,34,0,168,229,248,144,1,21,206,29,200,6,0,32,200,113,122,90,36,214,133,13,5,35,15,25,17, + 2,116,248,136,210,110,252,180,166,247,57,0,0,32,200,1,234,36,191,93,211,83,99,242,141,8,0,242,162,215,82,212, + 179,8,169,211,145,55,0,0,65,142,81,161,166,111,18,58,98,132,173,194,85,57,111,124,156,250,42,112,59,0,0,65, + 142,209,68,153,88,252,192,207,205,70,5,0,157,74,172,108,31,166,253,2,87,0,0,16,84,127,209,81,227,182,54,226, + 189,147,162,10,140,10,0,242,71,111,15,81,95,5,62,1,0,32,200,49,178,157,40,196,55,227,28,255,97,207,248,243, + 70,134,192,216,204,182,234,171,192,24,0,0,130,234,175,27,101,82,45,122,37,101,147,145,1,176,127,83,182,186,42,112, + 92,115,64,129,16,0,0,225,53,128,47,5,106,6,221,32,100,239,87,159,237,171,142,2,30,7,0,32,168,254,162,227, + 188,169,49,199,78,163,3,128,204,231,43,212,46,66,237,1,0,8,170,159,166,139,132,42,229,189,250,142,25,29,0,43, + 151,92,75,75,151,18,2,31,2,0,16,84,63,37,201,97,245,148,59,187,172,48,195,40,96,252,152,10,189,3,134,0, + 0,16,84,119,209,49,93,167,40,31,58,119,8,55,5,0,138,215,101,177,6,161,254,18,0,155,181,58,86,8,2,0, + 16,164,139,214,214,153,191,191,247,62,51,0,128,60,237,158,4,117,20,144,1,0,64,80,221,53,69,206,3,24,173,95, + 96,85,230,103,28,176,136,198,101,109,197,183,105,117,56,112,20,0,128,32,93,125,229,183,233,188,217,189,138,204,50,10, + 152,57,165,66,223,128,28,0,0,130,234,166,96,77,148,5,167,13,140,206,55,11,0,104,159,64,84,100,89,113,208,110, + 110,95,0,0,130,234,38,91,171,176,22,205,67,86,155,5,0,228,71,167,247,172,115,137,48,0,0,65,229,122,155,114, + 130,247,227,59,100,38,0,28,223,61,78,45,17,94,11,0,64,80,61,39,2,247,111,26,115,218,76,16,120,224,206,174, + 234,40,32,25,0,128,160,218,171,172,101,248,251,175,15,218,98,38,0,236,219,56,134,5,7,151,157,49,248,17,0,0, + 65,181,87,7,9,128,25,15,36,174,50,19,0,200,55,142,139,149,0,40,21,159,5,0,128,160,90,40,72,36,15,203, + 24,222,38,223,108,0,88,95,48,82,61,100,244,89,0,0,130,106,175,99,148,23,93,59,55,89,110,54,0,144,83,250, + 54,87,187,8,7,3,0,16,84,59,125,77,121,17,209,56,104,189,25,1,240,250,252,100,117,50,112,28,0,0,65,181, + 211,187,148,23,129,1,62,37,102,4,0,63,234,76,45,12,90,6,0,64,80,237,68,199,114,211,187,244,119,102,4,0, + 121,242,164,78,234,40,32,14,0,128,160,154,235,31,34,113,206,25,249,172,128,234,38,3,149,14,194,51,1,0,8,170, + 185,238,148,223,158,102,43,6,82,221,35,33,66,2,160,4,0,128,160,154,107,130,4,192,186,130,145,7,205,10,128,217, + 211,122,168,175,1,9,0,0,4,213,76,89,50,113,120,239,189,61,102,5,192,206,111,70,171,53,1,143,2,0,16,84, + 51,141,144,0,248,98,113,218,46,179,2,128,156,212,189,169,4,192,1,110,47,0,0,130,170,215,80,9,128,143,223,30, + 178,205,204,0,176,219,38,220,13,0,128,160,234,53,64,38,205,123,11,140,125,82,80,117,230,45,206,85,0,60,8,0, + 64,144,7,1,128,28,219,174,145,4,192,74,0,0,130,60,232,21,128,124,123,94,188,4,192,121,238,112,0,0,130,60, + 100,18,144,252,223,255,12,86,95,3,50,1,0,8,186,180,44,177,12,40,253,93,113,46,11,9,241,147,0,120,21,0, + 128,160,75,107,130,102,129,66,32,213,131,251,183,168,178,42,16,0,128,160,138,186,75,2,224,192,150,236,51,86,0,192, + 67,247,118,83,95,3,154,1,0,16,84,181,102,137,68,249,195,172,155,129,236,189,100,81,170,10,128,81,0,0,4,85, + 173,23,53,125,59,240,113,43,36,191,108,27,30,224,239,35,1,240,79,0,0,130,170,150,108,8,82,108,21,0,144,187, + 119,45,219,29,248,13,0,0,65,85,171,80,51,113,75,176,170,124,219,77,29,37,0,254,224,246,7,0,32,168,114,153, + 186,41,104,85,126,241,201,171,213,121,128,206,0,0,4,93,172,178,182,224,163,71,152,175,45,248,165,188,106,105,186,10, + 128,177,0,0,4,93,44,83,31,12,114,41,159,224,205,66,253,253,188,47,234,15,0,0,64,80,185,202,142,6,251,224, + 205,193,91,172,4,0,114,135,216,48,9,128,37,0,0,4,93,172,169,34,65,74,15,109,201,249,209,106,0,200,28,222, + 90,2,224,48,0,0,65,23,107,17,229,132,159,159,247,65,171,37,63,249,193,187,19,212,179,3,131,1,0,8,170,168, + 29,148,19,209,205,67,138,172,8,128,151,230,245,81,39,2,99,1,0,8,42,23,125,35,210,158,121,54,108,112,76,190, + 21,1,176,244,189,161,42,0,82,1,0,8,42,87,178,76,142,103,30,237,189,218,138,0,216,185,58,83,5,192,45,0, + 0,4,85,50,1,184,123,109,214,247,86,4,0,223,220,68,37,206,18,0,115,1,0,8,42,215,82,202,135,0,127,239, + 189,86,76,126,233,182,173,27,74,0,188,3,0,64,144,46,31,238,211,148,15,157,59,134,175,176,50,0,122,245,40,59, + 43,224,75,0,0,130,116,245,146,239,198,15,222,149,96,105,0,164,13,186,66,2,96,19,0,0,65,186,30,150,239,255, + 188,151,254,49,43,3,32,55,171,157,4,192,17,0,0,130,116,209,30,121,22,18,228,187,195,202,201,79,158,60,169,147, + 4,192,111,238,2,128,15,226,13,50,144,34,184,47,80,46,12,25,16,157,111,117,0,60,124,127,162,186,20,232,227,14, + 0,248,35,230,32,3,233,38,205,66,39,1,85,231,167,102,95,165,2,32,192,29,0,8,64,204,65,6,18,205,134,83, + 15,192,99,39,247,142,191,96,117,0,204,127,162,66,99,144,32,119,0,32,16,49,7,25,104,248,127,142,146,161,119,175, + 168,2,171,39,63,249,213,103,251,170,0,8,118,7,0,130,16,119,144,65,116,139,76,134,247,95,31,184,217,19,0,240, + 198,139,253,85,0,92,230,14,0,4,35,238,32,131,104,53,37,130,143,143,215,17,171,156,1,80,157,23,189,150,162,2, + 32,20,35,0,200,83,213,73,38,194,240,180,150,249,158,144,252,149,28,22,218,16,0,128,60,85,207,136,36,184,176,97, + 217,168,195,0,0,38,1,33,207,17,125,9,157,164,36,136,138,8,94,235,41,201,143,87,0,8,210,117,179,76,130,199, + 31,73,42,244,36,0,24,97,18,16,35,0,200,157,242,210,68,235,47,95,95,239,67,223,151,228,158,243,36,0,188,242, + 140,251,151,1,81,8,4,185,83,195,101,2,228,77,136,43,240,164,228,55,74,33,16,21,95,156,17,23,80,128,120,132, + 92,172,149,20,123,94,94,218,169,195,91,115,206,122,26,0,140,80,10,236,43,18,95,94,68,50,98,18,114,145,82,100, + 220,121,194,198,159,202,60,227,62,247,111,6,34,29,229,254,73,92,196,74,196,37,228,34,21,138,111,255,179,187,214,140, + 62,225,137,0,184,99,98,188,76,254,95,233,134,184,19,0,249,10,137,6,33,54,33,39,43,77,198,155,85,219,126,215, + 196,99,51,219,86,56,29,200,93,0,216,162,233,235,176,63,138,139,89,195,237,141,24,133,156,36,138,173,245,226,219,255, + 244,190,141,217,167,60,21,0,67,7,150,181,4,219,232,78,0,216,190,253,155,52,14,84,71,1,19,16,167,144,147,116, + 163,140,179,235,210,91,121,236,183,63,57,169,123,89,83,208,47,220,9,128,119,233,103,243,22,197,191,208,70,12,113,65, + 199,184,27,32,86,33,7,43,84,188,114,218,54,253,28,221,49,246,103,79,6,192,159,90,53,48,68,91,240,39,232,103, + 7,6,250,208,172,228,74,101,20,48,11,241,10,57,88,143,203,248,154,61,173,199,215,158,156,252,118,7,131,60,234,78, + 0,220,46,31,10,159,141,45,109,212,208,127,147,248,245,239,220,29,16,179,144,131,68,59,254,254,160,216,226,49,182,217, + 83,182,252,214,240,104,176,73,238,4,192,48,121,33,159,47,78,99,75,23,13,221,174,137,131,25,53,125,89,16,19,130, + 80,125,69,205,103,139,68,76,157,251,124,241,208,157,158,156,252,70,59,28,52,94,94,8,213,38,211,197,37,95,221,172, + 64,185,184,91,17,191,80,61,53,89,198,211,224,228,22,249,158,158,252,228,23,159,188,218,48,199,131,135,200,11,153,118, + 79,130,237,226,104,114,198,207,207,251,160,248,125,90,30,140,65,12,67,117,84,43,77,20,154,249,251,123,239,63,182,115, + 220,47,0,192,4,54,229,174,174,50,249,75,53,177,43,215,157,7,131,124,75,63,63,107,100,155,178,11,92,240,92,223, + 117,226,226,232,15,46,215,112,134,0,84,123,81,169,249,42,25,232,175,207,239,191,30,201,175,59,35,189,181,4,192,33, + 121,179,220,9,0,219,105,172,157,58,132,87,184,200,158,137,145,203,148,97,202,20,196,51,84,75,77,151,241,211,59,41, + 170,0,137,95,238,184,246,97,50,175,62,54,2,0,230,218,134,104,126,222,236,187,226,220,178,139,164,225,26,95,170,40, + 17,23,74,51,184,61,16,211,80,13,117,149,38,218,124,243,37,230,226,99,187,198,253,138,196,215,77,57,70,185,38,242, + 106,182,17,0,48,78,146,186,240,211,244,10,23,91,240,191,97,197,154,190,89,129,254,255,1,238,38,136,109,168,26,53, + 230,222,39,98,230,183,47,22,167,237,66,226,151,123,229,146,107,213,9,192,108,35,0,224,74,121,65,47,63,213,231,162, + 11,206,203,141,85,87,5,62,195,124,0,84,205,123,255,151,50,94,110,189,161,195,50,36,189,93,35,144,127,86,88,1, + 136,55,2,0,2,196,16,159,253,245,230,248,74,47,186,67,108,24,170,4,161,154,104,174,140,147,246,127,106,180,10,9, + 127,177,111,187,169,163,204,35,42,182,243,55,2,0,72,182,99,153,147,18,35,43,189,232,111,183,143,61,75,239,114,202, + 210,69,22,98,29,178,83,182,92,57,10,10,242,221,229,233,181,254,85,57,177,75,132,4,64,161,122,243,220,13,128,39, + 233,26,2,252,125,24,159,176,169,244,194,139,62,27,190,143,182,112,106,229,77,12,122,35,230,33,161,62,154,126,206,189, + 173,197,215,154,47,71,30,64,178,95,108,62,177,174,78,0,62,110,36,0,100,200,161,219,167,255,55,180,202,15,240,214, + 191,250,111,148,15,154,251,123,238,118,136,125,143,87,27,238,239,228,106,209,194,23,250,97,189,191,10,127,252,206,16,245, + 253,127,132,145,0,112,185,188,176,135,239,79,188,116,21,211,157,93,86,104,229,69,66,244,90,16,133,28,240,88,81,220, + 236,145,175,134,15,221,211,109,37,18,189,106,79,253,123,130,10,128,40,35,1,64,147,75,55,169,3,163,171,253,32,233, + 169,49,106,3,145,45,98,233,7,242,44,209,146,240,86,25,7,163,174,105,133,58,255,106,60,40,185,5,83,190,56,53, + 163,1,96,1,93,71,232,101,126,236,68,73,110,117,251,153,75,123,36,68,168,149,130,27,184,195,144,19,30,163,134,154, + 222,62,206,246,252,169,106,20,9,126,105,31,223,61,142,133,132,248,201,124,121,197,136,0,24,35,31,40,189,171,84,247, + 129,78,238,29,127,161,67,251,10,203,131,171,68,96,64,214,86,35,238,175,229,115,239,24,23,182,130,98,1,73,126,105, + 127,240,198,32,117,248,127,157,17,1,16,174,137,94,0,127,155,212,169,70,31,234,135,61,227,207,243,214,70,95,43,31, + 108,157,134,106,65,43,139,98,100,181,124,222,49,209,161,69,158,118,164,151,3,214,255,207,139,251,104,56,0,104,98,109, + 146,197,199,133,215,124,104,83,156,251,123,203,152,208,66,187,57,129,102,200,21,75,78,248,109,147,207,185,85,76,131,66, + 122,246,72,238,154,57,182,93,35,153,31,43,42,187,185,70,1,192,52,177,150,203,118,20,101,214,248,195,209,72,192,174, + 90,176,4,75,132,150,82,123,101,182,223,54,236,167,103,142,196,174,153,183,21,102,168,195,255,41,70,6,64,162,86,126, + 92,115,109,27,29,150,210,182,79,229,131,254,192,221,23,185,99,122,245,210,202,215,249,89,247,174,17,203,240,206,95,59, + 63,58,189,167,10,128,174,70,6,128,151,92,14,252,115,207,168,186,116,59,45,29,158,214,146,150,8,101,157,0,21,13, + 161,108,216,188,202,214,202,11,191,74,177,212,87,55,247,236,22,41,147,127,143,200,49,195,2,128,100,219,208,225,237,237, + 85,171,215,128,10,5,15,119,119,163,215,129,223,181,242,189,3,79,105,250,78,49,200,28,162,29,159,211,21,144,255,62, + 237,239,40,242,169,235,240,159,94,169,181,106,54,210,25,9,0,101,175,1,115,30,234,89,231,15,78,45,160,148,189,3, + 114,43,49,86,8,140,47,58,54,190,108,75,47,127,134,39,169,4,28,201,92,55,207,156,210,93,29,254,119,49,3,0, + 72,182,157,127,85,237,14,172,169,87,127,49,124,127,96,160,239,110,229,6,236,231,238,137,28,51,172,168,147,143,108,8, + 107,219,213,135,141,61,245,115,194,149,77,100,236,239,186,212,141,55,26,0,102,203,213,128,13,203,70,213,111,7,20,111, + 7,21,223,33,124,133,2,1,106,21,53,135,219,15,249,102,168,33,255,189,154,232,11,161,137,253,252,216,210,91,63,175, + 249,106,132,58,252,159,105,38,0,180,151,239,127,119,222,210,217,33,55,35,111,66,92,129,50,161,36,215,67,91,34,247, + 220,46,106,221,189,74,121,46,191,222,150,23,143,210,94,7,152,26,236,104,118,253,255,205,2,0,146,109,93,191,105,68, + 80,181,123,3,106,234,101,31,93,91,76,195,74,229,166,252,44,190,121,208,102,204,245,162,83,159,242,180,242,163,225,233, + 188,186,18,244,240,115,140,41,103,40,119,196,189,45,168,238,97,24,17,0,227,101,96,252,231,165,254,142,107,138,192,95, + 9,6,38,183,160,27,114,65,1,1,149,19,119,68,78,186,76,212,139,78,173,222,44,229,239,170,203,49,228,119,156,23, + 62,223,79,253,246,207,49,35,0,232,196,146,147,116,109,180,141,209,225,55,136,55,142,224,221,81,14,40,55,233,119,49, + 247,16,138,252,116,154,104,179,214,99,234,187,62,157,216,243,198,139,3,54,32,105,29,235,148,190,205,101,92,159,226,14, + 54,35,0,72,207,105,250,121,238,245,158,12,172,162,69,210,47,162,112,232,15,5,4,39,184,239,192,107,129,195,135,251, + 185,220,199,212,201,88,170,220,164,126,143,72,88,199,122,93,254,72,91,29,141,184,207,79,215,228,1,25,21,0,29,229, + 100,224,77,185,177,206,60,45,117,7,29,27,173,4,39,121,45,247,16,228,110,189,53,148,123,189,122,111,195,26,249,111, + 194,41,189,206,243,13,99,99,213,179,255,98,205,12,0,210,39,98,77,152,237,89,159,229,212,27,247,236,220,222,171,125, + 125,189,15,217,129,128,222,85,175,65,30,215,90,189,197,228,83,217,189,228,35,185,163,84,165,73,37,219,72,84,231,120, + 239,134,49,44,56,216,87,222,243,255,214,244,97,25,25,0,3,100,0,61,120,119,130,211,111,32,77,68,141,24,214,42, + 159,175,159,158,177,3,1,29,82,122,173,24,206,66,85,15,245,211,229,10,142,86,94,205,119,250,186,244,86,249,56,157, + 215,249,190,111,114,23,53,102,251,88,1,0,36,106,244,193,34,26,7,86,217,54,220,9,36,61,153,154,18,77,32,56, + 107,7,130,157,98,249,42,8,249,94,97,194,246,102,77,175,54,83,19,255,167,97,131,99,242,247,109,204,62,133,228,116, + 65,219,111,158,27,145,77,202,150,254,214,212,230,1,26,29,0,57,50,168,158,152,153,228,210,155,186,123,109,214,247,131, + 147,91,228,219,237,43,96,98,133,130,54,25,117,242,224,196,239,44,38,153,78,218,37,254,41,130,103,241,218,209,63,32, + 49,93,231,57,211,122,168,241,153,97,37,0,80,217,174,237,164,224,102,81,193,46,27,5,168,62,180,53,251,39,62,17, + 89,224,235,227,117,216,14,4,114,158,96,162,166,111,100,177,186,34,185,111,209,148,214,92,210,52,127,66,103,57,30,222, + 154,131,153,125,23,155,191,186,170,133,63,59,107,187,138,101,116,0,104,98,25,73,223,37,200,73,231,174,27,77,61,232, + 230,206,232,89,24,21,17,188,214,174,152,72,246,91,163,93,135,55,106,214,218,121,72,96,187,137,251,115,241,25,213,207, + 124,33,170,105,240,26,222,192,165,16,253,249,220,103,58,79,67,121,38,181,238,129,97,6,0,16,209,182,211,181,54,9, + 15,100,71,182,229,184,253,166,243,218,132,195,116,70,1,159,221,62,82,201,168,224,130,248,150,156,206,157,100,178,186,2, + 186,86,234,196,51,67,211,207,109,180,7,29,205,232,31,161,26,138,141,43,174,59,130,4,116,175,41,23,104,126,76,60, + 155,173,117,153,168,54,3,0,52,65,54,219,7,157,126,111,55,67,61,132,165,139,134,110,167,194,22,94,128,113,172,18, + 24,144,127,20,223,160,211,197,178,162,145,90,152,7,139,101,59,218,23,241,161,166,183,83,187,232,51,120,121,121,253,64, + 37,187,239,45,72,217,132,165,60,227,120,202,93,93,213,231,52,170,46,1,96,22,0,16,217,54,106,226,0,145,93,107, + 70,27,238,97,80,191,186,119,94,29,176,145,38,14,121,237,194,78,173,188,171,13,171,228,117,97,7,247,59,154,222,168, + 145,150,24,59,56,121,117,33,72,252,140,116,241,51,23,137,107,56,95,197,53,150,134,4,249,238,24,50,32,58,159,146, + 30,189,248,140,231,221,107,71,219,114,65,60,175,205,117,93,166,54,11,0,72,41,50,64,175,207,110,111,248,7,180,189, + 40,243,24,157,103,72,61,9,120,221,251,190,42,18,205,222,71,53,125,131,210,123,220,47,104,250,94,238,191,137,121,144, + 12,225,65,226,94,164,136,255,150,191,63,94,252,217,153,226,239,190,39,38,41,143,214,228,103,7,248,123,239,237,204,175, + 245,193,187,18,86,236,92,157,121,28,73,102,108,231,100,180,85,159,223,160,186,38,149,153,0,80,86,29,72,245,206,5, + 31,94,99,170,7,198,71,45,39,230,205,238,85,68,203,100,45,154,135,84,86,121,232,50,251,249,121,31,164,107,160,107, + 121,102,206,159,139,104,201,19,73,101,30,47,255,248,90,181,230,127,113,125,18,202,108,0,136,149,27,120,248,123,183,233, + 31,228,129,45,217,103,62,120,115,240,150,25,247,39,174,204,24,222,38,255,202,78,225,203,35,26,7,173,167,253,241,252, + 1,83,75,236,115,117,72,240,115,244,119,249,191,81,76,255,86,215,206,77,150,211,191,61,227,129,196,85,244,179,14,109, + 201,249,17,73,100,94,243,57,24,214,171,71,83,117,39,107,91,79,2,0,105,158,12,246,5,207,245,179,252,3,223,191, + 105,204,105,222,226,233,224,215,159,166,239,165,9,71,50,205,53,144,229,175,233,255,209,159,33,160,32,73,172,237,151,230, + 245,81,97,63,183,190,201,100,70,0,208,105,192,182,3,35,162,34,131,120,130,100,35,48,96,143,217,240,163,44,251,209, + 220,78,3,79,4,0,105,156,164,96,110,86,59,4,7,236,17,206,26,213,70,253,246,207,116,68,34,153,21,0,164,255, + 137,250,115,219,17,200,8,16,216,202,254,240,173,33,106,167,223,143,29,149,68,102,6,64,75,238,159,232,51,180,105,217, + 128,186,252,32,80,96,203,214,251,183,188,34,84,38,63,109,87,143,6,0,116,77,150,67,162,137,215,199,33,88,96,171, + 119,250,33,223,234,200,4,50,59,0,124,100,19,10,26,30,189,187,96,32,2,6,182,148,223,254,215,0,117,232,95,224, + 232,198,52,102,7,0,169,149,24,22,217,102,72,141,88,38,12,195,117,113,241,186,44,181,209,7,117,249,141,113,116,242, + 88,1,0,21,86,5,168,45,50,21,75,32,128,96,179,23,252,12,238,223,66,29,250,143,113,70,226,88,5,0,164,55, + 141,208,55,0,134,157,176,207,127,161,179,146,198,74,0,104,164,233,167,0,51,63,95,111,182,100,81,42,2,9,54,237, + 146,31,223,43,34,147,191,196,17,5,63,158,0,0,82,23,238,95,232,115,209,187,211,142,162,76,4,20,108,42,111,43, + 204,80,171,253,126,229,238,230,204,132,177,26,0,42,204,7,116,239,26,193,190,43,206,69,96,193,166,48,29,236,153,148, + 24,169,14,253,39,56,59,89,172,8,0,210,75,242,38,254,37,167,61,130,11,54,133,199,141,174,176,199,255,89,87,36, + 138,85,1,224,167,41,135,84,204,156,210,29,1,6,27,218,211,238,73,176,239,54,237,15,0,212,79,205,184,15,200,6, + 34,255,126,33,25,129,6,27,210,180,173,93,105,240,65,237,231,155,187,42,73,172,12,0,82,71,81,64,193,120,131,12, + 246,217,251,105,8,56,216,88,77,101,223,77,181,197,166,82,231,223,217,149,9,98,117,0,104,162,95,218,31,178,82,144, + 142,80,70,224,193,70,240,55,95,142,96,141,195,3,213,238,62,253,93,157,28,158,0,0,210,13,242,253,234,242,102,33, + 108,211,138,235,16,128,176,91,189,245,235,12,22,221,252,50,245,56,239,9,238,72,12,79,1,0,105,154,132,64,107,190, + 125,120,231,55,216,51,0,187,199,84,159,162,108,239,37,223,239,174,164,240,36,0,144,102,203,155,222,182,117,67,91,111, + 117,4,36,236,74,151,172,207,98,177,109,27,169,201,255,15,119,38,132,167,1,192,75,172,175,218,110,126,231,142,225,108, + 15,127,32,8,76,216,85,187,251,226,227,194,213,228,127,218,221,9,225,105,0,144,16,120,89,62,132,118,109,26,162,100, + 24,118,186,105,155,122,92,251,48,251,13,62,222,0,128,123,68,141,68,94,83,231,4,182,172,194,196,32,236,28,211,164, + 115,76,244,101,106,242,191,170,25,228,208,88,79,5,128,28,9,60,41,31,74,115,190,58,176,22,75,132,176,131,205,207, + 107,176,173,60,41,201,255,188,17,190,249,1,128,114,205,144,15,39,60,44,0,219,136,97,135,249,243,197,105,182,35,237, + 149,228,159,99,180,224,7,0,116,77,149,15,41,48,208,135,45,124,190,31,2,24,174,151,95,125,182,175,90,225,87,234, + 206,165,62,0,160,102,186,94,86,12,82,19,198,251,38,119,65,32,195,117,50,117,164,82,106,251,233,124,199,60,163,6, + 61,0,112,113,217,240,25,57,26,152,144,221,14,253,4,224,26,251,248,238,113,246,91,122,105,31,202,0,35,7,60,0, + 112,177,226,53,209,90,140,220,165,83,99,172,16,192,53,170,238,163,6,52,74,242,211,174,190,43,141,30,236,0,64,229, + 186,156,187,72,62,76,106,47,246,9,38,7,225,75,244,240,179,155,236,91,197,29,101,134,64,7,0,170,86,0,247,124, + 249,80,169,73,227,172,169,221,209,114,28,174,208,186,155,186,247,42,13,60,101,39,31,127,179,4,57,0,80,189,198,114, + 255,44,31,112,242,213,151,179,157,171,81,57,136,178,222,44,251,190,253,212,192,243,47,102,11,110,0,160,102,162,206,172, + 251,212,87,130,69,175,165,32,17,60,212,111,189,60,192,126,200,79,173,187,187,152,49,176,1,128,154,171,33,247,27,242, + 161,211,82,97,222,248,56,156,74,236,65,166,83,122,169,201,172,146,248,178,166,191,129,89,131,26,0,168,189,50,184,79, + 202,0,160,125,221,139,95,31,132,4,177,184,63,126,103,8,251,83,171,6,106,226,159,17,175,135,166,22,0,80,55,181, + 228,94,161,142,6,198,102,182,101,251,55,101,35,89,44,230,189,27,198,176,172,81,109,236,191,245,151,113,95,97,133,64, + 6,0,234,46,218,208,241,87,238,159,212,185,129,87,158,233,139,196,177,200,12,255,203,79,245,81,79,233,33,255,200,125, + 155,102,160,205,60,0,128,49,106,6,222,87,191,33,18,187,68,216,54,130,32,145,204,233,101,31,93,195,122,245,104,106, + 255,173,255,145,85,190,245,1,0,231,40,155,251,184,12,24,170,5,31,63,166,157,109,185,8,73,101,158,166,29,244,42, + 167,212,241,147,143,114,103,90,53,104,1,0,199,170,17,247,19,154,216,84,68,110,16,234,111,59,245,229,200,182,28,36, + 153,65,125,152,63,155,41,119,117,101,151,133,248,169,137,255,27,247,163,102,158,225,7,0,220,167,182,220,139,212,33,36, + 245,127,159,126,111,55,219,82,18,146,206,56,203,122,180,115,175,105,68,144,253,112,255,115,238,56,79,8,84,0,192,185, + 26,204,189,81,13,174,168,200,32,54,119,70,79,118,108,23,234,7,220,101,170,221,168,34,241,215,115,167,120,82,128,2, + 0,206,23,181,30,187,134,123,131,26,108,84,73,70,61,7,208,149,216,117,166,123,77,163,176,168,166,193,246,137,191,141, + 59,215,74,179,251,0,128,49,151,13,169,136,104,151,26,124,1,254,62,44,107,100,27,219,49,81,72,82,231,120,227,242, + 81,108,226,245,113,44,40,200,215,62,241,247,106,122,179,14,31,79,13,74,0,192,245,242,229,206,226,94,171,6,35,205, + 60,15,236,215,156,189,62,63,153,157,40,65,19,146,250,154,238,33,157,8,157,210,183,185,253,172,62,249,27,49,179,239, + 227,233,193,8,0,184,87,189,185,63,212,244,158,113,101,1,74,5,69,244,141,85,248,105,58,146,185,150,166,195,95,39, + 79,234,100,187,135,118,73,95,42,38,247,174,65,216,1,0,70,83,7,77,223,71,126,74,13,90,42,49,166,46,51,179, + 249,132,213,246,194,12,36,120,21,222,198,239,205,63,30,236,110,43,192,162,123,102,151,248,180,111,227,105,79,153,213,7, + 0,204,173,32,49,25,181,220,46,136,109,195,216,164,238,77,109,43,8,128,129,126,186,238,156,135,122,178,164,196,200,202, + 146,158,190,237,11,184,115,184,3,17,86,0,128,25,21,171,233,7,71,22,219,195,128,220,33,54,140,253,245,230,120,246, + 191,55,7,123,68,227,82,106,184,249,193,27,131,216,109,55,117,180,63,92,83,53,77,176,62,162,233,117,24,16,0,96, + 25,37,104,122,85,218,222,202,2,63,132,87,176,13,25,16,205,30,226,75,92,116,176,137,21,106,12,104,173,158,250,48, + 78,253,123,2,27,148,220,130,5,7,251,86,149,244,116,79,232,212,231,174,8,19,0,192,19,68,221,137,166,112,175,228, + 62,95,89,82,248,251,121,179,158,221,34,109,223,150,47,205,235,195,86,45,77,55,244,202,2,141,96,86,46,185,150,189, + 248,228,213,236,214,27,59,218,230,61,252,42,246,217,83,125,78,124,246,41,2,140,16,0,224,177,10,211,244,229,44,58, + 108,178,164,138,132,177,153,18,170,35,127,109,200,28,222,218,86,247,78,96,88,250,222,80,151,245,55,164,237,181,244,179, + 232,103,82,162,211,53,100,164,183,182,189,202,92,34,217,165,233,53,232,21,77,175,163,8,195,99,7,0,160,202,69,173, + 168,71,113,255,83,211,219,154,255,81,77,98,233,199,161,241,35,172,232,152,116,218,2,155,54,232,10,219,225,22,180,148, + 246,200,3,137,108,222,172,171,216,252,39,174,102,11,158,235,103,51,117,63,162,119,113,50,253,183,252,125,250,51,244,103, + 233,239,220,49,49,222,246,111,208,191,69,19,151,109,91,55,180,21,60,213,228,90,196,53,23,137,207,48,82,51,73,123, + 109,0,0,0,48,162,252,184,59,139,217,112,154,67,88,194,125,168,134,137,232,10,31,228,254,68,211,15,205,204,17,215, + 234,135,199,6,0,64,206,21,45,55,210,186,248,80,238,91,184,31,227,126,155,251,43,238,205,220,71,52,125,59,108,93, + 19,251,55,241,111,108,18,255,230,91,220,115,197,207,74,21,63,27,203,115,0,0,100,112,249,136,68,37,96,4,115,135, + 10,55,18,150,191,14,22,127,38,80,67,121,45,0,0,195,176,181,141,155,0,195,0,0,12,195,0,0,12,195,0,0, + 12,195,0,0,12,195,0,0,12,195,0,0,12,195,0,0,12,195,0,0,12,195,0,0,12,195,0,0,12,195,0,0, + 12,195,0,0,12,195,38,241,255,3,73,49,223,76,174,54,46,60,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char Logo1x[11533] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,1,94,0,0,0,85,8,6,0,0,0,1,155,140, + 109,0,0,0,4,103,65,77,65,0,0,177,143,11,252,97,5,0,0,0,32,99,72,82,77,0,0,122,38,0,0,128, + 132,0,0,250,0,0,0,128,232,0,0,117,48,0,0,234,96,0,0,58,152,0,0,23,112,156,186,81,60,0,0,0, + 9,112,72,89,115,0,0,0,96,0,0,0,96,0,240,107,66,207,0,0,0,7,116,73,77,69,7,228,8,10,18,21, + 15,166,100,212,10,0,0,0,6,98,75,71,68,0,0,0,0,0,0,249,67,187,127,0,0,43,252,73,68,65,84,120, + 218,237,93,7,84,84,71,23,126,219,119,217,66,89,58,75,239,189,72,17,193,130,2,162,8,42,96,3,1,177,199,168, + 81,163,177,229,55,246,168,137,137,189,151,104,140,37,246,216,176,165,153,24,75,236,198,196,222,59,29,236,194,252,115,223, + 190,85,68,80,145,89,88,240,125,231,220,99,52,176,251,222,148,111,110,31,138,98,193,130,5,11,22,44,88,176,96,193, + 130,5,11,22,44,88,176,96,193,130,5,11,22,44,180,7,14,22,46,35,2,44,150,88,172,176,40,177,136,216,225,97, + 193,130,5,11,237,145,47,64,129,101,57,135,195,57,169,52,20,237,183,183,149,111,83,200,133,115,241,191,125,130,37,10, + 139,45,75,198,44,88,176,96,65,158,128,221,176,124,105,109,37,189,52,168,143,247,211,21,243,35,208,148,49,33,40,35, + 217,245,65,131,96,179,139,230,166,146,141,92,46,103,0,254,25,127,44,18,118,200,88,176,96,193,130,12,120,88,66,12, + 244,133,171,123,164,185,101,255,123,176,253,211,252,171,93,208,197,99,157,208,206,245,45,209,248,145,65,37,45,34,173,239, + 90,154,235,109,225,112,168,174,248,103,173,75,105,205,44,88,176,96,193,162,10,0,215,67,255,144,122,166,23,118,172,109, + 241,184,240,90,70,73,225,245,12,4,114,243,76,103,180,107,67,75,52,168,143,207,51,31,79,163,211,2,1,247,115,252, + 179,78,44,1,179,96,193,130,69,213,1,65,183,72,59,27,249,129,197,51,27,63,200,187,210,165,4,11,2,13,24,8, + 24,254,251,196,190,68,90,11,246,247,49,62,199,231,115,135,226,159,183,96,135,141,5,11,22,44,170,14,111,19,165,120, + 231,140,73,97,69,185,151,187,20,3,225,106,164,224,90,6,45,39,255,72,66,195,7,250,61,119,180,87,252,133,127,62, + 129,98,3,113,44,88,176,96,81,101,56,25,25,138,182,205,250,42,172,72,163,249,150,37,96,248,243,231,205,173,80,82, + 188,67,129,76,42,152,77,169,51,33,88,176,96,193,130,69,21,224,106,110,170,247,243,202,133,77,31,98,162,125,141,124, + 65,192,5,113,227,159,20,52,121,116,72,137,131,157,226,0,254,157,8,138,245,253,178,96,193,130,69,149,16,228,230,108, + 112,252,247,109,113,143,53,90,110,89,1,31,48,200,214,213,49,40,188,190,249,13,252,59,221,40,117,129,6,11,22,44, + 88,176,168,44,56,106,221,53,49,42,66,117,227,210,241,78,207,242,175,150,79,190,26,237,247,216,111,137,40,161,149,125, + 129,128,207,29,65,177,185,191,44,88,176,96,241,222,224,115,185,156,241,195,7,250,229,99,130,45,174,136,120,53,190,223, + 11,71,59,162,140,20,215,71,34,17,111,2,254,93,41,59,124,44,88,176,96,241,126,176,180,180,144,254,188,115,125,203, + 199,154,212,178,55,145,239,213,147,201,168,71,186,219,99,161,144,55,158,213,124,89,176,96,193,226,253,145,144,208,202,254, + 206,157,179,169,207,193,167,251,46,228,155,222,201,229,33,143,199,129,124,95,62,59,124,44,88,176,96,81,121,232,41,228, + 194,85,107,22,55,123,244,54,173,87,67,190,231,254,238,128,226,91,216,230,225,223,77,141,111,201,102,155,177,96,193,130, + 197,251,32,166,77,75,187,91,119,177,214,251,54,226,165,3,110,152,124,15,255,220,22,133,4,154,94,193,191,27,198,230, + 153,177,96,193,130,69,229,33,83,26,137,55,209,253,28,222,65,235,213,100,59,108,248,62,26,217,168,100,191,80,234,30, + 192,44,88,176,96,193,162,146,232,218,191,151,87,94,254,213,46,37,239,66,188,26,25,51,60,176,68,44,226,77,161,88, + 127,47,11,22,44,88,84,26,142,1,62,198,167,206,31,233,248,236,109,65,182,151,69,22,25,232,202,137,100,20,27,109, + 147,133,127,191,57,151,203,58,29,88,176,96,193,162,50,224,203,164,130,239,54,126,31,253,206,238,6,141,203,1,170,219, + 84,150,210,93,248,51,140,216,97,100,193,130,5,139,202,161,215,136,129,254,249,133,21,244,112,168,72,114,46,165,163,126, + 61,61,159,194,239,27,232,179,13,205,88,176,96,193,162,50,168,31,23,99,123,227,222,249,180,231,149,33,94,72,49,251, + 107,87,27,228,234,108,112,152,98,123,249,178,96,193,130,69,165,96,225,229,110,248,247,185,195,29,158,191,169,127,67,69, + 50,164,159,239,115,252,25,31,201,229,108,47,29,22,44,88,176,120,87,72,44,204,244,126,218,183,61,254,73,69,93,203, + 222,148,219,251,199,142,120,228,96,167,216,71,169,175,150,103,193,130,5,11,22,239,0,142,88,204,155,187,110,89,212,163, + 202,4,216,52,146,125,49,29,26,233,60,196,159,19,207,225,84,75,134,3,124,9,220,150,220,6,190,83,135,36,14,75, + 12,165,238,99,28,130,197,7,139,7,22,27,44,50,74,183,83,239,96,76,193,100,209,199,226,192,60,183,47,150,250,204, + 251,180,32,48,62,173,177,180,100,190,163,170,128,171,173,132,88,140,177,56,98,241,100,158,23,198,189,9,51,15,213,49, + 215,141,176,248,49,227,165,194,34,166,170,175,135,181,102,12,76,177,184,96,241,198,82,143,153,175,24,230,25,227,117,84, + 96,45,52,99,198,235,195,4,159,207,165,184,28,206,132,165,179,154,20,23,221,232,90,105,226,5,178,94,179,184,25,50, + 208,23,46,161,212,55,29,107,27,6,98,125,227,223,28,35,146,158,185,181,72,127,172,75,226,26,147,246,208,37,42,165, + 200,49,162,93,190,109,104,203,28,11,223,134,247,148,78,62,231,21,150,14,127,8,165,250,235,57,60,222,56,252,252,157, + 24,162,208,171,193,105,23,48,68,209,24,220,68,92,190,112,134,88,161,220,170,175,114,250,203,216,217,255,178,165,111,163, + 123,118,13,90,229,58,53,109,95,224,18,149,92,132,223,235,81,149,198,166,101,151,199,246,225,241,79,249,98,189,211,148, + 250,118,235,74,91,101,88,156,129,76,240,225,62,132,39,20,47,146,24,154,102,26,218,184,29,49,113,9,184,106,233,215, + 248,158,93,88,28,60,111,62,243,188,15,181,63,215,169,15,157,35,59,21,226,247,202,133,241,82,58,120,255,39,85,90, + 236,225,10,132,51,152,57,182,101,200,145,228,225,104,130,37,12,75,31,158,64,52,71,207,200,108,167,161,173,251,113,51, + 247,224,155,214,193,209,217,14,141,19,242,170,235,253,171,178,22,172,2,34,158,115,184,188,237,212,135,220,241,144,81,82, + 135,205,152,212,224,201,251,16,47,228,255,94,56,210,17,133,6,155,93,96,180,37,109,35,28,147,90,94,151,77,55,81, + 247,204,92,212,125,135,46,73,14,234,182,61,27,117,219,118,31,101,252,116,7,117,217,120,3,165,172,58,135,146,22,28, + 68,177,147,126,66,13,63,153,142,60,91,247,122,102,225,19,126,71,98,96,178,19,15,254,64,70,91,225,86,195,184,193, + 161,8,55,74,119,23,234,41,86,99,162,56,139,55,106,97,64,202,103,37,77,135,45,66,113,223,100,162,118,139,14,163, + 212,31,47,226,231,190,137,50,182,220,197,239,145,69,191,79,85,199,165,231,174,2,20,249,249,114,36,16,75,151,85,226, + 112,134,84,25,127,14,135,251,25,30,171,29,152,92,174,97,178,123,28,210,99,28,138,26,245,3,106,61,109,15,234,240, + 221,113,148,186,246,50,130,181,208,245,149,231,205,169,182,185,134,239,133,121,78,254,225,95,212,102,230,47,168,201,103,243, + 145,71,171,238,79,141,236,60,206,113,249,130,47,25,141,188,42,144,99,137,228,9,132,211,245,173,28,143,226,3,49,63, + 160,243,208,146,102,35,150,162,214,211,247,162,142,203,78,162,180,245,87,81,198,230,219,168,235,214,123,213,248,254,239,39, + 61,118,230,163,160,140,47,16,126,167,225,120,124,62,120,119,195,192,41,99,66,138,138,174,87,46,165,172,52,249,14,232, + 237,253,12,78,122,109,22,84,136,228,134,240,199,168,250,189,38,162,158,187,139,232,73,212,93,41,64,61,48,225,0,233, + 244,220,93,136,122,238,41,66,189,246,60,160,255,95,218,186,171,40,110,106,38,242,239,52,184,88,233,232,125,137,203,227, + 143,99,92,18,218,0,152,163,225,88,75,156,107,236,236,119,217,59,177,239,243,230,99,127,68,201,43,206,208,7,4,60, + 27,60,23,60,31,252,55,60,51,45,240,252,164,198,34,51,143,62,112,240,115,164,112,184,111,61,99,64,187,109,33,208, + 147,175,194,7,212,221,192,244,207,75,226,166,238,160,15,4,154,196,153,113,132,249,239,185,75,75,207,251,222,243,172,126, + 182,238,248,125,59,126,119,2,19,204,40,164,176,176,63,201,184,88,42,187,49,192,61,213,14,91,119,59,237,194,90,21, + 52,28,56,147,62,24,129,232,95,204,89,233,49,168,177,247,175,132,224,231,236,178,233,22,178,14,138,202,97,220,88,31, + 60,6,125,53,182,254,131,247,37,94,252,123,104,229,194,166,72,46,19,204,209,178,143,75,33,53,182,250,21,52,11,221, + 39,222,55,47,64,250,249,241,159,160,177,128,6,35,51,85,29,2,205,134,176,15,208,31,155,247,139,173,252,155,100,55, + 194,27,23,200,22,190,191,39,108,90,216,176,213,240,174,64,72,41,43,255,67,38,46,1,151,222,162,253,193,243,134,8, + 245,228,171,177,69,83,16,249,249,50,90,155,165,15,45,102,172,106,211,28,107,14,177,216,41,91,17,62,240,224,26,173, + 22,88,123,127,215,185,243,23,74,21,107,29,26,183,125,216,98,226,70,108,61,221,126,121,48,238,44,168,181,235,30,230, + 177,205,140,159,145,212,216,226,87,216,203,31,118,100,77,77,147,35,103,127,21,254,236,125,92,13,154,156,222,227,191,39, + 34,15,55,195,131,248,179,12,181,248,184,33,224,203,2,211,178,182,109,196,55,110,80,252,103,139,9,235,145,210,209,231, + 58,248,49,9,152,96,250,120,98,7,25,59,249,94,13,255,100,26,74,163,9,172,168,218,200,246,149,247,195,36,223,124, + 204,26,36,212,83,172,124,67,128,81,142,73,233,83,76,206,55,155,12,153,71,187,58,128,104,234,194,28,195,251,199,78, + 222,130,228,230,118,71,241,123,218,189,45,228,130,37,217,208,206,227,98,227,79,231,208,238,131,186,50,14,26,226,13,233, + 49,30,220,12,163,120,130,15,188,232,74,170,39,160,120,92,206,228,229,115,35,158,191,47,241,130,220,249,47,21,181,106, + 110,123,151,137,46,147,15,2,138,233,88,212,176,160,174,163,95,144,85,93,18,88,148,224,126,48,176,113,61,135,223,51, + 160,10,67,229,40,208,147,173,113,107,145,254,180,253,146,163,12,225,214,236,198,245,78,236,91,140,159,43,131,195,45,215, + 189,107,133,231,118,153,107,243,212,39,29,150,30,175,149,218,237,187,184,35,176,85,3,132,51,66,40,211,127,83,192,115, + 160,85,64,68,94,219,89,191,189,116,251,212,149,49,192,239,2,7,137,109,253,22,121,224,250,98,157,12,216,196,147,74, + 5,139,55,175,108,254,240,125,210,201,74,251,121,7,246,161,253,188,73,90,59,35,140,204,118,199,127,187,171,118,187,25, + 222,66,190,160,233,8,36,178,213,239,153,245,16,40,85,90,236,15,237,61,233,133,47,176,70,223,7,111,182,206,107,46, + 32,51,143,224,107,248,217,92,203,59,36,68,10,163,237,129,93,254,87,66,107,119,117,240,64,213,204,107,194,156,63,144, + 204,204,102,127,5,22,33,248,32,250,217,132,196,20,130,235,169,46,174,111,120,167,182,179,127,71,50,83,213,31,144,153, + 196,210,46,54,243,172,85,178,93,135,246,182,125,86,217,2,138,87,252,188,88,91,158,49,169,1,226,241,56,35,140,12, + 181,98,70,4,96,109,224,126,250,134,235,117,80,35,42,29,124,184,137,108,67,99,243,213,254,222,74,185,203,131,229,230, + 182,39,154,14,95,242,226,179,106,222,204,46,66,45,38,108,64,34,185,193,58,38,200,87,26,182,98,133,114,103,131,62, + 83,80,247,237,57,53,174,149,107,125,94,55,222,64,170,122,77,239,49,169,132,47,93,125,106,43,160,173,133,79,120,86, + 135,165,199,234,180,82,17,250,209,36,208,250,199,9,244,228,44,235,194,6,168,231,107,124,226,242,241,78,207,223,181,53, + 100,69,249,188,235,151,71,33,125,133,112,62,233,0,27,79,64,239,217,129,245,82,71,212,136,159,178,58,5,162,213,160, + 245,114,249,194,89,149,24,34,15,153,137,234,48,4,164,116,138,192,240,179,248,117,24,84,130,159,175,55,151,247,138,123, + 215,8,111,190,53,33,221,199,209,169,79,61,234,50,233,50,2,239,233,18,157,2,133,70,205,203,204,157,155,194,202,241, + 84,252,55,59,235,44,233,130,171,5,210,19,237,194,226,10,40,117,129,11,11,140,166,29,19,28,239,228,92,74,47,126, + 95,210,213,148,15,239,219,30,143,172,44,164,63,49,57,152,36,33,17,235,27,111,107,245,213,182,58,188,56,95,6,219, + 218,45,60,132,20,150,14,208,124,232,93,202,176,77,69,114,195,173,13,7,204,208,57,45,15,178,18,44,188,195,110,150, + 209,242,4,88,203,251,210,171,109,159,231,144,119,250,33,144,174,70,220,99,51,158,224,247,79,40,157,234,199,19,138,23, + 128,38,88,151,53,126,216,179,137,115,255,68,216,34,59,64,177,109,100,95,100,52,12,153,248,191,224,194,170,248,119,53, + 153,13,71,127,77,64,110,206,6,127,50,57,136,36,225,13,69,7,105,235,174,212,253,141,138,223,15,220,41,150,126,141, + 239,128,54,244,182,152,35,38,177,41,190,237,7,148,64,226,188,46,141,13,108,54,136,230,139,245,149,112,16,211,165,161, + 140,214,155,104,29,20,149,7,190,223,186,110,189,188,166,241,70,37,63,40,163,241,182,180,9,105,158,15,5,16,117,121, + 93,131,203,41,172,239,84,196,225,114,191,148,24,154,86,142,163,234,40,247,74,148,70,226,13,187,55,198,62,169,42,241, + 66,103,179,51,7,218,163,0,95,227,99,36,157,231,204,102,253,216,63,121,200,139,148,168,154,151,2,173,110,20,168,192, + 114,106,218,190,144,82,151,244,190,9,241,170,192,72,157,36,49,120,30,38,146,223,191,84,234,144,29,214,122,142,64,246, + 134,54,44,151,151,133,5,133,116,177,5,20,136,116,221,114,143,14,52,190,187,220,163,73,146,104,160,143,137,232,99,146, + 133,91,91,124,52,5,18,98,133,209,150,232,49,171,235,188,21,7,150,141,67,163,182,69,239,147,167,206,173,163,228,235, + 219,168,129,197,197,235,167,83,170,228,223,213,100,53,192,21,240,245,131,204,142,19,206,229,21,11,36,178,205,141,7,207, + 165,203,50,59,45,63,93,179,242,253,105,252,28,255,209,101,154,64,144,90,73,127,202,204,67,80,211,78,169,27,1,85, + 4,115,169,177,229,159,177,147,127,210,218,198,213,84,99,65,46,106,175,202,200,222,135,116,48,201,210,191,49,164,23,250, + 105,246,16,214,120,38,5,166,141,212,74,49,10,108,238,164,5,7,80,196,176,69,116,65,10,30,191,71,120,179,23,218, + 134,182,204,7,173,242,93,197,182,126,203,2,215,152,180,199,45,39,109,38,122,8,193,154,81,58,122,67,170,160,166,87, + 69,43,251,134,173,139,160,180,156,228,250,41,93,65,167,11,210,251,231,71,140,235,204,254,111,74,221,212,232,195,38,94, + 161,128,174,162,25,50,102,88,96,97,101,111,159,168,176,103,195,209,142,40,44,196,252,4,97,63,142,10,107,189,153,6, + 42,231,83,74,7,239,147,74,7,175,26,23,99,103,191,51,102,158,33,55,28,35,146,30,64,32,76,27,5,29,238,177, + 93,225,118,143,196,114,243,234,140,233,11,158,135,248,36,245,43,214,212,191,147,118,19,64,217,107,231,213,231,232,74,163, + 232,209,43,81,147,193,243,16,248,145,161,223,196,91,101,224,12,20,144,50,20,220,12,153,165,210,226,130,204,60,66,110, + 193,225,73,74,59,7,173,20,180,84,120,62,199,38,137,15,228,230,118,135,177,118,61,19,44,36,44,209,148,186,91,89, + 48,124,119,37,36,16,75,123,83,247,160,107,41,43,207,18,241,189,106,114,180,37,6,38,91,153,236,14,1,86,38,86, + 52,131,96,232,158,34,98,7,16,252,9,85,130,80,45,215,104,208,108,84,191,231,4,20,220,109,76,141,10,4,80,157, + 35,59,34,46,95,240,181,145,163,119,229,243,247,41,117,13,121,93,34,95,83,91,107,249,111,7,118,181,121,82,149,52, + 178,106,208,120,33,177,28,186,50,153,234,144,88,50,121,169,145,60,161,120,117,189,212,225,197,208,176,71,11,17,240,152, + 10,198,196,222,192,218,229,12,52,223,33,105,18,3,33,130,22,15,190,89,143,248,30,79,77,92,235,93,150,24,154,238, + 225,139,245,22,242,4,194,177,248,0,28,197,225,241,222,77,184,220,209,180,105,169,14,36,240,49,33,206,11,239,255,45, + 49,237,28,222,27,114,94,65,59,21,201,12,182,80,234,54,136,74,66,123,212,216,200,206,227,40,157,83,75,224,144,0, + 114,133,180,57,252,185,99,152,170,68,47,51,207,250,55,59,175,185,72,134,216,241,103,128,5,22,212,245,139,18,19,23, + 255,43,120,60,54,225,185,154,0,5,71,88,6,99,249,180,134,101,16,22,247,247,153,8,216,104,208,135,192,161,14,17, + 111,122,247,84,183,236,156,203,85,203,102,208,182,143,183,54,164,227,97,77,252,95,112,67,144,209,228,94,164,222,64,35, + 17,255,178,95,166,111,69,183,59,24,9,26,37,105,255,104,50,214,150,188,218,244,126,138,201,246,23,88,31,204,122,151, + 80,85,239,158,22,106,225,19,126,79,237,139,46,32,147,249,177,232,111,100,229,223,36,135,33,22,210,137,161,13,108,66, + 154,231,102,108,190,69,196,146,1,95,179,99,147,36,8,172,197,48,253,26,62,246,239,52,152,76,38,3,248,143,127,186, + 131,60,226,122,60,229,242,232,94,41,46,229,228,76,215,90,168,56,28,206,41,124,120,143,170,43,218,174,149,133,116,239, + 207,155,91,61,46,36,160,237,86,67,86,131,46,67,164,167,52,223,67,172,113,15,104,47,235,174,64,181,23,52,86,113, + 42,231,251,172,244,85,206,39,147,22,146,211,118,225,115,192,119,13,205,105,48,49,124,65,145,189,77,132,11,57,201,208, + 51,130,196,248,0,89,65,167,50,219,6,177,16,124,236,77,145,239,1,205,229,112,121,95,133,126,52,153,200,248,194,103, + 36,205,63,80,218,199,201,23,202,244,127,132,194,18,18,110,6,24,83,104,17,41,148,42,160,72,165,206,53,158,49,49, + 50,16,29,180,52,215,251,143,210,82,47,130,234,130,141,138,230,196,79,250,118,247,204,205,37,164,237,86,67,30,175,78, + 19,175,212,216,114,175,186,190,190,136,144,54,119,24,242,120,95,107,56,196,104,75,221,160,197,34,49,215,6,83,85,229, + 220,172,227,35,240,27,83,228,111,202,112,55,113,13,184,146,252,195,25,98,190,221,192,244,145,8,147,227,84,198,13,69, + 142,113,213,110,128,22,230,94,13,238,146,242,69,195,154,128,54,166,120,242,198,51,21,91,166,134,118,30,167,59,46,63, + 69,228,243,33,141,208,57,146,158,187,184,58,153,118,229,96,167,200,252,184,187,231,19,185,76,176,152,170,217,219,3,170, + 156,201,224,229,110,116,234,216,111,137,68,124,187,21,84,174,85,39,192,167,39,102,180,108,32,42,43,74,221,5,202,177, + 26,4,76,113,63,3,27,215,227,208,148,155,196,70,130,72,112,204,248,117,72,168,39,95,85,142,137,47,149,24,152,100, + 66,240,132,152,175,20,63,115,120,191,111,17,95,36,89,70,122,93,203,76,172,104,183,72,96,250,231,196,92,12,137,243, + 246,35,125,149,19,4,112,109,9,175,35,232,94,147,6,157,193,212,227,91,72,70,59,95,123,25,89,250,54,188,203,4, + 238,0,1,214,65,145,247,33,189,172,202,45,30,233,156,239,107,144,243,125,131,42,191,23,70,237,135,161,129,104,209,138, + 249,77,81,114,146,19,212,91,119,53,51,145,212,198,215,80,200,164,130,21,243,190,105,248,160,128,64,38,67,133,189,26, + 12,180,170,240,2,25,193,117,242,77,176,12,192,132,49,83,172,80,238,80,88,216,31,80,58,120,31,51,113,9,248,207, + 212,45,240,34,150,203,213,33,198,206,126,215,176,198,241,148,84,102,3,16,106,112,247,177,16,136,25,130,77,244,178,239, + 30,162,10,108,150,13,26,42,145,239,162,211,156,254,69,248,61,160,137,141,159,54,44,69,185,185,221,161,132,185,127,18, + 115,195,64,62,55,254,92,184,54,38,21,136,178,138,210,5,75,63,46,143,63,25,91,45,251,156,154,117,120,216,118,214, + 175,196,170,200,192,149,208,116,216,98,104,122,180,162,148,223,53,26,175,151,199,164,44,22,72,163,99,10,51,32,155,163, + 39,227,155,79,35,44,112,149,81,27,74,125,159,91,40,4,7,41,117,230,146,118,125,201,152,76,224,143,193,211,38,54, + 64,160,41,250,120,26,253,67,213,190,14,234,224,11,27,210,45,213,45,251,222,249,180,42,231,237,150,219,157,236,35,237, + 118,39,163,212,89,4,179,177,134,121,10,18,178,225,118,2,240,111,65,199,50,200,21,4,243,45,101,213,89,186,35,86, + 117,10,221,184,103,103,30,153,226,137,173,247,145,67,227,132,215,146,205,153,219,55,190,128,171,111,200,101,6,20,33,104, + 144,206,244,133,208,198,245,67,109,192,133,1,153,18,100,10,75,238,163,240,254,211,160,205,228,115,159,164,254,88,250,85, + 73,124,219,15,40,174,151,58,162,4,198,0,8,151,104,71,55,38,211,192,58,40,42,23,143,67,211,82,201,22,177,30, + 113,61,158,145,204,99,238,132,215,125,88,223,175,225,80,42,241,105,87,245,113,121,85,232,207,123,6,65,87,183,22,233, + 143,236,195,91,23,90,248,132,223,198,123,240,152,80,166,191,150,195,225,12,98,130,192,90,35,225,216,94,93,220,31,129, + 121,190,102,73,228,35,115,51,189,189,84,213,239,83,170,22,40,213,221,194,218,69,52,180,188,118,246,80,135,167,36,93, + 12,175,246,227,181,209,70,63,94,240,57,54,21,201,12,54,66,240,7,242,102,161,139,147,250,218,154,82,215,192,236,46, + 83,85,86,221,66,40,200,5,239,102,96,227,2,215,197,152,149,53,133,161,165,30,180,214,35,69,188,80,165,229,208,176, + 77,33,69,246,230,139,23,7,61,95,172,247,29,28,140,196,114,85,75,231,172,18,146,210,107,136,228,237,14,240,121,144, + 247,140,45,178,165,101,98,30,100,137,151,177,92,52,21,123,218,92,219,234,106,192,44,90,209,128,222,201,112,59,70,189, + 180,17,37,22,62,97,119,249,98,233,82,134,128,137,167,220,58,53,9,183,188,122,245,100,50,104,119,197,223,140,15,45, + 84,200,133,107,25,159,162,174,35,218,199,83,121,230,207,204,214,143,181,65,186,47,110,160,112,37,126,3,133,18,155,129, + 227,45,124,195,179,34,255,183,28,117,217,124,171,212,21,40,117,179,51,89,211,17,75,16,214,234,203,235,240,22,6,77, + 164,137,248,6,75,71,219,45,236,15,17,206,98,208,192,209,196,197,255,18,164,168,125,72,61,25,52,164,11,233,110,74, + 71,159,43,212,235,41,129,205,28,35,146,30,210,93,217,106,245,213,85,133,47,246,98,234,218,75,244,33,99,100,231,113, + 145,82,23,253,16,37,95,137,202,82,186,253,215,45,113,116,32,41,251,98,122,241,152,97,129,5,50,169,96,165,142,147, + 111,164,155,179,193,137,204,117,45,31,21,106,129,116,75,223,185,38,35,116,231,26,211,188,199,94,36,211,223,224,147,248, + 241,243,206,171,206,98,82,42,170,219,61,91,65,163,200,204,67,30,113,221,193,93,211,145,195,121,57,140,2,9,157,137, + 242,57,92,155,66,204,205,128,199,19,170,204,240,193,54,5,91,18,100,163,157,234,11,45,211,189,218,124,244,156,148,11, + 166,214,144,46,115,161,163,75,84,50,116,34,235,31,152,58,242,53,5,206,204,35,248,42,164,196,213,214,198,56,61,203, + 8,188,71,215,157,69,40,122,234,30,164,112,12,128,64,95,99,138,67,136,123,165,122,124,138,203,225,12,159,58,174,62, + 77,188,224,211,204,186,144,86,60,110,68,80,129,129,190,112,131,14,70,21,193,167,155,224,229,110,244,79,230,218,150,143, + 10,180,68,186,180,127,247,10,125,203,48,148,184,146,186,101,216,154,103,96,190,205,42,117,50,106,180,226,14,138,221,144, + 143,18,55,231,161,228,173,121,168,203,142,124,212,45,179,212,2,216,85,106,1,212,234,13,171,14,116,25,59,251,129,214, + 96,95,102,60,228,82,99,203,95,160,124,151,20,241,130,201,232,24,209,174,108,183,44,82,16,192,165,149,116,3,152,61, + 69,31,14,241,98,2,234,190,61,155,78,119,227,139,36,75,168,242,243,217,197,34,185,225,166,152,9,235,233,30,24,181, + 134,92,119,170,247,93,250,246,124,212,9,239,195,132,77,185,168,197,186,92,212,100,117,14,10,89,145,141,188,191,203,66, + 174,75,115,145,89,191,85,136,167,111,6,41,165,68,139,90,66,219,182,178,203,185,115,54,149,38,27,32,95,232,99,59, + 115,114,88,145,149,165,116,31,19,105,231,234,0,233,194,132,15,104,212,192,226,202,190,237,241,143,181,74,186,120,12,206, + 31,233,136,66,131,204,46,148,67,24,239,3,35,174,212,112,181,97,199,73,200,122,238,93,100,61,47,11,217,204,187,143, + 236,230,223,71,206,139,178,144,231,146,44,84,111,121,54,10,91,153,141,154,253,152,131,90,173,207,69,73,155,115,105,82, + 78,223,158,135,186,226,197,209,189,244,162,169,37,196,12,155,48,122,244,42,36,208,147,255,80,78,46,109,176,117,80,100, + 54,185,204,137,66,212,110,241,223,72,95,229,12,85,134,166,90,88,127,206,38,174,245,174,166,172,58,87,231,173,148,87, + 202,173,49,233,66,206,46,38,214,237,21,90,193,106,77,176,189,67,227,132,135,80,25,87,83,227,83,222,254,128,125,3, + 251,39,13,239,163,78,91,242,176,178,147,139,98,241,254,106,186,38,7,53,248,33,27,249,47,203,70,30,120,255,57,45, + 204,162,247,163,53,222,151,42,144,185,204,159,32,179,110,33,105,253,14,5,212,203,114,113,34,144,219,90,203,246,252,182, + 53,14,105,204,118,134,128,75,54,175,108,254,40,200,223,228,60,16,30,85,179,37,179,174,18,9,127,113,106,7,231,172, + 51,7,218,63,213,38,233,106,242,119,87,47,110,6,249,187,75,168,170,87,17,9,56,124,225,4,69,236,167,207,85,179, + 111,35,235,249,89,244,228,106,68,85,90,230,170,255,173,52,41,187,227,69,225,135,23,7,156,192,141,87,229,160,232,181, + 185,40,126,67,46,106,199,104,203,176,160,186,150,213,152,117,136,156,189,19,62,126,14,41,78,140,169,174,54,91,132,116, + 92,102,104,80,198,40,114,149,106,88,11,109,60,120,14,226,10,132,211,137,39,85,171,139,60,210,212,110,134,15,199,167, + 155,190,254,26,242,79,254,172,132,233,27,241,54,5,68,42,16,75,151,193,197,173,234,107,143,10,181,163,169,238,122,93, + 99,205,216,193,16,43,222,15,73,120,95,196,225,253,17,181,54,7,53,194,251,37,24,239,27,95,172,185,186,47,126,73, + 174,54,229,144,171,245,155,100,65,46,50,74,159,137,40,30,127,52,87,70,168,87,150,80,72,243,74,191,255,13,14,40, + 41,75,104,64,196,167,247,183,123,218,61,213,45,199,64,95,4,229,123,141,40,194,149,53,111,1,144,125,119,103,71,253, + 35,223,78,12,45,194,90,249,115,109,147,46,72,246,197,116,148,145,236,10,38,107,60,167,10,39,28,135,71,15,85,156, + 196,191,85,142,213,212,243,175,145,238,187,136,170,204,34,161,137,25,47,30,251,5,47,137,217,231,187,108,20,244,61,104, + 204,57,248,36,87,155,75,173,55,230,161,118,63,169,23,99,234,118,245,226,236,166,209,156,119,85,44,68,47,133,92,125, + 30,153,186,7,93,165,212,181,246,175,22,77,24,154,237,34,121,53,12,104,102,46,81,201,218,170,118,226,241,49,169,68, + 141,90,161,211,166,52,169,118,148,16,233,135,206,99,118,97,173,138,120,66,241,44,38,199,252,93,96,3,55,135,4,164, + 124,86,146,250,227,37,117,160,234,13,218,239,43,100,90,134,80,53,154,42,172,219,212,109,121,168,227,22,53,169,198,111, + 204,69,49,120,125,71,96,141,53,12,107,172,129,216,82,244,94,154,133,220,202,16,171,117,101,201,245,141,196,155,131,76, + 7,111,65,60,185,201,15,164,173,127,167,160,0,147,243,255,30,104,143,202,18,27,252,29,252,190,171,22,53,123,212,56, + 204,226,138,72,200,155,65,169,219,204,9,180,76,184,109,141,149,226,45,152,0,179,15,236,166,187,141,149,144,206,211,125, + 83,153,176,131,157,98,31,85,245,86,144,166,124,99,219,223,77,135,108,167,39,207,186,42,147,175,17,32,111,248,172,249, + 217,72,53,231,46,109,6,89,149,18,248,187,245,236,91,200,6,139,221,156,91,200,113,222,45,228,58,255,54,242,92,120, + 27,249,47,185,131,130,151,221,65,225,223,223,65,17,43,239,162,230,171,239,162,184,181,247,80,194,250,123,168,253,198,251, + 40,121,115,22,74,195,139,28,22,59,237,222,200,172,64,227,216,245,186,175,236,245,108,134,34,4,62,63,172,45,173,45, + 39,31,210,223,202,191,201,61,82,151,124,130,118,5,85,118,134,182,238,167,41,117,243,39,210,176,50,180,243,56,67,170, + 179,215,11,146,171,105,129,40,190,38,245,12,191,23,20,177,180,250,122,59,242,140,239,249,76,102,170,130,108,158,14,20, + 115,211,70,37,96,193,19,136,166,91,5,68,228,68,12,93,136,192,53,67,191,239,238,7,168,251,174,34,212,117,103,33, + 202,200,44,192,26,106,62,182,216,242,81,123,76,168,9,180,150,154,135,154,51,132,26,190,18,124,172,57,180,27,192,115, + 105,54,114,213,144,234,130,44,172,116,100,209,174,58,85,89,107,145,196,222,122,3,241,154,141,216,131,248,74,235,173,20, + 225,242,115,142,72,196,155,12,197,20,21,221,218,0,255,126,249,68,167,103,51,39,135,61,8,11,49,187,168,39,225,47, + 196,191,7,161,99,99,66,169,22,64,228,208,64,165,167,153,137,100,71,199,4,199,123,91,87,199,60,206,186,144,94,92, + 29,90,110,105,25,220,215,7,204,227,222,114,217,251,159,45,140,73,242,177,34,102,192,51,235,185,247,8,44,128,44,154, + 108,45,39,157,66,202,30,139,144,162,229,160,103,210,240,212,66,189,144,118,249,122,193,73,111,21,73,25,129,127,147,130, + 132,36,229,203,235,39,229,25,132,180,45,50,75,24,94,226,58,255,22,242,194,139,29,220,27,160,69,131,31,12,76,182, + 102,120,67,128,166,209,10,155,113,109,55,229,210,218,71,7,188,105,82,24,119,7,4,8,95,37,236,2,228,219,97,32, + 92,10,217,139,83,234,82,72,166,119,192,192,192,212,225,168,23,108,252,157,85,119,137,168,171,169,22,65,202,218,60,74, + 59,109,78,163,156,34,218,61,32,85,52,1,105,87,80,118,91,221,69,49,47,4,91,34,16,244,108,191,228,40,106,61, + 125,47,130,11,69,161,80,1,42,8,37,134,166,187,241,251,246,40,39,231,186,82,134,52,228,169,11,36,210,197,230,94, + 13,206,121,38,244,125,236,211,111,46,242,30,157,137,60,191,62,130,220,103,158,71,46,179,175,32,167,57,215,145,253,156, + 27,88,73,184,129,172,103,221,192,74,195,13,100,53,179,124,81,105,100,214,155,228,38,178,198,202,136,90,49,201,34,75, + 188,195,118,34,158,161,229,70,138,124,3,35,202,59,44,196,252,234,217,195,29,80,193,213,140,10,131,78,152,128,75,174, + 96,2,94,62,55,226,17,38,199,187,246,182,242,125,124,62,119,18,254,253,214,76,22,132,226,29,31,78,200,144,118,0, + 150,110,82,169,96,169,191,183,242,244,160,62,62,121,123,55,181,122,114,31,107,217,85,189,186,231,125,114,119,255,218,217, + 26,185,58,25,28,174,132,121,85,161,182,43,48,119,57,104,62,106,31,77,152,85,213,114,85,88,131,53,234,50,11,137, + 28,130,46,112,132,18,240,99,118,98,92,63,193,4,164,30,150,47,229,205,251,149,148,245,57,151,118,113,104,252,207,182, + 140,171,195,17,107,33,224,238,0,51,15,2,20,16,5,6,45,37,8,107,43,65,243,206,35,35,207,240,203,229,100,198, + 136,5,10,227,159,26,142,223,130,58,108,45,164,77,72,240,85,119,222,86,134,192,25,18,239,198,16,121,247,10,34,211, + 180,123,36,51,23,110,183,120,162,141,10,67,166,9,204,72,8,48,245,34,144,205,208,11,46,73,156,247,39,50,243,172, + 95,164,116,240,190,172,116,244,169,9,185,104,100,239,249,175,129,181,203,65,61,35,243,76,158,80,4,57,214,189,25,75, + 150,100,23,62,56,113,161,255,68,27,138,39,24,203,81,152,174,229,91,184,237,23,58,6,255,43,242,108,122,93,226,215, + 242,158,36,32,62,91,47,176,13,9,201,209,11,74,200,149,134,117,46,52,236,56,169,216,234,219,75,228,200,23,92,13, + 3,55,34,174,212,104,153,54,14,118,142,80,200,157,240,229,168,96,244,54,13,147,33,96,58,239,247,200,47,9,79,231, + 125,211,240,97,215,20,215,172,208,32,179,179,42,75,233,110,104,186,195,225,80,163,33,231,15,75,6,150,100,44,157,153, + 147,244,83,76,212,83,140,12,69,171,157,29,244,255,108,222,84,117,101,72,63,223,252,117,203,162,30,159,63,210,241,25, + 4,245,10,171,89,195,125,225,219,189,148,142,250,246,240,132,20,178,158,6,250,239,95,45,200,81,247,35,232,44,143,236, + 243,132,136,182,59,231,14,50,72,28,93,204,149,26,254,136,63,215,67,11,147,207,231,202,148,203,77,62,89,87,105,151, + 72,121,38,159,10,127,134,241,128,13,136,43,55,94,89,142,75,202,89,236,26,126,197,230,155,115,200,1,19,183,134,188, + 93,74,17,184,215,210,44,58,32,2,36,14,62,188,96,172,121,215,95,161,214,190,193,12,5,13,28,82,126,64,11,143, + 90,155,135,34,230,159,70,74,151,122,224,102,80,105,65,219,21,138,228,70,27,162,39,109,65,93,118,22,209,169,71,26, + 129,67,162,172,148,254,255,26,129,3,69,35,233,153,69,168,201,200,239,65,59,95,206,4,172,106,66,236,152,12,5,67, + 70,1,170,174,172,37,32,98,125,102,158,156,152,181,236,69,80,160,176,163,33,71,36,93,99,220,235,59,114,238,189,5, + 185,72,217,109,1,226,240,4,227,121,114,19,173,12,140,3,116,249,58,184,167,45,122,87,243,30,126,174,72,173,153,150, + 92,59,149,252,236,240,207,109,159,111,90,17,253,104,238,215,225,69,147,71,135,60,253,252,83,255,7,216,116,207,251,172, + 191,111,254,232,97,245,30,79,29,31,250,100,201,172,198,69,187,55,196,62,61,253,103,187,103,119,206,166,130,43,161,68, + 147,71,92,19,132,171,113,165,108,89,21,3,45,32,119,17,240,237,234,241,12,44,182,152,14,222,90,245,201,95,144,141, + 140,63,250,30,242,8,183,85,209,252,123,99,69,150,200,185,193,69,171,175,255,35,163,37,224,207,80,180,26,2,238,154, + 174,76,128,145,241,51,208,134,80,186,162,229,160,231,111,204,238,40,39,219,227,21,41,237,211,3,146,239,181,20,113,197, + 178,153,90,114,51,40,69,214,94,199,28,167,28,67,238,75,114,232,232,248,11,89,82,142,44,126,93,220,74,203,146,92, + 100,218,126,52,52,197,233,201,229,241,41,22,132,45,20,107,111,138,35,16,77,82,118,95,64,19,38,25,226,205,70,250, + 241,195,138,105,37,146,203,35,255,208,140,79,179,91,183,206,110,143,238,157,75,171,52,17,194,207,3,17,3,137,65,103, + 47,16,112,77,0,177,50,228,90,66,255,251,117,245,207,84,228,210,168,110,129,155,38,174,156,232,132,90,70,219,192,141, + 169,209,4,10,38,26,232,5,196,101,171,102,92,83,251,102,171,224,215,85,77,191,130,176,73,118,31,127,102,184,118,74, + 82,232,205,223,153,246,69,207,35,67,186,86,223,92,64,98,247,38,208,29,204,237,53,205,90,106,244,189,201,39,107,201, + 105,35,216,162,144,69,244,120,196,184,186,180,1,47,137,79,243,219,86,120,46,85,111,59,32,222,65,96,124,228,81,125, + 224,210,207,120,150,38,181,2,11,129,133,203,65,243,209,127,86,221,197,167,81,10,102,221,68,122,129,109,115,24,183,156, + 246,10,21,140,12,69,171,22,76,107,132,10,174,233,6,49,86,135,140,25,86,175,4,2,140,85,141,90,74,252,90,64, + 26,217,56,163,212,111,9,104,187,57,200,108,228,207,16,77,221,73,105,175,95,50,159,43,195,100,216,159,16,25,210,169, + 55,91,65,67,95,79,189,222,60,222,94,228,84,255,130,213,87,255,18,211,172,45,39,157,68,66,187,128,211,148,246,74, + 220,99,100,77,123,62,182,158,119,143,88,86,138,188,89,47,56,40,98,89,142,36,207,93,28,190,112,170,126,235,17,197, + 100,2,218,247,105,242,182,24,119,8,9,44,221,181,213,255,227,213,83,222,219,195,232,31,72,171,170,238,0,87,77,184, + 24,54,44,143,66,54,42,217,47,4,2,106,0,35,124,226,254,101,49,230,64,213,79,220,133,185,180,155,129,35,148,76, + 19,251,198,104,107,174,29,68,206,161,4,201,16,155,101,109,70,66,54,195,71,216,228,43,229,248,166,173,136,78,242,232, + 126,79,73,70,155,141,251,172,64,92,137,124,190,54,252,148,140,155,164,167,65,194,23,37,228,252,133,57,200,160,253,4, + 112,53,12,229,74,13,89,170,172,58,184,140,223,184,33,87,79,255,7,121,228,71,143,173,190,189,72,48,176,6,254,221, + 121,136,35,16,127,109,152,58,77,187,111,194,20,85,36,198,70,219,100,159,255,187,67,157,213,124,33,136,119,120,111,91, + 20,92,207,20,162,239,97,132,134,47,76,47,56,41,151,78,109,33,48,233,38,253,127,196,196,162,248,145,137,14,195,78, + 53,32,40,16,193,238,170,136,249,228,25,41,109,206,106,218,37,36,246,138,186,5,89,50,101,157,26,120,99,44,54,233, + 187,138,156,155,1,11,4,48,241,103,183,211,202,142,150,27,3,249,254,207,40,125,6,57,127,33,163,65,137,156,27,128, + 43,6,110,171,133,235,218,29,153,128,147,149,142,136,138,170,184,82,85,68,113,121,253,56,66,189,233,28,145,244,219,154, + 20,174,88,62,3,130,194,2,115,231,253,122,129,109,114,149,61,23,35,213,204,235,68,83,201,32,155,72,27,229,194,111, + 246,254,241,56,67,123,164,185,61,190,121,166,51,237,7,173,75,164,11,135,201,185,195,29,80,124,11,91,104,234,156,218, + 48,212,156,64,90,8,125,131,71,95,131,164,177,132,200,37,11,169,166,93,70,250,241,195,159,98,173,244,146,208,198,231, + 136,208,218,251,16,65,57,40,180,245,189,110,58,104,19,49,55,131,217,80,58,223,17,18,205,203,94,103,98,35,116,8, + 252,207,114,242,105,114,110,134,41,103,144,200,49,248,44,69,254,202,28,117,222,155,107,67,10,111,240,175,137,70,200,153, + 103,183,24,123,8,97,77,186,88,47,56,49,87,236,17,113,85,228,18,118,1,207,241,121,176,62,106,90,240,252,93,100, + 50,144,202,131,7,158,199,27,202,110,243,233,160,102,141,202,71,203,17,172,93,139,177,7,233,28,95,245,28,17,206,223, + 29,177,23,241,141,109,127,102,180,234,106,131,84,79,194,159,62,164,159,239,243,187,231,82,107,52,235,128,52,233,66,15, + 226,244,142,46,15,121,92,206,103,20,185,106,20,14,54,123,231,18,213,234,128,164,230,222,67,86,83,207,33,203,47,143, + 35,203,137,199,200,202,164,147,136,238,33,65,202,140,78,26,3,102,244,0,142,168,180,75,154,214,20,18,229,205,122,63, + 33,230,43,197,223,101,210,111,13,226,234,25,124,71,105,33,169,29,128,181,29,138,43,150,205,48,254,248,7,178,196,203, + 104,190,180,204,185,141,173,132,203,244,252,66,86,73,205,202,89,122,61,72,124,91,228,129,233,94,65,154,100,15,136,240, + 219,44,204,83,91,1,53,42,57,47,170,56,181,85,181,166,104,49,0,178,115,122,113,248,213,127,187,188,161,66,46,92, + 60,242,83,255,98,53,249,102,212,9,210,5,77,94,40,228,141,163,42,95,18,249,38,136,120,70,170,109,230,159,255,162, + 133,141,154,245,114,179,18,21,82,26,2,214,206,103,92,133,12,12,184,191,47,160,172,213,142,9,108,46,104,40,36,15, + 36,112,145,224,207,78,209,150,9,40,246,104,162,214,120,123,127,71,126,62,95,171,74,212,1,193,239,104,49,250,47,132, + 77,247,63,42,112,53,8,121,10,147,31,77,7,109,214,242,120,232,128,44,200,70,230,95,252,129,4,150,110,71,40,237, + 148,161,191,19,140,129,124,65,243,5,183,67,109,245,249,194,115,95,56,210,17,117,73,118,121,40,18,242,38,80,234,59, + 207,136,70,85,5,86,238,7,44,198,29,214,234,41,172,155,11,85,147,129,97,179,139,122,189,250,201,82,104,235,119,202, + 242,203,19,196,220,12,160,157,137,92,26,92,162,180,120,85,21,79,237,227,29,5,157,169,136,249,120,117,124,14,141,210, + 166,67,161,192,56,161,125,189,242,134,196,89,236,218,240,10,104,231,68,75,114,117,81,230,222,69,242,232,190,112,176,247, + 198,135,111,141,70,15,13,193,237,208,45,213,237,241,185,191,59,212,186,108,7,120,222,163,191,37,162,182,173,236,242,249, + 124,238,240,114,124,144,68,198,72,104,235,123,20,92,2,31,34,241,26,118,248,18,97,237,115,24,54,255,203,142,75,188, + 172,73,183,71,234,94,196,164,74,56,55,32,174,76,89,94,101,28,193,232,26,237,193,232,162,223,122,120,113,157,215,240, + 52,129,164,208,142,16,72,138,120,173,22,69,157,239,93,110,241,75,93,92,203,224,59,198,214,235,158,234,72,33,123,39, + 159,47,144,86,203,40,155,156,63,118,196,211,26,164,174,251,125,225,249,64,160,42,173,65,176,217,117,136,226,107,113,179, + 234,11,173,189,255,6,223,233,135,69,188,89,116,52,89,175,94,235,108,234,245,155,170,57,16,1,87,246,88,76,208,205, + 144,141,244,153,202,56,138,163,245,106,215,16,189,160,132,28,34,89,42,186,44,144,109,49,22,242,85,221,14,86,64,54, + 144,239,189,226,125,202,202,107,219,56,128,53,37,241,137,129,98,170,230,213,176,190,222,61,225,30,2,37,222,30,70,103, + 22,78,107,132,160,194,77,87,93,15,160,229,222,248,39,5,77,250,34,184,196,222,86,254,23,165,190,77,67,155,57,33, + 98,190,177,237,78,243,255,253,70,251,136,62,28,109,55,27,153,143,250,29,241,77,237,127,165,212,77,146,74,195,76,96, + 237,125,204,98,194,81,50,135,209,139,202,184,198,229,85,198,105,197,210,227,155,57,253,1,254,190,58,61,167,152,76,149, + 93,231,34,142,64,52,197,116,192,250,242,198,129,108,241,139,78,146,174,186,25,21,196,14,56,60,193,24,138,112,11,200, + 170,91,96,234,67,192,19,26,221,64,121,49,221,219,225,170,238,148,0,107,14,130,61,155,98,81,98,156,125,190,84,143, + 15,205,156,109,180,61,46,118,171,17,164,148,125,3,109,27,63,8,159,96,105,55,67,202,84,132,77,243,47,248,198,54, + 175,87,126,53,76,123,0,141,126,136,153,129,208,148,90,97,90,94,101,28,121,63,175,1,212,212,112,6,65,36,159,104, + 170,146,174,9,158,31,105,120,106,145,90,203,227,148,183,225,59,201,155,19,44,126,209,65,210,165,155,81,181,27,7,153, + 50,43,40,178,55,139,107,161,68,143,195,233,230,233,102,120,250,203,81,193,37,144,23,75,247,96,168,33,13,24,190,23, + 228,196,190,68,52,244,19,191,231,14,118,138,253,248,25,219,86,199,6,45,85,157,21,43,13,75,41,130,147,243,67,33, + 94,186,150,61,56,9,82,144,26,149,30,14,199,237,248,32,18,136,191,82,118,157,71,214,205,208,90,93,25,71,85,95, + 147,25,43,190,169,195,126,32,252,58,105,102,131,155,1,91,36,2,107,47,184,175,174,188,70,76,80,252,178,132,116,241, + 139,46,189,63,184,202,244,19,70,149,112,229,202,117,53,153,197,80,233,178,83,177,136,55,49,44,196,236,234,180,137,161, + 72,115,147,5,221,113,172,26,124,184,154,64,223,241,223,19,209,184,17,129,37,254,222,202,255,248,124,46,228,231,154,215, + 192,88,40,224,70,82,173,228,126,234,234,166,29,67,167,32,193,33,87,182,171,155,177,192,202,253,16,185,44,15,77,101, + 92,36,84,198,121,85,215,132,50,141,220,35,69,78,33,215,204,134,239,38,223,104,91,23,220,12,61,22,35,142,72,175, + 162,251,234,200,22,191,232,146,150,11,41,116,227,255,70,178,198,25,143,185,18,249,130,26,226,140,42,215,76,123,137,197, + 188,201,112,65,230,231,131,3,138,225,2,77,184,189,184,144,233,70,70,42,16,167,33,91,144,155,255,164,160,204,117,45, + 209,160,62,222,207,188,61,140,78,9,4,220,145,76,138,17,167,70,70,65,173,245,214,23,218,250,159,55,27,186,163,238, + 109,210,242,82,144,210,103,33,14,95,56,81,242,122,63,137,102,210,6,157,10,172,9,22,104,152,13,205,132,202,184,45, + 90,202,74,121,227,204,130,53,35,80,121,157,49,74,155,129,232,126,0,90,78,222,175,206,212,41,200,58,161,224,190,186, + 178,193,36,142,22,138,95,106,56,16,172,41,188,128,162,17,104,102,37,180,175,119,145,226,114,251,104,33,189,180,218,9, + 216,137,203,229,244,183,81,201,246,194,213,241,83,199,213,71,191,110,137,163,11,23,128,56,161,45,100,17,227,146,128,130, + 12,77,230,65,126,153,76,4,186,205,36,254,255,133,76,207,95,32,218,156,75,233,232,226,209,142,104,39,38,91,208,110, + 99,34,173,239,90,152,233,193,157,247,25,76,141,57,167,198,7,64,78,7,133,155,11,84,158,255,64,94,164,213,212,179, + 234,13,10,126,95,77,209,2,115,119,84,173,95,196,115,110,35,105,131,228,66,170,76,45,187,196,175,37,84,58,77,0, + 82,38,215,23,53,7,25,36,210,149,113,159,188,210,128,167,122,225,193,21,203,231,97,173,251,174,97,202,215,200,124,244, + 126,166,63,64,246,203,106,170,215,230,56,75,167,45,22,186,195,155,173,95,69,247,213,113,225,125,213,197,47,185,181,103, + 93,190,40,12,201,102,136,54,87,29,60,155,126,153,46,3,198,235,168,68,228,26,126,13,107,249,211,32,94,165,11,188, + 65,18,112,111,74,40,159,199,25,161,178,148,110,111,18,102,113,181,87,23,247,71,223,140,15,69,235,151,71,161,63,51, + 91,211,110,137,203,199,59,209,217,7,183,255,235,76,107,200,183,255,237,140,110,156,78,65,151,142,117,66,255,252,213,14, + 253,190,45,30,173,89,18,9,217,9,168,75,178,235,131,208,96,179,11,102,166,146,13,152,220,7,80,234,174,243,18,29, + 125,127,119,142,88,54,91,236,218,240,154,126,252,240,98,147,126,171,233,141,10,145,97,48,153,161,95,175,170,244,125,82, + 181,77,102,223,162,223,71,96,229,254,55,126,215,178,173,249,141,248,102,78,251,205,63,255,149,254,185,42,127,23,220,199, + 53,245,28,146,248,52,191,203,204,121,77,2,210,17,3,241,193,50,22,50,30,36,254,173,178,160,241,59,220,82,0,126, + 96,232,76,7,102,57,220,44,13,189,54,212,243,124,93,71,231,240,54,82,246,92,130,128,92,43,32,31,43,161,141,207, + 105,104,236,3,190,124,221,95,151,215,233,30,214,144,249,98,57,229,31,122,46,76,63,253,9,25,101,204,70,138,230,253, + 159,65,3,39,190,210,122,23,197,229,13,166,212,55,96,240,168,58,14,32,71,184,238,35,22,43,70,131,13,244,133,139, + 108,173,229,153,62,158,70,71,194,235,155,95,140,105,102,125,51,190,133,237,45,172,33,223,138,139,177,189,25,29,161,186, + 209,32,216,236,188,167,155,225,33,107,43,217,86,185,76,48,135,82,95,37,20,69,169,155,162,8,107,201,123,131,131,208, + 29,75,15,174,68,127,33,223,212,225,23,145,67,224,25,177,103,211,27,18,191,216,187,122,1,241,247,177,100,213,74,169, + 215,58,11,107,13,185,88,251,156,88,206,123,7,115,101,70,23,37,190,45,8,125,87,124,150,196,59,42,151,43,53,220, + 160,99,38,33,52,79,129,50,175,84,14,95,48,145,167,48,93,13,115,140,53,200,19,120,108,46,129,63,26,107,255,119, + 37,1,113,247,116,114,174,241,28,10,172,189,110,83,112,47,90,249,183,43,68,242,244,205,174,195,225,82,11,214,228,125, + 252,156,119,37,222,209,183,197,110,141,174,8,109,253,79,242,77,29,127,225,201,77,86,114,120,252,177,248,93,218,51,218, + 173,148,250,128,193,97,200,216,152,113,17,216,51,190,89,39,230,79,248,187,21,19,176,17,213,161,119,150,49,14,124,7, + 230,196,245,101,52,184,218,44,229,93,68,5,233,56,126,90,248,46,27,29,159,99,30,51,199,166,88,172,169,151,119,139, + 121,107,105,60,72,136,15,85,113,163,125,227,90,180,14,125,153,160,171,11,179,78,76,153,185,224,82,44,88,176,96,193, + 130,5,11,22,44,88,176,96,193,130,5,11,22,44,88,176,96,193,130,69,37,241,127,238,146,187,72,236,7,218,235,0, + 0,0,37,116,69,88,116,100,97,116,101,58,99,114,101,97,116,101,0,50,48,50,48,45,48,56,45,49,48,84,48,57, + 58,50,49,58,49,53,43,48,57,58,48,48,81,58,8,144,0,0,0,37,116,69,88,116,100,97,116,101,58,109,111,100, + 105,102,121,0,50,48,50,48,45,48,56,45,49,48,84,48,57,58,50,49,58,49,53,43,48,57,58,48,48,32,103,176, + 44,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char Logo2x[18991] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,2,188,0,0,0,170,8,6,0,0,0,79,170,156, + 236,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,6,132,105,84,88,116,88,77, + 76,58,99,111,109,46,97,100,111,98,101,46,120,109,112,0,0,0,0,0,60,63,120,112,97,99,107,101,116,32,98,101, + 103,105,110,61,34,239,187,191,34,32,105,100,61,34,87,53,77,48,77,112,67,101,104,105,72,122,114,101,83,122,78,84, + 99,122,107,99,57,100,34,63,62,32,60,120,58,120,109,112,109,101,116,97,32,120,109,108,110,115,58,120,61,34,97,100, + 111,98,101,58,110,115,58,109,101,116,97,47,34,32,120,58,120,109,112,116,107,61,34,65,100,111,98,101,32,88,77,80, + 32,67,111,114,101,32,53,46,54,45,99,49,52,53,32,55,57,46,49,54,51,52,57,57,44,32,50,48,49,56,47,48, + 56,47,49,51,45,49,54,58,52,48,58,50,50,32,32,32,32,32,32,32,32,34,62,32,60,114,100,102,58,82,68,70, + 32,120,109,108,110,115,58,114,100,102,61,34,104,116,116,112,58,47,47,119,119,119,46,119,51,46,111,114,103,47,49,57, + 57,57,47,48,50,47,50,50,45,114,100,102,45,115,121,110,116,97,120,45,110,115,35,34,62,32,60,114,100,102,58,68, + 101,115,99,114,105,112,116,105,111,110,32,114,100,102,58,97,98,111,117,116,61,34,34,32,120,109,108,110,115,58,120,109, + 112,61,34,104,116,116,112,58,47,47,110,115,46,97,100,111,98,101,46,99,111,109,47,120,97,112,47,49,46,48,47,34, + 32,120,109,108,110,115,58,100,99,61,34,104,116,116,112,58,47,47,112,117,114,108,46,111,114,103,47,100,99,47,101,108, + 101,109,101,110,116,115,47,49,46,49,47,34,32,120,109,108,110,115,58,112,104,111,116,111,115,104,111,112,61,34,104,116, + 116,112,58,47,47,110,115,46,97,100,111,98,101,46,99,111,109,47,112,104,111,116,111,115,104,111,112,47,49,46,48,47, + 34,32,120,109,108,110,115,58,120,109,112,77,77,61,34,104,116,116,112,58,47,47,110,115,46,97,100,111,98,101,46,99, + 111,109,47,120,97,112,47,49,46,48,47,109,109,47,34,32,120,109,108,110,115,58,115,116,69,118,116,61,34,104,116,116, + 112,58,47,47,110,115,46,97,100,111,98,101,46,99,111,109,47,120,97,112,47,49,46,48,47,115,84,121,112,101,47,82, + 101,115,111,117,114,99,101,69,118,101,110,116,35,34,32,120,109,112,58,67,114,101,97,116,111,114,84,111,111,108,61,34, + 65,100,111,98,101,32,80,104,111,116,111,115,104,111,112,32,67,67,32,50,48,49,57,32,40,77,97,99,105,110,116,111, + 115,104,41,34,32,120,109,112,58,67,114,101,97,116,101,68,97,116,101,61,34,50,48,50,49,45,48,56,45,50,55,84, + 50,48,58,49,49,58,52,52,43,48,51,58,48,48,34,32,120,109,112,58,77,111,100,105,102,121,68,97,116,101,61,34, + 50,48,50,49,45,48,57,45,48,51,84,50,50,58,49,51,58,53,52,43,48,51,58,48,48,34,32,120,109,112,58,77, + 101,116,97,100,97,116,97,68,97,116,101,61,34,50,48,50,49,45,48,57,45,48,51,84,50,50,58,49,51,58,53,52, + 43,48,51,58,48,48,34,32,100,99,58,102,111,114,109,97,116,61,34,105,109,97,103,101,47,112,110,103,34,32,112,104, + 111,116,111,115,104,111,112,58,67,111,108,111,114,77,111,100,101,61,34,51,34,32,112,104,111,116,111,115,104,111,112,58, + 73,67,67,80,114,111,102,105,108,101,61,34,115,82,71,66,32,73,69,67,54,49,57,54,54,45,50,46,49,34,32,120, + 109,112,77,77,58,73,110,115,116,97,110,99,101,73,68,61,34,120,109,112,46,105,105,100,58,97,98,54,101,100,57,50, + 102,45,50,49,53,102,45,52,55,56,53,45,97,98,100,101,45,57,52,97,55,54,99,53,53,56,97,53,48,34,32,120, + 109,112,77,77,58,68,111,99,117,109,101,110,116,73,68,61,34,97,100,111,98,101,58,100,111,99,105,100,58,112,104,111, + 116,111,115,104,111,112,58,98,99,101,99,53,54,54,98,45,52,101,97,57,45,101,49,52,57,45,98,50,97,101,45,48, + 100,56,52,51,101,102,48,50,55,57,56,34,32,120,109,112,77,77,58,79,114,105,103,105,110,97,108,68,111,99,117,109, + 101,110,116,73,68,61,34,120,109,112,46,100,105,100,58,102,50,99,97,50,51,102,99,45,53,49,57,99,45,52,56,55, + 54,45,97,57,101,101,45,100,50,51,100,52,99,50,51,50,99,51,51,34,62,32,60,112,104,111,116,111,115,104,111,112, + 58,84,101,120,116,76,97,121,101,114,115,62,32,60,114,100,102,58,66,97,103,62,32,60,114,100,102,58,108,105,32,112, + 104,111,116,111,115,104,111,112,58,76,97,121,101,114,78,97,109,101,61,34,97,114,101,115,34,32,112,104,111,116,111,115, + 104,111,112,58,76,97,121,101,114,84,101,120,116,61,34,97,114,101,115,34,47,62,32,60,47,114,100,102,58,66,97,103, + 62,32,60,47,112,104,111,116,111,115,104,111,112,58,84,101,120,116,76,97,121,101,114,115,62,32,60,120,109,112,77,77, + 58,72,105,115,116,111,114,121,62,32,60,114,100,102,58,83,101,113,62,32,60,114,100,102,58,108,105,32,115,116,69,118, + 116,58,97,99,116,105,111,110,61,34,99,114,101,97,116,101,100,34,32,115,116,69,118,116,58,105,110,115,116,97,110,99, + 101,73,68,61,34,120,109,112,46,105,105,100,58,102,50,99,97,50,51,102,99,45,53,49,57,99,45,52,56,55,54,45, + 97,57,101,101,45,100,50,51,100,52,99,50,51,50,99,51,51,34,32,115,116,69,118,116,58,119,104,101,110,61,34,50, + 48,50,49,45,48,56,45,50,55,84,50,48,58,49,49,58,52,52,43,48,51,58,48,48,34,32,115,116,69,118,116,58, + 115,111,102,116,119,97,114,101,65,103,101,110,116,61,34,65,100,111,98,101,32,80,104,111,116,111,115,104,111,112,32,67, + 67,32,50,48,49,57,32,40,77,97,99,105,110,116,111,115,104,41,34,47,62,32,60,114,100,102,58,108,105,32,115,116, + 69,118,116,58,97,99,116,105,111,110,61,34,115,97,118,101,100,34,32,115,116,69,118,116,58,105,110,115,116,97,110,99, + 101,73,68,61,34,120,109,112,46,105,105,100,58,97,98,54,101,100,57,50,102,45,50,49,53,102,45,52,55,56,53,45, + 97,98,100,101,45,57,52,97,55,54,99,53,53,56,97,53,48,34,32,115,116,69,118,116,58,119,104,101,110,61,34,50, + 48,50,49,45,48,57,45,48,51,84,50,50,58,49,51,58,53,52,43,48,51,58,48,48,34,32,115,116,69,118,116,58, + 115,111,102,116,119,97,114,101,65,103,101,110,116,61,34,65,100,111,98,101,32,80,104,111,116,111,115,104,111,112,32,67, + 67,32,50,48,49,57,32,40,77,97,99,105,110,116,111,115,104,41,34,32,115,116,69,118,116,58,99,104,97,110,103,101, + 100,61,34,47,34,47,62,32,60,47,114,100,102,58,83,101,113,62,32,60,47,120,109,112,77,77,58,72,105,115,116,111, + 114,121,62,32,60,47,114,100,102,58,68,101,115,99,114,105,112,116,105,111,110,62,32,60,47,114,100,102,58,82,68,70, + 62,32,60,47,120,58,120,109,112,109,101,116,97,62,32,60,63,120,112,97,99,107,101,116,32,101,110,100,61,34,114,34, + 63,62,255,127,105,182,0,0,67,81,73,68,65,84,120,218,237,157,7,120,85,197,182,199,79,26,36,129,4,72,232,189, + 133,222,33,244,222,123,9,45,33,141,174,128,34,2,42,130,52,169,10,34,138,229,42,118,164,40,98,3,20,203,179,87, + 64,20,148,98,232,82,165,43,234,149,139,48,111,173,157,217,97,115,76,224,156,228,156,147,153,125,254,235,251,126,223,125, + 79,66,216,123,102,205,204,127,207,172,89,203,33,132,112,0,0,0,0,0,0,96,87,208,8,0,0,0,0,0,0,130, + 23,0,0,0,0,0,0,8,94,0,0,0,0,0,0,32,120,1,0,0,0,0,0,128,224,5,0,0,0,0,0,0, + 130,23,0,0,0,0,0,0,8,94,0,0,0,0,0,0,193,139,70,0,0,0,0,0,0,126,47,120,97,55,180,64, + 34,15,145,151,8,35,66,37,97,242,191,69,17,65,104,166,172,13,3,17,0,0,0,0,16,188,122,219,255,17,105,196, + 65,249,191,95,19,175,18,11,137,209,68,15,34,150,40,79,228,131,224,5,0,0,0,0,128,224,213,205,158,35,174,18, + 127,133,4,7,30,47,93,34,223,217,144,144,64,110,84,147,203,196,5,226,40,177,155,88,39,133,112,25,8,94,0,0, + 0,0,0,8,94,93,172,29,177,145,184,24,16,16,112,113,120,82,181,3,235,87,119,253,253,173,149,157,197,67,115,155, + 137,17,73,85,69,251,214,37,69,237,26,81,162,112,116,168,8,12,8,48,197,48,239,8,63,78,244,39,106,218,117,7, + 24,3,17,0,0,0,0,16,188,246,48,142,233,109,78,124,31,16,224,184,82,178,120,248,95,179,167,52,76,59,119,32, + 245,202,133,67,67,197,169,180,20,113,240,135,4,177,227,139,1,98,195,154,174,226,206,177,117,68,141,234,133,68,96,96, + 128,185,11,124,138,255,46,49,141,40,5,193,11,0,0,0,0,0,193,171,170,241,69,182,201,196,1,218,237,189,20,31, + 87,233,208,103,27,123,95,56,127,112,232,85,22,190,86,206,31,76,21,91,63,142,19,15,206,110,34,122,117,45,39,42, + 87,140,20,65,65,134,0,254,75,238,24,167,16,149,228,197,57,8,94,0,0,0,0,0,8,94,165,50,59,148,36,94, + 34,254,137,42,148,247,252,154,103,58,28,112,22,188,86,206,238,79,21,123,191,139,23,239,175,235,46,18,250,87,18,121, + 210,227,128,121,231,247,4,177,86,134,60,64,240,2,0,0,0,0,64,240,42,183,219,123,59,177,159,197,235,232,212,106, + 251,127,249,49,241,210,141,132,175,201,225,29,9,98,209,236,166,162,101,211,226,34,50,34,143,25,243,251,46,209,83,166, + 66,131,224,5,0,0,0,0,128,224,85,198,98,136,173,20,175,251,15,133,45,252,146,182,117,240,57,87,68,47,115,124, + 87,146,248,230,253,190,34,53,33,70,200,236,15,127,19,59,137,100,93,66,29,48,16,1,0,0,0,0,193,235,31,22, + 46,51,50,252,55,166,98,129,179,27,215,116,59,225,170,232,53,217,246,73,127,49,102,120,13,81,190,108,132,185,227,203, + 57,127,251,17,145,16,188,0,0,0,0,128,224,133,224,85,193,66,228,69,180,203,17,249,67,254,251,228,226,150,123,221, + 21,189,116,249,77,252,188,117,176,152,125,111,35,145,47,60,152,59,239,146,20,190,53,32,120,1,0,0,0,0,193,11, + 193,171,138,245,33,142,231,203,23,252,219,99,15,182,56,158,89,6,7,87,248,238,163,126,34,121,112,140,40,28,21,202, + 157,248,15,177,132,168,8,193,11,0,0,0,0,8,94,8,94,21,178,56,240,142,236,9,170,206,118,105,218,164,6,7, + 178,43,122,41,207,175,248,246,131,126,162,77,139,18,220,145,92,241,237,52,145,10,193,11,0,0,0,0,8,94,8,94, + 21,44,150,216,155,55,111,208,197,165,243,155,31,203,142,224,181,66,187,197,130,226,131,205,116,102,171,137,218,16,188,0, + 0,0,0,128,224,133,224,205,77,11,32,202,18,191,4,211,78,239,188,251,98,15,228,68,240,114,124,239,161,237,67,196, + 168,148,106,230,165,182,95,137,33,16,188,0,0,0,0,128,224,133,229,182,113,73,226,99,209,81,121,47,172,95,221,245, + 120,78,119,122,57,204,225,217,101,173,69,213,202,5,204,48,135,199,136,194,16,188,0,0,0,0,128,224,133,229,230,78, + 111,61,226,98,190,240,144,191,190,120,183,247,209,156,138,94,102,207,230,193,162,93,171,146,220,193,87,120,23,153,168,5, + 193,11,0,0,0,0,8,94,88,110,90,18,241,223,122,181,163,207,80,104,194,223,158,16,189,28,230,48,227,174,6,34, + 44,204,72,97,118,136,24,32,211,163,65,240,2,0,0,0,0,130,23,230,115,11,34,102,115,24,66,253,218,209,219,60, + 33,120,205,16,135,111,62,232,43,74,149,8,55,243,246,78,133,224,5,0,0,0,0,4,47,44,55,83,150,173,100,209, + 123,223,228,250,135,73,172,94,245,148,240,253,252,157,222,162,73,195,162,220,225,255,35,30,242,85,92,47,6,34,0,0, + 0,0,32,120,97,206,86,138,51,44,132,133,6,93,216,253,205,160,63,61,37,120,153,227,187,146,68,183,142,101,205,203, + 108,235,32,120,1,0,0,0,0,193,11,203,45,107,71,92,40,91,58,226,192,190,109,241,23,61,41,122,15,239,24,34, + 70,36,85,19,121,242,4,114,231,191,67,148,129,224,5,0,0,0,0,4,47,44,55,108,26,239,196,142,25,94,253,215, + 236,86,98,203,138,179,251,83,197,109,163,106,154,249,122,63,149,241,195,16,188,0,0,0,0,128,224,133,249,212,74,16, + 187,11,20,200,115,97,243,135,253,206,120,82,240,50,191,166,37,139,73,183,213,17,97,161,193,166,232,45,3,193,11,0, + 0,0,0,8,94,152,175,141,203,3,95,46,86,56,108,151,167,119,121,141,157,94,202,224,48,113,92,29,115,167,119,19, + 4,47,0,0,0,0,32,120,97,185,97,92,41,237,234,195,243,154,29,243,180,224,53,118,122,127,78,22,183,143,174,41, + 242,132,24,49,189,124,145,45,26,130,23,0,0,0,0,16,188,48,95,90,13,226,98,149,74,5,142,30,223,157,116,217, + 27,162,247,236,254,20,49,52,161,170,185,211,251,16,4,47,0,0,0,0,32,120,97,190,52,46,61,60,151,248,231,225, + 249,205,210,188,33,120,25,170,238,70,41,203,202,176,67,92,38,166,56,60,84,145,13,3,17,0,0,0,0,16,188,48, + 87,140,11,82,236,15,12,12,184,120,228,167,196,63,188,37,122,143,82,158,222,70,245,138,152,21,217,250,67,240,2,0, + 0,0,0,130,23,230,75,187,139,184,114,223,164,250,199,188,37,120,205,138,108,37,138,25,101,136,15,202,112,10,8,94, + 0,0,0,0,64,240,194,124,98,149,136,63,106,215,136,58,114,242,231,228,127,188,41,122,191,122,191,143,8,15,51,210, + 149,29,118,228,176,4,49,6,34,0,0,0,0,32,120,97,238,216,19,1,1,142,75,111,175,234,114,196,155,130,151,11, + 83,76,191,171,1,59,199,21,226,81,8,94,0,0,0,0,64,240,194,124,101,97,196,159,197,139,132,125,239,77,193,203, + 80,222,95,209,182,101,73,51,115,67,31,8,94,0,0,0,0,64,240,194,124,101,171,56,47,239,7,175,119,63,238,109, + 209,251,243,230,193,162,74,229,130,236,36,191,18,181,32,120,1,0,0,0,0,193,11,243,133,13,230,20,101,227,111,169, + 117,193,219,130,151,121,246,209,214,230,46,239,42,8,94,0,0,0,0,64,240,194,124,97,165,136,223,27,214,43,114,252, + 244,222,148,43,222,22,188,231,168,252,240,176,196,170,102,126,222,84,8,94,0,0,0,0,64,240,194,124,97,111,208,229, + 181,63,143,252,152,248,63,95,236,242,114,81,138,202,21,35,217,89,78,17,21,33,120,1,0,0,0,0,193,11,243,182, + 53,225,56,222,59,111,173,189,217,23,130,151,47,176,45,123,160,5,59,203,85,135,155,165,135,49,16,1,0,0,0,0, + 193,11,203,174,29,10,11,11,222,235,11,193,107,210,44,182,40,59,204,63,68,21,8,94,0,0,0,0,64,240,194,188, + 109,47,179,158,252,250,253,62,39,125,37,120,233,223,18,209,81,161,236,52,95,16,145,16,188,0,0,0,0,128,224,133, + 121,211,110,231,16,131,249,211,27,31,242,149,224,61,75,23,216,146,6,199,176,211,92,34,250,66,240,2,0,0,0,0, + 130,23,230,77,235,200,153,19,198,142,168,241,59,197,216,94,245,149,232,221,250,81,63,179,236,240,199,68,32,4,47,0, + 0,0,0,32,120,97,222,178,210,196,223,109,91,150,56,78,165,128,175,248,50,150,247,158,9,245,204,220,188,253,33,120, + 1,0,0,0,0,193,11,243,150,241,238,234,129,98,69,194,142,158,74,75,246,169,224,221,67,21,216,202,151,137,96,231, + 249,145,136,130,224,5,0,0,0,0,4,47,204,91,246,92,96,96,192,197,147,123,124,43,120,57,77,217,232,161,213,217, + 121,254,38,186,67,240,2,0,0,0,0,130,23,230,45,75,102,77,249,209,91,61,15,249,82,240,50,223,126,216,79,132, + 132,4,178,3,173,131,224,5,0,0,0,0,4,47,204,91,198,249,112,197,212,73,245,191,246,181,224,101,146,6,197,152, + 197,40,106,120,83,240,194,96,48,24,12,6,131,101,165,43,32,38,236,111,193,28,86,208,170,69,137,207,115,67,240,126, + 245,94,31,17,25,17,194,78,180,214,145,69,198,6,8,94,24,12,6,131,193,96,16,188,176,156,218,193,232,66,161,219, + 115,67,240,30,223,149,40,90,54,41,198,78,116,130,168,104,83,193,91,155,120,2,184,196,163,196,195,196,98,98,30,49, + 141,184,131,72,33,122,58,210,75,98,151,32,2,48,108,125,242,49,92,146,104,70,244,34,82,137,9,178,79,230,203,62, + 90,74,44,83,200,127,38,18,97,154,180,47,251,112,17,162,14,209,142,136,39,198,16,147,137,153,196,66,71,122,25,118, + 213,218,56,39,227,154,223,105,186,244,163,4,249,222,149,137,32,63,30,103,236,175,49,142,244,52,161,131,137,177,196,189, + 196,28,57,198,30,33,30,195,218,144,35,120,45,41,13,193,11,99,219,18,28,28,120,44,55,4,47,51,119,90,44,59, + 209,101,41,106,188,46,120,71,189,247,155,207,112,164,87,147,19,192,43,252,74,124,79,108,144,11,195,16,162,130,47,251, + 87,119,200,194,137,214,82,104,241,162,250,190,35,61,115,202,105,141,253,98,162,66,237,27,34,63,120,135,72,31,125,93, + 250,236,113,140,223,127,241,11,241,161,20,199,131,136,80,27,141,179,0,249,97,51,212,50,206,118,17,231,209,239,62,37, + 8,130,23,198,147,204,229,115,7,134,94,201,13,193,187,239,187,120,145,39,253,242,218,122,27,10,94,222,201,16,181,250, + 141,21,45,110,95,2,110,66,243,177,15,138,102,183,46,16,77,70,207,21,177,67,103,136,250,9,147,169,237,198,136,152, + 142,9,162,108,147,174,162,104,181,70,34,60,170,248,205,38,53,22,107,95,200,93,200,88,94,108,32,110,13,95,12,146, + 139,238,120,226,77,98,239,205,22,136,240,232,18,162,88,245,198,162,108,211,110,162,74,167,33,134,31,115,159,196,14,155, + 105,244,81,179,91,23,138,230,227,22,41,227,63,242,185,215,228,98,27,243,233,195,72,98,21,241,195,205,218,55,180,64, + 97,17,85,161,166,40,89,183,149,168,212,182,191,168,209,115,132,168,51,96,188,104,144,52,69,52,30,49,91,52,29,61, + 79,185,54,206,201,184,230,119,106,144,120,183,225,71,149,218,13,48,222,59,178,100,197,27,181,17,127,124,205,37,202,106, + 54,214,248,116,164,141,220,157,255,242,102,126,80,160,84,37,81,170,126,91,81,177,77,156,168,209,107,164,168,23,63,81, + 52,74,189,47,125,140,141,121,64,180,24,183,24,235,67,206,231,133,181,216,225,133,177,113,150,4,241,203,143,67,46,230, + 214,46,239,224,126,149,216,145,254,146,11,134,45,4,175,220,221,49,6,91,159,165,31,10,250,111,192,67,12,223,112,74, + 36,189,178,79,196,61,241,133,232,50,231,85,99,97,168,220,110,160,136,40,94,46,171,69,101,141,35,61,253,93,144,159, + 137,220,0,185,187,184,54,171,197,182,68,237,22,134,208,226,69,181,251,130,55,197,128,167,190,22,201,175,30,16,35,54, + 158,209,198,31,70,190,123,206,250,78,227,124,60,198,187,16,203,137,35,153,181,47,11,90,246,77,246,209,78,51,94,54, + 124,54,113,245,207,98,216,91,39,48,150,205,241,188,254,87,49,228,229,93,162,251,194,183,13,113,92,177,117,191,204,124, + 149,133,99,146,226,31,148,92,72,233,157,172,252,160,74,231,196,140,113,54,112,249,102,145,186,238,23,49,106,211,5,248, + 128,183,160,182,181,244,65,123,8,94,24,219,74,238,255,159,55,15,62,155,91,130,247,253,215,123,136,160,160,0,33,99, + 151,236,34,120,235,154,131,45,113,77,26,38,31,31,193,98,173,247,146,247,140,93,200,34,85,26,100,182,112,190,66,12, + 228,227,124,155,10,220,58,114,119,123,75,102,11,111,173,190,183,138,206,179,86,137,193,207,127,47,134,189,125,210,30,125, + 190,246,160,245,29,155,123,185,141,43,200,56,219,77,153,181,111,181,110,169,162,253,148,103,69,220,147,95,66,212,230,240, + 163,150,63,190,26,13,157,238,220,198,219,136,91,136,188,10,140,183,104,185,163,191,225,95,31,146,117,90,26,187,243,189, + 31,126,95,164,190,126,4,125,154,11,36,174,218,99,246,199,49,217,95,16,188,48,199,243,220,255,63,126,49,224,84,110, + 9,222,189,91,227,69,229,10,145,66,198,183,133,219,68,240,142,49,39,191,17,27,79,99,2,202,37,120,23,165,195,212, + 231,69,153,198,157,51,19,191,83,237,176,235,203,241,142,196,157,153,9,48,222,93,236,120,223,75,34,97,197,79,98,196, + 59,103,109,217,199,125,31,253,200,250,206,225,94,218,189,227,75,147,27,157,219,183,116,163,14,162,213,132,71,197,144,149, + 187,109,219,190,185,13,127,152,241,238,120,38,254,221,57,151,198,91,121,226,1,231,231,169,208,178,183,232,58,247,53,145, + 242,218,33,244,155,2,88,124,230,117,8,94,152,105,79,115,255,111,253,184,223,137,220,18,188,103,247,167,136,158,93,140, + 227,232,83,68,77,155,8,222,215,248,241,59,78,95,129,201,71,37,241,59,237,5,58,50,237,235,188,112,222,170,169,208, + 229,16,160,251,157,23,94,222,221,238,187,236,19,191,217,97,172,209,123,148,215,194,25,100,72,200,117,237,203,113,182,221, + 230,189,142,157,59,31,195,27,7,113,143,127,238,60,118,199,251,112,188,113,86,141,5,206,59,250,28,86,53,244,205,227, + 232,35,149,124,133,62,62,157,195,156,32,120,97,108,207,112,255,127,255,89,220,201,220,18,188,204,194,153,141,205,108,13, + 113,54,17,188,251,248,241,7,191,240,3,38,32,5,233,189,244,3,17,211,33,222,58,41,62,160,203,110,175,12,93,248, + 215,142,238,128,167,191,241,187,93,70,22,245,150,54,136,240,96,27,255,107,71,151,63,94,237,18,6,162,51,28,179,93, + 187,255,109,214,126,31,235,131,241,54,142,248,201,252,55,235,15,185,75,164,172,59,140,254,80,56,180,205,226,31,205,32, + 120,97,166,189,196,253,191,235,235,129,167,115,83,240,110,249,168,159,8,8,48,226,120,151,233,46,120,201,90,92,11,103, + 56,131,9,72,97,218,76,122,220,121,199,104,128,226,98,247,58,161,91,119,208,4,227,194,143,191,94,126,225,120,89,217, + 22,175,122,168,125,107,202,212,81,25,109,220,250,206,101,8,87,80,16,206,252,96,233,167,78,94,26,111,29,173,190,192, + 23,60,113,39,67,125,56,134,222,210,111,97,16,188,48,211,248,6,187,216,183,45,254,124,110,10,222,243,7,135,138,234, + 85,10,178,67,237,177,129,224,125,153,31,157,83,203,96,242,209,96,114,164,155,243,78,162,119,178,130,66,119,172,76,211, + 100,60,35,167,9,235,243,200,255,249,253,237,126,75,159,181,243,192,46,222,146,235,66,67,40,53,30,118,116,181,201,206, + 193,180,244,240,152,155,229,188,195,143,118,215,3,78,125,39,251,237,78,75,127,66,240,194,140,99,187,171,191,166,37,95, + 202,77,193,203,76,24,83,219,116,210,210,186,10,94,249,236,198,123,224,200,75,175,248,192,6,137,247,88,23,207,4,133, + 196,238,92,235,37,180,94,15,109,66,159,17,124,17,207,210,95,101,115,184,171,155,241,187,216,15,16,159,171,143,232,173, + 217,231,150,235,138,11,120,96,188,229,147,39,141,198,239,228,76,17,252,113,133,246,214,231,174,134,197,31,66,33,120,97, + 86,251,146,66,9,46,228,182,216,101,54,172,233,106,134,53,140,212,88,240,14,51,7,27,118,135,180,206,219,200,116,207, + 101,161,203,85,208,86,155,207,211,238,158,229,232,35,107,28,54,165,159,203,105,252,46,31,133,91,251,156,99,187,209,182, + 154,9,28,250,56,177,244,97,151,28,142,185,8,98,145,249,251,122,46,218,136,54,214,55,76,237,57,167,190,133,224,133, + 57,126,10,15,11,78,83,65,240,110,255,124,128,40,28,29,42,100,134,3,237,4,175,60,22,53,6,91,219,187,159,194, + 228,163,169,232,173,29,55,206,186,128,6,228,146,216,77,176,10,49,44,188,255,134,211,129,201,246,89,158,211,54,230,252, + 196,248,64,213,23,142,179,150,125,249,76,14,198,92,21,71,122,105,99,227,119,197,191,176,29,109,171,97,254,102,203,188, + 217,6,130,23,230,108,103,171,84,46,248,141,10,130,247,192,15,9,162,86,245,40,118,170,157,14,153,143,87,51,193,155, + 215,28,108,200,206,160,55,28,58,32,251,114,84,46,136,221,137,25,41,176,250,223,110,28,219,162,79,254,13,151,60,150, + 237,212,43,27,109,60,195,250,65,129,138,87,122,51,240,153,45,102,95,238,206,65,138,191,12,127,136,127,233,71,180,171, + 222,197,38,152,24,8,94,152,213,34,57,126,55,113,96,204,103,42,8,222,83,105,41,162,125,235,146,236,84,92,166,179, + 156,134,130,183,169,57,216,32,82,244,134,227,100,45,19,103,17,31,138,221,165,25,98,119,224,29,232,139,44,251,231,93, + 107,255,4,103,87,236,114,122,41,136,93,219,237,236,85,117,211,31,194,173,98,151,127,23,218,84,79,250,44,253,208,234, + 7,145,16,188,48,171,181,229,190,127,102,89,155,31,84,16,188,204,176,196,42,236,84,231,137,88,13,5,239,115,252,200, + 28,67,132,201,199,86,241,188,115,125,36,118,231,155,255,102,167,153,43,209,7,55,160,82,187,1,102,223,220,237,102,27, + 47,206,136,137,190,251,105,180,165,141,136,29,62,203,244,137,165,110,248,67,32,241,184,233,19,156,191,21,109,169,47,205, + 199,62,104,250,192,163,153,244,53,4,175,159,27,167,93,185,114,96,91,194,31,170,8,222,197,247,55,53,11,80,116,215, + 80,240,242,206,180,81,102,20,147,143,254,112,126,91,95,237,242,90,51,49,224,226,212,205,146,202,239,207,214,173,124,107, + 14,99,142,255,69,91,218,139,1,79,125,109,250,196,143,110,248,196,36,132,161,217,135,18,181,91,152,62,208,26,130,23, + 230,108,159,210,133,181,83,148,146,236,138,42,130,247,141,151,187,152,14,59,82,39,193,75,86,151,31,55,186,98,45,76, + 60,54,130,11,59,72,127,156,225,69,223,153,105,46,186,92,166,20,237,126,99,58,207,90,237,246,165,66,178,17,230,223, + 233,58,103,45,218,209,134,112,121,95,139,95,132,184,224,19,29,28,150,42,133,104,67,91,133,161,5,64,240,194,172,198, + 113,75,127,84,175,90,232,200,233,189,41,202,8,222,109,159,246,55,29,118,190,46,130,215,154,157,161,217,152,7,48,249, + 216,243,50,140,71,75,215,90,124,231,30,248,78,182,99,53,39,184,216,198,183,155,127,167,97,202,52,180,163,77,225,44, + 27,22,223,40,119,19,159,104,136,75,139,182,13,65,187,39,139,62,135,224,245,99,171,79,92,162,11,107,191,157,59,48, + 244,170,42,130,151,196,183,8,9,54,114,241,174,209,72,240,22,49,7,91,255,255,124,133,201,199,70,36,189,178,207,58, + 145,54,240,176,223,220,237,176,148,175,69,123,187,144,115,245,141,163,89,166,29,202,162,141,107,155,63,223,120,196,108,180, + 161,255,8,222,218,174,108,80,112,209,10,136,93,59,20,161,217,105,237,251,188,16,188,48,103,139,39,254,185,123,124,221, + 83,170,136,93,147,10,229,34,216,177,190,214,72,240,54,48,7,27,31,171,97,2,178,211,49,233,49,235,68,218,211,131, + 62,83,197,252,189,124,217,6,109,157,173,248,221,40,23,218,249,9,236,226,65,240,58,249,68,23,243,231,80,81,207,30, + 116,152,250,252,77,227,250,33,120,253,219,22,114,191,175,124,186,253,65,213,4,111,243,216,98,236,88,123,53,18,188,19, + 248,81,185,126,55,38,31,91,31,161,15,243,116,8,12,199,8,163,157,93,167,253,148,103,205,190,120,203,157,112,17,20, + 149,240,59,193,91,249,6,126,193,133,141,68,199,233,43,208,110,54,128,203,194,91,250,125,210,13,250,29,130,215,143,237, + 139,192,192,128,191,142,238,74,252,75,53,193,219,183,71,121,118,172,11,26,9,222,119,141,203,48,243,214,97,2,178,33, + 13,18,239,241,232,197,53,178,169,72,131,148,61,138,215,106,110,246,69,215,155,180,113,95,132,139,248,181,224,141,202,194, + 47,110,193,105,156,205,250,253,173,19,46,149,150,246,148,224,13,132,118,212,206,10,16,127,197,214,47,242,141,106,98,151, + 185,37,181,58,59,214,85,246,45,213,5,47,89,24,142,199,252,102,87,113,149,7,252,101,188,3,229,167,179,5,199,199, + 203,182,219,225,66,59,191,135,80,6,191,142,183,207,236,150,126,65,243,207,249,35,22,109,230,95,97,78,158,18,188,121, + 160,31,181,51,46,93,122,101,195,234,46,251,84,20,188,83,38,212,203,8,62,215,64,240,142,229,103,173,209,107,36,38, + 31,155,210,115,241,59,166,63,126,154,67,95,41,136,140,12,217,131,43,23,90,22,181,59,93,201,105,92,189,199,112,136, + 93,127,26,167,139,54,154,254,241,113,22,126,113,175,233,67,124,12,142,54,179,7,77,70,222,111,246,251,242,155,204,11, + 30,17,188,161,208,143,90,89,94,226,64,225,232,208,19,39,247,36,93,81,81,240,206,158,210,208,116,224,112,149,5,47, + 89,176,57,129,114,169,83,76,62,246,36,238,241,207,77,127,220,150,67,127,25,154,17,83,74,199,112,104,91,119,118,113, + 14,88,5,111,51,87,46,3,218,34,99,10,9,118,62,57,26,184,124,179,241,225,213,121,214,42,163,66,92,235,137,143, + 137,86,119,60,34,90,142,95,234,21,248,119,183,189,235,63,162,239,163,31,105,19,255,28,59,108,102,70,74,203,81,55, + 40,31,60,96,249,183,24,83,182,137,223,61,227,114,22,29,79,9,222,48,104,72,173,172,41,241,247,224,126,21,207,156, + 59,144,170,164,224,93,56,179,177,233,192,249,21,23,188,53,205,193,150,184,106,143,72,93,247,11,176,66,11,53,103,57, + 208,125,55,133,23,72,217,207,59,115,224,43,92,42,251,91,254,61,44,92,176,88,101,59,156,33,203,124,200,124,59,219, + 252,153,250,9,147,245,91,188,223,57,43,226,95,250,81,180,191,247,57,81,103,192,120,81,162,78,75,235,59,231,42,92, + 178,85,245,246,43,90,61,214,124,222,166,55,250,216,196,174,191,125,232,247,248,103,102,159,127,231,194,28,12,193,235,135, + 54,157,227,99,151,204,107,118,88,69,177,203,44,157,159,113,49,165,128,226,130,55,89,149,5,73,117,194,10,22,17,165, + 27,182,23,77,70,207,53,118,171,120,113,215,101,82,181,20,159,216,157,211,172,12,53,122,142,192,66,149,13,120,215,81, + 182,225,127,110,208,206,83,204,118,78,121,237,144,86,185,133,121,71,245,6,227,135,47,240,238,226,144,26,226,77,98,5, + 241,44,241,20,183,135,151,224,223,253,162,249,145,198,12,122,110,155,14,249,153,127,203,194,55,140,247,232,52,115,165,222, + 59,154,52,111,242,142,59,159,16,129,19,46,101,103,128,224,245,95,227,11,134,223,231,9,9,252,253,200,143,137,127,64, + 240,122,68,244,222,79,156,38,206,131,127,193,11,245,239,89,45,228,9,43,126,210,109,119,113,123,54,125,36,197,46,177, + 131,156,166,141,203,176,242,69,190,134,201,247,138,106,221,82,69,153,216,78,198,110,100,145,42,13,188,138,108,195,238,89, + 180,113,136,217,198,124,228,175,75,86,1,222,57,205,100,108,172,38,38,19,109,57,238,123,148,151,74,90,143,114,179,26, + 96,237,184,113,234,158,194,144,79,202,182,251,41,147,231,111,198,127,86,172,122,99,173,82,109,241,59,181,24,183,88,84, + 233,52,196,40,89,159,55,127,65,108,158,100,77,27,8,94,152,179,245,224,203,106,99,71,212,216,162,170,216,213,41,164, + 1,184,181,195,89,148,232,76,44,182,78,84,221,23,190,165,252,226,211,119,217,199,230,243,110,206,198,187,135,154,239,218, + 109,254,27,218,22,223,232,114,255,43,162,124,243,158,42,44,108,33,89,180,115,42,255,57,139,3,29,218,148,219,211,233, + 189,120,71,181,94,102,217,5,20,24,191,202,183,109,151,217,107,204,118,124,59,171,211,21,29,178,162,112,40,24,87,4, + 188,129,255,95,38,254,36,254,0,6,39,92,77,23,137,75,107,254,101,252,97,178,51,34,34,207,249,195,59,134,92,86, + 89,240,206,210,228,210,26,200,246,2,26,73,44,114,104,146,15,147,69,185,124,214,15,178,241,174,213,117,60,102,55,143, + 137,235,37,76,202,108,209,253,137,83,180,17,179,120,72,17,221,120,135,69,198,41,123,147,172,196,110,94,243,217,122,60, + 184,65,249,75,104,78,130,102,45,209,94,241,241,122,159,17,23,61,228,46,101,219,213,210,166,11,157,158,189,172,14,33, + 25,12,159,76,100,50,206,30,147,31,28,117,137,66,88,63,114,228,199,30,17,188,81,196,255,201,152,34,152,186,198,139, + 210,165,248,126,149,206,157,61,144,122,85,101,193,171,83,90,50,144,237,201,39,84,23,145,210,97,218,11,166,63,190,146, + 141,247,124,148,255,110,163,212,251,52,218,209,61,110,45,182,97,178,222,145,94,204,33,88,65,95,138,207,248,168,88,119, + 88,105,177,235,212,166,115,53,25,171,111,168,28,255,234,20,203,25,239,244,236,29,205,63,227,112,28,85,253,162,94,252, + 68,235,59,112,21,212,10,88,39,212,20,188,124,51,54,205,145,94,40,160,35,116,165,178,246,10,241,207,154,103,59,252, + 162,178,216,101,70,167,232,83,120,2,228,104,2,154,199,115,79,236,208,25,106,231,121,28,53,199,92,136,22,185,249,126, + 67,116,139,221,109,57,254,97,103,81,198,33,40,229,20,247,35,190,204,101,196,21,171,220,182,53,123,143,182,182,235,8, + 77,198,104,94,213,63,38,134,188,188,203,218,174,165,156,158,255,9,254,239,45,110,95,162,203,71,208,61,88,27,212,22, + 188,108,7,137,191,136,119,16,211,171,164,53,230,84,100,77,99,139,109,59,187,95,205,84,100,86,250,116,47,39,228,165, + 39,7,4,175,173,39,160,145,220,197,124,241,73,147,130,7,99,220,120,183,252,230,223,235,56,125,133,22,98,215,41,83, + 0,135,156,84,212,192,135,238,118,152,69,38,20,110,91,167,29,188,86,26,141,209,97,70,252,110,231,36,117,243,239,210, + 7,179,108,215,71,50,121,254,221,252,103,156,105,69,197,103,79,88,177,211,234,23,189,176,46,232,33,120,127,14,9,9, + 60,78,255,123,145,104,13,125,169,148,69,16,223,133,135,7,95,220,253,237,160,223,85,23,187,76,179,216,98,236,88,123, + 33,120,109,63,1,25,59,160,149,219,13,84,250,38,189,101,65,74,114,227,221,90,153,127,143,99,97,85,23,187,13,146, + 166,88,223,115,170,38,254,19,225,208,160,200,4,135,236,88,218,182,171,70,227,51,92,245,176,163,225,235,127,181,182,109, + 181,172,158,95,213,112,134,54,147,30,183,62,127,16,214,5,61,4,239,55,165,74,230,59,31,24,24,192,191,117,167,60, + 6,129,169,97,44,42,46,199,199,85,58,165,195,238,46,83,161,108,4,59,214,215,16,188,182,159,128,140,116,93,49,29, + 19,116,200,239,153,101,74,172,44,222,237,33,254,59,77,71,207,83,94,236,246,122,104,147,245,29,167,104,228,63,157,148, + 255,168,184,254,200,58,69,179,241,57,214,161,120,101,64,75,118,134,141,153,60,255,32,254,179,138,173,251,41,59,246,44, + 233,246,90,97,77,208,71,240,190,26,18,18,32,198,141,172,121,32,32,192,113,69,230,16,12,132,214,204,117,227,99,213, + 239,131,131,3,47,126,181,169,207,73,29,196,238,233,189,41,34,36,56,192,204,69,9,193,107,239,9,232,118,238,226,154, + 125,110,81,184,164,237,126,171,96,137,117,183,208,4,199,23,170,44,118,57,115,132,229,253,102,104,230,63,75,204,103,231, + 208,19,13,226,51,3,52,106,219,140,139,128,29,166,62,175,195,233,75,207,76,222,225,101,35,47,243,61,203,85,207,239, + 189,3,235,129,94,130,151,111,21,138,13,171,187,254,94,178,120,62,206,17,119,136,131,199,161,55,115,213,120,209,93,206, + 121,119,31,154,219,52,77,7,177,203,108,251,180,127,70,61,116,8,94,219,79,64,115,141,12,6,67,167,43,43,8,157, + 98,236,74,187,179,51,86,173,251,80,229,119,119,107,244,26,105,190,219,3,154,249,78,25,179,95,146,94,217,167,124,254, + 102,87,63,150,20,105,219,140,10,146,181,251,223,166,172,239,14,126,225,7,235,216,44,63,42,139,66,36,92,192,65,197, + 231,111,122,203,124,243,217,151,96,61,208,75,240,222,194,191,227,173,85,93,196,131,179,155,252,76,187,188,151,100,158,198, + 60,208,157,185,102,124,220,247,87,195,122,69,142,159,216,157,124,89,23,193,251,250,138,206,230,36,48,18,130,215,246,19, + 208,51,220,197,173,239,92,166,67,89,97,35,77,158,59,233,214,226,158,248,66,105,177,219,102,242,147,89,222,110,215,192, + 119,30,84,249,116,192,105,247,113,152,70,237,58,217,186,43,205,187,212,170,250,111,167,25,47,91,219,56,56,171,252,187, + 170,86,116,172,59,104,2,50,51,104,42,120,185,130,215,63,75,230,52,19,231,15,166,94,29,28,87,137,119,120,175,200, + 35,75,152,239,45,72,238,42,92,125,123,85,151,35,186,136,93,102,209,236,166,102,53,153,238,118,20,188,100,209,68,23, + 57,54,248,72,246,53,226,19,226,71,121,50,114,210,145,94,146,151,171,200,252,151,115,39,203,246,176,35,198,132,223,117, + 222,58,133,115,240,190,104,46,74,175,186,216,191,29,50,226,74,169,106,146,202,185,118,45,98,97,136,102,11,87,9,213, + 219,56,254,197,29,214,246,205,175,248,197,191,214,50,69,224,110,243,153,57,171,132,202,98,151,41,85,191,173,217,190,61, + 50,121,175,122,25,39,0,107,246,42,249,252,252,161,47,159,145,63,252,139,66,148,234,35,120,185,10,206,249,225,73,85, + 13,209,242,229,166,62,23,10,71,229,61,39,23,240,170,208,159,62,53,190,48,248,31,218,101,255,223,216,225,53,126,210, + 73,236,50,195,134,84,49,83,146,53,178,131,224,37,43,196,183,251,137,55,137,99,14,212,60,255,23,42,47,172,229,154, + 245,48,159,179,143,139,253,253,31,29,222,171,207,210,15,173,125,80,80,179,133,235,126,213,219,152,11,141,56,174,21,236, + 80,141,15,136,111,137,95,50,27,143,92,104,69,249,172,34,215,10,163,188,156,133,143,180,83,253,163,200,233,20,64,200, + 44,87,92,42,247,176,38,176,190,219,207,89,186,56,14,153,248,138,120,143,88,41,195,92,199,201,13,128,130,118,19,188, + 229,136,35,29,90,151,20,167,210,82,104,151,119,232,213,117,47,116,60,44,119,113,182,18,249,160,67,125,102,124,51,245, + 239,54,45,74,156,57,185,39,233,178,78,98,247,87,242,157,246,228,67,114,34,46,171,171,224,149,199,218,3,137,143,50, + 91,80,74,55,234,96,28,197,114,12,23,231,104,237,185,104,163,24,240,212,215,198,209,27,239,70,164,190,126,196,184,21, + 205,41,119,184,96,193,136,119,206,218,22,149,69,33,47,148,238,164,12,178,94,86,75,126,245,128,210,130,161,86,223,91, + 205,247,26,175,153,216,205,72,69,214,123,233,7,186,164,34,83,154,18,181,91,24,185,108,57,124,103,196,198,51,202,139, + 93,167,66,19,45,178,240,147,94,142,140,210,229,199,212,125,151,149,187,141,242,221,97,5,139,216,125,99,227,8,177,129, + 83,30,18,49,186,11,94,22,180,187,106,215,136,18,7,127,72,184,86,49,43,181,218,126,74,85,246,15,253,217,147,136, + 231,245,137,213,34,142,230,207,23,114,238,195,55,122,28,215,109,119,247,0,249,78,173,234,81,66,166,182,11,215,77,240, + 146,21,225,155,238,206,131,189,114,135,193,162,243,172,85,34,113,213,158,116,145,167,73,137,89,127,199,18,35,184,206,197, + 254,55,202,152,242,81,171,202,239,149,188,246,160,213,63,195,53,19,188,25,177,153,137,107,210,148,110,103,22,189,93,238, + 127,69,57,186,47,124,75,244,125,244,35,67,56,234,82,1,208,221,204,23,100,253,84,79,169,150,89,152,17,251,52,95, + 148,213,131,159,68,252,11,219,197,160,103,191,51,50,78,240,7,104,183,249,111,136,118,83,158,17,141,71,204,22,53,122, + 143,162,185,176,77,86,2,120,167,220,5,46,167,163,224,101,123,173,72,225,80,177,227,243,1,25,2,230,200,79,137,151, + 42,87,140,100,101,255,55,49,20,122,212,171,198,183,150,211,130,130,2,127,223,248,74,215,51,186,137,93,102,59,249,78, + 225,168,80,35,94,242,70,142,169,162,224,37,155,238,60,168,187,47,120,211,56,182,130,120,212,15,167,10,107,131,93,244, + 129,231,249,231,219,76,122,66,233,119,179,92,150,89,160,107,238,102,101,83,145,1,239,22,154,160,2,18,150,113,217,246, + 6,126,18,167,155,224,181,43,188,201,195,59,217,93,230,188,106,236,102,103,34,126,57,12,172,174,110,130,119,84,64,64, + 128,88,191,166,235,117,34,38,109,203,224,179,49,149,34,207,210,159,255,79,126,117,33,63,175,231,45,159,20,137,87,238, + 25,95,247,132,46,5,38,156,161,11,118,130,125,136,222,99,184,46,130,87,166,160,250,209,28,188,117,250,223,46,18,87, + 255,172,252,133,15,112,99,56,180,196,50,33,215,112,43,247,46,77,238,234,238,34,29,179,190,87,73,13,5,47,23,164, + 49,22,79,248,169,255,81,103,224,29,46,133,25,113,94,94,29,66,26,252,85,0,15,88,254,173,53,206,221,100,161,55, + 179,197,120,90,240,150,230,223,51,105,108,157,127,9,153,77,175,117,63,17,25,17,194,55,206,185,252,112,13,232,83,143, + 90,152,188,233,127,101,104,66,149,237,191,254,156,124,89,71,177,203,220,113,75,173,140,20,73,170,11,94,25,43,253,177, + 57,88,171,247,24,38,226,158,252,18,19,154,77,224,184,106,203,68,28,230,78,118,6,142,189,86,245,189,186,205,123,61, + 203,84,78,26,136,221,94,58,92,8,4,94,10,49,154,185,210,229,204,34,50,243,132,241,179,92,92,5,237,167,104,225, + 155,117,135,141,60,236,78,194,247,238,156,204,19,195,222,253,205,49,232,237,11,142,30,235,206,59,218,172,57,231,104,180, + 226,172,163,218,179,103,60,46,120,217,210,106,84,43,196,151,214,254,37,102,158,88,220,114,111,254,252,33,191,201,91,136, + 77,229,142,8,44,103,22,34,243,81,254,211,190,85,201,95,142,237,74,210,86,236,158,59,152,42,170,85,41,200,14,181, + 231,102,142,153,219,130,151,108,162,57,56,171,116,78,52,226,225,48,121,217,139,216,225,179,92,62,246,183,238,238,170,44, + 198,248,66,146,67,195,18,194,178,141,131,204,103,231,139,158,240,81,255,130,47,242,90,124,247,14,23,252,165,138,14,39, + 46,224,90,197,71,203,69,90,102,236,205,250,56,121,227,5,71,220,155,231,29,157,215,158,115,180,88,117,214,81,239,197, + 179,142,152,229,103,28,101,254,115,58,83,188,33,120,31,227,35,233,109,159,196,101,42,106,30,91,212,226,56,149,185,229, + 188,162,251,100,102,7,88,206,196,238,83,156,9,163,66,185,136,29,135,182,39,92,210,85,236,50,91,254,175,159,25,206, + 240,168,202,130,87,62,159,49,40,27,38,223,139,201,202,166,148,109,218,205,156,120,123,185,147,57,32,238,241,207,149,125, + 39,142,101,180,44,40,61,52,19,188,25,2,134,47,204,192,71,253,11,203,241,247,90,23,253,37,204,244,151,129,203,55, + 163,13,53,161,245,196,199,172,115,84,223,17,155,126,115,196,175,191,224,232,245,250,121,71,251,87,206,57,26,191,124,214, + 81,243,185,51,142,242,79,157,206,82,216,250,82,240,114,160,248,229,69,179,155,100,42,106,56,93,217,244,187,26,28,200, + 155,55,232,119,153,166,162,57,118,122,179,101,156,246,234,1,110,235,58,181,162,143,238,223,22,255,183,206,98,151,89,48, + 179,177,89,112,34,206,155,130,151,191,6,251,190,113,222,49,100,195,5,7,15,38,55,22,220,162,142,244,114,199,198,96, + 228,129,137,9,202,47,46,198,20,114,193,55,90,100,92,144,81,248,146,162,83,154,181,178,154,9,222,65,25,33,35,212, + 63,240,83,255,129,243,2,91,252,182,189,27,62,243,18,255,157,86,119,60,130,118,84,57,166,119,19,165,103,219,112,65, + 208,186,44,104,125,22,37,187,141,177,246,119,35,119,133,173,47,5,111,77,226,116,239,174,229,196,217,253,41,153,139,222, + 67,67,175,46,93,208,156,119,122,249,18,219,81,162,62,244,171,219,23,212,214,113,24,67,249,242,17,59,14,124,175,247, + 206,46,195,190,210,179,75,57,118,166,83,206,49,222,158,22,188,214,1,80,150,168,242,204,25,227,40,164,249,202,179,142, + 142,175,158,115,244,38,49,156,64,95,148,195,55,93,55,113,118,177,30,89,115,124,39,38,42,251,194,41,117,100,95,175, + 116,113,97,125,153,127,190,233,232,121,74,191,87,203,241,75,205,247,122,79,195,203,106,90,100,192,0,94,45,31,60,215, + 77,159,105,102,254,93,164,131,204,229,77,4,18,181,180,174,10,90,95,5,173,179,130,214,91,65,235,174,160,245,87,208, + 58,44,202,56,17,209,126,212,117,37,221,85,21,188,156,59,245,251,152,138,5,196,222,239,226,111,40,114,30,154,211,108, + 95,116,84,232,5,250,249,63,137,100,25,163,5,187,121,234,177,87,248,130,90,59,138,217,61,184,93,127,177,203,236,221, + 26,47,42,85,136,100,103,218,102,230,223,245,133,224,189,25,28,15,36,159,199,24,120,229,187,142,16,125,86,164,9,10, + 138,199,36,102,83,156,170,31,181,116,97,81,45,168,197,206,227,245,249,75,251,106,40,120,185,162,147,145,247,19,126,234, + 31,196,191,244,163,213,103,103,102,211,111,182,169,30,106,100,155,185,147,214,197,193,36,106,41,252,64,116,120,229,156,104, + 70,162,182,238,11,103,4,173,163,255,18,180,55,163,244,35,135,173,125,223,70,85,193,203,54,53,40,40,64,188,247,90, + 247,155,10,157,245,171,187,30,207,159,47,152,5,239,127,229,229,43,164,44,203,218,170,57,210,75,248,93,73,165,108,12, + 58,95,80,115,230,221,181,221,4,21,40,97,103,186,199,249,165,115,83,240,202,65,178,40,163,66,218,178,35,25,3,178, + 210,242,211,162,214,243,103,68,227,21,103,69,155,213,231,68,215,215,206,139,126,111,158,55,142,102,248,136,6,19,160,158, + 240,194,232,78,81,6,178,62,252,179,92,130,88,229,247,226,42,90,242,157,118,107,40,118,235,32,247,174,95,231,193,94, + 146,3,223,25,143,204,30,158,13,61,224,117,142,215,59,94,247,120,253,227,117,144,215,67,119,69,237,205,136,74,93,102, + 246,255,243,42,11,222,146,196,95,241,113,149,92,18,59,95,109,234,123,180,126,157,104,206,211,123,149,88,37,211,155,193, + 174,89,94,78,124,207,225,31,244,33,113,113,202,29,245,78,80,9,94,219,136,93,102,64,159,138,66,238,244,23,87,73, + 240,202,56,105,99,208,21,159,241,185,203,3,149,143,104,98,232,168,166,14,125,221,54,125,249,172,104,187,230,156,232,70, + 19,68,28,4,177,45,170,56,57,45,168,198,7,81,147,81,115,116,9,211,88,168,161,224,125,210,104,227,209,115,225,163, + 254,32,172,174,207,38,194,68,140,242,64,101,190,62,75,63,68,251,186,32,104,121,157,226,245,138,215,173,38,180,126,241, + 58,22,147,69,232,129,55,41,185,112,135,217,255,71,85,22,188,108,27,243,132,4,138,125,219,226,93,18,60,135,119,12, + 185,212,184,97,209,173,82,244,158,36,186,66,231,26,198,59,222,203,136,191,105,39,252,28,87,80,59,119,64,207,162,18, + 89,145,182,117,176,8,33,95,161,119,92,159,89,3,228,150,224,37,27,109,78,148,69,110,91,237,241,193,92,153,142,121, + 106,211,151,113,44,125,33,183,166,47,101,14,214,239,67,241,77,131,223,190,32,82,222,193,78,132,2,225,12,125,93,88, + 76,243,234,16,215,237,148,157,33,78,51,177,91,16,233,165,252,7,46,218,227,36,118,155,120,192,135,238,228,223,21,211, + 49,193,184,184,233,183,169,191,104,93,225,245,165,143,188,28,198,235,14,175,63,188,14,85,206,70,216,129,47,8,41,93, + 211,244,131,186,42,11,94,142,201,189,60,119,90,172,91,226,135,50,56,28,14,11,11,62,79,127,247,130,44,213,234,175, + 187,189,156,185,162,29,177,37,32,192,113,185,109,139,146,103,62,124,179,199,9,59,9,93,147,57,83,27,153,217,25,146, + 85,17,188,178,237,141,129,86,248,150,231,115,101,160,151,123,234,180,17,208,207,95,215,148,138,37,67,20,115,208,255,32, + 41,138,71,98,129,244,236,98,187,106,143,187,197,38,50,142,218,19,215,164,233,242,94,181,53,19,188,253,29,40,17,235, + 23,88,46,85,154,233,41,171,121,208,143,158,176,171,31,141,148,98,150,215,133,222,22,49,219,88,238,206,242,58,194,235, + 137,138,130,246,102,20,140,155,97,250,195,3,42,11,222,74,92,96,162,101,211,226,226,248,238,36,215,139,15,28,72,189, + 186,251,219,65,127,86,174,88,96,191,220,237,229,34,4,117,253,76,236,230,39,150,115,88,8,183,193,136,164,170,59,79, + 236,78,250,159,29,197,238,177,93,137,162,69,147,98,66,86,224,171,168,130,224,181,22,17,200,223,110,164,210,147,65,217, + 167,174,133,78,112,44,85,43,154,228,58,209,100,199,151,6,6,190,117,65,36,179,40,70,248,132,203,180,189,235,63,230, + 228,250,162,139,139,232,36,135,44,39,173,116,92,242,19,95,92,119,227,89,51,193,107,228,189,110,62,246,65,248,168,141, + 105,144,52,197,234,163,124,159,167,176,135,253,168,140,117,231,88,151,75,108,60,127,243,60,206,243,57,207,235,60,191,183, + 146,177,179,25,161,6,154,138,89,87,40,58,121,189,217,103,159,169,44,120,249,40,126,109,100,68,30,241,245,251,125,221, + 22,66,20,10,113,113,236,240,26,167,162,11,229,61,47,119,255,30,151,233,203,2,108,46,116,147,228,173,210,43,245,106, + 71,31,127,123,85,215,163,118,20,186,25,241,219,239,245,17,84,114,154,157,232,213,172,46,44,230,130,224,53,68,76,254, + 54,195,108,49,97,148,149,225,19,148,172,91,52,120,233,172,113,115,182,29,197,102,117,89,123,222,56,218,26,248,54,132, + 177,9,95,60,147,147,107,31,23,23,209,239,141,197,147,4,165,202,239,21,59,108,166,249,94,243,53,140,223,253,136,159, + 189,199,131,27,32,12,109,72,223,101,31,83,181,202,36,183,42,108,229,192,151,120,83,101,169,249,111,117,95,248,86,238, + 11,89,25,98,192,243,49,207,203,60,63,243,60,205,243,181,170,97,6,190,164,248,204,235,62,214,195,84,21,188,102,78, + 222,171,201,131,99,178,37,134,184,72,5,85,223,58,83,172,104,216,46,185,219,251,7,231,225,179,105,38,135,88,226,59, + 22,247,20,207,250,251,146,121,77,211,104,103,252,178,157,197,46,147,56,48,70,200,190,173,158,85,195,248,82,240,202,76, + 24,233,151,212,166,127,230,119,147,75,37,154,96,171,211,68,203,249,17,249,178,130,177,99,76,57,19,169,38,185,113,145, + 129,42,222,136,84,155,198,23,15,125,243,152,91,187,160,156,42,71,135,204,1,78,23,128,202,106,40,120,143,32,126,215, + 102,151,163,40,39,46,103,13,105,144,120,143,213,55,63,240,85,124,185,220,65,54,254,221,90,253,198,138,65,207,126,231, + 185,226,46,52,63,242,60,201,243,37,207,155,60,127,242,60,202,243,41,207,171,60,191,86,130,144,117,153,18,247,127,107, + 245,145,104,149,5,47,219,27,124,33,233,219,15,251,229,72,24,61,254,96,203,99,49,149,10,112,145,138,127,136,67,92, + 7,62,171,35,112,141,140,75,31,118,147,73,235,255,155,47,60,248,34,101,182,56,243,217,198,222,39,236,46,116,153,111, + 62,232,107,94,86,91,123,163,70,242,177,224,125,132,255,201,2,125,166,97,178,185,201,174,177,41,142,235,190,112,214,184, + 248,208,98,213,57,209,238,21,222,57,78,15,169,232,79,71,112,9,82,32,235,16,107,108,169,230,244,138,139,139,230,26, + 29,142,218,59,76,123,209,124,175,13,26,138,221,140,143,10,22,238,16,139,154,95,10,165,216,89,222,85,117,186,148,198, + 204,206,5,223,154,97,125,134,210,13,219,139,78,51,87,94,231,103,35,165,128,229,121,172,191,12,41,224,249,141,231,57, + 158,239,120,222,227,249,207,20,177,101,177,54,120,94,240,206,217,108,245,147,162,170,11,222,238,156,97,96,84,74,117,222, + 177,205,145,64,226,29,207,165,243,155,165,113,106,46,62,242,151,59,190,79,74,225,168,91,230,133,30,4,239,92,115,181, + 185,127,154,198,22,251,110,247,183,131,47,114,12,179,63,136,93,246,133,81,169,213,217,121,254,150,62,146,235,130,87,102, + 6,73,159,252,30,253,69,253,201,224,201,83,162,228,162,221,162,200,248,87,69,161,33,139,68,100,215,9,34,127,235,161, + 34,95,203,100,229,136,236,49,89,148,95,188,83,84,127,54,61,222,184,145,33,144,211,211,181,241,5,139,158,150,29,228, + 228,141,23,140,170,60,185,152,157,97,144,11,139,101,49,29,50,7,56,149,72,238,161,161,224,53,62,42,154,141,121,0, + 130,81,227,50,221,28,182,16,59,116,70,102,66,151,69,103,105,95,249,19,87,207,164,249,197,65,243,140,131,230,27,179, + 168,208,100,231,231,138,238,50,78,148,186,103,131,30,235,128,127,237,240,70,169,46,120,163,136,159,42,148,141,16,63,111, + 25,236,17,177,116,116,103,210,159,247,77,170,127,172,118,141,168,163,148,193,224,146,204,223,186,82,230,170,45,165,112,46, + 221,166,114,128,115,220,223,149,136,252,33,231,105,71,247,220,43,207,118,248,229,172,205,82,141,221,140,61,223,14,18,229, + 202,228,103,231,217,65,20,202,109,193,43,171,252,25,131,170,64,223,251,148,158,0,74,47,61,104,77,200,173,13,44,122, + 221,205,82,193,49,108,166,72,110,72,113,109,156,211,184,149,101,39,153,133,114,223,55,211,227,144,57,127,100,234,59,191, + 101,107,55,153,69,171,229,89,43,185,32,196,90,155,63,159,178,238,176,178,98,99,240,243,223,91,223,171,180,102,98,55, + 212,124,246,212,55,142,66,60,106,146,214,47,97,197,78,35,69,95,179,91,23,136,138,109,226,50,155,11,56,116,97,16, + 17,146,29,191,160,241,237,160,113,238,160,241,238,160,113,239,160,241,239,160,121,192,65,243,129,131,230,5,7,205,15,14, + 154,39,28,52,95,56,104,222,112,208,252,225,160,121,196,65,243,201,205,46,42,115,1,153,77,206,207,155,183,74,115,81, + 160,215,20,81,120,220,42,35,158,180,212,210,3,198,102,3,196,168,143,98,120,103,125,101,237,143,252,170,11,94,135,116, + 110,49,237,206,250,30,21,77,191,254,156,252,207,250,85,93,142,20,47,18,246,189,140,3,229,112,7,222,253,125,131,115, + 247,41,34,116,35,101,14,192,131,114,55,243,106,158,60,65,191,143,29,81,115,203,47,59,134,252,207,95,118,116,157,185, + 123,124,93,211,129,251,221,172,1,125,36,120,155,152,131,170,212,162,61,106,14,254,39,78,24,59,185,153,44,32,239,201, + 147,142,123,137,91,137,81,138,49,213,16,188,221,38,248,172,173,42,60,157,158,210,173,166,140,71,230,163,71,174,221,206, + 169,122,58,72,177,204,241,117,124,89,132,143,42,91,207,122,205,108,203,119,93,20,99,195,51,202,9,175,255,85,89,1, + 210,121,214,106,171,159,4,107,38,120,59,168,30,35,109,94,184,42,219,180,155,255,209,164,171,40,19,219,73,148,168,211, + 82,68,87,172,37,194,10,21,189,209,7,239,102,98,30,81,142,251,118,232,187,191,57,146,104,167,149,202,208,58,104,252, + 57,104,28,58,122,72,209,74,227,211,65,227,212,65,227,213,65,227,214,65,227,215,65,227,216,65,227,217,65,227,58,199, + 197,7,92,16,69,165,229,174,239,231,55,250,128,231,252,176,97,117,186,136,124,205,19,68,68,135,91,68,100,247,137,70, + 40,28,167,209,42,24,55,83,61,6,206,17,81,73,75,140,52,155,188,107,90,250,241,227,122,8,222,105,31,95,87,8, + 72,7,193,203,71,248,159,80,126,93,177,245,163,56,175,8,168,15,94,239,113,124,252,45,181,126,107,220,160,232,9,218, + 245,253,83,10,224,67,50,62,150,75,11,118,150,49,191,65,94,142,201,173,35,119,154,23,202,1,195,169,197,174,20,42, + 152,247,228,160,190,21,207,44,153,215,236,240,145,159,18,255,244,71,145,107,66,23,17,69,56,249,130,188,125,29,160,136, + 224,125,196,28,84,42,126,189,151,92,240,131,243,132,251,2,209,156,69,140,183,23,128,28,78,48,25,133,25,138,76,88, + 167,236,164,26,86,191,167,217,174,253,249,185,121,97,165,250,239,142,106,180,59,68,101,51,141,69,151,194,48,140,157,35, + 138,221,227,247,122,214,184,240,50,102,153,33,156,123,191,158,30,146,193,169,131,184,150,124,226,198,244,60,201,180,176,231, + 110,170,167,107,151,130,102,104,24,206,48,83,245,146,176,78,217,5,64,58,135,121,189,39,30,150,107,97,36,143,35,30, + 79,190,16,172,94,152,191,56,142,252,126,142,129,39,118,18,191,217,165,175,114,43,199,188,59,112,200,158,185,177,163,114, + 90,50,103,227,227,130,75,137,3,43,139,179,251,83,189,38,166,78,239,75,185,114,244,167,196,203,119,142,169,253,45,9, + 236,52,217,80,87,101,106,51,222,97,61,32,197,2,23,57,136,97,193,144,195,221,219,182,50,76,225,83,25,83,124,73, + 198,23,139,192,192,128,63,99,235,23,249,102,195,234,46,251,78,236,73,254,199,95,119,115,173,80,232,134,72,28,84,89, + 200,118,234,235,74,35,123,91,240,74,63,72,143,221,125,228,144,114,3,190,80,226,98,231,137,170,183,70,11,70,57,243, + 185,75,204,217,162,228,132,202,113,208,238,236,32,144,229,49,127,190,228,131,187,92,14,207,168,40,119,157,57,68,163,230, + 243,233,59,207,156,126,136,119,159,57,21,17,95,130,225,218,244,237,105,7,186,35,221,236,230,90,245,44,166,249,162,12, + 215,174,143,147,161,27,92,41,41,97,67,186,168,78,150,162,122,88,22,194,186,88,205,166,230,123,181,208,53,156,161,235, + 188,117,170,87,4,227,28,226,189,252,144,158,242,210,117,27,153,43,191,88,78,119,224,52,154,215,10,202,119,230,118,24, + 70,76,32,238,147,187,216,15,200,44,16,170,177,132,120,154,120,141,72,83,121,205,187,174,240,196,192,185,230,56,123,88, + 39,193,27,65,124,89,56,42,84,124,185,169,143,175,46,70,93,253,230,253,190,39,231,79,143,61,56,118,100,141,223,219, + 181,44,113,188,104,145,176,163,36,68,47,90,22,184,255,202,112,3,46,107,252,161,116,134,21,196,243,210,57,150,203,255, + 127,181,252,194,227,93,219,159,136,51,150,223,113,133,118,44,79,85,139,41,248,11,9,250,223,238,25,95,247,244,202,167, + 219,31,60,190,43,233,47,127,23,184,206,124,73,121,119,163,10,133,26,73,164,165,79,168,32,120,151,164,103,102,152,170, + 222,113,206,125,159,88,197,216,98,158,104,53,91,24,154,94,155,88,15,43,57,161,70,143,122,214,108,223,87,93,124,167, + 73,170,22,37,225,91,226,44,172,153,74,15,95,87,93,45,136,227,25,121,167,154,68,182,113,84,76,85,152,140,99,99, + 134,210,38,57,218,72,72,112,59,58,188,154,14,93,40,116,144,240,54,232,78,71,206,36,190,51,224,216,201,56,11,3, + 232,104,154,196,248,53,232,184,154,68,121,166,208,205,119,227,88,59,43,172,151,2,227,95,216,238,217,244,115,239,102,13, + 223,200,231,15,137,204,224,157,251,193,111,95,163,213,172,117,102,219,110,226,247,231,246,176,182,15,183,151,217,118,220,142, + 102,155,114,251,154,109,221,74,30,221,51,220,31,220,47,220,63,220,79,220,95,244,97,228,168,65,199,249,85,159,73,63, + 210,167,126,53,40,235,7,162,18,120,125,110,254,138,253,183,216,221,239,40,45,120,195,27,245,51,199,89,188,78,130,151, + 173,42,199,217,54,107,84,52,119,118,23,247,167,94,57,149,150,114,229,228,158,228,43,31,175,239,117,120,234,196,250,95, + 181,106,86,252,243,232,66,161,219,131,131,3,143,201,93,224,27,29,1,92,13,8,8,184,64,226,54,173,106,229,130,223, + 12,25,16,243,217,51,203,218,108,63,176,45,225,143,95,211,146,175,242,238,242,249,131,216,197,189,81,102,134,198,13,139, + 154,101,132,43,187,234,52,222,20,188,214,157,164,18,115,191,83,235,114,218,178,35,215,165,237,209,116,82,157,145,157,11, + 107,190,36,127,171,20,179,141,71,187,240,62,25,151,27,75,204,254,90,233,133,130,219,92,62,235,28,13,253,38,70,245, + 140,41,28,23,105,86,5,131,128,2,26,142,177,109,134,224,157,246,145,210,243,88,80,161,146,25,151,110,117,19,188,14, + 25,215,115,245,177,7,91,228,56,77,153,231,5,89,234,213,195,59,134,252,78,217,36,206,254,248,197,128,83,20,107,122, + 252,135,207,250,159,252,233,203,129,167,246,125,23,127,158,68,237,37,8,215,236,139,221,71,22,52,55,195,75,22,187,227, + 48,94,22,188,113,170,238,64,22,74,124,200,99,233,88,114,113,82,221,159,254,49,177,85,201,201,180,212,146,189,214,54, + 174,231,194,251,52,207,240,23,250,32,81,54,147,7,29,83,90,222,171,164,134,126,195,151,89,69,88,189,30,202,166,4, + 180,180,111,42,4,20,208,108,124,101,124,184,151,90,178,79,217,121,172,212,67,105,230,24,59,235,161,247,246,185,224,173, + 68,156,166,34,18,226,208,246,33,16,131,126,194,193,237,9,162,82,133,8,118,150,83,68,5,21,4,175,117,208,171,22, + 188,207,226,219,178,160,78,208,116,82,77,52,68,11,93,8,83,182,78,251,93,239,88,219,57,226,38,239,19,96,61,241, + 41,243,196,73,117,47,122,76,88,119,93,118,6,13,125,199,168,132,197,187,168,74,126,80,92,127,250,210,11,34,10,104, + 54,190,140,204,57,249,219,142,80,122,119,183,232,228,13,230,24,251,92,87,193,203,150,194,161,13,67,19,170,8,186,200, + 5,65,104,115,184,143,135,13,169,106,134,50,164,184,235,44,94,20,188,13,84,13,220,143,74,94,106,14,244,103,116,223, + 65,40,114,199,107,234,30,251,119,189,195,108,231,41,46,188,83,152,249,78,69,39,189,173,238,238,238,99,215,149,72,158, + 168,169,255,188,103,248,14,221,208,86,53,23,182,165,141,59,65,68,1,141,198,86,121,211,119,139,207,248,92,105,193,203, + 57,241,229,179,46,208,89,240,102,84,208,121,238,177,214,16,133,54,103,249,35,173,77,167,93,153,29,71,241,162,224,93, + 174,98,124,169,211,238,81,55,77,39,213,218,215,50,25,236,84,54,175,177,165,157,203,186,240,78,81,25,241,187,243,183, + 41,93,152,196,242,94,117,52,245,31,227,22,57,231,11,213,160,141,59,66,72,1,141,198,214,194,107,167,84,39,212,21, + 188,79,92,87,253,178,187,238,130,151,23,196,83,85,40,180,97,55,85,221,130,48,180,105,69,181,205,131,5,247,49,245, + 245,73,162,150,42,130,87,150,148,84,50,118,183,228,130,237,214,129,94,93,211,73,117,88,70,251,210,142,163,138,19,106, + 212,176,199,205,54,254,196,197,119,106,173,250,59,25,165,56,233,50,157,124,206,61,26,47,202,191,27,237,76,194,82,205, + 24,233,195,218,127,148,2,191,19,186,33,86,177,203,241,177,42,239,238,150,124,224,71,235,24,139,209,93,240,102,92,24, + 162,76,9,202,93,96,3,158,185,168,214,182,69,137,140,220,177,217,117,18,47,9,94,101,47,197,240,173,89,203,64,47, + 168,233,228,186,194,168,73,63,236,9,53,5,11,85,25,178,180,241,96,23,223,105,181,17,87,74,121,33,149,206,91,73, + 149,158,228,123,61,160,169,239,112,165,43,227,118,182,186,187,79,215,157,14,140,132,160,2,138,143,169,4,235,253,131,130, + 253,103,171,95,112,226,182,235,170,68,134,218,65,240,178,45,227,60,182,51,238,106,224,213,130,20,192,231,41,224,196,180, + 73,245,133,44,194,177,52,39,14,226,37,193,203,126,39,10,197,47,80,48,80,127,189,199,74,41,230,226,4,187,55,189, + 216,196,102,53,119,15,174,175,92,23,233,194,251,148,208,97,119,215,41,123,64,140,166,190,211,195,248,24,173,221,73,233, + 5,185,216,61,155,204,118,254,134,40,12,97,5,20,220,209,237,107,198,195,155,185,195,139,207,250,74,139,146,194,225,141, + 251,155,227,107,136,7,219,36,215,5,111,52,151,0,14,11,13,22,95,188,219,27,98,209,38,124,254,78,47,17,22,26, + 100,150,153,44,172,146,224,37,203,159,113,249,136,196,165,122,11,233,187,86,209,18,162,225,68,91,67,117,113,200,151,206, + 220,217,61,32,187,155,127,54,162,243,237,74,47,18,252,129,33,223,105,175,198,11,245,56,29,110,144,27,57,156,219,12, + 211,62,94,90,99,63,105,68,140,2,6,163,137,241,178,218,219,115,196,22,231,58,2,5,7,220,175,133,208,77,159,199, + 182,88,159,61,175,157,4,47,91,53,22,189,197,138,132,145,80,130,232,213,157,207,54,246,18,197,139,133,11,89,198,185, + 122,78,157,195,11,130,183,66,198,133,170,133,59,212,219,125,164,75,94,150,193,62,69,195,234,106,198,238,121,129,94,83, + 116,216,5,189,233,46,186,220,41,73,207,56,113,231,235,126,83,134,51,23,253,103,129,225,63,116,67,91,245,133,185,228, + 3,63,57,251,18,239,166,77,151,101,119,185,244,108,73,89,134,54,18,100,74,120,54,125,164,251,77,138,68,1,115,206, + 162,208,0,85,139,183,100,69,68,167,177,230,243,223,239,225,185,69,9,193,107,198,84,94,138,173,95,68,28,219,149,4, + 225,168,41,71,119,37,138,6,245,10,179,83,92,146,49,218,14,5,5,111,155,140,29,72,138,229,84,113,192,23,26,178, + 200,121,226,186,32,115,24,159,212,128,244,143,137,69,187,117,200,161,26,239,194,36,89,74,135,234,106,78,153,3,122,106, + 44,120,141,236,41,81,73,15,107,177,56,151,94,118,148,118,122,135,67,92,101,159,112,55,253,35,216,252,187,249,90,38, + 3,130,171,69,70,180,31,37,34,187,79,20,81,41,143,138,98,247,126,144,94,80,130,62,238,117,18,186,70,177,9,122, + 238,236,250,134,78,130,151,119,81,238,34,254,215,173,67,25,113,240,7,20,165,208,174,184,196,15,9,162,43,245,157,204, + 183,59,69,246,169,138,130,247,73,35,126,151,68,165,202,3,191,248,244,207,156,143,76,181,66,213,194,12,188,171,239,106, + 177,9,57,73,54,209,161,42,17,199,230,89,222,43,90,99,193,251,170,113,225,113,212,179,122,45,212,15,239,55,242,6, + 71,246,152,68,241,199,157,69,72,233,154,34,168,64,113,17,24,94,64,4,134,70,128,76,144,190,122,135,155,254,17,126, + 173,28,252,86,237,4,29,184,201,101,53,202,219,238,173,162,57,42,9,94,211,30,226,103,225,66,5,103,246,167,64,72, + 106,115,73,45,69,164,198,87,49,75,7,63,232,73,135,240,130,224,253,57,125,183,238,27,61,118,144,104,23,154,23,211, + 82,139,246,80,184,195,46,229,41,181,248,103,165,171,144,69,116,190,205,156,76,239,115,113,146,156,108,196,239,118,26,135, + 236,12,190,17,188,111,27,213,15,199,190,12,1,96,83,120,46,179,136,154,60,110,250,71,83,213,79,232,128,71,138,230, + 140,243,194,220,162,156,224,229,75,108,175,135,132,4,138,113,35,107,138,95,127,78,134,160,84,156,147,212,71,220,87,220, + 103,212,119,107,101,130,126,85,5,111,94,45,110,219,3,95,228,79,141,114,113,146,220,110,84,37,154,254,169,46,73,218, + 203,107,46,120,55,24,130,119,220,42,248,172,77,225,82,238,210,87,95,203,134,127,188,168,67,122,64,144,227,82,239,81, + 254,32,120,77,51,38,189,59,110,169,45,206,162,252,176,186,59,187,212,55,19,199,214,49,29,244,109,111,56,130,135,5, + 111,67,254,149,121,202,214,197,4,227,143,199,101,19,214,153,190,250,161,139,19,100,91,213,67,52,236,146,157,193,210,230, + 175,27,130,247,214,23,224,179,246,223,197,27,239,166,111,68,168,90,48,8,120,32,21,89,147,129,110,157,190,217,73,240, + 114,242,241,143,56,93,217,157,99,106,27,187,136,16,152,106,241,107,90,178,184,115,92,29,17,74,125,68,125,245,169,236, + 51,213,5,239,80,227,178,67,179,120,76,48,56,46,115,101,130,124,37,61,95,243,66,165,223,141,243,73,203,247,90,102, + 3,193,251,130,113,105,45,117,25,252,214,142,225,12,215,95,74,106,235,166,111,220,110,230,147,69,91,218,108,215,255,214, + 23,77,159,88,239,197,185,69,89,193,203,22,196,101,63,249,217,110,27,85,147,98,122,177,211,171,10,220,23,99,135,215, + 204,40,205,42,251,202,161,129,224,157,101,164,60,234,121,55,38,25,127,91,104,169,148,166,101,161,109,227,194,228,88,57, + 35,227,4,149,185,212,68,200,183,181,129,224,53,202,159,22,232,55,29,126,107,67,156,46,141,70,185,225,23,97,25,249, + 211,39,190,137,182,180,211,220,76,119,84,44,62,209,221,95,5,175,153,18,232,45,142,15,77,73,168,34,14,124,159,0, + 193,153,219,217,24,182,15,17,169,212,23,50,102,119,35,23,47,243,166,3,120,88,240,42,91,97,13,248,180,186,90,41, + 23,38,199,33,25,199,167,10,231,177,228,155,234,150,247,42,103,3,193,123,171,177,139,215,122,40,252,214,134,68,143,92, + 110,250,234,58,55,253,162,138,14,31,160,32,199,101,132,35,253,89,240,154,182,142,51,0,112,202,178,163,59,19,33,60, + 115,9,206,145,220,165,125,25,51,27,195,171,190,232,120,15,11,222,231,113,92,234,167,23,34,174,175,174,22,224,194,228, + 248,136,113,57,102,208,124,181,143,2,199,174,240,90,26,159,92,18,188,45,248,93,242,86,106,12,191,181,33,121,171,52, + 55,125,181,163,155,126,209,87,135,120,122,144,163,11,183,99,189,60,183,104,35,120,249,230,255,98,206,211,91,175,118,52, + 85,243,66,69,54,95,243,233,134,94,162,73,195,162,220,225,255,35,22,201,140,26,186,9,222,21,70,142,207,97,79,96, + 162,241,51,56,110,91,78,170,169,46,78,142,159,167,151,159,222,160,118,121,91,218,9,149,239,117,107,25,123,148,140,13, + 67,218,41,123,82,98,238,119,166,175,30,200,134,95,60,101,156,206,37,62,132,182,180,17,145,61,38,155,62,241,164,15, + 230,22,109,4,175,105,92,156,226,82,145,232,80,241,9,9,48,100,112,240,77,38,134,47,222,237,45,184,244,179,172,160, + 54,197,151,29,238,97,193,251,156,177,195,75,213,104,48,217,248,109,220,96,176,11,19,99,104,70,177,9,138,47,211,228, + 2,80,164,29,4,175,108,255,221,70,42,56,42,166,1,255,181,9,215,151,244,158,154,13,159,56,144,94,108,226,59,180, + 165,109,66,25,214,88,125,162,14,4,111,230,21,217,250,19,7,57,131,195,244,187,26,136,243,7,33,74,189,5,183,237, + 125,147,26,136,208,188,65,66,78,56,113,178,180,163,174,130,247,1,227,152,154,146,244,99,194,241,163,93,132,158,119,153, + 147,234,204,50,238,20,155,160,114,157,154,188,215,124,187,136,93,217,254,79,167,151,23,94,2,255,181,203,71,39,149,26, + 183,136,155,134,110,250,131,81,237,48,79,133,134,104,75,27,125,0,133,213,233,98,250,195,112,31,205,43,218,9,94,211, + 170,177,232,37,174,52,111,82,76,236,254,118,16,4,170,135,217,179,121,176,104,211,188,4,119,240,21,226,48,81,35,55, + 58,218,195,130,247,22,227,66,76,171,84,76,56,254,121,3,56,214,133,73,49,40,163,116,233,253,223,170,155,157,97,217, + 17,235,123,149,177,153,224,237,201,239,197,37,122,225,195,246,160,216,189,31,88,253,53,63,82,213,97,183,95,242,148,15, + 231,21,109,5,47,91,97,226,81,190,64,85,165,114,1,241,236,163,173,197,57,132,56,228,24,110,195,167,151,182,18,49, + 21,11,152,151,211,150,122,186,122,26,10,79,0,95,81,252,190,79,172,147,107,17,87,47,77,25,49,164,36,42,149,141, + 135,156,179,197,250,94,161,54,19,188,37,51,110,228,47,216,14,63,182,1,81,201,75,77,95,125,198,77,95,40,146,241, + 1,74,62,143,182,180,149,216,101,234,65,240,186,103,61,136,147,252,14,169,241,49,226,16,165,205,66,152,67,246,194,23, + 14,109,79,16,35,146,170,154,142,248,43,209,47,183,59,215,195,130,55,0,169,109,176,208,222,96,66,12,176,78,198,42, + 223,6,47,60,230,37,91,101,103,200,106,151,221,232,7,90,36,225,203,154,87,209,106,216,219,236,207,129,217,205,206,80, + 250,145,67,104,75,221,195,203,186,221,105,157,183,6,248,120,94,177,133,224,101,171,69,172,34,46,87,166,157,201,71,23, + 54,135,136,117,83,236,62,178,160,185,168,88,33,146,59,244,50,177,146,168,169,66,199,122,88,240,34,142,215,223,22,218, + 216,56,115,114,77,112,97,66,140,52,39,227,226,211,63,211,165,216,196,61,118,18,187,150,190,168,102,190,99,212,176,199, + 225,203,58,87,57,164,60,214,22,127,13,119,211,15,144,89,199,38,112,138,71,139,31,140,206,133,57,197,54,130,215,180, + 4,130,223,236,106,253,58,133,197,23,239,244,70,152,195,77,50,48,124,185,169,143,136,77,79,55,198,225,11,167,136,20, + 149,58,212,11,130,183,182,14,55,240,129,199,143,207,74,186,48,33,198,92,11,103,56,170,174,128,88,122,208,250,94,13, + 236,40,120,101,127,220,159,209,31,36,242,225,211,154,158,178,208,7,139,236,199,181,110,246,255,181,19,57,186,244,134,182, + 180,197,197,97,230,206,92,154,79,108,39,120,217,42,18,15,17,255,68,71,133,138,164,65,49,98,203,71,253,32,112,157, + 216,252,127,253,196,144,1,149,69,116,161,188,230,174,46,231,214,45,175,90,103,122,65,240,134,103,148,168,188,243,13,76, + 70,118,78,123,51,97,157,57,193,110,118,113,66,156,100,100,103,232,52,86,237,184,228,105,31,187,85,68,67,115,209,251, + 158,177,203,59,244,49,248,180,142,187,187,215,159,70,180,119,179,239,155,167,23,33,137,69,91,234,188,179,219,127,118,174, + 139,93,59,11,94,211,42,19,159,112,238,216,176,176,32,49,249,182,58,148,121,96,144,95,199,247,242,187,239,161,140,22, + 83,39,214,23,249,194,131,205,188,186,95,203,227,67,37,205,211,130,87,58,254,29,233,105,167,70,99,66,178,49,97,245, + 123,154,147,236,48,23,39,196,79,140,15,161,137,111,169,93,108,162,85,138,249,94,99,236,44,118,157,99,56,11,14,156, + 11,191,214,173,216,4,101,58,177,136,157,104,55,251,126,73,122,191,207,65,91,106,26,202,18,209,113,140,181,255,39,228, + 242,92,98,107,193,107,198,228,245,38,222,231,119,44,95,38,66,140,76,174,38,190,249,160,175,95,238,232,222,58,172,134, + 40,87,38,191,233,124,159,17,125,136,8,149,59,208,75,130,55,191,57,8,85,207,181,10,178,185,208,206,219,102,157,104, + 35,220,185,13,206,19,181,178,249,76,233,178,165,229,189,130,236,46,120,157,67,27,32,126,180,190,145,31,228,70,159,7, + 95,203,206,176,25,109,169,91,24,11,157,200,56,245,253,56,5,230,17,219,11,94,235,237,124,22,190,59,136,191,131,130, + 2,69,255,94,21,140,248,213,99,187,146,108,43,114,249,221,190,126,191,175,72,77,136,17,193,193,129,220,89,127,19,59, + 137,100,34,80,135,142,243,134,224,149,206,63,50,99,17,165,96,122,76,82,254,189,208,202,10,130,34,178,235,29,186,92, + 252,120,196,31,196,110,25,167,98,32,230,71,42,226,239,181,11,103,232,238,102,127,183,65,118,6,13,243,158,83,245,199, + 252,109,134,89,251,125,25,231,240,87,100,14,241,27,193,107,26,231,147,237,78,188,198,151,180,34,35,242,136,22,77,139, + 139,89,83,26,26,71,253,118,17,186,251,182,14,22,115,166,54,18,45,154,20,23,252,142,242,66,218,6,249,238,133,116, + 234,48,111,9,94,57,0,70,152,3,179,80,226,98,76,88,118,89,104,233,194,153,101,194,77,118,97,34,204,40,37,92, + 116,210,219,186,20,155,24,236,79,130,87,246,211,40,235,135,76,225,113,43,149,78,29,7,241,179,215,234,175,121,221,236, + 235,57,215,210,3,158,64,123,170,62,231,210,71,73,129,62,211,172,253,189,149,215,87,197,230,15,191,19,188,86,227,202, + 97,175,18,39,248,210,86,72,72,160,24,220,175,146,216,244,90,55,177,247,187,120,113,102,127,138,54,2,247,204,190,20, + 177,119,107,188,120,151,158,125,96,159,74,130,223,69,94,68,59,33,223,177,186,174,157,228,77,193,43,7,193,84,115,144, + 22,232,53,197,184,1,143,9,204,86,213,213,138,186,48,17,214,208,33,63,115,169,69,123,172,239,213,196,223,4,175,236, + 171,178,78,187,247,70,22,0,163,223,32,126,213,202,21,61,110,149,217,71,27,220,236,227,188,58,124,128,98,7,255,152, + 40,118,207,187,206,185,117,133,76,253,89,70,193,185,195,175,5,175,67,30,235,87,146,71,252,188,3,250,87,80,80,128, + 168,92,49,82,244,236,82,86,60,48,171,137,145,225,225,252,65,245,82,155,157,163,103,218,66,113,185,11,103,54,54,158, + 181,18,229,208,13,12,12,224,14,249,147,88,79,36,201,140,21,129,58,119,144,183,5,175,28,8,51,172,3,182,64,191, + 233,244,197,122,24,147,154,174,217,25,238,88,107,246,229,251,46,78,132,183,233,80,108,162,248,172,175,172,139,74,164,63, + 10,94,75,186,170,209,196,247,206,226,55,122,212,179,40,42,163,202,165,209,122,61,204,126,233,231,102,255,54,187,22,206, + 128,121,88,41,145,251,248,113,81,236,238,119,140,76,54,206,99,79,150,128,110,162,240,188,225,247,130,215,217,74,16,247, + 18,219,100,78,218,203,1,1,1,162,122,213,66,98,194,152,218,98,253,234,46,98,199,23,3,196,193,31,18,196,169,52, + 223,237,0,159,74,75,22,7,232,223,220,254,249,0,177,126,85,23,49,254,214,90,162,90,76,65,193,207,38,119,114,79, + 201,103,158,34,223,193,54,230,11,193,43,7,3,239,130,63,106,29,192,124,195,180,248,244,79,149,206,201,10,254,77,190, + 102,241,102,31,166,186,56,17,238,52,138,77,204,248,92,237,248,93,202,82,32,223,107,169,191,138,221,76,250,174,53,177, + 218,121,241,205,83,161,161,113,196,90,228,142,215,140,157,113,84,107,243,241,105,196,67,63,103,59,117,30,217,75,40,14, + 164,206,46,46,95,26,140,30,254,164,181,136,143,21,206,242,52,141,55,14,53,152,43,32,120,179,176,112,121,204,217,79, + 6,93,239,225,54,10,8,160,184,177,232,80,81,187,70,148,104,223,186,164,24,54,164,170,88,124,127,83,241,198,203,157, + 197,182,79,227,196,233,189,57,23,193,252,59,248,119,189,177,162,147,88,68,191,155,255,141,246,173,74,138,90,213,163,68, + 97,202,43,204,207,32,29,109,143,20,104,253,228,179,134,219,177,35,124,37,120,45,131,162,5,241,178,243,192,206,215,60, + 129,226,124,31,18,69,239,218,40,74,62,184,19,34,88,143,56,87,87,178,51,148,119,232,80,194,150,118,158,45,239,85, + 21,98,55,211,114,196,237,100,8,87,102,11,51,237,56,118,23,5,122,223,43,162,71,46,167,113,252,142,40,49,127,155, + 113,201,6,49,162,94,45,125,253,166,155,253,24,134,221,93,223,95,242,45,245,80,26,9,219,45,162,232,228,245,34,42, + 233,97,218,193,29,39,242,86,105,33,178,24,75,95,201,156,229,197,52,155,35,32,120,221,176,82,242,102,255,90,185,35, + 116,132,56,47,119,88,13,71,8,161,76,8,21,203,71,138,102,177,197,68,223,30,229,197,45,67,171,139,41,19,234,137, + 251,239,109,36,22,76,143,21,75,23,52,51,224,255,155,255,27,255,25,255,12,255,44,255,157,138,229,34,69,112,80,160, + 213,177,46,203,127,227,136,252,55,215,202,139,86,165,252,165,209,125,45,120,157,210,84,113,168,195,246,172,22,208,192,136, + 104,17,82,178,154,200,83,49,86,132,86,111,43,194,106,119,18,97,117,187,25,11,43,200,61,100,255,124,225,98,63,79, + 48,197,144,38,239,181,21,2,215,165,144,135,38,196,60,226,131,172,198,175,243,88,14,46,90,81,228,41,91,135,10,29, + 52,22,161,213,90,139,208,90,29,105,76,119,198,152,206,190,175,50,99,221,236,187,118,70,177,137,170,45,209,142,158,164, + 78,23,242,231,14,198,58,149,183,74,115,242,243,186,34,184,88,37,17,20,81,88,184,48,62,246,202,77,160,4,119,115, + 41,67,240,218,103,7,184,28,17,43,51,31,240,237,225,5,196,26,249,245,179,143,184,32,179,35,220,204,153,174,202,159, + 221,39,255,46,255,142,249,242,119,118,35,26,201,127,43,220,31,27,58,183,4,175,211,64,225,236,30,195,136,167,88,72, + 201,203,128,2,40,77,162,139,125,91,67,179,247,26,1,81,155,45,1,92,218,145,94,196,98,142,156,99,191,36,14,201, + 13,5,140,23,239,81,200,205,190,42,141,54,203,21,206,74,13,194,249,249,151,203,52,128,109,57,31,178,141,230,1,8, + 94,31,92,138,11,149,98,149,11,60,20,144,68,200,255,22,170,251,165,50,221,4,175,7,45,72,238,2,215,148,151,44, + 58,17,61,28,233,197,60,250,130,92,165,74,54,50,182,232,240,94,49,152,17,188,98,193,114,44,115,251,54,144,97,77, + 29,228,102,70,79,140,233,108,145,147,20,152,165,209,126,30,167,151,220,64,227,117,138,115,28,55,148,243,100,81,233,255, + 126,175,43,60,34,54,0,200,109,96,48,24,12,6,131,193,32,120,1,4,47,12,6,131,193,96,48,8,94,0,0,0, + 0,0,0,108,183,49,134,70,0,0,0,0,0,0,16,188,0,0,0,0,0,0,64,240,2,0,0,0,0,0,0,193, + 11,0,0,0,0,0,0,4,47,0,0,0,0,0,0,16,188,0,0,0,0,0,0,184,192,255,3,116,155,54,6,4, + 95,45,50,0,0,0,0,73,69,78,68,174,66,96,130, +}; +} +namespace Sprite { +namespace SuperFamicom { +const unsigned char CrosshairBlue[332] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,32,0,0,0,32,8,6,0,0,0,115,122,122, + 244,0,0,0,4,115,66,73,84,8,8,8,8,124,8,100,136,0,0,0,9,112,72,89,115,0,0,14,196,0,0,14, + 196,1,149,43,14,27,0,0,0,238,73,68,65,84,88,133,213,87,91,18,195,32,8,196,78,15,232,81,189,161,253,9, + 25,52,98,121,57,76,246,43,137,44,11,24,69,11,232,209,55,99,69,235,76,74,184,69,107,229,245,91,27,220,137,124, + 75,140,58,21,165,34,181,246,199,251,100,167,174,200,32,124,137,119,124,134,177,252,116,108,224,44,120,44,190,156,56,102, + 163,204,228,182,107,173,80,31,93,225,67,30,189,112,124,85,41,145,120,36,88,191,159,96,33,23,78,101,47,242,127,90, + 156,213,73,159,2,111,0,33,21,179,150,63,132,151,62,5,243,78,136,217,236,118,173,85,198,86,30,20,152,154,13,192, + 118,251,125,216,90,121,212,118,215,112,86,224,26,142,133,247,152,2,73,195,64,155,190,248,166,229,229,255,132,8,243,146, + 242,234,120,43,224,58,241,68,4,16,138,212,110,120,58,136,119,28,72,16,169,103,194,33,136,63,68,209,184,103,74,83, + 239,5,0,215,26,167,231,123,124,103,130,53,221,140,94,113,55,100,131,9,242,151,139,31,79,50,234,237,105,206,30,22, + 0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char CrosshairGreen[329] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,32,0,0,0,32,8,6,0,0,0,115,122,122, + 244,0,0,0,4,115,66,73,84,8,8,8,8,124,8,100,136,0,0,0,9,112,72,89,115,0,0,14,196,0,0,14, + 196,1,149,43,14,27,0,0,0,235,73,68,65,84,88,133,213,87,65,18,195,32,8,196,78,31,230,211,253,153,61,180, + 52,18,145,1,193,97,178,39,141,44,139,24,69,11,216,209,133,177,98,117,166,37,92,162,77,176,170,118,223,26,163,78, + 68,71,145,198,244,169,157,57,35,84,248,43,222,255,109,154,254,113,140,114,102,222,18,239,165,120,251,181,42,0,232,103, + 114,217,85,226,163,27,124,232,163,87,142,115,153,82,137,71,98,233,247,21,44,228,194,169,217,171,252,159,22,95,234,164, + 47,129,55,128,144,140,237,166,63,132,151,190,4,247,147,16,103,35,157,90,220,140,119,121,80,224,94,108,0,164,227,119, + 182,221,229,13,182,82,193,225,176,42,56,59,188,105,9,52,5,3,109,58,243,205,202,203,255,9,17,251,91,202,169,227, + 205,128,235,198,19,17,64,40,82,171,225,233,32,158,113,33,65,164,222,9,105,16,50,81,55,238,88,210,212,119,1,0, + 238,241,241,126,143,125,62,216,173,151,209,35,222,134,235,96,98,252,229,226,3,112,72,179,236,202,138,114,18,0,0,0, + 0,73,69,78,68,174,66,96,130, +}; +const unsigned char CrosshairRed[342] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,32,0,0,0,32,8,6,0,0,0,115,122,122, + 244,0,0,0,4,115,66,73,84,8,8,8,8,124,8,100,136,0,0,0,9,112,72,89,115,0,0,14,196,0,0,14, + 196,1,149,43,14,27,0,0,0,248,73,68,65,84,88,133,205,87,65,14,196,32,8,132,102,255,255,101,246,176,177,139, + 148,81,80,27,229,212,70,102,6,212,0,50,229,77,26,107,156,37,139,2,228,241,209,39,11,113,71,156,68,139,106,128, + 56,255,198,175,203,223,114,16,79,68,253,138,90,99,141,113,112,80,231,131,196,11,83,52,19,43,196,53,135,147,7,38, + 150,104,244,212,32,86,235,228,236,20,6,200,207,191,117,215,70,12,242,94,139,133,166,236,173,236,67,252,111,139,67,157, + 237,71,48,27,192,244,142,93,228,23,148,144,184,228,131,96,254,3,164,4,176,213,108,37,52,5,208,53,47,227,81,28, + 49,153,102,163,88,96,149,68,150,193,21,223,59,128,68,43,69,13,103,4,199,246,8,34,151,240,209,249,38,112,251,47, + 97,177,209,74,152,246,95,93,9,211,51,160,181,99,142,128,104,115,55,124,59,136,115,7,146,237,51,33,2,71,166,226, + 94,23,13,77,214,104,44,103,174,163,143,86,189,244,187,224,232,151,81,21,132,39,210,33,91,246,54,132,193,44,226,219, + 107,95,57,136,120,253,172,254,16,23,0,0,0,0,73,69,78,68,174,66,96,130, +}; +} +namespace WonderSwan { +const unsigned char Auxiliary0[117] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,39,73,68,65,84,40,145, + 99,96,24,5,12,12,12,12,12,140,216,4,255,255,255,255,31,174,128,145,17,67,13,19,57,54,145,165,105,20,64,1, + 0,79,229,4,6,25,160,104,81,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char Auxiliary1[134] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,56,73,68,65,84,40,145, + 99,96,24,212,128,17,155,224,255,255,255,255,195,21,48,50,98,168,193,16,64,214,128,75,35,10,7,155,6,108,26,153, + 112,41,194,7,200,210,68,185,159,176,105,196,22,122,131,28,0,0,93,187,20,10,151,47,54,180,0,0,0,0,73,69, + 78,68,174,66,96,130, +}; +const unsigned char Auxiliary2[136] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,58,73,68,65,84,40,145, + 99,96,160,23,96,196,38,248,255,255,255,255,112,5,140,140,24,106,24,113,41,198,80,136,164,153,137,34,231,225,179,5, + 221,54,178,108,162,204,121,12,12,244,10,8,92,54,98,139,39,250,1,0,103,1,20,14,22,78,11,159,0,0,0,0, + 73,69,78,68,174,66,96,130, +}; +const unsigned char Headphones[167] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,89,73,68,65,84,40,145, + 157,145,65,14,0,33,8,3,29,255,255,231,122,210,176,217,130,196,222,44,3,82,29,227,65,56,83,146,14,0,150,249, + 192,177,33,243,90,69,91,139,102,213,120,86,206,64,151,69,146,0,166,3,110,225,231,239,234,208,144,62,64,6,40,200, + 102,170,6,0,236,243,222,194,134,117,131,90,159,28,87,44,225,174,22,199,17,111,189,73,184,252,1,0,0,0,0,73, + 69,78,68,174,66,96,130, +}; +const unsigned char Initialized[136] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,58,73,68,65,84,40,145, + 99,96,160,23,96,132,49,254,255,255,255,159,160,98,70,70,70,184,38,98,52,32,107,100,193,102,18,54,128,108,48,19, + 177,54,32,3,178,52,161,56,143,88,191,209,207,121,100,197,19,253,0,0,19,184,20,16,97,125,73,73,0,0,0,0, + 73,69,78,68,174,66,96,130, +}; +const unsigned char LowBattery[144] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,66,73,68,65,84,40,145, + 99,96,32,3,48,98,19,252,255,255,255,127,184,2,70,70,12,53,76,184,52,192,20,35,27,128,19,160,43,194,166,9, + 195,38,98,192,80,212,68,40,196,112,218,132,79,35,11,54,65,108,17,74,80,19,81,17,74,42,0,0,182,121,32,4, + 246,65,53,182,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char Orientation0[148] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,70,73,68,65,84,40,145, + 99,96,32,3,48,98,19,252,255,255,255,127,184,2,70,70,12,53,24,2,200,26,112,105,100,34,199,121,100,105,162,220, + 79,216,252,131,77,51,156,129,79,3,186,70,242,3,2,102,11,54,247,35,139,195,212,81,39,244,136,137,92,178,0,0, + 81,131,32,17,104,219,146,91,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char Orientation1[150] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,72,73,68,65,84,40,145, + 173,145,49,14,0,32,8,196,172,241,255,95,214,197,1,1,3,81,186,114,69,192,214,126,152,155,76,22,41,29,5,192, + 198,149,228,201,55,113,120,175,68,77,136,36,51,26,208,179,97,73,189,164,15,113,236,36,201,92,239,233,159,70,52,82, + 25,11,152,51,36,20,92,248,53,240,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char PoweredOn[155] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,77,73,68,65,84,40,145, + 165,146,73,14,0,32,8,3,173,255,255,115,189,40,33,202,98,96,142,77,109,65,29,163,0,44,145,36,197,0,60,158, + 233,29,56,102,29,96,194,77,166,61,77,63,200,188,119,26,0,88,90,216,20,237,210,27,47,75,215,87,223,111,242,218, + 238,7,46,253,136,18,11,199,234,51,248,48,4,102,223,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char Sleeping[154] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,76,73,68,65,84,40,145, + 189,145,193,10,0,32,8,67,155,244,255,191,188,46,17,166,44,242,80,59,137,250,112,195,214,94,136,36,75,139,30,72, + 48,167,98,173,0,40,11,0,160,156,172,65,5,220,154,42,116,132,187,178,112,117,41,133,61,100,50,53,56,253,199,252, + 2,166,180,225,223,26,147,225,59,237,75,250,15,99,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char VolumeA0[129] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,51,73,68,65,84,40,145, + 99,96,24,80,240,255,255,255,255,248,228,153,72,213,128,161,137,24,13,40,154,136,213,128,97,19,125,52,49,50,50,50, + 146,101,19,41,26,49,0,41,129,66,91,0,0,145,89,20,3,20,239,9,131,0,0,0,0,73,69,78,68,174,66,96, + 130, +}; +const unsigned char VolumeA1[142] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,64,73,68,65,84,40,145, + 197,142,49,14,0,32,8,196,60,254,255,231,186,56,9,18,209,24,59,222,81,160,181,175,0,100,189,85,5,39,205,194, + 106,129,101,3,146,20,229,238,189,29,238,36,73,154,75,128,40,119,48,40,159,63,146,158,208,1,54,254,39,241,163,168, + 22,227,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char VolumeA2[146] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,68,73,68,65,84,40,145, + 197,144,193,10,0,32,8,67,157,255,255,207,235,20,132,77,49,34,218,201,185,61,4,205,190,138,36,213,92,2,177,24, + 189,87,225,244,0,176,102,158,1,170,44,47,117,117,7,1,64,12,73,82,237,55,117,190,151,130,71,192,51,13,184,33, + 63,219,60,97,21,237,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char VolumeB0[125] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,47,73,68,65,84,40,145, + 99,96,24,5,67,1,48,194,24,255,255,255,255,79,80,49,35,35,35,138,38,66,26,97,26,48,52,225,210,136,172,1, + 171,38,116,141,232,26,200,6,0,148,173,16,5,30,219,32,189,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char VolumeB1[137] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,59,73,68,65,84,40,145, + 99,96,24,212,128,17,155,224,255,255,255,255,195,21,48,50,98,85,131,83,3,54,254,16,0,112,79,18,227,118,88,160, + 160,132,12,62,141,200,161,136,17,156,216,52,162,7,59,117,226,137,24,0,0,139,197,27,254,82,191,68,170,0,0,0, + 0,73,69,78,68,174,66,96,130, +}; +const unsigned char VolumeB2[148] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,70,73,68,65,84,40,145, + 99,96,32,3,48,162,11,252,255,255,255,63,134,34,70,70,12,117,120,53,224,19,199,45,65,164,60,249,128,100,147,97, + 26,168,234,36,120,80,18,99,42,44,232,81,194,31,159,70,228,184,34,43,114,177,198,52,178,70,188,169,129,20,0,0, + 40,67,47,234,31,247,182,170,0,0,0,0,73,69,78,68,174,66,96,130, +}; +const unsigned char VolumeB3[153] = { + 137,80,78,71,13,10,26,10,0,0,0,13,73,72,68,82,0,0,0,13,0,0,0,13,8,6,0,0,0,114,235,228, + 124,0,0,0,9,112,72,89,115,0,0,11,19,0,0,11,19,1,0,154,156,24,0,0,0,75,73,68,65,84,40,145, + 157,146,73,14,0,48,8,2,245,255,143,166,167,54,90,13,46,28,149,105,26,80,100,33,253,7,0,16,76,170,193,71, + 1,54,119,11,24,149,96,182,164,230,246,151,42,96,12,50,189,40,59,175,222,232,93,254,12,180,93,173,202,77,155,182, + 32,189,134,137,14,178,192,67,214,248,1,222,76,0,0,0,0,73,69,78,68,174,66,96,130, +}; +} +} +} diff --git a/waterbox/ares64/ares/ares/ares/resource/resource.hpp b/waterbox/ares64/ares/ares/ares/resource/resource.hpp new file mode 100644 index 0000000000..784bbaf5bc --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/resource/resource.hpp @@ -0,0 +1,34 @@ +namespace Resource { +namespace Ares { +extern const unsigned char Icon1x[6562]; +extern const unsigned char Icon2x[9662]; +extern const unsigned char Logo1x[11533]; +extern const unsigned char Logo2x[18991]; +} +namespace Sprite { +namespace SuperFamicom { +extern const unsigned char CrosshairBlue[332]; +extern const unsigned char CrosshairGreen[329]; +extern const unsigned char CrosshairRed[342]; +} +namespace WonderSwan { +extern const unsigned char Auxiliary0[117]; +extern const unsigned char Auxiliary1[134]; +extern const unsigned char Auxiliary2[136]; +extern const unsigned char Headphones[167]; +extern const unsigned char Initialized[136]; +extern const unsigned char LowBattery[144]; +extern const unsigned char Orientation0[148]; +extern const unsigned char Orientation1[150]; +extern const unsigned char PoweredOn[155]; +extern const unsigned char Sleeping[154]; +extern const unsigned char VolumeA0[129]; +extern const unsigned char VolumeA1[142]; +extern const unsigned char VolumeA2[146]; +extern const unsigned char VolumeB0[125]; +extern const unsigned char VolumeB1[137]; +extern const unsigned char VolumeB2[148]; +extern const unsigned char VolumeB3[153]; +} +} +} diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-blue.png b/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-blue.png new file mode 100644 index 0000000000..56687d6221 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-blue.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-green.png b/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-green.png new file mode 100644 index 0000000000..d0441b529b Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-green.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-red.png b/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-red.png new file mode 100644 index 0000000000..791e4af1a1 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/sfc/crosshair-red.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-0.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-0.png new file mode 100644 index 0000000000..8b8e5db6c0 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-0.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-1.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-1.png new file mode 100644 index 0000000000..1cee638d70 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-1.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-2.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-2.png new file mode 100644 index 0000000000..31fc3837ff Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/auxiliary-2.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/headphones.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/headphones.png new file mode 100644 index 0000000000..6330f49d2a Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/headphones.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/initialized.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/initialized.png new file mode 100644 index 0000000000..f28602a0b2 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/initialized.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/low-battery.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/low-battery.png new file mode 100644 index 0000000000..a82ec5c2ab Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/low-battery.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-0.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-0.png new file mode 100644 index 0000000000..844ed5e4ec Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-0.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-1.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-1.png new file mode 100644 index 0000000000..11c3a3b555 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/orientation-1.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/powered-on.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/powered-on.png new file mode 100644 index 0000000000..afc54a349f Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/powered-on.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/sleeping.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/sleeping.png new file mode 100644 index 0000000000..ec4d088c45 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/sleeping.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a0.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a0.png new file mode 100644 index 0000000000..3071e062a4 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a0.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a1.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a1.png new file mode 100644 index 0000000000..37da86b24d Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a1.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a2.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a2.png new file mode 100644 index 0000000000..a48bfcb5ab Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-a2.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b0.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b0.png new file mode 100644 index 0000000000..f6e113656c Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b0.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b1.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b1.png new file mode 100644 index 0000000000..5261a8a048 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b1.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b2.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b2.png new file mode 100644 index 0000000000..e5981c05e2 Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b2.png differ diff --git a/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b3.png b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b3.png new file mode 100644 index 0000000000..ed2b406cac Binary files /dev/null and b/waterbox/ares64/ares/ares/ares/resource/sprite/ws/volume-b3.png differ diff --git a/waterbox/ares64/ares/ares/ares/scheduler/scheduler.cpp b/waterbox/ares64/ares/ares/ares/scheduler/scheduler.cpp new file mode 100644 index 0000000000..5a0e48c53c --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/scheduler/scheduler.cpp @@ -0,0 +1,137 @@ +inline auto Scheduler::reset() -> void { + _threads.reset(); +} + +inline auto Scheduler::threads() const -> u32 { + return _threads.size(); +} + +inline auto Scheduler::thread(u32 uniqueID) const -> maybe { + for(auto& thread : _threads) { + if(thread->_uniqueID == uniqueID) return *thread; + } + return {}; +} + +//if threads A and B both have a clock value of 0, it is ambiguous which should run first. +//to resolve this, a uniqueID is assigned to each thread when appended to the scheduler. +//the first unused ID is selected, to avoid the uniqueID growing in an unbounded fashion. +inline auto Scheduler::uniqueID() const -> u32 { + u32 uniqueID = 0; + while(thread(uniqueID)) uniqueID++; + return uniqueID; +} + +//find the clock time of the furthest behind thread. +inline auto Scheduler::minimum() const -> u64 { + u64 minimum = (u64)-1; + for(auto& thread : _threads) { + minimum = min(minimum, thread->_clock - thread->_uniqueID); + } + return minimum; +} + +//find the clock time of the furthest ahead thread. +inline auto Scheduler::maximum() const -> u64 { + u64 maximum = 0; + for(auto& thread : _threads) { + maximum = max(maximum, thread->_clock - thread->_uniqueID); + } + return maximum; +} + +inline auto Scheduler::append(Thread& thread) -> bool { + if(_threads.find(&thread)) return false; + thread._uniqueID = uniqueID(); + thread._clock = maximum() + thread._uniqueID; + _threads.append(&thread); + return true; +} + +inline auto Scheduler::remove(Thread& thread) -> void { + _threads.removeByValue(&thread); +} + +//power cycle and soft reset events: assigns the primary thread and resets all thread clocks. +inline auto Scheduler::power(Thread& thread) -> void { + _primary = _resume = thread.handle(); + for(auto& thread : _threads) { + thread->_clock = thread->_uniqueID; + } +} + +inline auto Scheduler::enter(Mode mode) -> Event { + if(mode == Mode::Run) { + _mode = mode; + _host = co_active(); + co_switch(_resume); + platform->event(_event); + return _event; + } + + if(mode == Mode::Synchronize) { + //run all threads to safe points, starting with the primary thread. + for(auto& thread : _threads) { + if(thread->handle() == _primary) { + _mode = Mode::SynchronizePrimary; + _host = co_active(); + do { + co_switch(_resume); + platform->event(_event); + } while(_event != Event::Synchronize); + } + } + for(auto& thread : _threads) { + if(thread->handle() != _primary) { + _mode = Mode::SynchronizeAuxiliary; + _host = co_active(); + _resume = thread->handle(); + do { + co_switch(_resume); + platform->event(_event); + } while(_event != Event::Synchronize); + } + } + return Event::Synchronize; + } + + return Event::None; +} + +inline auto Scheduler::exit(Event event) -> void { + //subtract the minimum time from all threads to prevent clock overflow. + auto reduce = minimum(); + for(auto& thread : _threads) { + thread->_clock -= reduce; + } + + //return to the thread that entered the scheduler originally. + _event = event; + _resume = co_active(); + co_switch(_host); +} + +//used to prevent auxiliary threads from blocking during synchronization. +//for instance, a secondary CPU waiting on an interrupt from the primary CPU. +//as other threads are not run during synchronization, this would otherwise cause a deadlock. +inline auto Scheduler::synchronizing() const -> bool { + return _mode == Mode::SynchronizeAuxiliary; +} + +//marks a safe point (typically the beginning of the entry point) of a thread. +//the scheduler may exit at these points for the purpose of synchronization. +inline auto Scheduler::synchronize() -> void { + if(co_active() == _primary) { + if(_mode == Mode::SynchronizePrimary) return exit(Event::Synchronize); + } else { + if(_mode == Mode::SynchronizeAuxiliary) return exit(Event::Synchronize); + } +} + +inline auto Scheduler::getSynchronize() -> bool { + return _synchronize; +} + +inline auto Scheduler::setSynchronize(bool synchronize) -> void { + _synchronize = synchronize; +} diff --git a/waterbox/ares64/ares/ares/ares/scheduler/scheduler.hpp b/waterbox/ares64/ares/ares/ares/scheduler/scheduler.hpp new file mode 100644 index 0000000000..51dd625dfc --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/scheduler/scheduler.hpp @@ -0,0 +1,47 @@ +struct Thread; + +struct Scheduler { + enum class Mode : u32 { + Run, + Synchronize, + SynchronizePrimary, + SynchronizeAuxiliary, + }; + + Scheduler() = default; + Scheduler(const Scheduler&) = delete; + auto operator=(const Scheduler&) = delete; + + auto reset() -> void; + auto threads() const -> u32; + auto thread(u32 threadID) const -> maybe; + auto uniqueID() const -> u32; + auto minimum() const -> u64; + auto maximum() const -> u64; + + auto append(Thread& thread) -> bool; + auto remove(Thread& thread) -> void; + + auto power(Thread& thread) -> void; + auto enter(Mode mode = Mode::Run) -> Event; + auto exit(Event event) -> void; + + auto synchronizing() const -> bool; + auto synchronize() -> void; + + auto getSynchronize() -> bool; + auto setSynchronize(bool) -> void; + +private: + cothread_t _host = nullptr; //program thread (used to exit scheduler) + cothread_t _resume = nullptr; //resume thread (used to enter scheduler) + cothread_t _primary = nullptr; //primary thread (used to synchronize components) + Mode _mode = Mode::Run; + Event _event = Event::Step; + vector _threads; + bool _synchronize = false; + + friend class Thread; +}; + +extern Scheduler scheduler; diff --git a/waterbox/ares64/ares/ares/ares/scheduler/thread.cpp b/waterbox/ares64/ares/ares/ares/scheduler/thread.cpp new file mode 100644 index 0000000000..01abd89d82 --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/scheduler/thread.cpp @@ -0,0 +1,119 @@ +inline auto Thread::EntryPoints() -> vector& { + static vector entryPoints; + return entryPoints; +} + +inline auto Thread::Enter() -> void { + for(u32 index : range(EntryPoints().size())) { + if(co_active() == EntryPoints()[index].handle) { + auto entryPoint = EntryPoints()[index].entryPoint; + EntryPoints().remove(index); + while(true) { + scheduler.synchronize(); + entryPoint(); + } + } + } + struct ThreadNotFound{}; + throw ThreadNotFound{}; +} + +inline Thread::~Thread() { + destroy(); +} + +inline auto Thread::active() const -> bool { return co_active() == _handle; } +inline auto Thread::handle() const -> cothread_t { return _handle; } +inline auto Thread::frequency() const -> u64 { return _frequency; } +inline auto Thread::scalar() const -> u64 { return _scalar; } +inline auto Thread::clock() const -> u64 { return _clock; } + +inline auto Thread::setHandle(cothread_t handle) -> void { + _handle = handle; +} + +inline auto Thread::setFrequency(double frequency) -> void { + _frequency = frequency + 0.5; + _scalar = Second / _frequency; +} + +inline auto Thread::setScalar(u64 scalar) -> void { + _scalar = scalar; +} + +inline auto Thread::setClock(u64 clock) -> void { + _clock = clock; +} + +inline auto Thread::create(double frequency, function entryPoint) -> void { + if(!_handle) { + _handle = co_create(Thread::Size, &Thread::Enter); + } else { + co_derive(_handle, Thread::Size, &Thread::Enter); + } + EntryPoints().append({_handle, entryPoint}); + setFrequency(frequency); + setClock(0); + scheduler.append(*this); +} + +//returns a thread to its entry point (eg for a reset), without resetting the clock value +inline auto Thread::restart(function entryPoint) -> void { + co_derive(_handle, Thread::Size, &Thread::Enter); + EntryPoints().append({_handle, entryPoint}); +} + +inline auto Thread::destroy() -> void { + scheduler.remove(*this); + if(_handle) co_delete(_handle); + _handle = nullptr; +} + +inline auto Thread::step(u32 clocks) -> void { + _clock += _scalar * clocks; +} + +//ensure all threads are caught up to the current thread before proceeding. +inline auto Thread::synchronize() -> void { + //note: this will call Thread::synchronize(*this) at some point, but this is safe: + //the comparison will always fail as the current thread can never be behind itself. + for(auto thread : scheduler._threads) synchronize(*thread); +} + +//ensure the specified thread(s) are caught up the current thread before proceeding. +template +inline auto Thread::synchronize(Thread& thread, P&&... p) -> void { + //switching to another thread does not guarantee it will catch up before switching back. + while(thread.clock() < clock()) { + //disable synchronization for auxiliary threads during scheduler synchronization. + //synchronization can begin inside of this while loop. + if(scheduler.synchronizing()) break; + co_switch(thread.handle()); + } + //convenience: allow synchronizing multiple threads with one function call. + if constexpr(sizeof...(p) > 0) synchronize(forward

(p)...); +} + +inline auto Thread::serialize(serializer& s) -> void { + s(_frequency); + s(_scalar); + s(_clock); + + if(!scheduler._synchronize) { + static u8 stack[Thread::Size]; + bool resume = co_active() == _handle; + + if(s.reading()) { + s(stack); + s(resume); + memory::copy(_handle, stack, Thread::Size); + if(resume) scheduler._resume = _handle; + } + + if(s.writing()) { + memory::copy(stack, _handle, Thread::Size); + s(stack); + s(resume); + } + } +} diff --git a/waterbox/ares64/ares/ares/ares/scheduler/thread.hpp b/waterbox/ares64/ares/ares/ares/scheduler/thread.hpp new file mode 100644 index 0000000000..f40941f6ec --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/scheduler/thread.hpp @@ -0,0 +1,50 @@ +struct Scheduler; + +struct Thread { + enum : u64 { Second = (u64)-1 >> 1 }; + enum : u64 { Size = 16_KiB * sizeof(void*) }; + + struct EntryPoint { + cothread_t handle = nullptr; + function entryPoint; + }; + + static auto EntryPoints() -> vector&; + static auto Enter() -> void; + + Thread() = default; + Thread(const Thread&) = delete; + auto operator=(const Thread&) = delete; + virtual ~Thread(); + + explicit operator bool() const { return _handle; } + auto active() const -> bool; + auto handle() const -> cothread_t; + auto frequency() const -> u64; + auto scalar() const -> u64; + auto clock() const -> u64; + + auto setHandle(cothread_t handle) -> void; + auto setFrequency(double frequency) -> void; + auto setScalar(u64 scalar) -> void; + auto setClock(u64 clock) -> void; + + auto create(double frequency, function entryPoint) -> void; + auto restart(function entryPoint) -> void; + auto destroy() -> void; + + auto step(u32 clocks) -> void; + auto synchronize() -> void; + template auto synchronize(Thread&, P&&...) -> void; + + auto serialize(serializer& s) -> void; + +protected: + cothread_t _handle = nullptr; + u32 _uniqueID = 0; + u64 _frequency = 0; + u64 _scalar = 0; + u64 _clock = 0; + + friend class Scheduler; +}; diff --git a/waterbox/ares64/ares/ares/ares/types.hpp b/waterbox/ares64/ares/ares/ares/types.hpp new file mode 100644 index 0000000000..8a0880929f --- /dev/null +++ b/waterbox/ares64/ares/ares/ares/types.hpp @@ -0,0 +1,134 @@ +using b1 = nall::Boolean; + +using i1 = nall::Integer< 1>; using s1 = nall::IntegerPrimitive< 1>; +using i2 = nall::Integer< 2>; using s2 = nall::IntegerPrimitive< 2>; +using i3 = nall::Integer< 3>; using s3 = nall::IntegerPrimitive< 3>; +using i4 = nall::Integer< 4>; using s4 = nall::IntegerPrimitive< 4>; +using i5 = nall::Integer< 5>; using s5 = nall::IntegerPrimitive< 5>; +using i6 = nall::Integer< 6>; using s6 = nall::IntegerPrimitive< 6>; +using i7 = nall::Integer< 7>; using s7 = nall::IntegerPrimitive< 7>; +using i8 = nall::Integer< 8>; +using i9 = nall::Integer< 9>; using s9 = nall::IntegerPrimitive< 9>; +using i10 = nall::Integer<10>; using s10 = nall::IntegerPrimitive<10>; +using i11 = nall::Integer<11>; using s11 = nall::IntegerPrimitive<11>; +using i12 = nall::Integer<12>; using s12 = nall::IntegerPrimitive<12>; +using i13 = nall::Integer<13>; using s13 = nall::IntegerPrimitive<13>; +using i14 = nall::Integer<14>; using s14 = nall::IntegerPrimitive<14>; +using i15 = nall::Integer<15>; using s15 = nall::IntegerPrimitive<15>; +using i16 = nall::Integer<16>; +using i17 = nall::Integer<17>; using s17 = nall::IntegerPrimitive<17>; +using i18 = nall::Integer<18>; using s18 = nall::IntegerPrimitive<18>; +using i19 = nall::Integer<19>; using s19 = nall::IntegerPrimitive<19>; +using i20 = nall::Integer<20>; using s20 = nall::IntegerPrimitive<20>; +using i21 = nall::Integer<21>; using s21 = nall::IntegerPrimitive<21>; +using i22 = nall::Integer<22>; using s22 = nall::IntegerPrimitive<22>; +using i23 = nall::Integer<23>; using s23 = nall::IntegerPrimitive<23>; +using i24 = nall::Integer<24>; using s24 = nall::IntegerPrimitive<24>; +using i25 = nall::Integer<25>; using s25 = nall::IntegerPrimitive<25>; +using i26 = nall::Integer<26>; using s26 = nall::IntegerPrimitive<26>; +using i27 = nall::Integer<27>; using s27 = nall::IntegerPrimitive<27>; +using i28 = nall::Integer<28>; using s28 = nall::IntegerPrimitive<28>; +using i29 = nall::Integer<29>; using s29 = nall::IntegerPrimitive<29>; +using i30 = nall::Integer<30>; using s30 = nall::IntegerPrimitive<30>; +using i31 = nall::Integer<31>; using s31 = nall::IntegerPrimitive<31>; +using i32 = nall::Integer<32>; +using i33 = nall::Integer<33>; using s33 = nall::IntegerPrimitive<33>; +using i34 = nall::Integer<34>; using s34 = nall::IntegerPrimitive<34>; +using i35 = nall::Integer<35>; using s35 = nall::IntegerPrimitive<35>; +using i36 = nall::Integer<36>; using s36 = nall::IntegerPrimitive<36>; +using i37 = nall::Integer<37>; using s37 = nall::IntegerPrimitive<37>; +using i38 = nall::Integer<38>; using s38 = nall::IntegerPrimitive<38>; +using i39 = nall::Integer<39>; using s39 = nall::IntegerPrimitive<39>; +using i40 = nall::Integer<40>; using s40 = nall::IntegerPrimitive<40>; +using i41 = nall::Integer<41>; using s41 = nall::IntegerPrimitive<41>; +using i42 = nall::Integer<42>; using s42 = nall::IntegerPrimitive<42>; +using i43 = nall::Integer<43>; using s43 = nall::IntegerPrimitive<43>; +using i44 = nall::Integer<44>; using s44 = nall::IntegerPrimitive<44>; +using i45 = nall::Integer<45>; using s45 = nall::IntegerPrimitive<45>; +using i46 = nall::Integer<46>; using s46 = nall::IntegerPrimitive<46>; +using i47 = nall::Integer<47>; using s47 = nall::IntegerPrimitive<47>; +using i48 = nall::Integer<48>; using s48 = nall::IntegerPrimitive<48>; +using i49 = nall::Integer<49>; using s49 = nall::IntegerPrimitive<49>; +using i50 = nall::Integer<50>; using s50 = nall::IntegerPrimitive<50>; +using i51 = nall::Integer<51>; using s51 = nall::IntegerPrimitive<51>; +using i52 = nall::Integer<52>; using s52 = nall::IntegerPrimitive<52>; +using i53 = nall::Integer<53>; using s53 = nall::IntegerPrimitive<53>; +using i54 = nall::Integer<54>; using s54 = nall::IntegerPrimitive<54>; +using i55 = nall::Integer<55>; using s55 = nall::IntegerPrimitive<55>; +using i56 = nall::Integer<56>; using s56 = nall::IntegerPrimitive<56>; +using i57 = nall::Integer<57>; using s57 = nall::IntegerPrimitive<57>; +using i58 = nall::Integer<58>; using s58 = nall::IntegerPrimitive<58>; +using i59 = nall::Integer<59>; using s59 = nall::IntegerPrimitive<59>; +using i60 = nall::Integer<60>; using s60 = nall::IntegerPrimitive<60>; +using i61 = nall::Integer<61>; using s61 = nall::IntegerPrimitive<61>; +using i62 = nall::Integer<62>; using s62 = nall::IntegerPrimitive<62>; +using i63 = nall::Integer<63>; using s63 = nall::IntegerPrimitive<63>; +using i64 = nall::Integer<64>; + +using n1 = nall::Natural< 1>; using u1 = nall::NaturalPrimitive< 1>; +using n2 = nall::Natural< 2>; using u2 = nall::NaturalPrimitive< 2>; +using n3 = nall::Natural< 3>; using u3 = nall::NaturalPrimitive< 3>; +using n4 = nall::Natural< 4>; using u4 = nall::NaturalPrimitive< 4>; +using n5 = nall::Natural< 5>; using u5 = nall::NaturalPrimitive< 5>; +using n6 = nall::Natural< 6>; using u6 = nall::NaturalPrimitive< 6>; +using n7 = nall::Natural< 7>; using u7 = nall::NaturalPrimitive< 7>; +using n8 = nall::Natural< 8>; +using n9 = nall::Natural< 9>; using u9 = nall::NaturalPrimitive< 9>; +using n10 = nall::Natural<10>; using u10 = nall::NaturalPrimitive<10>; +using n11 = nall::Natural<11>; using u11 = nall::NaturalPrimitive<11>; +using n12 = nall::Natural<12>; using u12 = nall::NaturalPrimitive<12>; +using n13 = nall::Natural<13>; using u13 = nall::NaturalPrimitive<13>; +using n14 = nall::Natural<14>; using u14 = nall::NaturalPrimitive<14>; +using n15 = nall::Natural<15>; using u15 = nall::NaturalPrimitive<15>; +using n16 = nall::Natural<16>; +using n17 = nall::Natural<17>; using u17 = nall::NaturalPrimitive<17>; +using n18 = nall::Natural<18>; using u18 = nall::NaturalPrimitive<18>; +using n19 = nall::Natural<19>; using u19 = nall::NaturalPrimitive<19>; +using n20 = nall::Natural<20>; using u20 = nall::NaturalPrimitive<20>; +using n21 = nall::Natural<21>; using u21 = nall::NaturalPrimitive<21>; +using n22 = nall::Natural<22>; using u22 = nall::NaturalPrimitive<22>; +using n23 = nall::Natural<23>; using u23 = nall::NaturalPrimitive<23>; +using n24 = nall::Natural<24>; using u24 = nall::NaturalPrimitive<24>; +using n25 = nall::Natural<25>; using u25 = nall::NaturalPrimitive<25>; +using n26 = nall::Natural<26>; using u26 = nall::NaturalPrimitive<26>; +using n27 = nall::Natural<27>; using u27 = nall::NaturalPrimitive<27>; +using n28 = nall::Natural<28>; using u28 = nall::NaturalPrimitive<28>; +using n29 = nall::Natural<29>; using u29 = nall::NaturalPrimitive<29>; +using n30 = nall::Natural<30>; using u30 = nall::NaturalPrimitive<30>; +using n31 = nall::Natural<31>; using u31 = nall::NaturalPrimitive<31>; +using n32 = nall::Natural<32>; +using n33 = nall::Natural<33>; using u33 = nall::NaturalPrimitive<33>; +using n34 = nall::Natural<34>; using u34 = nall::NaturalPrimitive<34>; +using n35 = nall::Natural<35>; using u35 = nall::NaturalPrimitive<35>; +using n36 = nall::Natural<36>; using u36 = nall::NaturalPrimitive<36>; +using n37 = nall::Natural<37>; using u37 = nall::NaturalPrimitive<37>; +using n38 = nall::Natural<38>; using u38 = nall::NaturalPrimitive<38>; +using n39 = nall::Natural<39>; using u39 = nall::NaturalPrimitive<39>; +using n40 = nall::Natural<40>; using u40 = nall::NaturalPrimitive<40>; +using n41 = nall::Natural<41>; using u41 = nall::NaturalPrimitive<41>; +using n42 = nall::Natural<42>; using u42 = nall::NaturalPrimitive<42>; +using n43 = nall::Natural<43>; using u43 = nall::NaturalPrimitive<43>; +using n44 = nall::Natural<44>; using u44 = nall::NaturalPrimitive<44>; +using n45 = nall::Natural<45>; using u45 = nall::NaturalPrimitive<45>; +using n46 = nall::Natural<46>; using u46 = nall::NaturalPrimitive<46>; +using n47 = nall::Natural<47>; using u47 = nall::NaturalPrimitive<47>; +using n48 = nall::Natural<48>; using u48 = nall::NaturalPrimitive<48>; +using n49 = nall::Natural<49>; using u49 = nall::NaturalPrimitive<49>; +using n50 = nall::Natural<50>; using u50 = nall::NaturalPrimitive<50>; +using n51 = nall::Natural<51>; using u51 = nall::NaturalPrimitive<51>; +using n52 = nall::Natural<52>; using u52 = nall::NaturalPrimitive<52>; +using n53 = nall::Natural<53>; using u53 = nall::NaturalPrimitive<53>; +using n54 = nall::Natural<54>; using u54 = nall::NaturalPrimitive<54>; +using n55 = nall::Natural<55>; using u55 = nall::NaturalPrimitive<55>; +using n56 = nall::Natural<56>; using u56 = nall::NaturalPrimitive<56>; +using n57 = nall::Natural<57>; using u57 = nall::NaturalPrimitive<57>; +using n58 = nall::Natural<58>; using u58 = nall::NaturalPrimitive<58>; +using n59 = nall::Natural<59>; using u59 = nall::NaturalPrimitive<59>; +using n60 = nall::Natural<60>; using u60 = nall::NaturalPrimitive<60>; +using n61 = nall::Natural<61>; using u61 = nall::NaturalPrimitive<61>; +using n62 = nall::Natural<62>; using u62 = nall::NaturalPrimitive<62>; +using n63 = nall::Natural<63>; using u63 = nall::NaturalPrimitive<63>; +using n64 = nall::Natural<64>; + +using r32 = nall::Real<32>; +using r64 = nall::Real<64>; diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/disassembler.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/disassembler.cpp new file mode 100644 index 0000000000..5c84321eb3 --- /dev/null +++ b/waterbox/ares64/ares/ares/component/processor/sm5k/disassembler.cpp @@ -0,0 +1,89 @@ +auto SM5K::disassembleInstruction() -> string { + string s; + + n8 opcode = ROM[PC + 0 & sizeof(ROM) - 1]; + n8 operand = ROM[PC + 1 & sizeof(ROM) - 1]; + + string p2 = {"0x", hex(n2(opcode), 1L)}; + string p4 = {"0x", hex(n4(opcode), 1L)}; + string p5 = {"0x", hex(n5(opcode), 2L)}; + string p6 = {"0x", hex(n6(opcode), 2L)}; + string p8 = {"0x", hex(n8(operand), 2L)}; + string pc = {"0x", hex(n4(opcode) << 8 | operand, 3L)}; + + switch(opcode) { + case 0x00 ... 0x0f: s = {"adx ", p4}; break; + case 0x10 ... 0x1f: s = {"lax ", p4}; break; + case 0x20 ... 0x2f: s = {"lblx ", p4}; break; + case 0x30 ... 0x3f: s = {"lbmx ", p4}; break; + case 0x40 ... 0x43: s = {"rm ", p2}; break; + case 0x44 ... 0x47: s = {"sm ", p2}; break; + case 0x48 ... 0x4b: s = {"tm ", p2}; break; + case 0x4c ... 0x4f: s = {"tpb ", p2}; break; + case 0x50 ... 0x53: s = {"lda ", p2}; break; + case 0x54 ... 0x57: s = {"exc ", p2}; break; + case 0x58 ... 0x5b: s = {"exci ", p2}; break; + case 0x5c ... 0x5f: s = {"excd ", p2}; break; + case 0x60: s = {"rc " }; break; + case 0x61: s = {"sc " }; break; + case 0x62: s = {"id " }; break; + case 0x63: s = {"ie " }; break; + case 0x64: s = {"exax " }; break; + case 0x65: s = {"atx " }; break; + case 0x66: s = {"exbm " }; break; + case 0x67: s = {"exbl " }; break; + case 0x68: s = {"ex " }; break; + case 0x69: s = {"dta ", p8}; break; + case 0x6a: s = {"pat ", p8}; break; + case 0x6b: s = {"tabl " }; break; + case 0x6c: s = {"ta " }; break; + case 0x6d: s = {"tb " }; break; + case 0x6e: s = {"tc " }; break; + case 0x6f: s = {"tam " }; break; + case 0x70: s = {"inl " }; break; + case 0x71: s = {"outl " }; break; + case 0x72: s = {"anp " }; break; + case 0x73: s = {"orp " }; break; + case 0x74: s = {"in " }; break; + case 0x75: s = {"out " }; break; + case 0x76: s = {"stop " }; break; + case 0x77: s = {"halt " }; break; + case 0x78: s = {"incb " }; break; + case 0x79: s = {"coma " }; break; + case 0x7a: s = {"add " }; break; + case 0x7b: s = {"adc " }; break; + case 0x7c: s = {"decb " }; break; + case 0x7d: s = {"rtn " }; break; + case 0x7e: s = {"rtns " }; break; + case 0x7f: s = {"rtni " }; break; + case 0x80 ... 0xbf: s = {"tr ", p6}; break; + case 0xc0 ... 0xdf: s = {"trs ", p5}; break; + case 0xe0 ... 0xef: s = {"tl ", pc}; break; + case 0xf0 ... 0xff: s = {"call ", pc}; break; + } + + while(s.size() < 10) s.append(" "); + return s; +} + +auto SM5K::disassembleContext() -> string { + string s; + s.append("A:", hex(A, 1L), " "); + s.append("X:", hex(X, 1L), " "); + s.append("B:", hex(B, 2L), " "); + s.append("C:", hex(C, 1L), " "); + s.append("P0:", hex(P0, 1L), " "); + s.append("P1:", hex(P1, 1L), " "); + s.append("P2:", hex(P2, 1L), " "); + s.append("P3:", hex(P3, 1L), " "); + s.append("P4:", hex(P4, 1L), " "); + s.append("P5:", hex(P5, 1L), " "); + s.append("SP:", hex(SP, 1L), " "); + s.append("SB:", hex(SB, 2L), " "); + s.append("IFA:", hex(IFA, 1L), " "); + s.append("IFB:", hex(IFB, 1L), " "); + s.append("IFT:", hex(IFT, 1L), " "); + s.append("IME:", hex(IME, 1L), " "); + s.append("SKIP:", hex(SKIP, 1L)); + return s; +} diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/instruction.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/instruction.cpp new file mode 100644 index 0000000000..dc38b50f3d --- /dev/null +++ b/waterbox/ares64/ares/ares/component/processor/sm5k/instruction.cpp @@ -0,0 +1,74 @@ +#define op(id, name, ...) \ + case id: \ + if(SKIP) { SKIP = 0; return; } \ + return instruction##name(__VA_ARGS__); \ + +auto SM5K::interrupt(n2 id) -> void { + SR[SP++] = PC; + PU = 2; + PL = id << 1; + HALT = 0; + STOP = 0; +} + +auto SM5K::instruction() -> void { + if(IFA & RE.bit(0) & IME) return interrupt(0); + if(IFB & RE.bit(1) & IME) return interrupt(1); + if(IFT & RE.bit(2) & IME) return interrupt(2); + if(HALT) return timerStep(); + if(STOP) return timerStep(); + + n8 opcode = fetch(); + switch(opcode) { + op(0x00 ... 0x0f, ADX, n4(opcode)); + op(0x10 ... 0x1f, LAX, n4(opcode)); + op(0x20 ... 0x2f, LBLX, n4(opcode)); + op(0x30 ... 0x3f, LBMX, n4(opcode)); + op(0x40 ... 0x43, RM, n2(opcode)); + op(0x44 ... 0x47, SM, n2(opcode)); + op(0x48 ... 0x4b, TM, n2(opcode)); + op(0x4c ... 0x4f, TPB, n2(opcode)); + op(0x50 ... 0x53, LDA, n2(opcode)); + op(0x54 ... 0x57, EXC, n2(opcode)); + op(0x58 ... 0x5b, EXCI, n2(opcode)); + op(0x5c ... 0x5f, EXCD, n2(opcode)); + op(0x60, RC ); + op(0x61, SC ); + op(0x62, ID ); + op(0x63, IE ); + op(0x64, EXAX ); + op(0x65, ATX ); + op(0x66, EXBM ); + op(0x67, EXBL ); + op(0x68, EX ); + op(0x69, DTA, fetch()); + op(0x6a, PAT, fetch()); + op(0x6b, TABL ); + op(0x6c, TA ); + op(0x6d, TB ); + op(0x6e, TC ); + op(0x6f, TAM ); + op(0x70, INL ); + op(0x71, OUTL ); + op(0x72, ANP ); + op(0x73, ORP ); + op(0x74, IN ); + op(0x75, OUT ); + op(0x76, STOP ); + op(0x77, HALT ); + op(0x78, INCB ); + op(0x79, COMA ); + op(0x7a, ADD ); + op(0x7b, ADC ); + op(0x7c, DECB ); + op(0x7d, RTN ); + op(0x7e, RTNS ); + op(0x7f, RTNI ); + op(0x80 ... 0xbf, TR, n6(opcode)); + op(0xc0 ... 0xdf, TRS, n5(opcode)); + op(0xe0 ... 0xef, TL, n4(opcode) << 8 | fetch()); + op(0xf0 ... 0xff, CALL, n4(opcode) << 8 | fetch()); + } +} + +#undef op diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/instructions.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/instructions.cpp new file mode 100644 index 0000000000..9f43bf0631 --- /dev/null +++ b/waterbox/ares64/ares/ares/component/processor/sm5k/instructions.cpp @@ -0,0 +1,275 @@ +auto SM5K::instructionADC() -> void { + auto c = C; + C = A + RAM[B] + c >= 0x10; + A = A + RAM[B] + c; + if(C) SKIP = 1; +} + +auto SM5K::instructionADD() -> void { + A += RAM[B]; +} + +auto SM5K::instructionADX(n4 data) -> void { + if(A + data >= 0x10) SKIP = 1; + A += data; +} + +auto SM5K::instructionANP() -> void { + switch(BL) { + case 0x0: P0 &= A; break; + case 0x2: P2 &= A; break; + case 0x4: P4 &= A; break; + case 0x5: P5 &= A; break; + } +} + +auto SM5K::instructionATX() -> void { + X = A; +} + +auto SM5K::instructionCALL(n12 address) -> void { + SR[SP++] = PC; + PC = address; +} + +auto SM5K::instructionCOMA() -> void { + A = ~A; +} + +auto SM5K::instructionDECB() -> void { + if(!--BL) SKIP = 1; +} + +auto SM5K::instructionDR() -> void { + DIV = 0; +} + +auto SM5K::instructionDTA(n8 operand) -> void { + switch(operand) { + case 0x02: instructionTT(); break; + case 0x03: instructionDR(); break; + } + + static constexpr u8 rom[8] = {0xfc, 0xfc, 0xa5, 0x6c, 0x03, 0x8f, 0x1b, 0x9a}; + if(BM >= 4 && BM <= 7) { + SKIP = rom[BM << 1 | BL >> 3] >> n3(BL) & 1; + } +} + +auto SM5K::instructionEX() -> void { + swap(B, SB); +} + +auto SM5K::instructionEXAX() -> void { + swap(A, X); +} + +auto SM5K::instructionEXBL() -> void { + auto a = A; + A = BL; + BL = a; +} + +auto SM5K::instructionEXBM() -> void { + auto a = A; + A = BM; + BM = a; +} + +auto SM5K::instructionEXC(n2 data) -> void { + swap(A, RAM[B]); + BM ^= data; +} + +auto SM5K::instructionEXCD(n2 data) -> void { + swap(A, RAM[B]); + if(!--BL) SKIP = 1; + BM ^= data; +} + +auto SM5K::instructionEXCI(n2 data) -> void { + swap(A, RAM[B]); + if(!++BL) SKIP = 1; + BM ^= data; +} + +auto SM5K::instructionID() -> void { + IME = 0; +} + +auto SM5K::instructionIE() -> void { + IME = 1; +} + +auto SM5K::instructionHALT() -> void { + HALT = 1; +} + +auto SM5K::instructionIN() -> void { + switch(BL) { + case 0x1: A = P1; break; + case 0x2: A = P2; break; + case 0x3: A = P3; break; + case 0x4: A = P4; break; + case 0x5: A = P5; break; + case 0x8: A = R8 >> 0; X = R8 >> 4; break; + case 0x9: A = R9 >> 0; X = R9 >> 4; break; + case 0xa: A = RA >> 0; X = RA >> 4; break; + case 0xb: A = RB >> 0; X = RB >> 4; break; + case 0xc: A = RC; break; + case 0xe: A = RE; break; + case 0xf: A = RF; break; + } +} + +auto SM5K::instructionINCB() -> void { + if(!++BL) SKIP = 1; +} + +auto SM5K::instructionINL() -> void { + A = P1; +} + +auto SM5K::instructionLAX(n4 data) -> void { + A = data; +} + +auto SM5K::instructionLBLX(n4 data) -> void { + BL = data; +} + +auto SM5K::instructionLBMX(n4 data) -> void { + BM = data; +} + +auto SM5K::instructionLDA(n2 data) -> void { + A = RAM[B]; + BM ^= data; +} + +auto SM5K::instructionORP() -> void { + switch(BL) { + case 0x0: P0 |= A; break; + case 0x2: P2 |= A; break; + case 0x4: P4 |= A; break; + case 0x5: P5 |= A; break; + } +} + +auto SM5K::instructionOUT() -> void { + switch(BL) { + case 0x0: P0 = A; break; + case 0x2: P2 = A; break; + case 0x3: R3 = A; break; + case 0x4: P4 = A; break; + case 0x5: P5 = A; break; + case 0x8: R8 = A << 0 | X << 4; break; + case 0x9: R9 = A << 0 | X << 4; break; + case 0xa: RA = RB; break; + case 0xb: RB = A << 0 | X << 4; break; + case 0xc: RC = A; break; + case 0xe: RE = A; break; + case 0xf: RF = A; break; + } +} + +auto SM5K::instructionOUTL() -> void { + P0 = A; +} + +auto SM5K::instructionPAT(n8) -> void { + //should this actually modify the stack frame? + n6 pu = 4; + n6 pl = X << 4 | A; + n8 data = ROM[pu << 6 | pl]; + A = data >> 0; + X = data >> 4; +} + +auto SM5K::instructionRC() -> void { + C = 0; +} + +auto SM5K::instructionRM(n2 data) -> void { + RAM[B] &= ~(1 << data); +} + +auto SM5K::instructionRTN() -> void { + PC = SR[--SP]; +} + +auto SM5K::instructionRTNI() -> void { + PC = SR[--SP]; + IME = 1; +} + +auto SM5K::instructionRTNS() -> void { + PC = SR[--SP]; + SKIP = 1; +} + +auto SM5K::instructionSC() -> void { + C = 1; +} + +auto SM5K::instructionSM(n2 data) -> void { + RAM[B] |= 1 << data; +} + +auto SM5K::instructionSTOP() -> void { + STOP = 1; +} + +auto SM5K::instructionTA() -> void { + if(IFA) SKIP = 1; + IFA = 0; +} + +auto SM5K::instructionTABL() -> void { + if(A == BL) SKIP = 1; +} + +auto SM5K::instructionTAM() -> void { + if(A == RAM[B]) SKIP = 1; +} + +auto SM5K::instructionTB() -> void { + if(IFB) SKIP = 1; + IFB = 0; +} + +auto SM5K::instructionTC() -> void { + if(C == 1) SKIP = 1; +} + +auto SM5K::instructionTL(n12 address) -> void { + PC = address; +} + +auto SM5K::instructionTM(n2 data) -> void { + if(RAM[B] & 1 << data) SKIP = 1; +} + +auto SM5K::instructionTPB(n2 port) -> void { + switch(port) { + case 0: if(P0 == 1) SKIP = 1; break; + case 1: if(P1 == 1) SKIP = 1; break; + case 2: if(P2 == 1) SKIP = 1; break; + case 3: if(P3 == 1) SKIP = 1; break; + } +} + +auto SM5K::instructionTR(n6 address) -> void { + PL = address; +} + +auto SM5K::instructionTRS(n5 address) -> void { + SR[SP++] = PC; + PU = 1; + PL = address << 1; +} + +auto SM5K::instructionTT() -> void { + if(IFT) SKIP = 1; + IFT = 0; +} diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/memory.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/memory.cpp new file mode 100644 index 0000000000..10f10f0518 --- /dev/null +++ b/waterbox/ares64/ares/ares/component/processor/sm5k/memory.cpp @@ -0,0 +1,4 @@ +auto SM5K::fetch() -> n8 { + timerStep(); + return ROM[PC++]; +} diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/serialization.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/serialization.cpp new file mode 100644 index 0000000000..a58e39648e --- /dev/null +++ b/waterbox/ares64/ares/ares/component/processor/sm5k/serialization.cpp @@ -0,0 +1,33 @@ +auto SM5K::serialize(serializer& s) -> void { + s(RAM); + s(A); + s(X); + s(B); + s(C); + s(IFA); + s(IFB); + s(IFT); + s(IME); + s(P0); + s(P1); + s(P2); + s(P3); + s(P4); + s(P5); + s(PC); + s(SP); + s(SR); + s(R3); + s(R8); + s(R9); + s(RA); + s(RB); + s(RC); + s(RE); + s(RF); + s(SB); + s(SKIP); + s(STOP); + s(HALT); + s(DIV); +} diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.cpp new file mode 100644 index 0000000000..fa56229007 --- /dev/null +++ b/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.cpp @@ -0,0 +1,54 @@ +#include +#include "sm5k.hpp" + +namespace ares { + +#include "timer.cpp" +#include "memory.cpp" +#include "instruction.cpp" +#include "instructions.cpp" +#include "serialization.cpp" +#include "disassembler.cpp" + +auto SM5K::setP1(n4 data) -> void { + if(P1.bit(0) && !data.bit(0)) IFA = 1; + if(P1.bit(1) && !data.bit(1)) IFB = 1; + if(P1.bit(1) && !data.bit(1) && RC == 3) timerIncrement(); + P1 = data; +} + +auto SM5K::power() -> void { + static const n8 Undefined = 0; + + PC = 0; + SP = 0; + SR[0] = 0; + SR[1] = 0; + SR[2] = 0; + SR[3] = 0; + A = Undefined; + X = Undefined; + P0 = 0; + P1 = 0; + P2 = 0; + P3 = 0; + P4 = 0; + P5 = 0; + IFA = 0; + IFB = 0; + IFT = 0; + IME = 0; + C = Undefined; + B = Undefined; + R3 = 0; + R8 = 0; + R9 = 0; + RA = 0; + RB = 0; + RC = 0; + RE = 0; + RF = 0; + SKIP = 0; +} + +} diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.hpp b/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.hpp new file mode 100644 index 0000000000..071be5f688 --- /dev/null +++ b/waterbox/ares64/ares/ares/component/processor/sm5k/sm5k.hpp @@ -0,0 +1,121 @@ +//Sharp SM5K + +#pragma once + +namespace ares { + +struct SM5K { + //sm5k.cpp + auto setP1(n4 data) -> void; + auto power() -> void; + + //timer.cpp + auto timerStep() -> void; + auto timerIncrement() -> void; + + //memory.cpp + auto fetch() -> n8; + + //instruction.cpp + auto interrupt(n2) -> void; + auto instruction() -> void; + + //instructions.cpp + auto instructionADC() -> void; + auto instructionADD() -> void; + auto instructionADX(n4 data) -> void; + auto instructionANP() -> void; + auto instructionATX() -> void; + auto instructionCALL(n12 address) -> void; + auto instructionCOMA() -> void; + auto instructionDECB() -> void; + auto instructionDR() -> void; + auto instructionDTA(n8) -> void; + auto instructionEX() -> void; + auto instructionEXAX() -> void; + auto instructionEXBL() -> void; + auto instructionEXBM() -> void; + auto instructionEXC(n2 data) -> void; + auto instructionEXCD(n2 data) -> void; + auto instructionEXCI(n2 data) -> void; + auto instructionHALT() -> void; + auto instructionID() -> void; + auto instructionIE() -> void; + auto instructionIN() -> void; + auto instructionINCB() -> void; + auto instructionINL() -> void; + auto instructionLAX(n4 data) -> void; + auto instructionLBLX(n4 data) -> void; + auto instructionLBMX(n4 data) -> void; + auto instructionLDA(n2 data) -> void; + auto instructionORP() -> void; + auto instructionOUT() -> void; + auto instructionOUTL() -> void; + auto instructionPAT(n8) -> void; + auto instructionRC() -> void; + auto instructionRM(n2 data) -> void; + auto instructionRTN() -> void; + auto instructionRTNI() -> void; + auto instructionRTNS() -> void; + auto instructionSC() -> void; + auto instructionSM(n2 data) -> void; + auto instructionSTOP() -> void; + auto instructionTA() -> void; + auto instructionTABL() -> void; + auto instructionTAM() -> void; + auto instructionTB() -> void; + auto instructionTC() -> void; + auto instructionTL(n12 address) -> void; + auto instructionTM(n2 data) -> void; + auto instructionTPB(n2 port) -> void; + auto instructionTR(n6 address) -> void; + auto instructionTRS(n5 address) -> void; + auto instructionTT() -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + n8 ROM[4096]; + n4 RAM[256]; + + n4 A; //accumulator + n4 X; //auxiliary accumulator + n8 B; //RAM bank register + BitRange<8,0,3> BL{&B}; + BitRange<8,4,7> BM{&B}; + n1 C; //carry flag + n1 IFA; //interrupt flag A + n1 IFB; //interrupt flag B + n1 IFT; //interrupt flag T + n1 IME; //interrupt mask enable + n4 P0; //CMOS inverting output port + n4 P1; //input port with pull-up resistor + n4 P2; //I/O port with pull-up resistor + n4 P3; //input port with pull-up resistor + n4 P4; //I/O port with pull-up resistor + n4 P5; //I/O port with pull-up resistor + n12 PC; //program counter + BitRange<12,0, 5> PL{&PC}; + BitRange<12,6,11> PU{&PC}; + n2 SP; //stack pointer + n12 SR[4]; //stack registers + n4 R3; //A/D pin selection register + n8 R8; //A/D conversion control and A/D data register + n8 R9; //A/D data register + n8 RA; //count register + n8 RB; //modulo register + n4 RC; //timer control + n4 RE; //interrupt mask flag + n4 RF; //P2 port direction register + n8 SB; //auxiliary RAM bank register + n1 SKIP; //skip next instruction flag + n1 STOP; //STOP instruction executed flag + n1 HALT; //HALT instruction executed flag + n16 DIV; //divider for timer + + //disassembler.cpp + auto disassembleInstruction() -> string; + auto disassembleContext() -> string; +}; + +} diff --git a/waterbox/ares64/ares/ares/component/processor/sm5k/timer.cpp b/waterbox/ares64/ares/ares/component/processor/sm5k/timer.cpp new file mode 100644 index 0000000000..52d30ebe2d --- /dev/null +++ b/waterbox/ares64/ares/ares/component/processor/sm5k/timer.cpp @@ -0,0 +1,15 @@ +auto SM5K::timerStep() -> void { + switch(RC & 3) { + case 0: timerIncrement(); return; + case 1: if(!n8 (++DIV)) timerIncrement(); return; + case 2: if(!n16(++DIV)) timerIncrement(); return; + case 3: return; //falling edge of P1.1 + } +} + +auto SM5K::timerIncrement() -> void { + if(!++RA) { + RA = RB; + IFT = 1; + } +} diff --git a/waterbox/ares64/ares/ares/n64/accuracy.hpp b/waterbox/ares64/ares/ares/n64/accuracy.hpp new file mode 100644 index 0000000000..85eda162f7 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/accuracy.hpp @@ -0,0 +1,25 @@ +struct Accuracy { + //enable all accuracy flags + static constexpr bool Reference = 1; + + struct CPU { + static constexpr bool Interpreter = 0 | Reference; + static constexpr bool Recompiler = !Interpreter; + + //exceptions when the CPU accesses unaligned memory addresses + static constexpr bool AddressErrors = 0 | Reference; + }; + + struct RSP { + static constexpr bool Interpreter = 0 | Reference; + static constexpr bool Recompiler = !Interpreter; + + //VU instructions + static constexpr bool SISD = 0 | Reference | !Architecture::amd64; + static constexpr bool SIMD = !SISD; + }; + + struct RDRAM { + static constexpr bool Broadcasting = 0; + }; +}; diff --git a/waterbox/ares64/ares/ares/n64/ai/ai.cpp b/waterbox/ares64/ares/ares/n64/ai/ai.cpp new file mode 100644 index 0000000000..f07bfb7cde --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ai/ai.cpp @@ -0,0 +1,66 @@ +#include + +namespace ares::Nintendo64 { + +AI ai; +#include "io.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto AI::load(Node::Object parent) -> void { + node = parent->append("AI"); + + stream = node->append("AI"); + stream->setChannels(2); + stream->setFrequency(44100.0); + + debugger.load(node); +} + +auto AI::unload() -> void { + debugger = {}; + node->remove(stream); + stream.reset(); + node.reset(); +} + +auto AI::main() -> void { + sample(); + step(dac.period); +} + +auto AI::sample() -> void { + if(io.dmaCount == 0) return stream->frame(0.0, 0.0); + + auto data = rdram.ram.read(io.dmaAddress[0]); + auto left = s16(data >> 16); + auto right = s16(data >> 0); + stream->frame(left / 32768.0, right / 32768.0); + + io.dmaAddress[0] += 4; + io.dmaLength [0] -= 4; + if(!io.dmaLength[0]) { + mi.raise(MI::IRQ::AI); + if(--io.dmaCount) { + io.dmaAddress[0] = io.dmaAddress[1]; + io.dmaLength [0] = io.dmaLength [1]; + } + } +} + +auto AI::step(u32 clocks) -> void { + Thread::clock += clocks; +} + +auto AI::power(bool reset) -> void { + Thread::reset(); + + fifo[0] = {}; + fifo[1] = {}; + io = {}; + dac.frequency = 44100; + dac.precision = 16; + dac.period = system.frequency() / dac.frequency; +} + +} diff --git a/waterbox/ares64/ares/ares/n64/ai/ai.hpp b/waterbox/ares64/ares/ares/n64/ai/ai.hpp new file mode 100644 index 0000000000..6cb8178ed8 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ai/ai.hpp @@ -0,0 +1,52 @@ +//Audio Interface + +struct AI : Thread, Memory::IO { + Node::Object node; + Node::Audio::Stream stream; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto io(bool mode, u32 address, u32 data) -> void; + + struct Tracer { + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //ai.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + auto main() -> void; + auto sample() -> void; + auto step(u32 clocks) -> void; + auto power(bool reset) -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + struct FIFO { + n24 address; + } fifo[2]; + + struct IO { + n1 dmaEnable; + n24 dmaAddress[2]; + n18 dmaLength[2]; + n2 dmaCount; + n14 dacRate; + n4 bitRate; + } io; + + struct DAC { + u32 frequency; + u32 precision; + u32 period; + } dac; +}; + +extern AI ai; diff --git a/waterbox/ares64/ares/ares/n64/ai/debugger.cpp b/waterbox/ares64/ares/ares/n64/ai/debugger.cpp new file mode 100644 index 0000000000..cdf0920046 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ai/debugger.cpp @@ -0,0 +1,26 @@ +auto AI::Debugger::load(Node::Object parent) -> void { + tracer.io = parent->append("I/O", "AI"); +} + +auto AI::Debugger::io(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "AI_DRAM_ADDRESS", + "AI_LENGTH", + "AI_CONTROL", + "AI_STATUS", + "AI_DACRATE", + "AI_BITRATE", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "AI_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} diff --git a/waterbox/ares64/ares/ares/n64/ai/io.cpp b/waterbox/ares64/ares/ares/n64/ai/io.cpp new file mode 100644 index 0000000000..1e9f4845b2 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ai/io.cpp @@ -0,0 +1,69 @@ +auto AI::readWord(u32 address) -> u32 { + address = (address & 0xfffff) >> 2; + n32 data; + + if(address != 3) { + //AI_LENGTH (mirrored) + data.bit(0,17) = io.dmaLength[0]; + } + + if(address == 3) { + //AI_STATUS + data.bit( 0) = io.dmaCount > 1; + data.bit(20) = 1; + data.bit(24) = 1; + data.bit(30) = io.dmaCount > 0; + data.bit(31) = io.dmaCount > 1; + } + + debugger.io(Read, address, data); + return data; +} + +auto AI::writeWord(u32 address, u32 data_) -> void { + address = (address & 0xfffff) >> 2; + n32 data = data_; + + if(address == 0) { + //AI_DRAM_ADDRESS + if(io.dmaCount < 2) { + io.dmaAddress[io.dmaCount] = data.bit(0,23) & ~7; + } + } + + if(address == 1) { + //AI_LENGTH + n18 length = data.bit(0,17) & ~7; + if(io.dmaCount < 2 && length) { + io.dmaLength[io.dmaCount] = length; + io.dmaCount++; + } + } + + if(address == 2) { + //AI_CONTROL + io.dmaEnable = data.bit(0); + } + + if(address == 3) { + //AI_STATUS + mi.lower(MI::IRQ::AI); + } + + if(address == 4) { + //AI_DACRATE + auto frequency = dac.frequency; + io.dacRate = data.bit(0,13); + dac.frequency = max(1, system.frequency() / 4 / (io.dacRate + 1)) * 1.037; + dac.period = system.frequency() / dac.frequency; + if(frequency != dac.frequency) stream->setFrequency(dac.frequency); + } + + if(address == 5) { + //AI_BITRATE + io.bitRate = data.bit(0,3); + dac.precision = io.bitRate + 1; + } + + debugger.io(Write, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/ai/serialization.cpp b/waterbox/ares64/ares/ares/n64/ai/serialization.cpp new file mode 100644 index 0000000000..6f89e8d76b --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ai/serialization.cpp @@ -0,0 +1,17 @@ +auto AI::serialize(serializer& s) -> void { + Thread::serialize(s); + + s(fifo[0].address); + s(fifo[1].address); + + s(io.dmaEnable); + s(io.dmaAddress); + s(io.dmaLength); + s(io.dmaCount); + s(io.dacRate); + s(io.bitRate); + + s(dac.frequency); + s(dac.precision); + s(dac.period); +} diff --git a/waterbox/ares64/ares/ares/n64/cartridge/cartridge.cpp b/waterbox/ares64/ares/ares/n64/cartridge/cartridge.cpp new file mode 100644 index 0000000000..de58f54f1f --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cartridge/cartridge.cpp @@ -0,0 +1,90 @@ +#include + +namespace ares::Nintendo64 { + +Cartridge& cartridge = cartridgeSlot.cartridge; +#include "slot.cpp" +#include "flash.cpp" +#include "isviewer.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto Cartridge::allocate(Node::Port parent) -> Node::Peripheral { + return node = parent->append(string{system.name(), " Cartridge"}); +} + +auto Cartridge::connect() -> void { + if(!node->setPak(pak = platform->pak(node))) return; + + information = {}; + information.title = pak->attribute("title"); + information.region = pak->attribute("region"); + information.cic = pak->attribute("cic"); + + if(auto fp = pak->read("program.rom")) { + rom.allocate(fp->size()); + rom.load(fp); + } else { + rom.allocate(16); + } + + if(auto fp = pak->read("save.ram")) { + ram.allocate(fp->size()); + ram.load(fp); + } + + if(auto fp = pak->read("save.eeprom")) { + eeprom.allocate(fp->size()); + eeprom.load(fp); + } + + if(auto fp = pak->read("save.flash")) { + flash.allocate(fp->size()); + flash.load(fp); + } + + isviewer.ram.allocate(64_KiB); + + debugger.load(node); + + power(false); +} + +auto Cartridge::disconnect() -> void { + if(!node) return; + save(); + debugger.unload(node); + rom.reset(); + ram.reset(); + eeprom.reset(); + flash.reset(); + isviewer.ram.reset(); + pak.reset(); + node.reset(); +} + +auto Cartridge::save() -> void { + if(!node) return; + + if(auto fp = pak->write("save.ram")) { + ram.save(fp); + } + + if(auto fp = pak->write("save.eeprom")) { + eeprom.save(fp); + } + + if(auto fp = pak->write("save.flash")) { + flash.save(fp); + } +} + +auto Cartridge::power(bool reset) -> void { + flash.mode = Flash::Mode::Idle; + flash.status = 0; + flash.source = 0; + flash.offset = 0; + isviewer.ram.fill(0); +} + +} diff --git a/waterbox/ares64/ares/ares/n64/cartridge/cartridge.hpp b/waterbox/ares64/ares/ares/n64/cartridge/cartridge.hpp new file mode 100644 index 0000000000..ec5ddc0636 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cartridge/cartridge.hpp @@ -0,0 +1,86 @@ +struct Cartridge { + Node::Peripheral node; + VFS::Pak pak; + Memory::Readable rom; + Memory::Writable ram; + Memory::Writable eeprom; + struct Flash : Memory::Writable { + template + auto read(u32 address) -> u64 { + if constexpr(Size == Byte) return readByte(address); + if constexpr(Size == Half) return readHalf(address); + if constexpr(Size == Word) return readWord(address); + if constexpr(Size == Dual) return readDual(address); + unreachable; + } + + template + auto write(u32 address, u64 data) -> void { + if constexpr(Size == Byte) return writeByte(address, data); + if constexpr(Size == Half) return writeHalf(address, data); + if constexpr(Size == Word) return writeWord(address, data); + if constexpr(Size == Dual) return writeDual(address, data); + } + + //flash.cpp + auto readByte(u32 adddres) -> u64; + auto readHalf(u32 address) -> u64; + auto readWord(u32 address) -> u64; + auto readDual(u32 address) -> u64; + auto writeByte(u32 address, u64 data) -> void; + auto writeHalf(u32 address, u64 data) -> void; + auto writeWord(u32 address, u64 data) -> void; + auto writeDual(u32 address, u64 data) -> void; + + enum class Mode : u32 { Idle, Erase, Write, Read, Status }; + Mode mode = Mode::Idle; + u64 status = 0; + u32 source = 0; + u32 offset = 0; + } flash; + struct ISViewer : Memory::IO { + Memory::Writable ram; //unserialized + + //isviewer.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + } isviewer; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto unload(Node::Object) -> void; + + struct Memory { + Node::Debugger::Memory rom; + Node::Debugger::Memory ram; + Node::Debugger::Memory eeprom; + Node::Debugger::Memory flash; + } memory; + } debugger; + + auto title() const -> string { return information.title; } + auto region() const -> string { return information.region; } + auto cic() const -> string { return information.cic; } + + //cartridge.cpp + auto allocate(Node::Port) -> Node::Peripheral; + auto connect() -> void; + auto disconnect() -> void; + + auto save() -> void; + auto power(bool reset) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + +private: + struct Information { + string title; + string region; + string cic; + } information; +}; + +#include "slot.hpp" +extern Cartridge& cartridge; diff --git a/waterbox/ares64/ares/ares/n64/cartridge/debugger.cpp b/waterbox/ares64/ares/ares/n64/cartridge/debugger.cpp new file mode 100644 index 0000000000..fc9c656aa9 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cartridge/debugger.cpp @@ -0,0 +1,54 @@ +auto Cartridge::Debugger::load(Node::Object parent) -> void { + memory.rom = parent->append("Cartridge ROM"); + memory.rom->setSize(cartridge.rom.size); + memory.rom->setRead([&](u32 address) -> u8 { + return cartridge.rom.read(address); + }); + memory.rom->setWrite([&](u32 address, u8 data) -> void { + return cartridge.rom.write(address, data); + }); + + if(cartridge.ram) { + memory.ram = parent->append("Cartridge SRAM"); + memory.ram->setSize(cartridge.ram.size); + memory.ram->setRead([&](u32 address) -> u8 { + return cartridge.ram.read(address); + }); + memory.ram->setWrite([&](u32 address, u8 data) -> void { + return cartridge.ram.write(address, data); + }); + } + + if(cartridge.eeprom) { + memory.eeprom = parent->append("Cartridge EEPROM"); + memory.eeprom->setSize(cartridge.eeprom.size); + memory.eeprom->setRead([&](u32 address) -> u8 { + return cartridge.eeprom.read(address); + }); + memory.eeprom->setWrite([&](u32 address, u8 data) -> void { + return cartridge.eeprom.write(address, data); + }); + } + + if(cartridge.flash) { + memory.flash = parent->append("Cartridge Flash"); + memory.flash->setSize(cartridge.flash.size); + memory.flash->setRead([&](u32 address) -> u8 { + return cartridge.flash.read(address); + }); + memory.flash->setWrite([&](u32 address, u8 data) -> void { + return cartridge.flash.write(address, data); + }); + } +} + +auto Cartridge::Debugger::unload(Node::Object parent) -> void { + parent->remove(memory.rom); + parent->remove(memory.ram); + parent->remove(memory.eeprom); + parent->remove(memory.flash); + memory.rom.reset(); + memory.ram.reset(); + memory.eeprom.reset(); + memory.flash.reset(); +} diff --git a/waterbox/ares64/ares/ares/n64/cartridge/flash.cpp b/waterbox/ares64/ares/ares/n64/cartridge/flash.cpp new file mode 100644 index 0000000000..cfcbe2bf72 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cartridge/flash.cpp @@ -0,0 +1,113 @@ +auto Cartridge::Flash::readByte(u32 address) -> u64 { + debug(unusual, "[Cartridge::Flash::readByte] mode=", (u32)mode); + return 0; +} + +auto Cartridge::Flash::readHalf(u32 address) -> u64 { + if(mode == Mode::Read) { + return Memory::Writable::read(address); + } + + if(mode == Mode::Status) { + switch(address & 6) { default: + case 0: return status >> 48; + case 2: return status >> 32; + case 4: return status >> 16; + case 6: return status >> 0; + } + } + + debug(unusual, "[Cartridge::Flash::readHalf] mode=", (u32)mode); + return 0; +} + +auto Cartridge::Flash::readWord(u32 address) -> u64 { + switch(address & 4) { default: + case 0: return status >> 32; + case 4: return status >> 0; + } +} + +auto Cartridge::Flash::readDual(u32 address) -> u64 { + debug(unusual, "[Cartridge::Flash::readDual] mode=", (u32)mode); + return 0; +} + +auto Cartridge::Flash::writeByte(u32 address, u64 data) -> void { + debug(unusual, "[Cartridge::Flash::writeByte] mode=", (u32)mode); + return; +} + +auto Cartridge::Flash::writeHalf(u32 address, u64 data) -> void { + if(mode == Mode::Write) { + //writes are deferred until the flash execute command is sent later + source = pi.io.dramAddress; + return; + } + + debug(unusual, "[Cartridge::Flash::writeHalf] mode=", (u32)mode); + return; +} + +auto Cartridge::Flash::writeWord(u32 address, u64 data) -> void { + address = (address & 0x7ff'ffff) >> 2; + + if(address == 0) { + debug(unusual, "[Cartridge::Flash::writeWord] ignoring write to status register"); + return; + } + + u8 command = data >> 24; + switch(command) { + case 0x4b: //set erase offset + offset = u16(data) * 128; + return; + + case 0x78: //erase + mode = Mode::Erase; + status = 0x1111'8008'00c2'001dull; + return; + + case 0xa5: //set write offset + offset = u16(data) * 128; + status = 0x1111'8004'00c2'001dull; + return; + + case 0xb4: //write + mode = Mode::Write; + return; + + case 0xd2: //execute + if(mode == Mode::Erase) { + for(u32 index = 0; index < 128; index += 2) { + Memory::Writable::write(offset + index, 0xffff); + } + } + if(mode == Mode::Write) { + for(u32 index = 0; index < 128; index += 2) { + u16 half = rdram.ram.read(source + index); + Memory::Writable::write(offset + index, half); + } + } + return; + + case 0xe1: //status + mode = Mode::Status; + status = 0x1111'8001'00c2'001dull; + return; + + case 0xf0: //read + mode = Mode::Read; + status = 0x1111'8004'f000'001dull; + return; + + default: + debug(unusual, "[Cartridge::Flash::writeWord] command=", hex(command, 2L)); + return; + } +} + +auto Cartridge::Flash::writeDual(u32 address, u64 data) -> void { + debug(unusual, "[Cartridge::Flash::writeDual] mode=", (u32)mode); + return; +} diff --git a/waterbox/ares64/ares/ares/n64/cartridge/isviewer.cpp b/waterbox/ares64/ares/ares/n64/cartridge/isviewer.cpp new file mode 100644 index 0000000000..496ad4d00a --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cartridge/isviewer.cpp @@ -0,0 +1,22 @@ +auto Cartridge::ISViewer::readWord(u32 address) -> u32 { + u32 data = ram.read(address); + address = (address & 0xffff) >> 2; + + if(address == 0) { + data = 0x49533634; //'IS64' + } + + return data; +} + +auto Cartridge::ISViewer::writeWord(u32 address, u32 data) -> void { + ram.write(address, data); + address = (address & 0xffff) >> 2; + + if(address == 5) { + for(auto address : range(u16(data))) { + char c = ram.read(0x20 + address); + fputc(c, stdout); + } + } +} diff --git a/waterbox/ares64/ares/ares/n64/cartridge/serialization.cpp b/waterbox/ares64/ares/ares/n64/cartridge/serialization.cpp new file mode 100644 index 0000000000..49a70aac66 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cartridge/serialization.cpp @@ -0,0 +1,5 @@ +auto Cartridge::serialize(serializer& s) -> void { + s(ram); + s(eeprom); + s(flash); +} diff --git a/waterbox/ares64/ares/ares/n64/cartridge/slot.cpp b/waterbox/ares64/ares/ares/n64/cartridge/slot.cpp new file mode 100644 index 0000000000..733eb3ab47 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cartridge/slot.cpp @@ -0,0 +1,18 @@ +CartridgeSlot cartridgeSlot{"Cartridge Slot"}; + +CartridgeSlot::CartridgeSlot(string name) : name(name) { +} + +auto CartridgeSlot::load(Node::Object parent) -> void { + port = parent->append(name); + port->setFamily(system.name()); + port->setType("Cartridge"); + port->setAllocate([&](auto name) { return cartridge.allocate(port); }); + port->setConnect([&] { return cartridge.connect(); }); + port->setDisconnect([&] { return cartridge.disconnect(); }); +} + +auto CartridgeSlot::unload() -> void { + cartridge.disconnect(); + port = {}; +} diff --git a/waterbox/ares64/ares/ares/n64/cartridge/slot.hpp b/waterbox/ares64/ares/ares/n64/cartridge/slot.hpp new file mode 100644 index 0000000000..a0f5e9be0a --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cartridge/slot.hpp @@ -0,0 +1,13 @@ +struct CartridgeSlot { + Node::Port port; + Cartridge cartridge; + + //slot.cpp + CartridgeSlot(string name); + auto load(Node::Object) -> void; + auto unload() -> void; + + const string name; +}; + +extern CartridgeSlot cartridgeSlot; diff --git a/waterbox/ares64/ares/ares/n64/controller/controller.cpp b/waterbox/ares64/ares/ares/n64/controller/controller.cpp new file mode 100644 index 0000000000..949fcd161c --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/controller/controller.cpp @@ -0,0 +1,8 @@ +#include + +namespace ares::Nintendo64 { + +#include "port.cpp" +#include "gamepad/gamepad.cpp" + +} diff --git a/waterbox/ares64/ares/ares/n64/controller/controller.hpp b/waterbox/ares64/ares/ares/n64/controller/controller.hpp new file mode 100644 index 0000000000..ae2f9eaa14 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/controller/controller.hpp @@ -0,0 +1,11 @@ +struct Controller { + Node::Peripheral node; + + virtual ~Controller() = default; + virtual auto save() -> void {} + virtual auto read() -> n32 { return 0; } + virtual auto serialize(serializer&) -> void {} +}; + +#include "port.hpp" +#include "gamepad/gamepad.hpp" diff --git a/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.cpp b/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.cpp new file mode 100644 index 0000000000..4b33563b86 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.cpp @@ -0,0 +1,205 @@ +Gamepad::Gamepad(Node::Port parent) { + node = parent->append("Gamepad"); + + port = node->append("Pak"); + port->setFamily("Nintendo 64"); + port->setType("Pak"); + port->setHotSwappable(true); + port->setAllocate([&](auto name) { return allocate(name); }); + port->setConnect([&] { return connect(); }); + port->setDisconnect([&] { return disconnect(); }); + port->setSupported({"Controller Pak", "Rumble Pak"}); + + x = node->append ("X-Axis"); + y = node->append ("Y-Axis"); + up = node->append("Up"); + down = node->append("Down"); + left = node->append("Left"); + right = node->append("Right"); + b = node->append("B"); + a = node->append("A"); + cameraUp = node->append("C-Up"); + cameraDown = node->append("C-Down"); + cameraLeft = node->append("C-Left"); + cameraRight = node->append("C-Right"); + l = node->append("L"); + r = node->append("R"); + z = node->append("Z"); + start = node->append("Start"); +} + +Gamepad::~Gamepad() { + disconnect(); +} + +auto Gamepad::save() -> void { + if(!slot) return; + if(slot->name() == "Controller Pak") { + ram.save(pak->write("save.pak")); + } +} + +auto Gamepad::allocate(string name) -> Node::Peripheral { + if(name == "Controller Pak") return slot = port->append("Controller Pak"); + if(name == "Rumble Pak" ) return slot = port->append("Rumble Pak"); + return {}; +} + +auto Gamepad::connect() -> void { + if(!slot) return; + if(slot->name() == "Controller Pak") { + node->setPak(pak = platform->pak(node)); + ram.allocate(32_KiB); + formatControllerPak(); + if(auto fp = pak->read("save.pak")) { + if(fp->attribute("loaded").boolean()) { + ram.load(pak->read("save.pak")); + } + } + } + if(slot->name() == "Rumble Pak") { + motor = node->append("Rumble"); + } +} + +auto Gamepad::disconnect() -> void { + if(!slot) return; + if(slot->name() == "Controller Pak") { + save(); + ram.reset(); + } + if(slot->name() == "Rumble Pak") { + rumble(false); + node->remove(motor); + motor.reset(); + } + port->remove(slot); + slot.reset(); +} + +auto Gamepad::rumble(bool enable) -> void { + if(!motor) return; + motor->setEnable(enable); + platform->input(motor); +} + +auto Gamepad::read() -> n32 { + platform->input(x); + platform->input(y); + platform->input(up); + platform->input(down); + platform->input(left); + platform->input(right); + platform->input(b); + platform->input(a); + platform->input(cameraUp); + platform->input(cameraDown); + platform->input(cameraLeft); + platform->input(cameraRight); + platform->input(l); + platform->input(r); + platform->input(z); + platform->input(start); + + //scale {-32768 ... +32767} to {-84 ... +84} + auto ax = x->value() * 85.0 / 32767.0; + auto ay = y->value() * 85.0 / 32767.0; + + //create scaled circular dead-zone in range {-15 ... +15} + auto length = sqrt(ax * ax + ay * ay); + if(length < 16.0) { + length = 0.0; + } else if(length > 85.0) { + length = 85.0 / length; + } else { + length = (length - 16.0) * 85.0 / 69.0 / length; + } + ax *= length; + ay *= length; + + //bound diagonals to an octagonal range {-68 ... +68} + if(ax != 0.0 && ay != 0.0) { + auto slope = ay / ax; + auto edgex = copysign(85.0 / (abs(slope) + 16.0 / 69.0), ax); + auto edgey = copysign(min(abs(edgex * slope), 85.0 / (1.0 / abs(slope) + 16.0 / 69.0)), ay); + edgex = edgey / slope; + + auto scale = sqrt(edgex * edgex + edgey * edgey) / 85.0; + ax *= scale; + ay *= scale; + } + + n32 data; + data.byte(0) = -ay; + data.byte(1) = +ax; + data.bit(16) = cameraRight->value(); + data.bit(17) = cameraLeft->value(); + data.bit(18) = cameraDown->value(); + data.bit(19) = cameraUp->value(); + data.bit(20) = r->value(); + data.bit(21) = l->value(); + data.bit(22) = 0; //GND + data.bit(23) = 0; //RST + data.bit(24) = right->value() & !left->value(); + data.bit(25) = left->value() & !right->value(); + data.bit(26) = down->value() & !up->value(); + data.bit(27) = up->value() & !down->value(); + data.bit(28) = start->value(); + data.bit(29) = z->value(); + data.bit(30) = b->value(); + data.bit(31) = a->value(); + + //when L+R+Start are pressed: the X/Y axes are zeroed, RST is set, and Start is cleared + if(l->value() && r->value() && start->value()) { + data.byte(0) = 0; //Y-Axis + data.byte(1) = 0; //X-Axis + data.bit(23) = 1; //RST + data.bit(28) = 0; //Start + } + + return data; +} + +//controller paks contain 32KB of SRAM split into 128 pages of 256 bytes each. +//the first 5 pages are for storing system data, and the remaining 123 for game data. +auto Gamepad::formatControllerPak() -> void { + ram.fill(0x00); + + //page 0 (system area) + n6 fieldA = random(); + n19 fieldB = random(); + n27 fieldC = random(); + for(u32 area : array{1,3,4,6}) { + ram.write(area * 0x20 + 0x01, fieldA); //unknown + ram.write(area * 0x20 + 0x04, fieldB); //serial# hi + ram.write(area * 0x20 + 0x08, fieldC); //serial# lo + ram.write(area * 0x20 + 0x18, 0x0001); //device ID + ram.write(area * 0x20 + 0x1a, 0x01); //banks (0x01 = 32KB) + ram.write(area * 0x20 + 0x1b, 0x00); //version# + u16 checksum = 0; + u16 inverted = 0; + for(u32 half : range(14)) { + u16 data = ram.read(area * 0x20 + half * 2); + checksum += data; + inverted += ~data; + } + ram.write(area * 0x20 + 0x1c, checksum); + ram.write(area * 0x20 + 0x1e, inverted); + } + + //pages 1+2 (inode table) + for(u32 page : array{1,2}) { + ram.write(0x100 * page + 0x01, 0x71); //unknown + for(u32 slot : range(5,128)) { + ram.write(0x100 * page + slot * 2 + 0x01, 0x03); //0x01 = stop, 0x03 = empty + } + } + + //pages 3+4 (note table) + //pages 5-127 (game saves) +} + +auto Gamepad::serialize(serializer& s) -> void { + s(ram); + rumble(false); +} diff --git a/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.hpp b/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.hpp new file mode 100644 index 0000000000..a25f33e441 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/controller/gamepad/gamepad.hpp @@ -0,0 +1,35 @@ +struct Gamepad : Controller { + Node::Port port; + Node::Peripheral slot; + VFS::Pak pak; + Memory::Writable ram; //Toshiba TC55257DFL-85V + Node::Input::Rumble motor; + + Node::Input::Axis x; + Node::Input::Axis y; + Node::Input::Button up; + Node::Input::Button down; + Node::Input::Button left; + Node::Input::Button right; + Node::Input::Button b; + Node::Input::Button a; + Node::Input::Button cameraUp; + Node::Input::Button cameraDown; + Node::Input::Button cameraLeft; + Node::Input::Button cameraRight; + Node::Input::Button l; + Node::Input::Button r; + Node::Input::Button z; + Node::Input::Button start; + + Gamepad(Node::Port); + ~Gamepad(); + auto save() -> void override; + auto allocate(string name) -> Node::Peripheral; + auto connect() -> void; + auto disconnect() -> void; + auto rumble(bool enable) -> void; + auto read() -> n32 override; + auto formatControllerPak() -> void; + auto serialize(serializer&) -> void override; +}; diff --git a/waterbox/ares64/ares/ares/n64/controller/port.cpp b/waterbox/ares64/ares/ares/n64/controller/port.cpp new file mode 100644 index 0000000000..43ef7e5905 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/controller/port.cpp @@ -0,0 +1,35 @@ +ControllerPort controllerPort1{"Controller Port 1"}; +ControllerPort controllerPort2{"Controller Port 2"}; +ControllerPort controllerPort3{"Controller Port 3"}; +ControllerPort controllerPort4{"Controller Port 4"}; + +ControllerPort::ControllerPort(string name) : name(name) { +} + +auto ControllerPort::load(Node::Object parent) -> void { + port = parent->append(name); + port->setFamily("Nintendo 64"); + port->setType("Controller"); + port->setHotSwappable(true); + port->setAllocate([&](auto name) { return allocate(name); }); + port->setSupported({"Gamepad"}); +} + +auto ControllerPort::unload() -> void { + device = {}; + port = {}; +} + +auto ControllerPort::save() -> void { + if(device) device->save(); +} + +auto ControllerPort::allocate(string name) -> Node::Peripheral { + if(name == "Gamepad") device = new Gamepad(port); + if(device) return device->node; + return {}; +} + +auto ControllerPort::serialize(serializer& s) -> void { + if(device) s(*device); +} diff --git a/waterbox/ares64/ares/ares/n64/controller/port.hpp b/waterbox/ares64/ares/ares/n64/controller/port.hpp new file mode 100644 index 0000000000..c6c05c5445 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/controller/port.hpp @@ -0,0 +1,20 @@ +struct ControllerPort { + Node::Port port; + unique_pointer device; + + //port.cpp + ControllerPort(string name); + auto load(Node::Object) -> void; + auto unload() -> void; + auto save() -> void; + auto allocate(string name) -> Node::Peripheral; + + auto serialize(serializer&) -> void; + + const string name; +}; + +extern ControllerPort controllerPort1; +extern ControllerPort controllerPort2; +extern ControllerPort controllerPort3; +extern ControllerPort controllerPort4; diff --git a/waterbox/ares64/ares/ares/n64/cpu/context.cpp b/waterbox/ares64/ares/ares/n64/cpu/context.cpp new file mode 100644 index 0000000000..467b6acf1b --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/context.cpp @@ -0,0 +1,63 @@ +auto CPU::Context::setMode() -> void { + mode = min(2, self.scc.status.privilegeMode); + if(self.scc.status.exceptionLevel) mode = Mode::Kernel; + if(self.scc.status.errorLevel) mode = Mode::Kernel; + + switch(mode) { + case Mode::Kernel: + endian = self.scc.configuration.bigEndian; + bits = self.scc.status.kernelExtendedAddressing ? 64 : 32; + break; + case Mode::Supervisor: + endian = self.scc.configuration.bigEndian; + bits = self.scc.status.supervisorExtendedAddressing ? 64 : 32; + break; + case Mode::User: + endian = self.scc.configuration.bigEndian ^ self.scc.status.reverseEndian; + bits = self.scc.status.userExtendedAddressing ? 64 : 32; + break; + } + + if(bits == 32 || bits == 64) { + segment[0] = Segment::Mapped; + segment[1] = Segment::Mapped; + segment[2] = Segment::Mapped; + segment[3] = Segment::Mapped; + switch(mode) { + case Mode::Kernel: + segment[4] = Segment::Cached; + segment[5] = Segment::Direct; + segment[6] = Segment::Mapped; + segment[7] = Segment::Mapped; + break; + case Mode::Supervisor: + segment[4] = Segment::Unused; + segment[5] = Segment::Unused; + segment[6] = Segment::Mapped; + segment[7] = Segment::Unused; + break; + case Mode::User: + segment[4] = Segment::Unused; + segment[5] = Segment::Unused; + segment[6] = Segment::Unused; + segment[7] = Segment::Unused; + break; + } + return; + } + + if(bits == 64) { + for(auto n : range(8)) + switch(mode) { + case Mode::Kernel: + segment[n] = Segment::Kernel64; + break; + case Mode::Supervisor: + segment[n] = Segment::Supervisor64; + break; + case Mode::User: + segment[n] = Segment::User64; + break; + } + } +} diff --git a/waterbox/ares64/ares/ares/n64/cpu/cpu.cpp b/waterbox/ares64/ares/ares/n64/cpu/cpu.cpp new file mode 100644 index 0000000000..efdc133e8a --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/cpu.cpp @@ -0,0 +1,147 @@ +#include + +namespace ares::Nintendo64 { + +CPU cpu; +#include "context.cpp" +#include "icache.cpp" +#include "dcache.cpp" +#include "tlb.cpp" +#include "memory.cpp" +#include "exceptions.cpp" +#include "interpreter.cpp" +#include "interpreter-ipu.cpp" +#include "interpreter-scc.cpp" +#include "interpreter-fpu.cpp" +#include "recompiler.cpp" +#include "debugger.cpp" +#include "serialization.cpp" +#include "disassembler.cpp" + +auto CPU::load(Node::Object parent) -> void { + node = parent->append("CPU"); + debugger.load(node); +} + +auto CPU::unload() -> void { + debugger.unload(); + node.reset(); +} + +auto CPU::main() -> void { + instruction(); + synchronize(); +} + +auto CPU::step(u32 clocks) -> void { + Thread::clock += clocks; +} + +auto CPU::synchronize() -> void { + auto clocks = Thread::clock * 2; + Thread::clock = 0; + + vi.clock -= clocks; + ai.clock -= clocks; + rsp.clock -= clocks; + rdp.clock -= clocks; + while( vi.clock < 0) vi.main(); + while( ai.clock < 0) ai.main(); + while(rsp.clock < 0) rsp.main(); + while(rdp.clock < 0) rdp.main(); + + queue.step(clocks, [](u32 event) { + switch(event) { + case Queue::RSP_DMA: return rsp.dmaTransfer(); + case Queue::PI_DMA_Read: return pi.dmaRead(); + case Queue::PI_DMA_Write: return pi.dmaWrite(); + case Queue::SI_DMA_Read: return si.dmaRead(); + case Queue::SI_DMA_Write: return si.dmaWrite(); + } + }); + + clocks >>= 1; + if(scc.count < scc.compare && scc.count + clocks >= scc.compare) { + scc.cause.interruptPending.bit(Interrupt::Timer) = 1; + } + scc.count += clocks; +} + +auto CPU::instruction() -> void { + if(auto interrupts = scc.cause.interruptPending & scc.status.interruptMask) { + if(scc.status.interruptEnable && !scc.status.exceptionLevel && !scc.status.errorLevel) { + debugger.interrupt(scc.cause.interruptPending); + step(1); + return exception.interrupt(); + } + } + + if constexpr(Accuracy::CPU::Recompiler) { + auto address = devirtualize(ipu.pc)(0); + auto block = recompiler.block(address); + block->execute(*this); + } + + if constexpr(Accuracy::CPU::Interpreter) { + pipeline.address = ipu.pc; + pipeline.instruction = fetch(ipu.pc); + debugger.instruction(); + decoderEXECUTE(); + instructionEpilogue(); + } +} + +auto CPU::instructionEpilogue() -> s32 { + if constexpr(Accuracy::CPU::Recompiler) { + icache.step(ipu.pc); //simulates timings without performing actual icache loads + } + + ipu.r[0].u64 = 0; + + if(--scc.random.index < scc.wired.index) { + scc.random.index = 31; + } + + switch(branch.state) { + case Branch::Step: ipu.pc += 4; return 0; + case Branch::Take: ipu.pc += 4; branch.delaySlot(); return 0; + case Branch::DelaySlot: ipu.pc = branch.pc; branch.reset(); return 1; + case Branch::Exception: branch.reset(); return 1; + case Branch::Discard: ipu.pc += 8; branch.reset(); return 1; + } + + unreachable; +} + +auto CPU::power(bool reset) -> void { + Thread::reset(); + + pipeline = {}; + branch = {}; + context.endian = Context::Endian::Big; + context.mode = Context::Mode::Kernel; + context.bits = 64; + for(auto& segment : context.segment) segment = Context::Segment::Unused; + icache.power(reset); + dcache.power(reset); + for(auto& entry : tlb.entry) entry = {}; + tlb.physicalAddress = 0; + for(auto& r : ipu.r) r.u64 = 0; + ipu.lo.u64 = 0; + ipu.hi.u64 = 0; + ipu.r[29].u64 = u32(0xa400'1ff0); //stack pointer + ipu.pc = u32(0xbfc0'0000); + scc = {}; + for(auto& r : fpu.r) r.u64 = 0; + fpu.csr = {}; + fesetround(FE_TONEAREST); + context.setMode(); + + if constexpr(Accuracy::CPU::Recompiler) { + auto buffer = ares::Memory::FixedAllocator::get().tryAcquire(64_MiB); + recompiler.allocator.resize(64_MiB, bump_allocator::executable | bump_allocator::zero_fill, buffer); + recompiler.reset(); + } +} + +} diff --git a/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp b/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp new file mode 100644 index 0000000000..e7ce2c600a --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp @@ -0,0 +1,758 @@ +//NEC VR4300 + +struct CPU : Thread { + Node::Object node; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + auto instruction() -> void; + auto exception(u8 code) -> void; + auto interrupt(u8 mask) -> void; + auto tlbWrite(u32 index) -> void; + auto tlbModification(u64 address) -> void; + auto tlbLoad(u64 address, u64 physical) -> void; + auto tlbLoadInvalid(u64 address) -> void; + auto tlbLoadMiss(u64 address) -> void; + auto tlbStore(u64 address, u64 physical) -> void; + auto tlbStoreInvalid(u64 address) -> void; + auto tlbStoreMiss(u64 address) -> void; + + struct Tracer { + Node::Debugger::Tracer::Instruction instruction; + Node::Debugger::Tracer::Notification exception; + Node::Debugger::Tracer::Notification interrupt; + Node::Debugger::Tracer::Notification tlb; + } tracer; + } debugger; + + //cpu.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + + auto main() -> void; + auto step(u32 clocks) -> void; + auto synchronize() -> void; + + auto instruction() -> void; + auto instructionEpilogue() -> s32; + + auto power(bool reset) -> void; + + struct Pipeline { + u32 address; + u32 instruction; + + struct InstructionCache { + } ic; + + struct RegisterFile { + } rf; + + struct Execution { + } ex; + + struct DataCache { + } dc; + + struct WriteBack { + } wb; + } pipeline; + + struct Branch { + enum : u32 { Step, Take, DelaySlot, Exception, Discard }; + + auto inDelaySlot() const -> bool { return state == DelaySlot; } + auto reset() -> void { state = Step; } + auto take(u32 address) -> void { state = Take; pc = address; } + auto delaySlot() -> void { state = DelaySlot; } + auto exception() -> void { state = Exception; } + auto discard() -> void { state = Discard; } + + u64 pc = 0; + u32 state = Step; + } branch; + + //context.cpp + struct Context { + CPU& self; + Context(CPU& self) : self(self) {} + + enum Endian : bool { Little, Big }; + enum Mode : u32 { Kernel, Supervisor, User }; + enum Segment : u32 { Unused, Mapped, Cached, Direct, Kernel64, Supervisor64, User64 }; + + auto littleEndian() const -> bool { return endian == Endian::Little; } + auto bigEndian() const -> bool { return endian == Endian::Big; } + + auto kernelMode() const -> bool { return mode == Mode::Kernel; } + auto supervisorMode() const -> bool { return mode == Mode::Supervisor; } + auto userMode() const -> bool { return mode == Mode::User; } + + auto setMode() -> void; + + bool endian; + u32 mode; + u32 bits; + u32 segment[8]; //512_MiB chunks + } context{*this}; + + //icache.cpp + struct InstructionCache { + struct Line; + auto line(u32 address) -> Line&; + auto step(u32 address) -> void; + auto fetch(u32 address) -> u32; + auto read(u32 address) -> u32; + auto power(bool reset) -> void; + + //16KB + struct Line { + auto hit(u32 address) const -> bool; + auto fill(u32 address) -> void; + auto writeBack() -> void; + auto read(u32 address) const -> u32; + + bool valid; + u32 tag; + u16 index; + u32 words[8]; + } lines[512]; + } icache; + + //dcache.cpp + struct DataCache { + struct Line; + auto line(u32 address) -> Line&; + template auto read(u32 address) -> u64; + template auto write(u32 address, u64 data) -> void; + auto power(bool reset) -> void; + + //8KB + struct Line { + auto hit(u32 address) const -> bool; + template auto fill(u32 address, u64 data) -> void; + auto fill(u32 address) -> void; + auto writeBack() -> void; + template auto read(u32 address) const -> u64; + template auto write(u32 address, u64 data) -> void; + + bool valid; + bool dirty; + u32 tag; + u16 index; + union { + u8 bytes[16]; + u16 halfs[8]; + u32 words[4]; + }; + } lines[512]; + } dcache; + + //tlb.cpp: Translation Lookaside Buffer + struct TLB { + CPU& self; + TLB(CPU& self) : self(self) {} + static constexpr u32 Entries = 32; + + struct Match { + explicit operator bool() const { return found; } + + bool found; + bool cache; + u32 address; + }; + + //tlb.cpp + auto load(u32 address) -> Match; + auto store(u32 address) -> Match; + auto exception(u32 address) -> void; + + struct Entry { + //scc-tlb.cpp + auto synchronize() -> void; + + n1 global[2]; + n1 valid[2]; + n1 dirty[2]; + n3 cacheAlgorithm[2]; + n32 physicalAddress[2]; + n32 pageMask; + n40 virtualAddress; + n8 addressSpaceID; + n2 region; + //internal: + n1 globals; + n32 addressMaskHi; + n32 addressMaskLo; + n32 addressSelect; + n40 addressCompare; + } entry[TLB::Entries]; + + u32 physicalAddress; + } tlb{*this}; + + //memory.cpp + auto kernelSegment32(u32 address) const -> Context::Segment; + auto supervisorSegment32(u32 address) const -> Context::Segment; + auto userSegment32(u32 address) const -> Context::Segment; + + auto kernelSegment64(u64 address) const -> Context::Segment; + auto supervisorSegment64(u64 address) const -> Context::Segment; + auto userSegment64(u64 address) const -> Context::Segment; + + auto segment(u64 address) -> Context::Segment; + auto devirtualize(u64 address) -> maybe; + auto fetch(u64 address) -> u32; + template auto read(u64 address) -> maybe; + template auto write(u64 address, u64 data) -> bool; + + //serialization.cpp + auto serialize(serializer&) -> void; + + //exception.cpp + struct Exception { + CPU& self; + Exception(CPU& self) : self(self) {} + + auto trigger(u32 code, u32 coprocessor = 0, bool tlbMiss = 0) -> void; + + auto interrupt() -> void; + auto tlbModification() -> void; + auto tlbLoadInvalid() -> void; + auto tlbLoadMiss() -> void; + auto tlbStoreInvalid() -> void; + auto tlbStoreMiss() -> void; + auto addressLoad() -> void; + auto addressStore() -> void; + auto busInstruction() -> void; + auto busData() -> void; + auto systemCall() -> void; + auto breakpoint() -> void; + auto reservedInstruction() -> void; + auto coprocessor0() -> void; + auto coprocessor1() -> void; + auto coprocessor2() -> void; + auto coprocessor3() -> void; + auto arithmeticOverflow() -> void; + auto trap() -> void; + auto floatingPoint() -> void; + auto watchAddress() -> void; + } exception{*this}; + + enum Interrupt : u32 { + Software0 = 0, + Software1 = 1, + RCP = 2, + Cartridge = 3, + Reset = 4, + ReadRDB = 5, + WriteRDB = 6, + Timer = 7, + }; + + //ipu.cpp + union r64 { + struct { int32_t order_msb2(s32h, s32); }; + struct { uint32_t order_msb2(u32h, u32); }; + struct { float32_t order_msb2(f32h, f32); }; + struct { int64_t s64; }; + struct { uint64_t u64; }; + struct { float64_t f64; }; + auto s128() const -> int128_t { return (int128_t)s64; } + auto u128() const -> uint128_t { return (uint128_t)u64; } + }; + using cr64 = const r64; + + struct IPU { + enum Register : u32 { + R0, //zero (read-only) + AT, //assembler temporary + V0, V1, //arithmetic values + A0, A1, A2, A3, //subroutine parameters + T0, T1, T2, T3, T4, T5, T6, T7, //temporary registers + S0, S1, S2, S3, S4, S5, S6, S7, //saved registers + T8, T9, //temporary registers + K0, K1, //kernel registers + GP, //global pointer + SP, //stack pointer + S8, //saved register + RA, //return address + }; + + r64 r[32]; + r64 lo; + r64 hi; + u64 pc; //program counter + } ipu; + + //interpreter-ipu.cpp + auto ADD(r64& rd, cr64& rs, cr64& rt) -> void; + auto ADDI(r64& rt, cr64& rs, s16 imm) -> void; + auto ADDIU(r64& rt, cr64& rs, s16 imm) -> void; + auto ADDU(r64& rd, cr64& rs, cr64& rt) -> void; + auto AND(r64& rd, cr64& rs, cr64& rt) -> void; + auto ANDI(r64& rt, cr64& rs, u16 imm) -> void; + auto BEQ(cr64& rs, cr64& rt, s16 imm) -> void; + auto BEQL(cr64& rs, cr64& rt, s16 imm) -> void; + auto BGEZ(cr64& rs, s16 imm) -> void; + auto BGEZAL(cr64& rs, s16 imm) -> void; + auto BGEZALL(cr64& rs, s16 imm) -> void; + auto BGEZL(cr64& rs, s16 imm) -> void; + auto BGTZ(cr64& rs, s16 imm) -> void; + auto BGTZL(cr64& rs, s16 imm) -> void; + auto BLEZ(cr64& rs, s16 imm) -> void; + auto BLEZL(cr64& rs, s16 imm) -> void; + auto BLTZ(cr64& rs, s16 imm) -> void; + auto BLTZAL(cr64& rs, s16 imm) -> void; + auto BLTZALL(cr64& rs, s16 imm) -> void; + auto BLTZL(cr64& rs, s16 imm) -> void; + auto BNE(cr64& rs, cr64& rt, s16 imm) -> void; + auto BNEL(cr64& rs, cr64& rt, s16 imm) -> void; + auto BREAK() -> void; + auto CACHE(u8 operation, cr64& rs, s16 imm) -> void; + auto DADD(r64& rd, cr64& rs, cr64& rt) -> void; + auto DADDI(r64& rt, cr64& rs, s16 imm) -> void; + auto DADDIU(r64& rt, cr64& rs, s16 imm) -> void; + auto DADDU(r64& rd, cr64& rs, cr64& rt) -> void; + auto DDIV(cr64& rs, cr64& rt) -> void; + auto DDIVU(cr64& rs, cr64& rt) -> void; + auto DIV(cr64& rs, cr64& rt) -> void; + auto DIVU(cr64& rs, cr64& rt) -> void; + auto DMULT(cr64& rs, cr64& rt) -> void; + auto DMULTU(cr64& rs, cr64& rt) -> void; + auto DSLL(r64& rd, cr64& rt, u8 sa) -> void; + auto DSLLV(r64& rd, cr64& rt, cr64& rs) -> void; + auto DSRA(r64& rd, cr64& rt, u8 sa) -> void; + auto DSRAV(r64& rd, cr64& rt, cr64& rs) -> void; + auto DSRL(r64& rd, cr64& rt, u8 sa) -> void; + auto DSRLV(r64& rd, cr64& rt, cr64& rs) -> void; + auto DSUB(r64& rd, cr64& rs, cr64& rt) -> void; + auto DSUBU(r64& rd, cr64& rs, cr64& rt) -> void; + auto J(u32 imm) -> void; + auto JAL(u32 imm) -> void; + auto JALR(r64& rd, cr64& rs) -> void; + auto JR(cr64& rs) -> void; + auto LB(r64& rt, cr64& rs, s16 imm) -> void; + auto LBU(r64& rt, cr64& rs, s16 imm) -> void; + auto LD(r64& rt, cr64& rs, s16 imm) -> void; + auto LDL(r64& rt, cr64& rs, s16 imm) -> void; + auto LDR(r64& rt, cr64& rs, s16 imm) -> void; + auto LH(r64& rt, cr64& rs, s16 imm) -> void; + auto LHU(r64& rt, cr64& rs, s16 imm) -> void; + auto LUI(r64& rt, u16 imm) -> void; + auto LL(r64& rt, cr64& rs, s16 imm) -> void; + auto LLD(r64& rt, cr64& rs, s16 imm) -> void; + auto LW(r64& rt, cr64& rs, s16 imm) -> void; + auto LWL(r64& rt, cr64& rs, s16 imm) -> void; + auto LWR(r64& rt, cr64& rs, s16 imm) -> void; + auto LWU(r64& rt, cr64& rs, s16 imm) -> void; + auto MFHI(r64& rd) -> void; + auto MFLO(r64& rd) -> void; + auto MTHI(cr64& rs) -> void; + auto MTLO(cr64& rs) -> void; + auto MULT(cr64& rs, cr64& rt) -> void; + auto MULTU(cr64& rs, cr64& rt) -> void; + auto NOR(r64& rd, cr64& rs, cr64& rt) -> void; + auto OR(r64& rd, cr64& rs, cr64& rt) -> void; + auto ORI(r64& rt, cr64& rs, u16 imm) -> void; + auto SB(cr64& rt, cr64& rs, s16 imm) -> void; + auto SC(r64& rt, cr64& rs, s16 imm) -> void; + auto SD(cr64& rt, cr64& rs, s16 imm) -> void; + auto SCD(r64& rt, cr64& rs, s16 imm) -> void; + auto SDL(cr64& rt, cr64& rs, s16 imm) -> void; + auto SDR(cr64& rt, cr64& rs, s16 imm) -> void; + auto SH(cr64& rt, cr64& rs, s16 imm) -> void; + auto SLL(r64& rd, cr64& rt, u8 sa) -> void; + auto SLLV(r64& rd, cr64& rt, cr64& rs) -> void; + auto SLT(r64& rd, cr64& rs, cr64& rt) -> void; + auto SLTI(r64& rt, cr64& rs, s16 imm) -> void; + auto SLTIU(r64& rt, cr64& rs, s16 imm) -> void; + auto SLTU(r64& rd, cr64& rs, cr64& rt) -> void; + auto SRA(r64& rd, cr64& rt, u8 sa) -> void; + auto SRAV(r64& rd, cr64& rt, cr64& rs) -> void; + auto SRL(r64& rd, cr64& rt, u8 sa) -> void; + auto SRLV(r64& rd, cr64& rt, cr64& rs) -> void; + auto SUB(r64& rd, cr64& rs, cr64& rt) -> void; + auto SUBU(r64& rd, cr64& rs, cr64& rt) -> void; + auto SW(cr64& rt, cr64& rs, s16 imm) -> void; + auto SWL(cr64& rt, cr64& rs, s16 imm) -> void; + auto SWR(cr64& rt, cr64& rs, s16 imm) -> void; + auto SYNC() -> void; + auto SYSCALL() -> void; + auto TEQ(cr64& rs, cr64& rt) -> void; + auto TEQI(cr64& rs, s16 imm) -> void; + auto TGE(cr64& rs, cr64& rt) -> void; + auto TGEI(cr64& rs, s16 imm) -> void; + auto TGEIU(cr64& rs, s16 imm) -> void; + auto TGEU(cr64& rs, cr64& rt) -> void; + auto TLT(cr64& rs, cr64& rt) -> void; + auto TLTI(cr64& rs, s16 imm) -> void; + auto TLTIU(cr64& rs, s16 imm) -> void; + auto TLTU(cr64& rs, cr64& rt) -> void; + auto TNE(cr64& rs, cr64& rt) -> void; + auto TNEI(cr64& rs, s16 imm) -> void; + auto XOR(r64& rd, cr64& rs, cr64& rt) -> void; + auto XORI(r64& rt, cr64& rs, u16 imm) -> void; + + struct SCC { + //0 + struct Index { + n6 tlbEntry; + n1 probeFailure; + } index; + + //1 + struct Random { + n5 index = 31; + n1 unused; + } random; + + //2: EntryLo0 + //3: EntryLo1 + //5: PageMask + //10: EntryHi + TLB::Entry tlb; + + //4 + struct Context { + n19 badVirtualAddress; + n41 pageTableEntryBase; + } context; + + //6 + struct Wired { + n5 index; + n1 unused; + } wired; + + //8 + n64 badVirtualAddress; + + //9 + n33 count; //32-bit; +1 to count half-cycles + + //11 + n33 compare; + + //12 + struct Status { + n1 interruptEnable; + n1 exceptionLevel; + n1 errorLevel = 1; + n2 privilegeMode; + n1 userExtendedAddressing; + n1 supervisorExtendedAddressing; + n1 kernelExtendedAddressing; + n8 interruptMask = 0xff; + n1 de; //unused + n1 ce; //unused + n1 condition; + n1 softReset = 1; + n1 tlbShutdown; + n1 vectorLocation = 1; + n1 instructionTracing; + n1 reverseEndian; + n1 floatingPointMode = 1; + n1 lowPowerMode; + struct Enable { + n1 coprocessor0 = 1; + n1 coprocessor1 = 1; + n1 coprocessor2; + n1 coprocessor3; + } enable; + } status; + + //13 + struct Cause { + n5 exceptionCode; + n8 interruptPending; + n2 coprocessorError; + n1 branchDelay; + } cause; + + //14: Exception Program Counter + n64 epc; + + //15: Coprocessor Revision Identifier + struct Coprocessor { + static constexpr u8 revision = 0x22; + static constexpr u8 implementation = 0x0b; + } coprocessor; + + //16 + struct Configuration { + n2 coherencyAlgorithmKSEG0; + n2 cu; //reserved + n1 bigEndian = 1; + n2 sysadWritebackPattern; + n2 systemClockRatio = 6; + } configuration; + + //17: Load Linked Address + n64 ll; + n1 llbit; + + //18 + struct WatchLo { + n1 trapOnWrite; + n1 trapOnRead; + n32 physicalAddress; + } watchLo; + + //19 + struct WatchHi { + n4 physicalAddressExtended; //unused; for R4000 compatibility only + } watchHi; + + //20 + struct XContext { + n27 badVirtualAddress; + n2 region; + n31 pageTableEntryBase; + } xcontext; + + //26 + struct ParityError { + n8 diagnostic; //unused; for R4000 compatibility only + } parityError; + + //28 + struct TagLo { + n2 primaryCacheState; + n32 physicalAddress; + } tagLo; + + //30: Error Exception Program Counter + n64 epcError; + } scc; + + //interpreter-scc.cpp + auto getControlRegister(n5) -> u64; + auto setControlRegister(n5, n64) -> void; + + auto DMFC0(r64& rt, u8 rd) -> void; + auto DMTC0(cr64& rt, u8 rd) -> void; + auto ERET() -> void; + auto MFC0(r64& rt, u8 rd) -> void; + auto MTC0(cr64& rt, u8 rd) -> void; + auto TLBP() -> void; + auto TLBR() -> void; + auto TLBWI() -> void; + auto TLBWR() -> void; + + struct FPU { + auto setFloatingPointMode(bool) -> void; + + r64 r[32]; + + struct Coprocessor { + static constexpr u8 revision = 0x00; + static constexpr u8 implementation = 0x0b; + } coprocessor; + + struct ControlStatus { + n2 roundMode = 0; + struct Flag { + n1 inexact = 0; + n1 underflow = 0; + n1 overflow = 0; + n1 divisionByZero = 0; + n1 invalidOperation = 0; + } flag; + struct Enable { + n1 inexact = 0; + n1 underflow = 0; + n1 overflow = 0; + n1 divisionByZero = 0; + n1 invalidOperation = 0; + } enable; + struct Cause { + n1 inexact = 0; + n1 underflow = 0; + n1 overflow = 0; + n1 divisionByZero = 0; + n1 invalidOperation = 0; + n1 unimplementedOperation = 0; + } cause; + n1 compare = 0; + n1 flushed = 0; + } csr; + } fpu; + + //interpreter-fpu.cpp + template auto fgr(u32) -> T&; + auto getControlRegisterFPU(n5) -> u32; + auto setControlRegisterFPU(n5, n32) -> void; + + auto BC1(bool value, bool likely, s16 imm) -> void; + auto CFC1(r64& rt, u8 rd) -> void; + auto CTC1(cr64& rt, u8 rd) -> void; + auto DMFC1(r64& rt, u8 fs) -> void; + auto DMTC1(cr64& rt, u8 fs) -> void; + auto FABS_S(u8 fd, u8 fs) -> void; + auto FABS_D(u8 fd, u8 fs) -> void; + auto FADD_S(u8 fd, u8 fs, u8 ft) -> void; + auto FADD_D(u8 fd, u8 fs, u8 ft) -> void; + auto FCEIL_L_S(u8 fd, u8 fs) -> void; + auto FCEIL_L_D(u8 fd, u8 fs) -> void; + auto FCEIL_W_S(u8 fd, u8 fs) -> void; + auto FCEIL_W_D(u8 fd, u8 fs) -> void; + auto FC_EQ_S(u8 fs, u8 ft) -> void; + auto FC_EQ_D(u8 fs, u8 ft) -> void; + auto FC_F_S(u8 fs, u8 ft) -> void; + auto FC_F_D(u8 fs, u8 ft) -> void; + auto FC_LE_S(u8 fs, u8 ft) -> void; + auto FC_LE_D(u8 fs, u8 ft) -> void; + auto FC_LT_S(u8 fs, u8 ft) -> void; + auto FC_LT_D(u8 fs, u8 ft) -> void; + auto FC_NGE_S(u8 fs, u8 ft) -> void; + auto FC_NGE_D(u8 fs, u8 ft) -> void; + auto FC_NGL_S(u8 fs, u8 ft) -> void; + auto FC_NGL_D(u8 fs, u8 ft) -> void; + auto FC_NGLE_S(u8 fs, u8 ft) -> void; + auto FC_NGLE_D(u8 fs, u8 ft) -> void; + auto FC_NGT_S(u8 fs, u8 ft) -> void; + auto FC_NGT_D(u8 fs, u8 ft) -> void; + auto FC_OLE_S(u8 fs, u8 ft) -> void; + auto FC_OLE_D(u8 fs, u8 ft) -> void; + auto FC_OLT_S(u8 fs, u8 ft) -> void; + auto FC_OLT_D(u8 fs, u8 ft) -> void; + auto FC_SEQ_S(u8 fs, u8 ft) -> void; + auto FC_SEQ_D(u8 fs, u8 ft) -> void; + auto FC_SF_S(u8 fs, u8 ft) -> void; + auto FC_SF_D(u8 fs, u8 ft) -> void; + auto FC_UEQ_S(u8 fs, u8 ft) -> void; + auto FC_UEQ_D(u8 fs, u8 ft) -> void; + auto FC_ULE_S(u8 fs, u8 ft) -> void; + auto FC_ULE_D(u8 fs, u8 ft) -> void; + auto FC_ULT_S(u8 fs, u8 ft) -> void; + auto FC_ULT_D(u8 fs, u8 ft) -> void; + auto FC_UN_S(u8 fs, u8 ft) -> void; + auto FC_UN_D(u8 fs, u8 ft) -> void; + auto FCVT_S_D(u8 fd, u8 fs) -> void; + auto FCVT_S_W(u8 fd, u8 fs) -> void; + auto FCVT_S_L(u8 fd, u8 fs) -> void; + auto FCVT_D_S(u8 fd, u8 fs) -> void; + auto FCVT_D_W(u8 fd, u8 fs) -> void; + auto FCVT_D_L(u8 fd, u8 fs) -> void; + auto FCVT_L_S(u8 fd, u8 fs) -> void; + auto FCVT_L_D(u8 fd, u8 fs) -> void; + auto FCVT_W_S(u8 fd, u8 fs) -> void; + auto FCVT_W_D(u8 fd, u8 fs) -> void; + auto FDIV_S(u8 fd, u8 fs, u8 ft) -> void; + auto FDIV_D(u8 fd, u8 fs, u8 ft) -> void; + auto FFLOOR_L_S(u8 fd, u8 fs) -> void; + auto FFLOOR_L_D(u8 fd, u8 fs) -> void; + auto FFLOOR_W_S(u8 fd, u8 fs) -> void; + auto FFLOOR_W_D(u8 fd, u8 fs) -> void; + auto FMOV_S(u8 fd, u8 fs) -> void; + auto FMOV_D(u8 fd, u8 fs) -> void; + auto FMUL_S(u8 fd, u8 fs, u8 ft) -> void; + auto FMUL_D(u8 fd, u8 fs, u8 ft) -> void; + auto FNEG_S(u8 fd, u8 fs) -> void; + auto FNEG_D(u8 fd, u8 fs) -> void; + auto FROUND_L_S(u8 fd, u8 fs) -> void; + auto FROUND_L_D(u8 fd, u8 fs) -> void; + auto FROUND_W_S(u8 fd, u8 fs) -> void; + auto FROUND_W_D(u8 fd, u8 fs) -> void; + auto FSQRT_S(u8 fd, u8 fs) -> void; + auto FSQRT_D(u8 fd, u8 fs) -> void; + auto FSUB_S(u8 fd, u8 fs, u8 ft) -> void; + auto FSUB_D(u8 fd, u8 fs, u8 ft) -> void; + auto FTRUNC_L_S(u8 fd, u8 fs) -> void; + auto FTRUNC_L_D(u8 fd, u8 fs) -> void; + auto FTRUNC_W_S(u8 fd, u8 fs) -> void; + auto FTRUNC_W_D(u8 fd, u8 fs) -> void; + auto LDC1(u8 ft, cr64& rs, s16 imm) -> void; + auto LWC1(u8 ft, cr64& rs, s16 imm) -> void; + auto MFC1(r64& rt, u8 fs) -> void; + auto MTC1(cr64& rt, u8 fs) -> void; + auto SDC1(u8 ft, cr64& rs, s16 imm) -> void; + auto SWC1(u8 ft, cr64& rs, s16 imm) -> void; + + //decoder.cpp + auto decoderEXECUTE() -> void; + auto decoderSPECIAL() -> void; + auto decoderREGIMM() -> void; + auto decoderSCC() -> void; + auto decoderFPU() -> void; + + auto COP2() -> void; + auto COP3() -> void; + auto INVALID() -> void; + + //recompiler.cpp + struct Recompiler : recompiler::generic { + CPU& self; + Recompiler(CPU& self) : self(self), generic(allocator) {} + + struct Block { + auto execute(CPU& self) -> void { + ((void (*)(CPU*, r64*, r64*))code)(&self, &self.ipu.r[16], &self.fpu.r[16]); + } + + u8* code; + }; + + struct Pool { + Block* blocks[1 << 6]; + }; + + auto reset() -> void { + for(u32 index : range(1 << 21)) pools[index] = nullptr; + } + + auto invalidate(u32 address) -> void { + pools[address >> 8 & 0x1fffff] = nullptr; + } + + auto pool(u32 address) -> Pool*; + auto block(u32 address) -> Block*; + + auto emit(u32 address) -> Block*; + auto emitEXECUTE(u32 instruction) -> bool; + auto emitSPECIAL(u32 instruction) -> bool; + auto emitREGIMM(u32 instruction) -> bool; + auto emitSCC(u32 instruction) -> bool; + auto emitFPU(u32 instruction) -> bool; + + bump_allocator allocator; + Pool* pools[1 << 21]; //2_MiB * sizeof(void*) == 16_MiB + } recompiler{*this}; + + struct Disassembler { + CPU& self; + Disassembler(CPU& self) : self(self) {} + + //disassembler.cpp + auto disassemble(u32 address, u32 instruction) -> string; + template auto hint(P&&... p) const -> string; + + bool showColors = true; + bool showValues = true; + + private: + auto EXECUTE() -> vector; + auto SPECIAL() -> vector; + auto REGIMM() -> vector; + auto SCC() -> vector; + auto FPU() -> vector; + auto immediate(s64 value, u32 bits = 0) const -> string; + auto ipuRegisterName(u32 index) const -> string; + auto ipuRegisterValue(u32 index) const -> string; + auto ipuRegisterIndex(u32 index, s16 offset) const -> string; + auto sccRegisterName(u32 index) const -> string; + auto sccRegisterValue(u32 index) const -> string; + auto fpuRegisterName(u32 index) const -> string; + auto fpuRegisterValue(u32 index) const -> string; + auto ccrRegisterName(u32 index) const -> string; + auto ccrRegisterValue(u32 index) const -> string; + + u32 address; + u32 instruction; + } disassembler{*this}; +}; + +extern CPU cpu; diff --git a/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp b/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp new file mode 100644 index 0000000000..bde7024ab1 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp @@ -0,0 +1,125 @@ +auto CPU::DataCache::Line::hit(u32 address) const -> bool { + return valid && tag == (address & ~0xfff); +} + +template auto CPU::DataCache::Line::fill(u32 address, u64 data) -> void { + cpu.step(40); + valid = 1; + dirty = 1; + tag = address & ~0xfff; + //read words according to critical doubleword first scheme + switch(address & 8) { + case 0: + if constexpr(Size != Dual) { + words[0] = bus.read(tag | index | 0x0); + words[1] = bus.read(tag | index | 0x4); + } + write(address, data); + words[2] = bus.read(tag | index | 0x8); + words[3] = bus.read(tag | index | 0xc); + break; + case 8: + if constexpr(Size != Dual) { + words[2] = bus.read(tag | index | 0x8); + words[3] = bus.read(tag | index | 0xc); + } + write(address, data); + words[0] = bus.read(tag | index | 0x0); + words[1] = bus.read(tag | index | 0x4); + break; + } +} + +auto CPU::DataCache::Line::fill(u32 address) -> void { + cpu.step(40); + valid = 1; + dirty = 0; + tag = address & ~0xfff; + //read words according to critical doubleword first scheme + switch(address & 8) { + case 0: + words[0] = bus.read(tag | index | 0x0); + words[1] = bus.read(tag | index | 0x4); + words[2] = bus.read(tag | index | 0x8); + words[3] = bus.read(tag | index | 0xc); + break; + case 8: + words[2] = bus.read(tag | index | 0x8); + words[3] = bus.read(tag | index | 0xc); + words[0] = bus.read(tag | index | 0x0); + words[1] = bus.read(tag | index | 0x4); + break; + } +} + +auto CPU::DataCache::Line::writeBack() -> void { + cpu.step(40); + dirty = 0; + bus.write(tag | index | 0x0, words[0]); + bus.write(tag | index | 0x4, words[1]); + bus.write(tag | index | 0x8, words[2]); + bus.write(tag | index | 0xc, words[3]); +} + +auto CPU::DataCache::line(u32 address) -> Line& { + return lines[address >> 4 & 0x1ff]; +} + +template +auto CPU::DataCache::Line::read(u32 address) const -> u64 { + if constexpr(Size == Byte) { return bytes[address >> 0 & 15 ^ 3]; } + if constexpr(Size == Half) { return halfs[address >> 1 & 7 ^ 1]; } + if constexpr(Size == Word) { return words[address >> 2 & 3 ^ 0]; } + if constexpr(Size == Dual) { + u64 upper = words[address >> 2 & 2 | 0]; + u64 lower = words[address >> 2 & 2 | 1]; + return upper << 32 | lower << 0; + } +} + +template +auto CPU::DataCache::Line::write(u32 address, u64 data) -> void { + if constexpr(Size == Byte) { bytes[address >> 0 & 15 ^ 3] = data; } + if constexpr(Size == Half) { halfs[address >> 1 & 7 ^ 1] = data; } + if constexpr(Size == Word) { words[address >> 2 & 3 ^ 0] = data; } + if constexpr(Size == Dual) { + words[address >> 2 & 2 | 0] = data >> 32; + words[address >> 2 & 2 | 1] = data >> 0; + } + dirty = 1; +} + +template +auto CPU::DataCache::read(u32 address) -> u64 { + auto& line = this->line(address); + if(!line.hit(address)) { + if(line.valid && line.dirty) line.writeBack(); + line.fill(address); + } else { + cpu.step(1); + } + return line.read(address); +} + +template +auto CPU::DataCache::write(u32 address, u64 data) -> void { + auto& line = this->line(address); + if(!line.hit(address)) { + if(line.valid && line.dirty) line.writeBack(); + return line.fill(address, data); + } else { + cpu.step(1); + } + line.write(address, data); +} + +auto CPU::DataCache::power(bool reset) -> void { + u32 index = 0; + for(auto& line : lines) { + line.valid = 0; + line.dirty = 0; + line.tag = 0; + line.index = index++ << 4 & 0xff0; + for(auto& word : line.words) word = 0; + } +} diff --git a/waterbox/ares64/ares/ares/n64/cpu/debugger.cpp b/waterbox/ares64/ares/ares/n64/cpu/debugger.cpp new file mode 100644 index 0000000000..91451f510d --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/debugger.cpp @@ -0,0 +1,123 @@ +auto CPU::Debugger::load(Node::Object parent) -> void { + tracer.instruction = parent->append("Instruction", "CPU"); + tracer.instruction->setAddressBits(32, 2); + + tracer.exception = parent->append("Exception", "CPU"); + tracer.interrupt = parent->append("Interrupt", "CPU"); + tracer.tlb = parent->append("TLB", "CPU"); +} + +auto CPU::Debugger::unload() -> void { + tracer.instruction.reset(); + tracer.exception.reset(); + tracer.interrupt.reset(); + tracer.tlb.reset(); +} + +auto CPU::Debugger::instruction() -> void { + if(unlikely(tracer.instruction->enabled())) { + u32 address = cpu.pipeline.address; + u32 instruction = cpu.pipeline.instruction; + if(tracer.instruction->address(address)) { + cpu.disassembler.showColors = 0; + tracer.instruction->notify(cpu.disassembler.disassemble(address, instruction), {}); + cpu.disassembler.showColors = 1; + } + } +} + +auto CPU::Debugger::exception(u8 code) -> void { + if(unlikely(tracer.exception->enabled())) { + if(code == 0) return; //ignore interrupt exceptions + string type = {"unknown(0x", hex(code, 2L), ")"}; + switch(code) { + case 0: type = "interrupt"; break; + case 1: type = "TLB modification"; break; + case 2: type = "TLB load"; break; + case 3: type = "TLB store"; break; + case 4: type = "address load"; break; + case 5: type = "address store"; break; + case 6: type = "bus instruction"; break; + case 7: type = "bus data"; break; + case 8: type = "system call"; break; + case 9: type = "breakpoint"; break; + case 10: type = "reserved instruction"; break; + case 11: type = "coprocessor"; break; + case 12: type = "arithmetic overflow"; break; + case 13: type = "trap"; break; + case 15: type = "floating point"; break; + case 23: type = "watch address"; break; + } + tracer.exception->notify(type); + } +} + +auto CPU::Debugger::interrupt(u8 mask) -> void { + if(unlikely(tracer.interrupt->enabled())) { + vector sources; + if(mask & 0x01) sources.append("software 0"); + if(mask & 0x02) sources.append("software 1"); + if(mask & 0x04) sources.append("RCP"); + if(mask & 0x08) sources.append("cartridge"); + if(mask & 0x10) sources.append("reset"); + if(mask & 0x20) sources.append("read RDB"); + if(mask & 0x40) sources.append("write RDB"); + if(mask & 0x80) sources.append("timer"); + tracer.interrupt->notify(sources.merge(",")); + } +} + +auto CPU::Debugger::tlbWrite(u32 index) -> void { + if(unlikely(tracer.tlb->enabled())) { + auto entry = cpu.tlb.entry[index & 31]; + tracer.tlb->notify({"write: ", index, " {"}); + tracer.tlb->notify({" global: ", entry.global[0], ",", entry.global[1]}); + tracer.tlb->notify({" physical address: 0x", hex(entry.physicalAddress[0]), ",0x", hex(entry.physicalAddress[1])}); + tracer.tlb->notify({" page mask: 0x", hex(entry.pageMask)}); + tracer.tlb->notify({" virtual address: 0x", hex(entry.virtualAddress)}); + tracer.tlb->notify({" address space ID: 0x", hex(entry.addressSpaceID)}); + tracer.tlb->notify({"}"}); + } +} + +auto CPU::Debugger::tlbModification(u64 address) -> void { + if(unlikely(tracer.tlb->enabled())) { + tracer.tlb->notify({"modification: 0x", hex(address)}); + } +} + +auto CPU::Debugger::tlbLoad(u64 address, u64 physical) -> void { + if(unlikely(tracer.tlb->enabled())) { + tracer.tlb->notify({"load: 0x", hex(address), " => 0x", hex(physical)}); + } +} + +auto CPU::Debugger::tlbLoadInvalid(u64 address) -> void { + if(unlikely(tracer.tlb->enabled())) { + tracer.tlb->notify({"load invalid: 0x", hex(address)}); + } +} + +auto CPU::Debugger::tlbLoadMiss(u64 address) -> void { + if(unlikely(tracer.tlb->enabled())) { + tracer.tlb->notify({"load miss: 0x", hex(address)}); + } +} + +auto CPU::Debugger::tlbStore(u64 address, u64 physical) -> void { + if(unlikely(tracer.tlb->enabled())) { + tracer.tlb->notify({"store: 0x", hex(address), " => 0x", hex(physical)}); + } +} + +auto CPU::Debugger::tlbStoreInvalid(u64 address) -> void { + if(unlikely(tracer.tlb->enabled())) { + tracer.tlb->notify({"store invalid: 0x", hex(address)}); + } +} + +auto CPU::Debugger::tlbStoreMiss(u64 address) -> void { + if(unlikely(tracer.tlb->enabled())) { + tracer.tlb->notify({"store miss: 0x", hex(address)}); + } +} diff --git a/waterbox/ares64/ares/ares/n64/cpu/disassembler.cpp b/waterbox/ares64/ares/ares/n64/cpu/disassembler.cpp new file mode 100644 index 0000000000..e169b5da31 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/disassembler.cpp @@ -0,0 +1,481 @@ +auto CPU::Disassembler::disassemble(u32 address, u32 instruction) -> string { + this->address = address; + this->instruction = instruction; + + auto v = EXECUTE(); + if(!v) v.append("invalid", string{"$", hex(instruction, 8L)}); + if(!instruction) v = {"nop"}; + auto s = pad(v.takeFirst(), -8L); + return {s, v.merge(",")}; +} + +auto CPU::Disassembler::EXECUTE() -> vector { + auto rtName = [&] { return ipuRegisterName (instruction >> 16 & 31); }; + auto rtValue = [&] { return ipuRegisterValue(instruction >> 16 & 31); }; + auto rsValue = [&] { return ipuRegisterValue(instruction >> 21 & 31); }; + auto ftName = [&] { return fpuRegisterName (instruction >> 16 & 31); }; + auto ftValue = [&] { return fpuRegisterValue(instruction >> 16 & 31); }; + auto imm16i = [&] { return immediate(s16(instruction)); }; + auto imm16u = [&] { return immediate(u16(instruction), 16L); }; + auto jump = [&] { return immediate(address + 4 & 0xf000'0000 | (instruction & 0x03ff'ffff) << 2); }; + auto branch = [&] { return immediate(address + 4 + (s16(instruction) << 2)); }; + auto offset = [&] { return ipuRegisterIndex(instruction >> 21 & 31, s16(instruction)); }; + + auto ALU = [&](string_view name) -> vector { + return {name, rtName(), rsValue(), immediate(u16(instruction))}; + }; + + auto ADDI = [&](string_view add, string_view sub, string_view mov) -> vector { + if(!(instruction >> 21 & 31)) return {mov, rtName(), immediate(s16(instruction), 32L)}; + return {s16(instruction) >= 0 ? add : sub, rtName(), rsValue(), immediate(abs(s16(instruction)))}; + }; + + auto BRANCH1 = [&](string_view name) -> vector { + return {name, rsValue(), branch()}; + }; + + auto BRANCH2 = [&](string_view name) -> vector { + return {name, rsValue(), rtValue(), branch()}; + }; + + auto CACHE = [&](string_view name) -> vector { + auto operation = instruction >> 16 & 31; + string type = "reserved"; + switch(operation) { + case 0x00: type = "code(IndexInvalidate)"; break; + case 0x04: type = "code(IndexLoadTag)"; break; + case 0x08: type = "code(IndexStoreTag)"; break; + case 0x10: type = "code(HitInvalidate)"; break; + case 0x14: type = "code(Fill)"; break; + case 0x18: type = "code(HitWriteBack)"; break; + case 0x01: type = "data(IndexWriteBackInvalidate)"; break; + case 0x05: type = "data(IndexLoadTag)"; break; + case 0x09: type = "data(IndexStoreTag)"; break; + case 0x0d: type = "data(CreateDirtyExclusive)"; break; + case 0x11: type = "data(HitInvalidate)"; break; + case 0x15: type = "data(HitWriteBackInvalidate)"; break; + case 0x19: type = "data(HitWriteBack)"; break; + default: type ={"reserved(0x", hex(operation, 2L), ")"}; break; + } + return {name, type, offset()}; + }; + + auto JUMP = [&](string_view name) -> vector { + return {name, jump()}; + }; + + auto LOAD = [&](string_view name) -> vector { + return {name, rtName(), offset()}; + }; + + auto STORE = [&](string_view name) -> vector { + return {name, rtValue(), offset()}; + }; + + switch(instruction >> 26) { + case 0x00: return SPECIAL(); + case 0x01: return REGIMM(); + case 0x02: return JUMP("j"); + case 0x03: return JUMP("jal"); + case 0x04: return BRANCH2("beq"); + case 0x05: return BRANCH2("bne"); + case 0x06: return BRANCH1("blez"); + case 0x07: return BRANCH1("bgtz"); + case 0x08: return ADDI("addi", "subi", "li"); + case 0x09: return ADDI("addiu", "subiu", "liu"); + case 0x0a: return ALU("slti"); + case 0x0b: return ALU("sltiu"); + case 0x0c: return ALU("andi"); + case 0x0d: return ALU("ori"); + case 0x0e: return ALU("xori"); + case 0x0f: return {"lui", rtName(), imm16u()}; + case 0x10: return SCC(); + case 0x11: return FPU(); + case 0x12: break; //COP2 + case 0x13: break; //COP3 + case 0x14: return BRANCH2("beql"); + case 0x15: return BRANCH2("bnel"); + case 0x16: return BRANCH1("blezl"); + case 0x17: return BRANCH1("bgtzl"); + case 0x18: return ADDI("daddi", "dsubi", "dli"); + case 0x19: return ADDI("daddiu", "dsubiu", "dliu"); + case 0x1a: return LOAD("ldl"); + case 0x1b: return LOAD("ldr"); + case 0x1c: break; + case 0x1d: break; + case 0x1e: break; + case 0x1f: break; + case 0x20: return LOAD("lb"); + case 0x21: return LOAD("lh"); + case 0x22: return LOAD("lwl"); + case 0x23: return LOAD("lw"); + case 0x24: return LOAD("lbu"); + case 0x25: return LOAD("lhu"); + case 0x26: return LOAD("lwr"); + case 0x27: return LOAD("lwu"); + case 0x28: return STORE("sb"); + case 0x29: return STORE("sh"); + case 0x2a: return STORE("swl"); + case 0x2b: return STORE("sw"); + case 0x2c: return STORE("sdl"); + case 0x2d: return STORE("sdr"); + case 0x2e: return STORE("swr"); + case 0x2f: return CACHE("cache"); + case 0x30: return LOAD("ll"); + case 0x31: return {"lwc1", ftName(), offset()}; + case 0x32: break; //LWC2 + case 0x33: break; //LWC3 + case 0x34: return LOAD("lld"); + case 0x35: return {"ldc1", ftName(), offset()}; + case 0x36: break; //LDC2 + case 0x37: return LOAD("ld"); + case 0x38: return STORE("sc"); + case 0x39: return {"swc1", ftValue(), offset()}; + case 0x3a: break; //SWC2 + case 0x3b: break; //SWC3 + case 0x3c: return STORE("scd"); + case 0x3d: return {"sdc1", ftValue(), offset()}; + case 0x3e: break; //SDC2 + case 0x3f: return STORE("sd"); + } + return {}; +} + +auto CPU::Disassembler::SPECIAL() -> vector { + auto shift = [&] { return string{instruction >> 6 & 31}; }; + auto rdName = [&] { return ipuRegisterName (instruction >> 11 & 31); }; + auto rdValue = [&] { return ipuRegisterValue(instruction >> 11 & 31); }; + auto rtValue = [&] { return ipuRegisterValue(instruction >> 16 & 31); }; + auto rsValue = [&] { return ipuRegisterValue(instruction >> 21 & 31); }; + + auto ALU = [&](string_view name, string_view by) -> vector { + return {name, rdName(), rtValue(), by}; + }; + + auto JALR = [&](string_view name) -> vector { + if((instruction >> 11 & 31) == 31) return {name, rsValue()}; + return {name, rdName(), rsValue()}; + }; + + auto REG = [&](string_view name) -> vector { + return {name, rdName(), rsValue(), rtValue()}; + }; + + auto ST = [&](string_view name) -> vector { + return {name, rsValue(), rtValue()}; + }; + + switch(instruction & 0x3f) { + case 0x00: return ALU("sll", shift()); + case 0x01: break; + case 0x02: return ALU("srl", shift()); + case 0x03: return ALU("sra", shift()); + case 0x04: return ALU("sllv", rsValue()); + case 0x05: break; + case 0x06: return ALU("srlv", rsValue()); + case 0x07: return ALU("srav", rsValue()); + case 0x08: return {"jr", rsValue()}; + case 0x09: return JALR("jalr"); + case 0x0a: break; + case 0x0b: break; + case 0x0c: return {"syscall"}; + case 0x0d: return {"break"}; + case 0x0e: break; + case 0x0f: return {"sync"}; + case 0x10: return {"mfhi", rdName(), {"hi", hint("{$", hex(self.ipu.hi.u64, 8L), "}")}}; + case 0x11: return {"mthi", rsValue(), "hi"}; + case 0x12: return {"mflo", rdName(), {"lo", hint("{$", hex(self.ipu.lo.u64, 8L), "}")}}; + case 0x13: return {"mtlo", rsValue(), "lo"}; + case 0x14: return ALU("dsllv", rsValue()); + case 0x15: break; + case 0x16: return ALU("dsrlv", rsValue()); + case 0x17: return ALU("dsrav", rsValue()); + case 0x18: return ST("mult"); + case 0x19: return ST("multu"); + case 0x1a: return ST("div"); + case 0x1b: return ST("divu"); + case 0x1c: return ST("dmult"); + case 0x1d: return ST("dmultu"); + case 0x1e: return ST("ddiv"); + case 0x1f: return ST("ddivu"); + case 0x20: return REG("add"); + case 0x21: return REG("addu"); + case 0x22: return REG("sub"); + case 0x23: return REG("subu"); + case 0x24: return REG("and"); + case 0x25: return REG("or"); + case 0x26: return REG("xor"); + case 0x27: return REG("nor"); + case 0x28: break; + case 0x29: break; + case 0x2a: return REG("slt"); + case 0x2b: return REG("sltu"); + case 0x2c: return REG("dadd"); + case 0x2d: return REG("daddu"); + case 0x2e: return REG("dsub"); + case 0x2f: return REG("dsubu"); + case 0x30: return ST("tge"); + case 0x31: return ST("tgeu"); + case 0x32: return ST("tlt"); + case 0x33: return ST("tltu"); + case 0x34: return ST("teq"); + case 0x35: break; + case 0x36: return ST("tne"); + case 0x37: break; + case 0x38: return ALU("dsll", shift()); + case 0x39: break; + case 0x3a: return ALU("dsrl", shift()); + case 0x3b: return ALU("dsra", shift()); + case 0x3c: return ALU("dsll32", shift()); + case 0x3d: break; + case 0x3e: return ALU("dsrl32", shift()); + case 0x3f: return ALU("dsra32", shift()); + } + + return {}; +} + +auto CPU::Disassembler::REGIMM() -> vector { + auto rsValue = [&] { return ipuRegisterValue(instruction >> 21 & 31); }; + auto imm16i = [&] { return immediate(s16(instruction)); }; + auto branch = [&] { return immediate(address + 4 + (s16(instruction) << 2)); }; + + auto BRANCH = [&](string_view name) -> vector { + return {name, rsValue(), branch()}; + }; + + auto TRAP = [&](string_view name) -> vector { + return {name, rsValue(), imm16i()}; + }; + + switch(instruction >> 16 & 0x1f) { + case 0x00: return BRANCH("bltz"); + case 0x01: return BRANCH("bgez"); + case 0x02: return BRANCH("bltzl"); + case 0x03: return BRANCH("bgezl"); + case 0x04: break; + case 0x05: break; + case 0x06: break; + case 0x07: break; + case 0x08: return TRAP("tgei"); + case 0x09: return TRAP("tgeiu"); + case 0x0a: return TRAP("tlti"); + case 0x0b: return TRAP("tltiu"); + case 0x0c: return TRAP("teqi"); + case 0x0d: break; + case 0x0e: return TRAP("tnei"); + case 0x0f: break; + case 0x10: return BRANCH("bltzal"); + case 0x11: return BRANCH("bgezal"); + case 0x12: return BRANCH("bltzall"); + case 0x13: return BRANCH("bgezall"); + case 0x14: break; + case 0x15: break; + case 0x16: break; + case 0x17: break; + case 0x18: break; + case 0x19: break; + case 0x1a: break; + case 0x1b: break; + case 0x1c: break; + case 0x1d: break; + case 0x1e: break; + case 0x1f: break; + } + + return {}; +} + +auto CPU::Disassembler::SCC() -> vector { + auto rtName = [&] { return ipuRegisterName (instruction >> 16 & 31); }; + auto rtValue = [&] { return ipuRegisterValue(instruction >> 16 & 31); }; + auto sdName = [&] { return sccRegisterName (instruction >> 11 & 31); }; + auto sdValue = [&] { return sccRegisterValue(instruction >> 11 & 31); }; + auto branch = [&] { return immediate(address + 4 + (s16(instruction) << 2)); }; + + switch(instruction >> 21 & 0x1f) { + case 0x00: return {"mfc0", rtName(), sdValue()}; + case 0x01: return {"dmfc0", rtName(), sdValue()}; + case 0x02: break; //CFC0 + case 0x04: return {"mtc0", rtValue(), sdName()}; + case 0x05: return {"dmtc0", rtValue(), sdName()}; + case 0x06: break; //CTC0 + case 0x08: break; //BC0 + } + if(!(instruction >> 25 & 1)) return {}; + switch(instruction & 0x3f) { + case 0x01: return {"tlbr"}; + case 0x02: return {"tlbwi"}; + case 0x06: return {"tlbwr"}; + case 0x08: return {"tlbp"}; + case 0x18: return {"eret"}; + } + + return {}; +} + +auto CPU::Disassembler::FPU() -> vector { + auto rtName = [&] { return ipuRegisterName (instruction >> 16 & 31); }; + auto rtValue = [&] { return ipuRegisterValue(instruction >> 16 & 31); }; + auto rdName = [&] { return fpuRegisterName (instruction >> 11 & 31); }; + auto rdValue = [&] { return fpuRegisterValue(instruction >> 11 & 31); }; + auto cdName = [&] { return ccrRegisterName (instruction >> 11 & 31); }; + auto cdValue = [&] { return ccrRegisterValue(instruction >> 11 & 31); }; //todo + auto branch = [&] { return immediate(address + 4 + (s16(instruction) << 2)); }; + + switch(instruction >> 21 & 0x1f) { + case 0x00: return {"mfc1", rtName(), rdValue()}; + case 0x01: return {"dmfc1", rtName(), rdValue()}; + case 0x02: return {"cfc1", rtName(), cdValue()}; + case 0x04: return {"mtc1", rtValue(), rdName()}; + case 0x05: return {"dmtc1", rtValue(), rdName()}; + case 0x06: return {"ctc1", rtValue(), cdName()}; + case 0x08: switch(instruction >> 16 & 3) { + case 0x00: return {"bc1f", branch()}; + case 0x01: return {"bc1t", branch()}; + case 0x02: return {"bc1fl", branch()}; + case 0x03: return {"bc1tl", branch()}; + } + } + if(!(instruction >> 25 & 1)) return {}; + + auto fdName = [&] { return fpuRegisterName (instruction >> 6 & 31); }; + auto fsValue = [&] { return fpuRegisterValue(instruction >> 11 & 31); }; + auto ftValue = [&] { return fpuRegisterValue(instruction >> 16 & 31); }; + + auto DS = [&](string_view name) -> vector { + return {name, fdName(), fsValue()}; + }; + + auto DST = [&](string_view name) -> vector { + return {name, fdName(), fsValue(), ftValue()}; + }; + + auto ST = [&](string_view name) -> vector { + return {name, fsValue(), ftValue()}; + }; + + bool s = (instruction & 1 << 21) == 0; + bool i = (instruction & 1 << 23) != 0; + + switch(instruction & 0x3f) { + case 0x00: return DST(s ? "adds" : "addd" ); + case 0x01: return DST(s ? "subs" : "subd" ); + case 0x02: return DST(s ? "muls" : "muld" ); + case 0x03: return DST(s ? "divs" : "divd" ); + case 0x04: return DS (s ? "sqrts" : "sqrtd" ); + case 0x05: return DS (s ? "abss" : "absd" ); + case 0x06: return DS (s ? "movs" : "movd" ); + case 0x07: return DS (s ? "negs" : "negd" ); + case 0x08: return DS (s ? "roundls" : "roundld"); + case 0x09: return DS (s ? "truncls" : "truncld"); + case 0x0a: return DS (s ? "ceills" : "ceilld" ); + case 0x0b: return DS (s ? "floorls" : "floorld"); + case 0x0c: return DS (s ? "roundws" : "roundwd"); + case 0x0d: return DS (s ? "truncws" : "truncwd"); + case 0x0e: return DS (s ? "ceilws" : "ceilwd" ); + case 0x0f: return DS (s ? "floorws" : "floorwd"); + case 0x20: return DS (i ? (s ? "cvtsw" : "cvtsl") : "cvtsd"); + case 0x21: return DS (i ? (s ? "cvtdw" : "cvtdl") : "cvtds"); + case 0x24: return DS (s ? "cvtws" : "cvtwd" ); + case 0x25: return DS (s ? "cvtls" : "cvtld" ); + case 0x30: return ST(s ? "cfs" : "cfd" ); + case 0x31: return ST(s ? "cuns" : "cund" ); + case 0x32: return ST(s ? "ceqs" : "ceqd" ); + case 0x33: return ST(s ? "cueqs" : "cueqd" ); + case 0x34: return ST(s ? "colts" : "coltd" ); + case 0x35: return ST(s ? "cults" : "cultd" ); + case 0x36: return ST(s ? "coles" : "coled" ); + case 0x37: return ST(s ? "cules" : "culed" ); + case 0x38: return ST(s ? "csfs" : "csfd" ); + case 0x39: return ST(s ? "cngles" : "cngled"); + case 0x3a: return ST(s ? "cseqs" : "cseqd" ); + case 0x3b: return ST(s ? "cngls" : "cngld" ); + case 0x3c: return ST(s ? "clts" : "cltd" ); + case 0x3d: return ST(s ? "cnges" : "cnged" ); + case 0x3e: return ST(s ? "cles" : "cled" ); + case 0x3f: return ST(s ? "cngts" : "cngtd" ); + } + + return {}; +} + +auto CPU::Disassembler::immediate(s64 value, u32 bits) const -> string { + if(value < 0) return {"-$", hex(-value, bits >> 2)}; + return {"$", hex(value, bits >> 2)}; +}; + +auto CPU::Disassembler::ipuRegisterName(u32 index) const -> string { + static const string registers[32] = { + "0", "at", "v0", "v1", "a0", "a1", "a2", "a3", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra", + }; + return registers[index]; +} + +auto CPU::Disassembler::ipuRegisterValue(u32 index) const -> string { + if(index && showValues) return {ipuRegisterName(index), hint("{$", hex(self.ipu.r[index].u64, 8L), "}")}; + return ipuRegisterName(index); +} + +auto CPU::Disassembler::ipuRegisterIndex(u32 index, s16 offset) const -> string { + string adjust; + if(offset >= 0) adjust = {"+$", hex( offset)}; + if(offset < 0) adjust = {"-$", hex(-offset)}; + if(index && showValues) return {ipuRegisterName(index), adjust, hint("{$", hex(self.ipu.r[index].u64 + offset, 8L), "}")}; + return {ipuRegisterName(index), adjust}; +} + +auto CPU::Disassembler::sccRegisterName(u32 index) const -> string { + static const string registers[32] = { + "Index", "Random", "EntryLo0", "EntryLo1", + "Context", "PageMask", "Wired", "Unused7", + "BadVAddr", "Count", "EntryHi", "Compare", + "Status", "Cause", "EPC", "PrID", + "Config", "LLAddr", "WatchLo", "WatchHi", + "XContext", "Unused21", "Unused22", "Unused23", + "Unused24", "Unused25", "ParityError", "CacheError", + "TagLo", "TagHi", "ErrorEPC", "Unused31", + }; + return registers[index]; +} + +auto CPU::Disassembler::sccRegisterValue(u32 index) const -> string { + if(showValues) return {sccRegisterName(index), hint("{$", hex(self.getControlRegister(index), 8L), "}")}; + return sccRegisterName(index); +} + +auto CPU::Disassembler::fpuRegisterName(u32 index) const -> string { + static const string registers[32] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", + }; + return registers[index]; +} + +auto CPU::Disassembler::fpuRegisterValue(u32 index) const -> string { + bool f32 = (instruction & 1 << 21) == 0; + bool f64 = (instruction & 1 << 21) != 0; + if(f32 && showValues) return {fpuRegisterName(index), hint("{", self.fpu.r[index].f32, "}")}; + if(f64 && showValues) return {fpuRegisterName(index), hint("{", self.fpu.r[index].f64, "}")}; + return fpuRegisterName(index); +} + +auto CPU::Disassembler::ccrRegisterName(u32 index) const -> string { + return {"ccr", index}; +} + +auto CPU::Disassembler::ccrRegisterValue(u32 index) const -> string { + if(showValues) return {ccrRegisterName(index), hint("{$", hex(self.getControlRegisterFPU(index)), "}")}; + return ccrRegisterName(index); +} + +template +auto CPU::Disassembler::hint(P&&... p) const -> string { + if(showColors) return {"\e[0m\e[37m", forward

(p)..., "\e[0m"}; + return {forward

(p)...}; +} diff --git a/waterbox/ares64/ares/ares/n64/cpu/exceptions.cpp b/waterbox/ares64/ares/ares/n64/cpu/exceptions.cpp new file mode 100644 index 0000000000..74fa72272f --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/exceptions.cpp @@ -0,0 +1,52 @@ +auto CPU::Exception::trigger(u32 code, u32 coprocessor, bool tlbMiss) -> void { + self.debugger.exception(code); + + u64 vectorBase = !self.scc.status.vectorLocation ? 0x8000'0000 : 0xbfc0'0200; + if(self.context.bits == 64) vectorBase = (s32)vectorBase; + + u16 vectorOffset = 0x0180; + if(tlbMiss) { + //use special vector offset for TLB load/store miss exceptions when EXL=0 + if(!self.scc.status.exceptionLevel) { + if(self.context.bits == 32) vectorOffset = 0x0000; + if(self.context.bits == 64) vectorOffset = 0x0080; + } + } + + if(!self.scc.status.exceptionLevel) { + self.scc.epc = self.ipu.pc; + self.scc.status.exceptionLevel = 1; + self.scc.cause.exceptionCode = code; + self.scc.cause.coprocessorError = coprocessor; + if(self.scc.cause.branchDelay = self.branch.inDelaySlot()) self.scc.epc -= 4; + } else { + self.scc.cause.exceptionCode = code; + self.scc.cause.coprocessorError = coprocessor; + } + + self.ipu.pc = vectorBase + vectorOffset; + self.branch.exception(); + self.context.setMode(); +} + +auto CPU::Exception::interrupt() -> void { trigger( 0); } +auto CPU::Exception::tlbModification() -> void { trigger( 1); } +auto CPU::Exception::tlbLoadInvalid() -> void { trigger( 2, 0, 0); } +auto CPU::Exception::tlbLoadMiss() -> void { trigger( 2, 0, 1); } +auto CPU::Exception::tlbStoreInvalid() -> void { trigger( 3, 0, 0); } +auto CPU::Exception::tlbStoreMiss() -> void { trigger( 3, 0, 1); } +auto CPU::Exception::addressLoad() -> void { trigger( 4); } +auto CPU::Exception::addressStore() -> void { trigger( 5); } +auto CPU::Exception::busInstruction() -> void { trigger( 6); } +auto CPU::Exception::busData() -> void { trigger( 7); } +auto CPU::Exception::systemCall() -> void { trigger( 8); } +auto CPU::Exception::breakpoint() -> void { trigger( 9); } +auto CPU::Exception::reservedInstruction() -> void { trigger(10); } +auto CPU::Exception::coprocessor0() -> void { trigger(11, 0); } +auto CPU::Exception::coprocessor1() -> void { trigger(11, 1); } +auto CPU::Exception::coprocessor2() -> void { trigger(11, 2); } +auto CPU::Exception::coprocessor3() -> void { trigger(11, 3); } +auto CPU::Exception::arithmeticOverflow() -> void { trigger(12); } +auto CPU::Exception::trap() -> void { trigger(13); } +auto CPU::Exception::floatingPoint() -> void { trigger(15); } +auto CPU::Exception::watchAddress() -> void { trigger(23); } diff --git a/waterbox/ares64/ares/ares/n64/cpu/icache.cpp b/waterbox/ares64/ares/ares/n64/cpu/icache.cpp new file mode 100644 index 0000000000..42b5a20574 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/icache.cpp @@ -0,0 +1,70 @@ +auto CPU::InstructionCache::Line::hit(u32 address) const -> bool { + return valid && tag == (address & ~0xfff); +} + +auto CPU::InstructionCache::Line::fill(u32 address) -> void { + cpu.step(48); + valid = 1; + tag = address & ~0xfff; + words[0] = bus.read(tag | index | 0x00); + words[1] = bus.read(tag | index | 0x04); + words[2] = bus.read(tag | index | 0x08); + words[3] = bus.read(tag | index | 0x0c); + words[4] = bus.read(tag | index | 0x10); + words[5] = bus.read(tag | index | 0x14); + words[6] = bus.read(tag | index | 0x18); + words[7] = bus.read(tag | index | 0x1c); +} + +auto CPU::InstructionCache::Line::writeBack() -> void { + cpu.step(48); + bus.write(tag | index | 0x00, words[0]); + bus.write(tag | index | 0x04, words[1]); + bus.write(tag | index | 0x08, words[2]); + bus.write(tag | index | 0x0c, words[3]); + bus.write(tag | index | 0x10, words[4]); + bus.write(tag | index | 0x14, words[5]); + bus.write(tag | index | 0x18, words[6]); + bus.write(tag | index | 0x1c, words[7]); +} + +auto CPU::InstructionCache::Line::read(u32 address) const -> u32 { + return words[address >> 2 & 7]; +} + +auto CPU::InstructionCache::line(u32 address) -> Line& { + return lines[address >> 5 & 0x1ff]; +} + +//used by the recompiler to simulate instruction cache fetch timing +auto CPU::InstructionCache::step(u32 address) -> void { + auto& line = this->line(address); + if(!line.hit(address)) { + cpu.step(48); + line.valid = 1; + line.tag = address & ~0xfff; + } else { + cpu.step(2); + } +} + +//used by the interpreter to fully emulate the instruction cache +auto CPU::InstructionCache::fetch(u32 address) -> u32 { + auto& line = this->line(address); + if(!line.hit(address)) { + line.fill(address); + } else { + cpu.step(2); + } + return line.read(address); +} + +auto CPU::InstructionCache::power(bool reset) -> void { + u32 index = 0; + for(auto& line : lines) { + line.valid = 0; + line.tag = 0; + line.index = index++ << 5 & 0xfe0; + for(auto& word : line.words) word = 0; + } +} diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter-fpu.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter-fpu.cpp new file mode 100644 index 0000000000..54123738e2 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/interpreter-fpu.cpp @@ -0,0 +1,588 @@ +auto CPU::FPU::setFloatingPointMode(bool mode) -> void { + if(mode == 0) { + //32x64-bit -> 16x64-bit + } else { + //16x64-bit -> 32x64-bit + } +} + +template<> auto CPU::fgr(u32 index) -> s32& { + if(scc.status.floatingPointMode) { + return fpu.r[index].s32; + } else if(index & 1) { + return fpu.r[index & ~1].s32h; + } else { + return fpu.r[index & ~1].s32; + } +} + +template<> auto CPU::fgr(u32 index) -> u32& { + return (u32&)fgr(index); +} + +template<> auto CPU::fgr(u32 index) -> f32& { + if(scc.status.floatingPointMode) { + return fpu.r[index].f32; + } else if(index & 1) { + return fpu.r[index & ~1].f32h; + } else { + return fpu.r[index & ~1].f32; + } +} + +template<> auto CPU::fgr(u32 index) -> s64& { + if(scc.status.floatingPointMode) { + return fpu.r[index].s64; + } else { + return fpu.r[index & ~1].s64; + } +} + +template<> auto CPU::fgr(u32 index) -> u64& { + return (u64&)fgr(index); +} + +template<> auto CPU::fgr(u32 index) -> f64& { + if(scc.status.floatingPointMode) { + return fpu.r[index].f64; + } else { + return fpu.r[index & ~1].f64; + } +} + +auto CPU::getControlRegisterFPU(n5 index) -> u32 { + n32 data; + switch(index) { + case 0: //coprocessor revision identifier + data.bit(0, 7) = fpu.coprocessor.revision; + data.bit(8,15) = fpu.coprocessor.implementation; + break; + case 31: //control / status register + data.bit( 0) = fpu.csr.roundMode.bit(0); + data.bit( 1) = fpu.csr.roundMode.bit(1); + data.bit( 2) = fpu.csr.flag.inexact; + data.bit( 3) = fpu.csr.flag.underflow; + data.bit( 4) = fpu.csr.flag.overflow; + data.bit( 5) = fpu.csr.flag.divisionByZero; + data.bit( 6) = fpu.csr.flag.invalidOperation; + data.bit( 7) = fpu.csr.enable.inexact; + data.bit( 8) = fpu.csr.enable.underflow; + data.bit( 9) = fpu.csr.enable.overflow; + data.bit(10) = fpu.csr.enable.divisionByZero; + data.bit(11) = fpu.csr.enable.invalidOperation; + data.bit(12) = fpu.csr.cause.inexact; + data.bit(13) = fpu.csr.cause.underflow; + data.bit(14) = fpu.csr.cause.overflow; + data.bit(15) = fpu.csr.cause.divisionByZero; + data.bit(16) = fpu.csr.cause.invalidOperation; + data.bit(17) = fpu.csr.cause.unimplementedOperation; + data.bit(23) = fpu.csr.compare; + data.bit(24) = fpu.csr.flushed; + break; + } + return data; +} + +auto CPU::setControlRegisterFPU(n5 index, n32 data) -> void { + //read-only variables are defined but commented out for documentation purposes + switch(index) { + case 0: //coprocessor revision identifier + //fpu.coprocessor.revision = data.bit(0, 7); + //fpu.coprocessor.implementation = data.bit(8,15); + break; + case 31: {//control / status register + u32 roundModePrevious = fpu.csr.roundMode; + fpu.csr.roundMode.bit(0) = data.bit( 0); + fpu.csr.roundMode.bit(1) = data.bit( 1); + fpu.csr.flag.inexact = data.bit( 2); + fpu.csr.flag.underflow = data.bit( 3); + fpu.csr.flag.overflow = data.bit( 4); + fpu.csr.flag.divisionByZero = data.bit( 5); + fpu.csr.flag.invalidOperation = data.bit( 6); + fpu.csr.enable.inexact = data.bit( 7); + fpu.csr.enable.underflow = data.bit( 8); + fpu.csr.enable.overflow = data.bit( 9); + fpu.csr.enable.divisionByZero = data.bit(10); + fpu.csr.enable.invalidOperation = data.bit(11); + fpu.csr.cause.inexact = data.bit(12); + fpu.csr.cause.underflow = data.bit(13); + fpu.csr.cause.overflow = data.bit(14); + fpu.csr.cause.divisionByZero = data.bit(15); + fpu.csr.cause.invalidOperation = data.bit(16); + fpu.csr.cause.unimplementedOperation = data.bit(17); + fpu.csr.compare = data.bit(23); + fpu.csr.flushed = data.bit(24); + + if(fpu.csr.roundMode != roundModePrevious) { + switch(fpu.csr.roundMode) { + case 0: fesetround(FE_TONEAREST); break; + case 1: fesetround(FE_TOWARDZERO); break; + case 2: fesetround(FE_UPWARD); break; + case 3: fesetround(FE_DOWNWARD); break; + } + } + } break; + } +} + +#define CF fpu.csr.compare +#define FD(type) fgr(fd) +#define FS(type) fgr(fs) +#define FT(type) fgr(ft) + +auto CPU::BC1(bool value, bool likely, s16 imm) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + if(CF == value) branch.take(ipu.pc + 4 + (imm << 2)); + else if(likely) branch.discard(); +} + +auto CPU::CFC1(r64& rt, u8 rd) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + rt.u64 = s32(getControlRegisterFPU(rd)); +} + +auto CPU::CTC1(cr64& rt, u8 rd) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + setControlRegisterFPU(rd, rt.u32); +} + +auto CPU::DMFC1(r64& rt, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + rt.u64 = FS(u64); +} + +auto CPU::DMTC1(cr64& rt, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FS(u64) = rt.u64; +} + +auto CPU::FABS_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = fabs(FS(f32)); +} + +auto CPU::FABS_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = fabs(FS(f64)); +} + +auto CPU::FADD_S(u8 fd, u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = FS(f32) + FT(f32); +} + +auto CPU::FADD_D(u8 fd, u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = FS(f64) + FT(f64); +} + +auto CPU::FCEIL_L_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = ceil(FS(f32)); +} + +auto CPU::FCEIL_L_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = ceil(FS(f64)); +} + +auto CPU::FCEIL_W_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = ceil(FS(f32)); +} + +auto CPU::FCEIL_W_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = ceil(FS(f64)); +} + +#define XORDERED(type, value, quiet) \ + if(isnan(FS(type)) || isnan(FT(type))) { \ + if constexpr(!quiet) { \ + fpu.csr.cause.invalidOperation = 1; \ + if(fpu.csr.enable.invalidOperation) return exception.floatingPoint(); \ + fpu.csr.flag.invalidOperation = 1; \ + } \ + CF = value; \ + return; \ + } +#define ORDERED(type, value) XORDERED(type, value, 0) +#define UNORDERED(type, value) XORDERED(type, value, 1) + +auto CPU::FC_EQ_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f32, 0); CF = FS(f32) == FT(f32); +} + +auto CPU::FC_EQ_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f64, 0); CF = FS(f64) == FT(f64); +} + +auto CPU::FC_F_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f32, 0); CF = 0; +} + +auto CPU::FC_F_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f64, 0); CF = 0; +} + +auto CPU::FC_LE_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f32, 0); CF = FS(f32) <= FT(f32); +} + +auto CPU::FC_LE_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f64, 0); CF = FS(f64) <= FT(f64); +} + +auto CPU::FC_LT_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f32, 0); CF = FS(f32) < FT(f32); +} + +auto CPU::FC_LT_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f64, 0); CF = FS(f64) < FT(f64); +} + +auto CPU::FC_NGE_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f32, 1); CF = FS(f32) < FT(f32); +} + +auto CPU::FC_NGE_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f64, 1); CF = FS(f64) < FT(f64); +} + +auto CPU::FC_NGL_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f32, 1); CF = FS(f32) == FT(f32); +} + +auto CPU::FC_NGL_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f64, 1); CF = FS(f64) == FT(f64); +} + +auto CPU::FC_NGLE_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f32, 1); CF = 0; +} + +auto CPU::FC_NGLE_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f64, 1); CF = 0; +} + +auto CPU::FC_NGT_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f32, 1); CF = FS(f32) <= FT(f32); +} + +auto CPU::FC_NGT_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f64, 1); CF = FS(f64) <= FT(f64); +} + +auto CPU::FC_OLE_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f32, 0); CF = FS(f32) <= FT(f32); +} + +auto CPU::FC_OLE_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f64, 0); CF = FS(f64) <= FT(f64); +} + +auto CPU::FC_OLT_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f32, 0); CF = FS(f32) < FT(f32); +} + +auto CPU::FC_OLT_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f64, 0); CF = FS(f64) < FT(f64); +} + +auto CPU::FC_SEQ_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f32, 0); CF = FS(f32) == FT(f32); +} + +auto CPU::FC_SEQ_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f64, 0); CF = FS(f64) == FT(f64); +} + +auto CPU::FC_SF_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f32, 0); CF = 0; +} + +auto CPU::FC_SF_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + ORDERED(f64, 0); CF = 0; +} + +auto CPU::FC_UEQ_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f32, 1); CF = FS(f32) == FT(f32); +} + +auto CPU::FC_UEQ_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f64, 1); CF = FS(f64) == FT(f64); +} + +auto CPU::FC_ULE_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f32, 1); CF = FS(f32) <= FT(f32); +} + +auto CPU::FC_ULE_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f64, 1); CF = FS(f64) <= FT(f64); +} + +auto CPU::FC_ULT_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f32, 1); CF = FS(f32) < FT(f32); +} + +auto CPU::FC_ULT_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f64, 1); CF = FS(f64) < FT(f64); +} + +auto CPU::FC_UN_S(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f32, 1); CF = 0; +} + +auto CPU::FC_UN_D(u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + UNORDERED(f64, 1); CF = 0; +} + +#undef ORDERED +#undef UNORDERED + +auto CPU::FCVT_S_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = FS(f64); +} + +auto CPU::FCVT_S_W(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = FS(s32); +} + +auto CPU::FCVT_S_L(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = FS(s64); +} + +auto CPU::FCVT_D_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = FS(f32); +} + +auto CPU::FCVT_D_W(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = FS(s32); +} + +auto CPU::FCVT_D_L(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = FS(s64); +} + +auto CPU::FCVT_L_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = FS(f32); +} + +auto CPU::FCVT_L_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = FS(f64); +} + +auto CPU::FCVT_W_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = FS(f32); +} + +auto CPU::FCVT_W_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = FS(f64); +} + +auto CPU::FDIV_S(u8 fd, u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + if(!FT(f32)) { + fpu.csr.cause.divisionByZero = 1; + if(fpu.csr.enable.divisionByZero) return exception.floatingPoint(); + fpu.csr.flag.divisionByZero = 1; + } + FD(f32) = FS(f32) / FT(f32); +} + +auto CPU::FDIV_D(u8 fd, u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + if(!FT(f64)) { + fpu.csr.cause.divisionByZero = 1; + if(fpu.csr.enable.divisionByZero) return exception.floatingPoint(); + fpu.csr.flag.divisionByZero = 1; + } + FD(f64) = FS(f64) / FT(f64); +} + +auto CPU::FFLOOR_L_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = floor(FS(f32)); +} + +auto CPU::FFLOOR_L_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = floor(FS(f64)); +} + +auto CPU::FFLOOR_W_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = floor(FS(f32)); +} + +auto CPU::FFLOOR_W_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = floor(FS(f64)); +} + +auto CPU::FMOV_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = FS(f32); +} + +auto CPU::FMOV_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = FS(f64); +} + +auto CPU::FMUL_S(u8 fd, u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = FS(f32) * FT(f32); +} + +auto CPU::FMUL_D(u8 fd, u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = FS(f64) * FT(f64); +} + +auto CPU::FNEG_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = -FS(f32); +} + +auto CPU::FNEG_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = -FS(f64); +} + +auto CPU::FROUND_L_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = nearbyint(FS(f32)); +} + +auto CPU::FROUND_L_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = nearbyint(FS(f64)); +} + +auto CPU::FROUND_W_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = nearbyint(FS(f32)); +} + +auto CPU::FROUND_W_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = nearbyint(FS(f64)); +} + +auto CPU::FSQRT_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = sqrt(FS(f32)); +} + +auto CPU::FSQRT_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = sqrt(FS(f64)); +} + +auto CPU::FSUB_S(u8 fd, u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f32) = FS(f32) - FT(f32); +} + +auto CPU::FSUB_D(u8 fd, u8 fs, u8 ft) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(f64) = FS(f64) - FT(f64); +} + +auto CPU::FTRUNC_L_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = FS(f32) < 0 ? ceil(FS(f32)) : floor(FS(f32)); +} + +auto CPU::FTRUNC_L_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s64) = FS(f64) < 0 ? ceil(FS(f64)) : floor(FS(f64)); +} + +auto CPU::FTRUNC_W_S(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = FS(f32) < 0 ? ceil(FS(f32)) : floor(FS(f32)); +} + +auto CPU::FTRUNC_W_D(u8 fd, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FD(s32) = FS(f64) < 0 ? ceil(FS(f64)) : floor(FS(f64)); +} + +auto CPU::LDC1(u8 ft, cr64& rs, s16 imm) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + if(auto data = read(rs.u32 + imm)) FT(u64) = *data; +} + +auto CPU::LWC1(u8 ft, cr64& rs, s16 imm) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + if(auto data = read(rs.u32 + imm)) FT(u32) = *data; +} + +auto CPU::MFC1(r64& rt, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + rt.u64 = FS(s32); +} + +auto CPU::MTC1(cr64& rt, u8 fs) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + FS(s32) = rt.u32; +} + +auto CPU::SDC1(u8 ft, cr64& rs, s16 imm) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + write(rs.u32 + imm, FT(u64)); +} + +auto CPU::SWC1(u8 ft, cr64& rs, s16 imm) -> void { + if(!scc.status.enable.coprocessor1) return exception.coprocessor1(); + write(rs.u32 + imm, FT(u32)); +} + +#undef CF +#undef FD +#undef FS +#undef FT diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter-ipu.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter-ipu.cpp new file mode 100644 index 0000000000..b5567a53e7 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/interpreter-ipu.cpp @@ -0,0 +1,1122 @@ +#define PC ipu.pc +#define RA ipu.r[31] +#define LO ipu.lo +#define HI ipu.hi + +auto CPU::ADD(r64& rd, cr64& rs, cr64& rt) -> void { + if(~(rs.u32 ^ rt.u32) & (rs.u32 ^ rs.u32 + rt.u32) & 1 << 31) return exception.arithmeticOverflow(); + rd.u64 = s32(rs.u32 + rt.u32); +} + +auto CPU::ADDI(r64& rt, cr64& rs, s16 imm) -> void { + if(~(rs.u32 ^ imm) & (rs.u32 ^ rs.u32 + imm) & 1 << 31) return exception.arithmeticOverflow(); + rt.u64 = s32(rs.s32 + imm); +} + +auto CPU::ADDIU(r64& rt, cr64& rs, s16 imm) -> void { + rt.u64 = s32(rs.s32 + imm); +} + +auto CPU::ADDU(r64& rd, cr64& rs, cr64& rt) -> void { + rd.u64 = s32(rs.u32 + rt.u32); +} + +auto CPU::AND(r64& rd, cr64& rs, cr64& rt) -> void { + rd.u64 = rs.u64 & rt.u64; +} + +auto CPU::ANDI(r64& rt, cr64& rs, u16 imm) -> void { + rt.u64 = rs.u64 & imm; +} + +auto CPU::BEQ(cr64& rs, cr64& rt, s16 imm) -> void { + if(rs.u64 == rt.u64) branch.take(PC + 4 + (imm << 2)); +} + +auto CPU::BEQL(cr64& rs, cr64& rt, s16 imm) -> void { + if(rs.u64 == rt.u64) branch.take(PC + 4 + (imm << 2)); + else branch.discard(); +} + +auto CPU::BGEZ(cr64& rs, s16 imm) -> void { + if(rs.s64 >= 0) branch.take(PC + 4 + (imm << 2)); +} + +auto CPU::BGEZAL(cr64& rs, s16 imm) -> void { + RA.u64 = s32(PC + 8); + if(rs.s64 >= 0) branch.take(PC + 4 + (imm << 2)); +} + +auto CPU::BGEZALL(cr64& rs, s16 imm) -> void { + RA.u64 = s32(PC + 8); + if(rs.s64 >= 0) branch.take(PC + 4 + (imm << 2)); + else branch.discard(); +} + +auto CPU::BGEZL(cr64& rs, s16 imm) -> void { + if(rs.s64 >= 0) branch.take(PC + 4 + (imm << 2)); + else branch.discard(); +} + +auto CPU::BGTZ(cr64& rs, s16 imm) -> void { + if(rs.s64 > 0) branch.take(PC + 4 + (imm << 2)); +} + +auto CPU::BGTZL(cr64& rs, s16 imm) -> void { + if(rs.s64 > 0) branch.take(PC + 4 + (imm << 2)); + else branch.discard(); +} + +auto CPU::BLEZ(cr64& rs, s16 imm) -> void { + if(rs.s64 <= 0) branch.take(PC + 4 + (imm << 2)); +} + +auto CPU::BLEZL(cr64& rs, s16 imm) -> void { + if(rs.s64 <= 0) branch.take(PC + 4 + (imm << 2)); + else branch.discard(); +} + +auto CPU::BLTZ(cr64& rs, s16 imm) -> void { + if(rs.s64 < 0) branch.take(PC + 4 + (imm << 2)); +} + +auto CPU::BLTZAL(cr64& rs, s16 imm) -> void { + RA.u64 = s32(PC + 8); + if(rs.s64 < 0) branch.take(PC + 4 + (imm << 2)); +} + +auto CPU::BLTZALL(cr64& rs, s16 imm) -> void { + RA.u64 = s32(PC + 8); + if(rs.s64 < 0) branch.take(PC + 4 + (imm << 2)); + else branch.discard(); +} + +auto CPU::BLTZL(cr64& rs, s16 imm) -> void { + if(rs.s64 < 0) branch.take(PC + 4 + (imm << 2)); + else branch.discard(); +} + +auto CPU::BNE(cr64& rs, cr64& rt, s16 imm) -> void { + if(rs.u64 != rt.u64) branch.take(PC + 4 + (imm << 2)); +} + +auto CPU::BNEL(cr64& rs, cr64& rt, s16 imm) -> void { + if(rs.u64 != rt.u64) branch.take(PC + 4 + (imm << 2)); + else branch.discard(); +} + +auto CPU::BREAK() -> void { + exception.breakpoint(); +} + +auto CPU::CACHE(u8 operation, cr64& rs, s16 imm) -> void { + u32 address = rs.u64 + imm; + + switch(operation) { + + case 0x00: { //icache index invalidate + auto& line = icache.line(address); + line.valid = 0; + break; + } + + case 0x04: { //icache load tag + auto& line = icache.line(address); + scc.tagLo.primaryCacheState = line.valid << 1; + scc.tagLo.physicalAddress = line.tag; + break; + } + + case 0x08: { //icache store tag + auto& line = icache.line(address); + line.valid = scc.tagLo.primaryCacheState.bit(1); + line.tag = scc.tagLo.physicalAddress; + if(scc.tagLo.primaryCacheState == 0b01) debug(unusual, "[CPU] CACHE CPCS=1"); + if(scc.tagLo.primaryCacheState == 0b11) debug(unusual, "[CPU] CACHE CPCS=3"); + break; + } + + case 0x10: { //icache hit invalidate + auto& line = icache.line(address); + if(line.hit(address)) line.valid = 0; + break; + } + + case 0x14: { //icache fill + auto& line = icache.line(address); + line.fill(address); + break; + } + + case 0x18: { //icache hit write back + auto& line = icache.line(address); + if(line.hit(address)) line.writeBack(); + break; + } + + case 0x01: { //dcache index write back invalidate + auto& line = dcache.line(address); + if(line.valid && line.dirty) line.writeBack(); + line.valid = 0; + break; + } + + case 0x05: { //dcache index load tag + auto& line = dcache.line(address); + scc.tagLo.primaryCacheState = line.valid << 1 | line.dirty << 0; + scc.tagLo.physicalAddress = line.tag; + break; + } + + case 0x09: { //dcache index store tag + auto& line = dcache.line(address); + line.valid = scc.tagLo.primaryCacheState.bit(1); + line.dirty = scc.tagLo.primaryCacheState.bit(0); + line.tag = scc.tagLo.physicalAddress; + if(scc.tagLo.primaryCacheState == 0b01) debug(unusual, "[CPU] CACHE DPCS=1"); + if(scc.tagLo.primaryCacheState == 0b10) debug(unusual, "[CPU] CACHE DPCS=2"); + break; + } + + case 0x0d: { //dcache create dirty exclusive + auto& line = dcache.line(address); + if(!line.hit(address) && line.dirty) line.writeBack(); + line.tag = address & ~0xfff; + line.valid = 1; + line.dirty = 1; + break; + } + + case 0x11: { //dcache hit invalidate + auto& line = dcache.line(address); + if(line.hit(address)) { + line.valid = 0; + line.dirty = 0; + } + break; + } + + case 0x15: { //dcache hit write back invalidate + auto& line = dcache.line(address); + if(line.hit(address)) { + if(line.dirty) line.writeBack(); + line.valid = 0; + } + break; + } + + case 0x19: { //dcache hit write back + auto& line = dcache.line(address); + if(line.hit(address)) { + if(line.dirty) line.writeBack(); + } + break; + } + + } +} + +auto CPU::DADD(r64& rd, cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if(~(rs.u64 ^ rt.u64) & (rs.u64 ^ rs.u64 + rt.u64) & 1ull << 63) return exception.arithmeticOverflow(); + rd.u64 = rs.u64 + rt.u64; +} + +auto CPU::DADDI(r64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if(~(rs.u64 ^ imm) & (rs.u64 ^ rs.u64 + imm) & 1ull << 63) return exception.arithmeticOverflow(); + rt.u64 = rs.u64 + imm; +} + +auto CPU::DADDIU(r64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + rt.u64 = rs.u64 + imm; +} + +auto CPU::DADDU(r64& rd, cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + rd.u64 = rs.u64 + rt.u64; +} + +auto CPU::DDIV(cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if(rt.s64) { + //cast to i128 to prevent exception on INT64_MIN / -1 + LO.u64 = s128(rs.s64) / s128(rt.s64); + HI.u64 = s128(rs.s64) % s128(rt.s64); + } else { + LO.u64 = rs.s64 < 0 ? +1 : -1; + HI.u64 = rs.s64; + } + step(69); +} + +auto CPU::DDIVU(cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if(rt.u64) { + LO.u64 = rs.u64 / rt.u64; + HI.u64 = rs.u64 % rt.u64; + } else { + LO.u64 = -1; + HI.u64 = rs.u64; + } + step(69); +} + +auto CPU::DIV(cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if(rt.s32) { + //cast to s64 to prevent exception on INT32_MIN / -1 + LO.u64 = s32(s64(rs.s32) / s64(rt.s32)); + HI.u64 = s32(s64(rs.s32) % s64(rt.s32)); + } else { + LO.u64 = rs.s32 < 0 ? +1 : -1; + HI.u64 = rs.s32; + } + step(37); +} + +auto CPU::DIVU(cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if(rt.u32) { + LO.u64 = s32(rs.u32 / rt.u32); + HI.u64 = s32(rs.u32 % rt.u32); + } else { + LO.u64 = -1; + HI.u64 = rs.s32; + } + step(37); +} + +auto CPU::DMULT(cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + u128 result = rs.s128() * rt.s128(); + LO.u64 = result >> 0; + HI.u64 = result >> 64; + step(8); +} + +auto CPU::DMULTU(cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + u128 result = rs.u128() * rt.u128(); + LO.u64 = result >> 0; + HI.u64 = result >> 64; + step(8); +} + +auto CPU::DSLL(r64& rd, cr64& rt, u8 sa) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + rd.u64 = rt.u64 << sa; +} + +auto CPU::DSLLV(r64& rd, cr64& rt, cr64& rs) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + rd.u64 = rt.u64 << (rs.u32 & 63); +} + +auto CPU::DSRA(r64& rd, cr64& rt, u8 sa) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + rd.u64 = rt.s64 >> sa; +} + +auto CPU::DSRAV(r64& rd, cr64& rt, cr64& rs) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + rd.u64 = rt.s64 >> (rs.u32 & 63); +} + +auto CPU::DSRL(r64& rd, cr64& rt, u8 sa) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + rd.u64 = rt.u64 >> sa; +} + +auto CPU::DSRLV(r64& rd, cr64& rt, cr64& rs) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + rd.u64 = rt.u64 >> (rs.u32 & 63); +} + +auto CPU::DSUB(r64& rd, cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if((rs.u64 ^ rt.u64) & (rs.u64 ^ rs.u64 - rt.u64) & 1ull << 63) return exception.arithmeticOverflow(); + rd.u64 = rs.u64 - rt.u64; +} + +auto CPU::DSUBU(r64& rd, cr64& rs, cr64& rt) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + rd.u64 = rs.u64 - rt.u64; +} + +auto CPU::J(u32 imm) -> void { + branch.take((PC + 4 & 0xf000'0000) | (imm << 2)); +} + +auto CPU::JAL(u32 imm) -> void { + RA.u64 = s32(PC + 8); + branch.take((PC + 4 & 0xf000'0000) | (imm << 2)); +} + +auto CPU::JALR(r64& rd, cr64& rs) -> void { + rd.u64 = s32(PC + 8); + branch.take(rs.u32); +} + +auto CPU::JR(cr64& rs) -> void { + branch.take(rs.u32); +} + +auto CPU::LB(r64& rt, cr64& rs, s16 imm) -> void { + if(auto data = read(rs.u32 + imm)) rt.u64 = s8(*data); +} + +auto CPU::LBU(r64& rt, cr64& rs, s16 imm) -> void { + if(auto data = read(rs.u32 + imm)) rt.u64 = u8(*data); +} + +auto CPU::LD(r64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if(auto data = read(rs.u32 + imm)) rt.u64 = *data; +} + +auto CPU::LDL(r64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + u64 address = rs.u64 + imm; + u64 data = rt.u64; + + if(context.littleEndian()) + switch(address & 7) { + case 0: + data &= 0x00ffffffffffffffull; + if(auto byte = read(address & ~7 | 7)) data |= byte() << 56; else return; + break; + case 1: + data &= 0x0000ffffffffffffull; + if(auto half = read(address & ~7 | 6)) data |= half() << 48; else return; + break; + case 2: + data &= 0x000000ffffffffffull; + if(auto byte = read(address & ~7 | 5)) data |= byte() << 56; else return; + if(auto half = read(address & ~7 | 6)) data |= half() << 40; else return; + break; + case 3: + data &= 0x00000000ffffffffull; + if(auto word = read(address & ~7 | 4)) data |= word() << 32; else return; + break; + case 4: + data &= 0x0000000000ffffffull; + if(auto byte = read(address & ~7 | 3)) data |= byte() << 56; else return; + if(auto word = read(address & ~7 | 4)) data |= word() << 24; else return; + break; + case 5: + data &= 0x000000000000ffffull; + if(auto half = read(address & ~7 | 2)) data |= half() << 48; else return; + if(auto word = read(address & ~7 | 4)) data |= word() << 16; else return; + break; + case 6: + data &= 0x00000000000000ffull; + if(auto byte = read(address & ~7 | 1)) data |= byte() << 56; else return; + if(auto half = read(address & ~7 | 2)) data |= half() << 40; else return; + if(auto word = read(address & ~7 | 4)) data |= word() << 8; else return; + break; + case 7: + data &= 0x0000000000000000ull; + if(auto dual = read(address & ~7 | 0)) data |= dual() << 0; else return; + break; + } + + if(context.bigEndian()) + switch(address & 7) { + case 0: + data &= 0x0000000000000000ull; + if(auto dual = read(address & ~7 | 0)) data |= dual() << 0; else return; + break; + case 1: + data &= 0x00000000000000ffull; + if(auto byte = read(address & ~7 | 1)) data |= byte() << 56; else return; + if(auto half = read(address & ~7 | 2)) data |= half() << 40; else return; + if(auto word = read(address & ~7 | 4)) data |= word() << 8; else return; + break; + case 2: + data &= 0x000000000000ffffull; + if(auto half = read(address & ~7 | 2)) data |= half() << 48; else return; + if(auto word = read(address & ~7 | 4)) data |= word() << 16; else return; + break; + case 3: + data &= 0x0000000000ffffffull; + if(auto byte = read(address & ~7 | 3)) data |= byte() << 56; else return; + if(auto word = read(address & ~7 | 4)) data |= word() << 24; else return; + break; + case 4: + data &= 0x00000000ffffffffull; + if(auto word = read(address & ~7 | 4)) data |= word() << 32; else return; + break; + case 5: + data &= 0x000000ffffffffffull; + if(auto byte = read(address & ~7 | 5)) data |= byte() << 56; else return; + if(auto half = read(address & ~7 | 6)) data |= half() << 40; else return; + break; + case 6: + data &= 0x0000ffffffffffffull; + if(auto half = read(address & ~7 | 6)) data |= half() << 48; else return; + break; + case 7: + data &= 0x00ffffffffffffffull; + if(auto byte = read(address & ~7 | 7)) data |= byte() << 56; else return; + break; + } + + rt.u64 = data; +} + +auto CPU::LDR(r64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + u64 address = rs.u64 + imm; + u64 data = rt.u64; + + if(context.littleEndian()) + switch(address & 7) { + case 0: + data &= 0x0000000000000000ull; + if(auto dual = read(address & ~7 | 0)) data |= dual() << 0; else return; + break; + case 1: + data &= 0xff00000000000000ull; + if(auto word = read(address & ~7 | 0)) data |= word() << 24; else return; + if(auto half = read(address & ~7 | 4)) data |= half() << 8; else return; + if(auto byte = read(address & ~7 | 6)) data |= byte() << 0; else return; + break; + case 2: + data &= 0xffff000000000000ull; + if(auto word = read(address & ~7 | 0)) data |= word() << 16; else return; + if(auto half = read(address & ~7 | 4)) data |= half() << 0; else return; + break; + case 3: + data &= 0xffffff0000000000ull; + if(auto word = read(address & ~7 | 0)) data |= word() << 8; else return; + if(auto byte = read(address & ~7 | 4)) data |= byte() << 0; else return; + break; + case 4: + data &= 0xffffffff00000000ull; + if(auto word = read(address & ~7 | 0)) data |= word() << 0; else return; + break; + case 5: + data &= 0xffffffffff000000ull; + if(auto half = read(address & ~7 | 0)) data |= half() << 8; else return; + if(auto byte = read(address & ~7 | 2)) data |= byte() << 0; else return; + break; + case 6: + data &= 0xffffffffffff0000ull; + if(auto half = read(address & ~7 | 0)) data |= half() << 0; else return; + break; + case 7: + data &= 0xffffffffffffff00ull; + if(auto byte = read(address & ~7 | 0)) data |= byte() << 0; else return; + break; + } + + if(context.bigEndian()) + switch(address & 7) { + case 0: + data &= 0xffffffffffffff00ull; + if(auto byte = read(address & ~7 | 0)) data |= byte() << 0; else return; + break; + case 1: + data &= 0xffffffffffff0000ull; + if(auto half = read(address & ~7 | 0)) data |= half() << 0; else return; + break; + case 2: + data &= 0xffffffffff000000ull; + if(auto half = read(address & ~7 | 0)) data |= half() << 8; else return; + if(auto byte = read(address & ~7 | 2)) data |= byte() << 0; else return; + break; + case 3: + data &= 0xffffffff00000000ull; + if(auto word = read(address & ~7 | 0)) data |= word() << 0; else return; + break; + case 4: + data &= 0xffffff0000000000ull; + if(auto word = read(address & ~7 | 0)) data |= word() << 8; else return; + if(auto byte = read(address & ~7 | 4)) data |= byte() << 0; else return; + break; + case 5: + data &= 0xffff000000000000ull; + if(auto word = read(address & ~7 | 0)) data |= word() << 16; else return; + if(auto half = read(address & ~7 | 4)) data |= half() << 0; else return; + break; + case 6: + data &= 0xff00000000000000ull; + if(auto word = read(address & ~7 | 0)) data |= word() << 24; else return; + if(auto half = read(address & ~7 | 4)) data |= half() << 8; else return; + if(auto byte = read(address & ~7 | 6)) data |= byte() << 0; else return; + break; + case 7: + data &= 0x0000000000000000ull; + if(auto dual = read(address & ~7 | 0)) data |= dual() << 0; else return; + break; + } + + rt.u64 = data; +} + +auto CPU::LH(r64& rt, cr64& rs, s16 imm) -> void { + if(auto data = read(rs.u32 + imm)) rt.u64 = s16(*data); +} + +auto CPU::LHU(r64& rt, cr64& rs, s16 imm) -> void { + if(auto data = read(rs.u32 + imm)) rt.u64 = u16(*data); +} + +auto CPU::LL(r64& rt, cr64& rs, s16 imm) -> void { + if(auto data = read(rs.u32 + imm)) { + rt.u64 = s32(*data); + scc.ll = tlb.physicalAddress >> 4; + scc.llbit = 1; + } +} + +auto CPU::LLD(r64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if(auto data = read(rs.u32 + imm)) { + rt.u64 = *data; + scc.ll = tlb.physicalAddress >> 4; + scc.llbit = 1; + } +} + +auto CPU::LUI(r64& rt, u16 imm) -> void { + rt.u64 = s32(imm << 16); +} + +auto CPU::LW(r64& rt, cr64& rs, s16 imm) -> void { + if(auto data = read(rs.u32 + imm)) rt.u64 = s32(*data); +} + +auto CPU::LWL(r64& rt, cr64& rs, s16 imm) -> void { + u64 address = rs.u64 + imm; + u32 data = rt.u32; + + if(context.littleEndian()) + switch(address & 3) { + case 0: + data &= 0x00ffffff; + if(auto byte = read(address & ~3 | 3)) data |= byte() << 24; else return; + break; + case 1: + data &= 0x0000ffff; + if(auto half = read(address & ~3 | 2)) data |= half() << 16; else return; + break; + case 2: + data &= 0x000000ff; + if(auto byte = read(address & ~3 | 1)) data |= byte() << 24; else return; + if(auto half = read(address & ~3 | 2)) data |= half() << 8; else return; + break; + case 3: + data &= 0x00000000; + if(auto word = read(address & ~3 | 0)) data |= word() << 0; else return; + break; + } + + if(context.bigEndian()) + switch(address & 3) { + case 0: + data &= 0x00000000; + if(auto word = read(address & ~3 | 0)) data |= word() << 0; else return; + break; + case 1: + data &= 0x000000ff; + if(auto byte = read(address & ~3 | 1)) data |= byte() << 24; else return; + if(auto half = read(address & ~3 | 2)) data |= half() << 8; else return; + break; + case 2: + data &= 0x0000ffff; + if(auto half = read(address & ~3 | 2)) data |= half() << 16; else return; + break; + case 3: + data &= 0x00ffffff; + if(auto byte = read(address & ~3 | 3)) data |= byte() << 24; else return; + break; + } + + rt.s64 = (s32)data; +} + +auto CPU::LWR(r64& rt, cr64& rs, s16 imm) -> void { + u64 address = rs.u64 + imm; + u32 data = rt.u32; + + if(context.littleEndian()) + switch(address & 3) { + case 0: + data &= 0x00000000; + if(auto word = read(address & ~3 | 0)) data |= word() << 0; else return; + rt.s64 = (s32)data; + break; + case 1: + data &= 0xff000000; + if(auto half = read(address & ~3 | 0)) data |= half() << 8; else return; + if(auto byte = read(address & ~3 | 2)) data |= byte() << 0; else return; + if(context.bits == 32) rt.u32 = data; + if(context.bits == 64) rt.s64 = (s32)data; + break; + case 2: + data &= 0xffff0000; + if(auto half = read(address & ~3 | 0)) data |= half() << 0; else return; + if(context.bits == 32) rt.u32 = data; + if(context.bits == 64) rt.s64 = (s32)data; + break; + case 3: + data &= 0xffffff00; + if(auto byte = read(address & ~3 | 0)) data |= byte() << 0; else return; + if(context.bits == 32) rt.u32 = data; + if(context.bits == 64) rt.s64 = (s32)data; + break; + } + + if(context.bigEndian()) + switch(address & 3) { + case 0: + data &= 0xffffff00; + if(auto byte = read(address & ~3 | 0)) data |= byte() << 0; else return; + if(context.bits == 32) rt.u32 = data; + if(context.bits == 64) rt.s64 = (s32)data; + break; + case 1: + data &= 0xffff0000; + if(auto half = read(address & ~3 | 0)) data |= half() << 0; else return; + if(context.bits == 32) rt.u32 = data; + if(context.bits == 64) rt.s64 = (s32)data; + break; + case 2: + data &= 0xff000000; + if(auto half = read(address & ~3 | 0)) data |= half() << 8; else return; + if(auto byte = read(address & ~3 | 2)) data |= byte() << 0; else return; + if(context.bits == 32) rt.u32 = data; + if(context.bits == 64) rt.s64 = (s32)data; + break; + case 3: + data &= 0x00000000; + if(auto word = read(address & ~3 | 0)) data |= word() << 0; else return; + rt.s64 = (s32)data; + break; + } +} + +auto CPU::LWU(r64& rt, cr64& rs, s16 imm) -> void { + if(auto data = read(rs.u32 + imm)) rt.u64 = u32(*data); +} + +auto CPU::MFHI(r64& rd) -> void { + rd.u64 = HI.u64; +} + +auto CPU::MFLO(r64& rd) -> void { + rd.u64 = LO.u64; +} + +auto CPU::MTHI(cr64& rs) -> void { + HI.u64 = rs.u64; +} + +auto CPU::MTLO(cr64& rs) -> void { + LO.u64 = rs.u64; +} + +auto CPU::MULT(cr64& rs, cr64& rt) -> void { + u64 result = s64(rs.s32) * s64(rt.s32); + LO.u64 = s32(result >> 0); + HI.u64 = s32(result >> 32); + step(5); +} + +auto CPU::MULTU(cr64& rs, cr64& rt) -> void { + u64 result = u64(rs.u32) * u64(rt.u32); + LO.u64 = s32(result >> 0); + HI.u64 = s32(result >> 32); + step(5); +} + +auto CPU::NOR(r64& rd, cr64& rs, cr64& rt) -> void { + rd.u64 = ~(rs.u64 | rt.u64); +} + +auto CPU::OR(r64& rd, cr64& rs, cr64& rt) -> void { + rd.u64 = rs.u64 | rt.u64; +} + +auto CPU::ORI(r64& rt, cr64& rs, u16 imm) -> void { + rt.u64 = rs.u64 | imm; +} + +auto CPU::SB(cr64& rt, cr64& rs, s16 imm) -> void { + write(rs.u32 + imm, rt.u32); +} + +auto CPU::SC(r64& rt, cr64& rs, s16 imm) -> void { + if(scc.llbit) { + scc.llbit = 0; + rt.u64 = write(rs.u32 + imm, rt.u32); + } else { + rt.u64 = 0; + } +} + +auto CPU::SCD(r64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + if(scc.llbit) { + scc.llbit = 0; + rt.u64 = write(rs.u32 + imm, rt.u64); + } else { + rt.u64 = 0; + } +} + +auto CPU::SD(cr64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + write(rs.u32 + imm, rt.u64); +} + +auto CPU::SDL(cr64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + u64 address = rs.u64 + imm; + u64 data = rt.u64; + + if(context.littleEndian()) + switch(address & 7) { + case 0: + if(!write(address & ~7 | 7, data >> 56)) return; + break; + case 1: + if(!write(address & ~7 | 6, data >> 48)) return; + break; + case 2: + if(!write(address & ~7 | 5, data >> 56)) return; + if(!write(address & ~7 | 6, data >> 40)) return; + break; + case 3: + if(!write(address & ~7 | 4, data >> 32)) return; + break; + case 4: + if(!write(address & ~7 | 3, data >> 56)) return; + if(!write(address & ~7 | 4, data >> 24)) return; + break; + case 5: + if(!write(address & ~7 | 2, data >> 48)) return; + if(!write(address & ~7 | 4, data >> 16)) return; + break; + case 6: + if(!write(address & ~7 | 1, data >> 56)) return; + if(!write(address & ~7 | 2, data >> 40)) return; + if(!write(address & ~7 | 4, data >> 8)) return; + break; + case 7: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + } + + if(context.bigEndian()) + switch(address & 7) { + case 0: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + case 1: + if(!write(address & ~7 | 1, data >> 56)) return; + if(!write(address & ~7 | 2, data >> 40)) return; + if(!write(address & ~7 | 4, data >> 8)) return; + break; + case 2: + if(!write(address & ~7 | 2, data >> 48)) return; + if(!write(address & ~7 | 4, data >> 16)) return; + break; + case 3: + if(!write(address & ~7 | 3, data >> 56)) return; + if(!write(address & ~7 | 4, data >> 24)) return; + break; + case 4: + if(!write(address & ~7 | 4, data >> 32)) return; + break; + case 5: + if(!write(address & ~7 | 5, data >> 56)) return; + if(!write(address & ~7 | 6, data >> 40)) return; + break; + case 6: + if(!write(address & ~7 | 6, data >> 48)) return; + break; + case 7: + if(!write(address & ~7 | 7, data >> 56)) return; + break; + } +} + +auto CPU::SDR(cr64& rt, cr64& rs, s16 imm) -> void { + if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); + u64 address = rs.u64 + imm; + u64 data = rt.u64; + + if(context.littleEndian()) + switch(address & 7) { + case 0: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + case 1: + if(!write(address & ~7 | 0, data >> 24)) return; + if(!write(address & ~7 | 4, data >> 8)) return; + if(!write(address & ~7 | 6, data >> 0)) return; + break; + case 2: + if(!write(address & ~7 | 0, data >> 16)) return; + if(!write(address & ~7 | 4, data >> 0)) return; + break; + case 3: + if(!write(address & ~7 | 0, data >> 8)) return; + if(!write(address & ~7 | 4, data >> 0)) return; + break; + case 4: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + case 5: + if(!write(address & ~7 | 0, data >> 8)) return; + if(!write(address & ~7 | 2, data >> 0)) return; + break; + case 6: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + case 7: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + } + + if(context.bigEndian()) + switch(address & 7) { + case 0: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + case 1: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + case 2: + if(!write(address & ~7 | 0, data >> 8)) return; + if(!write(address & ~7 | 2, data >> 0)) return; + break; + case 3: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + case 4: + if(!write(address & ~7 | 0, data >> 8)) return; + if(!write(address & ~7 | 4, data >> 0)) return; + break; + case 5: + if(!write(address & ~7 | 0, data >> 16)) return; + if(!write(address & ~7 | 4, data >> 0)) return; + break; + case 6: + if(!write(address & ~7 | 0, data >> 24)) return; + if(!write(address & ~7 | 4, data >> 8)) return; + if(!write(address & ~7 | 6, data >> 0)) return; + break; + case 7: + if(!write(address & ~7 | 0, data >> 0)) return; + break; + } +} + +auto CPU::SH(cr64& rt, cr64& rs, s16 imm) -> void { + write(rs.u32 + imm, rt.u32); +} + +auto CPU::SLL(r64& rd, cr64& rt, u8 sa) -> void { + rd.u64 = s32(rt.u32 << sa); +} + +auto CPU::SLLV(r64& rd, cr64& rt, cr64& rs) -> void { + rd.u64 = s32(rt.u32 << (rs.u32 & 31)); +} + +auto CPU::SLT(r64& rd, cr64& rs, cr64& rt) -> void { + rd.u64 = rs.s64 < rt.s64; +} + +auto CPU::SLTI(r64& rt, cr64& rs, s16 imm) -> void { + rt.u64 = rs.s64 < imm; +} + +auto CPU::SLTIU(r64& rt, cr64& rs, s16 imm) -> void { + rt.u64 = rs.u64 < imm; +} + +auto CPU::SLTU(r64& rd, cr64& rs, cr64& rt) -> void { + rd.u64 = rs.u64 < rt.u64; +} + +auto CPU::SRA(r64& rd, cr64& rt, u8 sa) -> void { + rd.u64 = s32(rt.s64 >> sa); +} + +auto CPU::SRAV(r64& rd, cr64& rt, cr64& rs) -> void { + rd.u64 = s32(rt.s64 >> (rs.u32 & 31)); +} + +auto CPU::SRL(r64& rd, cr64& rt, u8 sa) -> void { + rd.u64 = s32(rt.u32 >> sa); +} + +auto CPU::SRLV(r64& rd, cr64& rt, cr64& rs) -> void { + rd.u64 = s32(rt.u32 >> (rs.u32 & 31)); +} + +auto CPU::SUB(r64& rd, cr64& rs, cr64& rt) -> void { + if((rs.u32 ^ rt.u32) & (rs.u32 ^ rs.u32 - rt.u32) & 1 << 31) return exception.arithmeticOverflow(); + rd.u64 = s32(rs.u32 - rt.u32); +} + +auto CPU::SUBU(r64& rd, cr64& rs, cr64& rt) -> void { + rd.u64 = s32(rs.u32 - rt.u32); +} + +auto CPU::SW(cr64& rt, cr64& rs, s16 imm) -> void { + write(rs.u32 + imm, rt.u32); +} + +auto CPU::SWL(cr64& rt, cr64& rs, s16 imm) -> void { + u64 address = rs.u64 + imm; + u32 data = rt.u32; + + if(context.littleEndian()) + switch(address & 3) { + case 0: + if(!write(address & ~3 | 3, data >> 24)) return; + break; + case 1: + if(!write(address & ~3 | 2, data >> 16)) return; + break; + case 2: + if(!write(address & ~3 | 1, data >> 24)) return; + if(!write(address & ~3 | 2, data >> 8)) return; + break; + case 3: + if(!write(address & ~3 | 0, data >> 0)) return; + break; + } + + if(context.bigEndian()) + switch(address & 3) { + case 0: + if(!write(address & ~3 | 0, data >> 0)) return; + break; + case 1: + if(!write(address & ~3 | 1, data >> 24)) return; + if(!write(address & ~3 | 2, data >> 8)) return; + break; + case 2: + if(!write(address & ~3 | 2, data >> 16)) return; + break; + case 3: + if(!write(address & ~3 | 3, data >> 24)) return; + break; + } +} + +auto CPU::SWR(cr64& rt, cr64& rs, s16 imm) -> void { + u64 address = rs.u64 + imm; + u32 data = rt.u32; + + if(context.littleEndian()) + switch(address & 3) { + case 0: + if(!write(address & ~3 | 0, data >> 0)) return; + break; + case 1: + if(!write(address & ~3 | 0, data >> 8)) return; + if(!write(address & ~3 | 2, data >> 0)) return; + break; + case 2: + if(!write(address & ~3 | 0, data >> 0)) return; + break; + case 3: + if(!write(address & ~3 | 0, data >> 0)) return; + break; + } + + if(context.bigEndian()) + switch(address & 3) { + case 0: + if(!write(address & ~3 | 0, data >> 0)) return; + break; + case 1: + if(!write(address & ~3 | 0, data >> 0)) return; + break; + case 2: + if(!write(address & ~3 | 0, data >> 8)) return; + if(!write(address & ~3 | 2, data >> 0)) return; + break; + case 3: + if(!write(address & ~3 | 0, data >> 0)) return; + break; + } +} + +auto CPU::SYNC() -> void { + //no operation; for compatibility with R4000-series code +} + +auto CPU::SYSCALL() -> void { + exception.systemCall(); +} + +auto CPU::TEQ(cr64& rs, cr64& rt) -> void { + if(rs.u64 == rt.u64) exception.trap(); +} + +auto CPU::TEQI(cr64& rs, s16 imm) -> void { + if(rs.s64 == imm) exception.trap(); +} + +auto CPU::TGE(cr64& rs, cr64& rt) -> void { + if(rs.s64 >= rt.s64) exception.trap(); +} + +auto CPU::TGEI(cr64& rs, s16 imm) -> void { + if(rs.s64 >= imm) exception.trap(); +} + +auto CPU::TGEIU(cr64& rs, s16 imm) -> void { + if(rs.u64 >= imm) exception.trap(); +} + +auto CPU::TGEU(cr64& rs, cr64& rt) -> void { + if(rs.u64 >= rt.u64) exception.trap(); +} + +auto CPU::TLT(cr64& rs, cr64& rt) -> void { + if(rs.s64 < rt.s64) exception.trap(); +} + +auto CPU::TLTI(cr64& rs, s16 imm) -> void { + if(rs.s64 < imm) exception.trap(); +} + +auto CPU::TLTIU(cr64& rs, s16 imm) -> void { + if(rs.u64 < imm) exception.trap(); +} + +auto CPU::TLTU(cr64& rs, cr64& rt) -> void { + if(rs.u64 < rt.u64) exception.trap(); +} + +auto CPU::TNE(cr64& rs, cr64& rt) -> void { + if(rs.u64 != rt.u64) exception.trap(); +} + +auto CPU::TNEI(cr64& rs, s16 imm) -> void { + if(rs.s64 != imm) exception.trap(); +} + +auto CPU::XOR(r64& rd, cr64& rs, cr64& rt) -> void { + rd.u64 = rs.u64 ^ rt.u64; +} + +auto CPU::XORI(r64& rt, cr64& rs, u16 imm) -> void { + rt.u64 = rs.u64 ^ imm; +} + +#undef PC +#undef RA +#undef LO +#undef HI diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter-scc.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter-scc.cpp new file mode 100644 index 0000000000..4d7e65318b --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/interpreter-scc.cpp @@ -0,0 +1,362 @@ +auto CPU::getControlRegister(n5 index) -> u64 { + n64 data; + switch(index) { + case 0: //index + data.bit( 0, 5) = scc.index.tlbEntry; + data.bit(31) = scc.index.probeFailure; + break; + case 1: //random + data.bit(0,4) = scc.random.index; + data.bit(5) = scc.random.unused; + break; + case 2: //entrylo0 + data.bit(0) = scc.tlb.global[0]; + data.bit(1) = scc.tlb.valid[0]; + data.bit(2) = scc.tlb.dirty[0]; + data.bit(3, 5) = scc.tlb.cacheAlgorithm[0]; + data.bit(6,29) = scc.tlb.physicalAddress[0].bit(12,35); + break; + case 3: //entrylo1 + data.bit(0) = scc.tlb.global[1]; + data.bit(1) = scc.tlb.valid[1]; + data.bit(2) = scc.tlb.dirty[1]; + data.bit(3, 5) = scc.tlb.cacheAlgorithm[1]; + data.bit(6,29) = scc.tlb.physicalAddress[1].bit(12,35); + break; + case 4: //context + data.bit( 4,22) = scc.context.badVirtualAddress; + data.bit(23,63) = scc.context.pageTableEntryBase; + break; + case 5: //pagemask + data.bit(13,24) = scc.tlb.pageMask.bit(13,24); + break; + case 6: //wired + data.bit(0,4) = scc.wired.index; + data.bit(5) = scc.wired.unused; + break; + case 8: //badvaddr + data = scc.badVirtualAddress; + break; + case 9: //count + data.bit(0,31) = scc.count >> 1; + break; + case 10: //entryhi + data.bit( 0, 7) = scc.tlb.addressSpaceID; + data.bit(13,39) = scc.tlb.virtualAddress.bit(13,39); + data.bit(40,61) = 0; + data.bit(62,63) = scc.tlb.region; + break; + case 11: //compare + data.bit(0,31) = scc.compare >> 1; + break; + case 12: //status + data.bit( 0) = scc.status.interruptEnable; + data.bit( 1) = scc.status.exceptionLevel; + data.bit( 2) = scc.status.errorLevel; + data.bit( 3, 4) = scc.status.privilegeMode; + data.bit( 5) = scc.status.userExtendedAddressing; + data.bit( 6) = scc.status.supervisorExtendedAddressing; + data.bit( 7) = scc.status.kernelExtendedAddressing; + data.bit( 8,15) = scc.status.interruptMask; + data.bit(16) = scc.status.de; + data.bit(17) = scc.status.ce; + data.bit(18) = scc.status.condition; + data.bit(20) = scc.status.softReset; + data.bit(21) = scc.status.tlbShutdown; + data.bit(22) = scc.status.vectorLocation; + data.bit(24) = scc.status.instructionTracing; + data.bit(25) = scc.status.reverseEndian; + data.bit(26) = scc.status.floatingPointMode; + data.bit(27) = scc.status.lowPowerMode; + data.bit(28) = scc.status.enable.coprocessor0; + data.bit(29) = scc.status.enable.coprocessor1; + data.bit(30) = scc.status.enable.coprocessor2; + data.bit(31) = scc.status.enable.coprocessor3; + context.setMode(); + break; + case 13: //cause + data.bit( 2, 6) = scc.cause.exceptionCode; + data.bit( 8,15) = scc.cause.interruptPending; + data.bit(28,29) = scc.cause.coprocessorError; + data.bit(31) = scc.cause.branchDelay; + break; + case 14: //exception program counter + data = scc.epc; + break; + case 15: //coprocessor revision identifier + data.bit(0, 7) = scc.coprocessor.revision; + data.bit(8,15) = scc.coprocessor.implementation; + break; + case 16: //configuration + data.bit( 0, 1) = scc.configuration.coherencyAlgorithmKSEG0; + data.bit( 2, 3) = scc.configuration.cu; + data.bit(15) = scc.configuration.bigEndian; + data.bit(24,27) = scc.configuration.sysadWritebackPattern; + data.bit(28,30) = scc.configuration.systemClockRatio; + break; + case 17: //load linked address + data = scc.ll; + break; + case 18: //watchlo + data.bit(0) = scc.watchLo.trapOnWrite; + data.bit(1) = scc.watchLo.trapOnRead; + data.bit(3,31) = scc.watchLo.physicalAddress.bit(3,31); + break; + case 19: //watchhi + data.bit(0,3) = scc.watchHi.physicalAddressExtended; + break; + case 20: //xcontext + data.bit( 4,30) = scc.xcontext.badVirtualAddress; + data.bit(31,32) = scc.xcontext.region; + data.bit(33,63) = scc.xcontext.pageTableEntryBase; + break; + case 26: //parity error + data.bit(0,7) = scc.parityError.diagnostic; + break; + case 27: //cache error (unused) + data.bit(0,31) = 0; + break; + case 28: //taglo + data.bit(6, 7) = scc.tagLo.primaryCacheState; + data.bit(8,27) = scc.tagLo.physicalAddress.bit(12,31); + break; + case 29: //taghi + data.bit(0,31) = 0; + break; + case 30: //error exception program counter + data = scc.epcError; + break; + } + return data; +} + +auto CPU::setControlRegister(n5 index, n64 data) -> void { + //read-only variables are defined but commented out for documentation purposes + switch(index) { + case 0: //index + scc.index.tlbEntry = data.bit( 0,5); + scc.index.probeFailure = data.bit(31); + break; + case 1: //random + //scc.random.index = data.bit(0,4); + scc.random.unused = data.bit(5); + break; + case 2: //entrylo0 + scc.tlb.global[0] = data.bit(0); + scc.tlb.valid[0] = data.bit(1); + scc.tlb.dirty[0] = data.bit(2); + scc.tlb.cacheAlgorithm[0] = data.bit(3, 5); + scc.tlb.physicalAddress[0].bit(12,35) = data.bit(6,29); + scc.tlb.synchronize(); + break; + case 3: //entrylo1 + scc.tlb.global[1] = data.bit(0); + scc.tlb.valid[1] = data.bit(1); + scc.tlb.dirty[1] = data.bit(2); + scc.tlb.cacheAlgorithm[1] = data.bit(3, 5); + scc.tlb.physicalAddress[1].bit(12,35) = data.bit(6,29); + scc.tlb.synchronize(); + break; + case 4: //context + scc.context.badVirtualAddress = data.bit( 4,22); + scc.context.pageTableEntryBase = data.bit(23,63); + break; + case 5: //pagemask + scc.tlb.pageMask.bit(13,24) = data.bit(13,24); + scc.tlb.synchronize(); + break; + case 6: //wired + scc.wired.index = data.bit(0,4); + scc.wired.unused = data.bit(5); + scc.random.index = 31; + break; + case 8: //badvaddr + //scc.badVirtualAddress = data; //read-only + break; + case 9: //count + scc.count = data.bit(0,31) << 1; + break; + case 10: //entryhi + scc.tlb.addressSpaceID = data.bit( 0, 7); + scc.tlb.virtualAddress.bit(13,39) = data.bit(13,39); + scc.tlb.region = data.bit(62,63); + scc.tlb.synchronize(); + break; + case 11: //compare + scc.compare = data.bit(0,31) << 1; + scc.cause.interruptPending.bit(Interrupt::Timer) = 0; + break; + case 12: {//status + bool floatingPointMode = scc.status.floatingPointMode; + scc.status.interruptEnable = data.bit( 0); + scc.status.exceptionLevel = data.bit( 1); + scc.status.errorLevel = data.bit( 2); + scc.status.privilegeMode = data.bit( 3, 4); + scc.status.userExtendedAddressing = data.bit( 5); + scc.status.supervisorExtendedAddressing = data.bit( 6); + scc.status.kernelExtendedAddressing = data.bit( 7); + scc.status.interruptMask = data.bit( 8,15); + scc.status.de = data.bit(16); + scc.status.ce = data.bit(17); + scc.status.condition = data.bit(18); + scc.status.softReset = data.bit(20); + //scc.status.tlbShutdown = data.bit(21); //read-only + scc.status.vectorLocation = data.bit(22); + scc.status.instructionTracing = data.bit(24); + scc.status.reverseEndian = data.bit(25); + scc.status.floatingPointMode = data.bit(26); + scc.status.lowPowerMode = data.bit(27); + scc.status.enable.coprocessor0 = data.bit(28); + scc.status.enable.coprocessor1 = data.bit(29); + scc.status.enable.coprocessor2 = data.bit(30); + scc.status.enable.coprocessor3 = data.bit(31); + if(floatingPointMode != scc.status.floatingPointMode) { + fpu.setFloatingPointMode(scc.status.floatingPointMode); + } + context.setMode(); + if(scc.status.instructionTracing) { + debug(unimplemented, "[CPU::setControlRegister] instructionTracing=1"); + } + } break; + case 13: //cause + scc.cause.interruptPending.bit(0) = data.bit(8); + scc.cause.interruptPending.bit(1) = data.bit(9); + break; + case 14: //exception program counter + scc.epc = data; + break; + case 15: //coprocessor revision identifier + //scc.coprocessor.revision = data.bit(0, 7); //read-only + //scc.coprocessor.implementation = data.bit(8,15); //read-only + break; + case 16: //configuration + scc.configuration.coherencyAlgorithmKSEG0 = data.bit( 0, 1); + scc.configuration.cu = data.bit( 2, 3); + scc.configuration.bigEndian = data.bit(15); + scc.configuration.sysadWritebackPattern = data.bit(24,27); + //scc.configuration.systemClockRatio = data.bit(28,30); //read-only + context.setMode(); + break; + case 17: //load linked address + scc.ll = data; + break; + case 18: //watchlo + scc.watchLo.trapOnWrite = data.bit(0); + scc.watchLo.trapOnRead = data.bit(1); + scc.watchLo.physicalAddress.bit(3,31) = data.bit(3,31); + break; + case 19: //watchhi + scc.watchHi.physicalAddressExtended = data.bit(0,3); + break; + case 20: //xcontext + scc.xcontext.badVirtualAddress = data.bit( 4,30); + scc.xcontext.region = data.bit(31,32); + scc.xcontext.pageTableEntryBase = data.bit(33,63); + break; + case 26: //parity error + scc.parityError.diagnostic = data.bit(0,7); + break; + case 27: //cache error (unused) + break; + case 28: //taglo + scc.tagLo.primaryCacheState = data.bit(6, 7); + scc.tagLo.physicalAddress.bit(12,31) = data.bit(8,27); + break; + case 29: //taghi + break; + case 30: //error exception program counter + scc.epcError = data; + break; + } +} + +auto CPU::DMFC0(r64& rt, u8 rd) -> void { + if(!context.kernelMode()) { + if(!scc.status.enable.coprocessor0) return exception.coprocessor0(); + if(context.bits == 32) return exception.reservedInstruction(); + } + rt.u64 = getControlRegister(rd); +} + +auto CPU::DMTC0(cr64& rt, u8 rd) -> void { + if(!context.kernelMode()) { + if(!scc.status.enable.coprocessor0) return exception.coprocessor0(); + if(context.bits == 32) return exception.reservedInstruction(); + } + setControlRegister(rd, rt.u64); +} + +auto CPU::ERET() -> void { + if(!context.kernelMode()) { + if(!scc.status.enable.coprocessor0) return exception.coprocessor0(); + } + branch.exception(); + if(scc.status.errorLevel) { + ipu.pc = scc.epcError; + scc.status.errorLevel = 0; + } else { + ipu.pc = scc.epc; + scc.status.exceptionLevel = 0; + } + scc.llbit = 0; + context.setMode(); +} + +auto CPU::MFC0(r64& rt, u8 rd) -> void { + if(!context.kernelMode()) { + if(!scc.status.enable.coprocessor0) return exception.coprocessor0(); + } + rt.u64 = s32(getControlRegister(rd)); +} + +auto CPU::MTC0(cr64& rt, u8 rd) -> void { + if(!context.kernelMode()) { + if(!scc.status.enable.coprocessor0) return exception.coprocessor0(); + } + setControlRegister(rd, s32(rt.u32)); +} + +auto CPU::TLBP() -> void { + if(!context.kernelMode()) { + if(!scc.status.enable.coprocessor0) return exception.coprocessor0(); + } + scc.index.tlbEntry = 0; //technically undefined + scc.index.probeFailure = 1; + for(u32 index : range(TLB::Entries)) { + auto& entry = tlb.entry[index]; + auto mask = ~entry.pageMask & ~0x1fff; + if((entry.virtualAddress & mask) != (scc.tlb.virtualAddress & mask)) continue; + if(!entry.global[0] || !entry.global[1]) { + if(entry.addressSpaceID != scc.tlb.addressSpaceID) continue; + } + scc.index.tlbEntry = index; + scc.index.probeFailure = 0; + break; + } +} + +auto CPU::TLBR() -> void { + if(!context.kernelMode()) { + if(!scc.status.enable.coprocessor0) return exception.coprocessor0(); + } + if(scc.index.tlbEntry >= TLB::Entries) return; + scc.tlb = tlb.entry[scc.index.tlbEntry]; +} + +auto CPU::TLBWI() -> void { + if(!context.kernelMode()) { + if(!scc.status.enable.coprocessor0) return exception.coprocessor0(); + } + if(scc.index.tlbEntry >= TLB::Entries) return; + tlb.entry[scc.index.tlbEntry] = scc.tlb; + debugger.tlbWrite(scc.index.tlbEntry); +} + +auto CPU::TLBWR() -> void { + if(!context.kernelMode()) { + if(!scc.status.enable.coprocessor0) return exception.coprocessor0(); + } + if(scc.random.index >= TLB::Entries) return; + tlb.entry[scc.random.index] = scc.tlb; + debugger.tlbWrite(scc.random.index); +} diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter.cpp new file mode 100644 index 0000000000..1cb4fa0112 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/interpreter.cpp @@ -0,0 +1,370 @@ +#define OP pipeline.instruction +#define RD ipu.r[RDn] +#define RT ipu.r[RTn] +#define RS ipu.r[RSn] + +#define jp(id, name, ...) case id: return decoder##name(__VA_ARGS__) +#define op(id, name, ...) case id: return name(__VA_ARGS__) +#define br(id, name, ...) case id: return name(__VA_ARGS__) + +#define SA (OP >> 6 & 31) +#define RDn (OP >> 11 & 31) +#define RTn (OP >> 16 & 31) +#define RSn (OP >> 21 & 31) +#define FD (OP >> 6 & 31) +#define FS (OP >> 11 & 31) +#define FT (OP >> 16 & 31) +#define IMMi16 s16(OP) +#define IMMu16 u16(OP) +#define IMMu26 (OP & 0x03ff'ffff) + +auto CPU::decoderEXECUTE() -> void { + switch(OP >> 26) { + jp(0x00, SPECIAL); + jp(0x01, REGIMM); + br(0x02, J, IMMu26); + br(0x03, JAL, IMMu26); + br(0x04, BEQ, RS, RT, IMMi16); + br(0x05, BNE, RS, RT, IMMi16); + br(0x06, BLEZ, RS, IMMi16); + br(0x07, BGTZ, RS, IMMi16); + op(0x08, ADDI, RT, RS, IMMi16); + op(0x09, ADDIU, RT, RS, IMMi16); + op(0x0a, SLTI, RT, RS, IMMi16); + op(0x0b, SLTIU, RT, RS, IMMi16); + op(0x0c, ANDI, RT, RS, IMMu16); + op(0x0d, ORI, RT, RS, IMMu16); + op(0x0e, XORI, RT, RS, IMMu16); + op(0x0f, LUI, RT, IMMu16); + jp(0x10, SCC); + jp(0x11, FPU); + br(0x12, COP2); + br(0x13, COP3); + br(0x14, BEQL, RS, RT, IMMi16); + br(0x15, BNEL, RS, RT, IMMi16); + br(0x16, BLEZL, RS, IMMi16); + br(0x17, BGTZL, RS, IMMi16); + op(0x18, DADDI, RT, RS, IMMi16); + op(0x19, DADDIU, RT, RS, IMMi16); + op(0x1a, LDL, RT, RS, IMMi16); + op(0x1b, LDR, RT, RS, IMMi16); + br(0x1c, INVALID); + br(0x1d, INVALID); + br(0x1e, INVALID); + br(0x1f, INVALID); + op(0x20, LB, RT, RS, IMMi16); + op(0x21, LH, RT, RS, IMMi16); + op(0x22, LWL, RT, RS, IMMi16); + op(0x23, LW, RT, RS, IMMi16); + op(0x24, LBU, RT, RS, IMMi16); + op(0x25, LHU, RT, RS, IMMi16); + op(0x26, LWR, RT, RS, IMMi16); + op(0x27, LWU, RT, RS, IMMi16); + op(0x28, SB, RT, RS, IMMi16); + op(0x29, SH, RT, RS, IMMi16); + op(0x2a, SWL, RT, RS, IMMi16); + op(0x2b, SW, RT, RS, IMMi16); + op(0x2c, SDL, RT, RS, IMMi16); + op(0x2d, SDR, RT, RS, IMMi16); + op(0x2e, SWR, RT, RS, IMMi16); + op(0x2f, CACHE, OP >> 16 & 31, RS, IMMi16); + op(0x30, LL, RT, RS, IMMi16); + op(0x31, LWC1, FT, RS, IMMi16); + br(0x32, COP2); //LWC2 + br(0x33, COP3); //LWC3 + op(0x34, LLD, RT, RS, IMMi16); + op(0x35, LDC1, FT, RS, IMMi16); + br(0x36, COP2); //LDC2 + op(0x37, LD, RT, RS, IMMi16); + op(0x38, SC, RT, RS, IMMi16); + op(0x39, SWC1, FT, RS, IMMi16); + br(0x3a, COP2); //SWC2 + br(0x3b, COP3); //SWC3 + op(0x3c, SCD, RT, RS, IMMi16); + op(0x3d, SDC1, FT, RS, IMMi16); + br(0x3e, COP2); //SDC2 + op(0x3f, SD, RT, RS, IMMi16); + } +} + +auto CPU::decoderSPECIAL() -> void { + switch(OP & 0x3f) { + op(0x00, SLL, RD, RT, SA); + br(0x01, INVALID); + op(0x02, SRL, RD, RT, SA); + op(0x03, SRA, RD, RT, SA); + op(0x04, SLLV, RD, RT, RS); + br(0x05, INVALID); + op(0x06, SRLV, RD, RT, RS); + op(0x07, SRAV, RD, RT, RS); + br(0x08, JR, RS); + br(0x09, JALR, RD, RS); + br(0x0a, INVALID); + br(0x0b, INVALID); + br(0x0c, SYSCALL); + br(0x0d, BREAK); + br(0x0e, INVALID); + op(0x0f, SYNC); + op(0x10, MFHI, RD); + op(0x11, MTHI, RS); + op(0x12, MFLO, RD); + op(0x13, MTLO, RS); + op(0x14, DSLLV, RD, RT, RS); + br(0x15, INVALID); + op(0x16, DSRLV, RD, RT, RS); + op(0x17, DSRAV, RD, RT, RS); + op(0x18, MULT, RS, RT); + op(0x19, MULTU, RS, RT); + op(0x1a, DIV, RS, RT); + op(0x1b, DIVU, RS, RT); + op(0x1c, DMULT, RS, RT); + op(0x1d, DMULTU, RS, RT); + op(0x1e, DDIV, RS, RT); + op(0x1f, DDIVU, RS, RT); + op(0x20, ADD, RD, RS, RT); + op(0x21, ADDU, RD, RS, RT); + op(0x22, SUB, RD, RS, RT); + op(0x23, SUBU, RD, RS, RT); + op(0x24, AND, RD, RS, RT); + op(0x25, OR, RD, RS, RT); + op(0x26, XOR, RD, RS, RT); + op(0x27, NOR, RD, RS, RT); + br(0x28, INVALID); + br(0x29, INVALID); + op(0x2a, SLT, RD, RS, RT); + op(0x2b, SLTU, RD, RS, RT); + op(0x2c, DADD, RD, RS, RT); + op(0x2d, DADDU, RD, RS, RT); + op(0x2e, DSUB, RD, RS, RT); + op(0x2f, DSUBU, RD, RS, RT); + op(0x30, TGE, RS, RT); + op(0x31, TGEU, RS, RT); + op(0x32, TLT, RS, RT); + op(0x33, TLTU, RS, RT); + op(0x34, TEQ, RS, RT); + br(0x35, INVALID); + op(0x36, TNE, RS, RT); + br(0x37, INVALID); + op(0x38, DSLL, RD, RT, SA); + br(0x39, INVALID); + op(0x3a, DSRL, RD, RT, SA); + op(0x3b, DSRA, RD, RT, SA); + op(0x3c, DSLL, RD, RT, SA + 32); + br(0x3d, INVALID); + op(0x3e, DSRL, RD, RT, SA + 32); + op(0x3f, DSRA, RD, RT, SA + 32); + } +} + +auto CPU::decoderREGIMM() -> void { + switch(OP >> 16 & 0x1f) { + br(0x00, BLTZ, RS, IMMi16); + br(0x01, BGEZ, RS, IMMi16); + br(0x02, BLTZL, RS, IMMi16); + br(0x03, BGEZL, RS, IMMi16); + br(0x04, INVALID); + br(0x05, INVALID); + br(0x06, INVALID); + br(0x07, INVALID); + op(0x08, TGEI, RS, IMMi16); + op(0x09, TGEIU, RS, IMMi16); + op(0x0a, TLTI, RS, IMMi16); + op(0x0b, TLTIU, RS, IMMi16); + op(0x0c, TEQI, RS, IMMi16); + br(0x0d, INVALID); + op(0x0e, TNEI, RS, IMMi16); + br(0x0f, INVALID); + br(0x10, BLTZAL, RS, IMMi16); + br(0x11, BGEZAL, RS, IMMi16); + br(0x12, BLTZALL, RS, IMMi16); + br(0x13, BGEZALL, RS, IMMi16); + br(0x14, INVALID); + br(0x15, INVALID); + br(0x16, INVALID); + br(0x17, INVALID); + br(0x18, INVALID); + br(0x19, INVALID); + br(0x1a, INVALID); + br(0x1b, INVALID); + br(0x1c, INVALID); + br(0x1d, INVALID); + br(0x1e, INVALID); + br(0x1f, INVALID); + } +} + +auto CPU::decoderSCC() -> void { + switch(OP >> 21 & 0x1f) { + op(0x00, MFC0, RT, RDn); + op(0x01, DMFC0, RT, RDn); + br(0x02, INVALID); //CFC0 + br(0x03, INVALID); + op(0x04, MTC0, RT, RDn); + op(0x05, DMTC0, RT, RDn); + br(0x06, INVALID); //CTC0 + br(0x07, INVALID); + br(0x08, INVALID); //BC0 + br(0x09, INVALID); + br(0x0a, INVALID); + br(0x0b, INVALID); + br(0x0c, INVALID); + br(0x0d, INVALID); + br(0x0e, INVALID); + br(0x0f, INVALID); + } + + switch(OP & 0x3f) { + op(0x01, TLBR); + op(0x02, TLBWI); + op(0x06, TLBWR); + op(0x08, TLBP); + br(0x18, ERET); + } + + //undefined instructions do not throw a reserved instruction exception +} + +auto CPU::decoderFPU() -> void { + switch(OP >> 21 & 0x1f) { + op(0x00, MFC1, RT, FS); + op(0x01, DMFC1, RT, FS); + op(0x02, CFC1, RT, RDn); + br(0x03, INVALID); + op(0x04, MTC1, RT, FS); + op(0x05, DMTC1, RT, FS); + op(0x06, CTC1, RT, RDn); + br(0x07, INVALID); + br(0x08, BC1, OP >> 16 & 1, OP >> 17 & 1, IMMi16); + br(0x09, INVALID); + br(0x0a, INVALID); + br(0x0b, INVALID); + br(0x0c, INVALID); + br(0x0d, INVALID); + br(0x0e, INVALID); + br(0x0f, INVALID); + } + + if((OP >> 21 & 31) == 16) + switch(OP & 0x3f) { + op(0x00, FADD_S, FD, FS, FT); + op(0x01, FSUB_S, FD, FS, FT); + op(0x02, FMUL_S, FD, FS, FT); + op(0x03, FDIV_S, FD, FS, FT); + op(0x04, FSQRT_S, FD, FS); + op(0x05, FABS_S, FD, FS); + op(0x06, FMOV_S, FD, FS); + op(0x07, FNEG_S, FD, FS); + op(0x08, FROUND_L_S, FD, FS); + op(0x09, FTRUNC_L_S, FD, FS); + op(0x0a, FCEIL_L_S, FD, FS); + op(0x0b, FFLOOR_L_S, FD, FS); + op(0x0c, FROUND_W_S, FD, FS); + op(0x0d, FTRUNC_W_S, FD, FS); + op(0x0e, FCEIL_W_S, FD, FS); + op(0x0f, FFLOOR_W_S, FD, FS); + op(0x21, FCVT_D_S, FD, FS); + op(0x24, FCVT_W_S, FD, FS); + op(0x25, FCVT_L_S, FD, FS); + op(0x30, FC_F_S, FS, FT); + op(0x31, FC_UN_S, FS, FT); + op(0x32, FC_EQ_S, FS, FT); + op(0x33, FC_UEQ_S, FS, FT); + op(0x34, FC_OLT_S, FS, FT); + op(0x35, FC_ULT_S, FS, FT); + op(0x36, FC_OLE_S, FS, FT); + op(0x37, FC_ULE_S, FS, FT); + op(0x38, FC_SF_S, FS, FT); + op(0x39, FC_NGLE_S, FS, FT); + op(0x3a, FC_SEQ_S, FS, FT); + op(0x3b, FC_NGL_S, FS, FT); + op(0x3c, FC_LT_S, FS, FT); + op(0x3d, FC_NGE_S, FS, FT); + op(0x3e, FC_LE_S, FS, FT); + op(0x3f, FC_NGT_S, FS, FT); + } + + if((OP >> 21 & 31) == 17) + switch(OP & 0x3f) { + op(0x00, FADD_D, FD, FS, FT); + op(0x01, FSUB_D, FD, FS, FT); + op(0x02, FMUL_D, FD, FS, FT); + op(0x03, FDIV_D, FD, FS, FT); + op(0x04, FSQRT_D, FD, FS); + op(0x05, FABS_D, FD, FS); + op(0x06, FMOV_D, FD, FS); + op(0x07, FNEG_D, FD, FS); + op(0x08, FROUND_L_D, FD, FS); + op(0x09, FTRUNC_L_D, FD, FS); + op(0x0a, FCEIL_L_D, FD, FS); + op(0x0b, FFLOOR_L_D, FD, FS); + op(0x0c, FROUND_W_D, FD, FS); + op(0x0d, FTRUNC_W_D, FD, FS); + op(0x0e, FCEIL_W_D, FD, FS); + op(0x0f, FFLOOR_W_D, FD, FS); + op(0x20, FCVT_S_D, FD, FS); + op(0x24, FCVT_W_D, FD, FS); + op(0x25, FCVT_L_D, FD, FS); + op(0x30, FC_F_D, FS, FT); + op(0x31, FC_UN_D, FS, FT); + op(0x32, FC_EQ_D, FS, FT); + op(0x33, FC_UEQ_D, FS, FT); + op(0x34, FC_OLT_D, FS, FT); + op(0x35, FC_ULT_D, FS, FT); + op(0x36, FC_OLE_D, FS, FT); + op(0x37, FC_ULE_D, FS, FT); + op(0x38, FC_SF_D, FS, FT); + op(0x39, FC_NGLE_D, FS, FT); + op(0x3a, FC_SEQ_D, FS, FT); + op(0x3b, FC_NGL_D, FS, FT); + op(0x3c, FC_LT_D, FS, FT); + op(0x3d, FC_NGE_D, FS, FT); + op(0x3e, FC_LE_D, FS, FT); + op(0x3f, FC_NGT_D, FS, FT); + } + + if((OP >> 21 & 31) == 20) + switch(OP & 0x3f) { + op(0x20, FCVT_S_W, FD, FS); + op(0x21, FCVT_D_W, FD, FS); + } + + if((OP >> 21 & 31) == 21) + switch(OP & 0x3f) { + op(0x20, FCVT_S_L, FD, FS); + op(0x21, FCVT_D_L, FD, FS); + } + + //undefined instructions do not throw a reserved instruction exception +} + +auto CPU::COP2() -> void { + exception.coprocessor2(); +} + +auto CPU::COP3() -> void { + exception.coprocessor3(); +} + +auto CPU::INVALID() -> void { + exception.reservedInstruction(); +} + +#undef SA +#undef RDn +#undef RTn +#undef RSn +#undef FD +#undef FS +#undef FT +#undef IMMi16 +#undef IMMu16 +#undef IMMu26 + +#undef jp +#undef op +#undef br + +#undef OP +#undef RD +#undef RT +#undef RS diff --git a/waterbox/ares64/ares/ares/n64/cpu/memory.cpp b/waterbox/ares64/ares/ares/n64/cpu/memory.cpp new file mode 100644 index 0000000000..6e9e28dfed --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/memory.cpp @@ -0,0 +1,198 @@ +//32-bit segments + +auto CPU::kernelSegment32(u32 address) const -> Context::Segment { + if(address <= 0x7fff'ffff) return Context::Segment::Mapped; //kuseg + if(address <= 0x9fff'ffff) return Context::Segment::Cached; //kseg0 + if(address <= 0xbfff'ffff) return Context::Segment::Direct; //kseg1 + if(address <= 0xdfff'ffff) return Context::Segment::Mapped; //ksseg + if(address <= 0xffff'ffff) return Context::Segment::Mapped; //kseg3 + unreachable; +} + +auto CPU::supervisorSegment32(u32 address) const -> Context::Segment { + if(address <= 0x7fff'ffff) return Context::Segment::Mapped; //suseg + if(address <= 0xbfff'ffff) return Context::Segment::Unused; + if(address <= 0xdfff'ffff) return Context::Segment::Mapped; //sseg + if(address <= 0xffff'ffff) return Context::Segment::Unused; + unreachable; +} + +auto CPU::userSegment32(u32 address) const -> Context::Segment { + if(address <= 0x7fff'ffff) return Context::Segment::Mapped; //useg + if(address <= 0xffff'ffff) return Context::Segment::Unused; + unreachable; +} + +//64-bit segments + +auto CPU::kernelSegment64(u64 address) const -> Context::Segment { + if(address <= 0x0000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xkuseg + if(address <= 0x3fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0x4000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xksseg + if(address <= 0x7fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0x8000'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(address <= 0x87ff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0x8800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(address <= 0x8fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0x9000'0000'ffff'ffffull) return Context::Segment::Direct; //xkphys* + if(address <= 0x97ff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0x9800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(address <= 0x9fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0xa000'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(address <= 0xa7ff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0xa800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(address <= 0xafff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0xb000'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(address <= 0xb7ff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0xb800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(address <= 0xbfff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0xc000'00ff'7fff'ffffull) return Context::Segment::Mapped; //xkseg + if(address <= 0xffff'ffff'7fff'ffffull) return Context::Segment::Unused; + if(address <= 0xffff'ffff'9fff'ffffull) return Context::Segment::Cached; //ckseg0 + if(address <= 0xffff'ffff'bfff'ffffull) return Context::Segment::Direct; //ckseg1 + if(address <= 0xffff'ffff'dfff'ffffull) return Context::Segment::Mapped; //ckseg2 + if(address <= 0xffff'ffff'ffff'ffffull) return Context::Segment::Mapped; //ckseg3 + unreachable; +} + +auto CPU::supervisorSegment64(u64 address) const -> Context::Segment { + if(address <= 0x0000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xsuseg + if(address <= 0x3fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(address <= 0x4000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xsseg + if(address <= 0xffff'ffff'bfff'ffffull) return Context::Segment::Unused; + if(address <= 0xffff'ffff'dfff'ffffull) return Context::Segment::Mapped; //csseg + if(address <= 0xffff'ffff'ffff'ffffull) return Context::Segment::Unused; + unreachable; +} + +auto CPU::userSegment64(u64 address) const -> Context::Segment { + if(address <= 0x0000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xuseg + if(address <= 0xffff'ffff'ffff'ffffull) return Context::Segment::Unused; + unreachable; +} + +// + +auto CPU::segment(u64 address) -> Context::Segment { + auto segment = context.segment[address >> 29 & 7]; +//if(likely(context.bits == 32)) + return (Context::Segment)segment; + switch(segment) { + case Context::Segment::Kernel64: + return kernelSegment64(address); + case Context::Segment::Supervisor64: + return supervisorSegment64(address); + case Context::Segment::User64: + return userSegment64(address); + } + unreachable; +} + +auto CPU::devirtualize(u64 address) -> maybe { + switch(context.segment[address >> 29 & 7]) { + case Context::Segment::Unused: + exception.addressLoad(); + return nothing; + case Context::Segment::Mapped: + if(auto match = tlb.load(address)) return match.address; + tlb.exception(address); + return nothing; + case Context::Segment::Cached: + case Context::Segment::Direct: + return address; + } + unreachable; +} + +auto CPU::fetch(u64 address) -> u32 { + switch(segment(address)) { + case Context::Segment::Unused: + step(1); + exception.addressLoad(); + return 0; //nop + case Context::Segment::Mapped: + if(auto match = tlb.load(address)) { + if(match.cache) return icache.fetch(match.address); + step(1); + return bus.read(match.address); + } + step(1); + tlb.exception(address); + return 0; //nop + case Context::Segment::Cached: + return icache.fetch(address); + case Context::Segment::Direct: + step(1); + return bus.read(address); + } + + unreachable; +} + +template +auto CPU::read(u64 address) -> maybe { + if constexpr(Accuracy::CPU::AddressErrors) { + if(unlikely(address & Size - 1)) { + step(1); + exception.addressLoad(); + return nothing; + } + } + + switch(segment(address)) { + case Context::Segment::Unused: + step(1); + exception.addressLoad(); + return nothing; + case Context::Segment::Mapped: + if(auto match = tlb.load(address)) { + if(match.cache) return dcache.read(match.address); + step(1); + return bus.read(match.address); + } + step(1); + tlb.exception(address); + return nothing; + case Context::Segment::Cached: + return dcache.read(address); + case Context::Segment::Direct: + step(1); + return bus.read(address); + } + + unreachable; +} + +template +auto CPU::write(u64 address, u64 data) -> bool { + if constexpr(Accuracy::CPU::AddressErrors) { + if(unlikely(address & Size - 1)) { + step(1); + exception.addressStore(); + return false; + } + } + + switch(segment(address)) { + case Context::Segment::Unused: + step(1); + exception.addressStore(); + return false; + case Context::Segment::Mapped: + if(auto match = tlb.store(address)) { + if(match.cache) return dcache.write(match.address, data), true; + step(1); + return bus.write(match.address, data), true; + } + step(1); + tlb.exception(address); + return false; + case Context::Segment::Cached: + return dcache.write(address, data), true; + case Context::Segment::Direct: + step(1); + return bus.write(address, data), true; + } + + unreachable; +} diff --git a/waterbox/ares64/ares/ares/n64/cpu/recompiler.cpp b/waterbox/ares64/ares/ares/n64/cpu/recompiler.cpp new file mode 100644 index 0000000000..6ccfc418a8 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/recompiler.cpp @@ -0,0 +1,2005 @@ +auto CPU::Recompiler::pool(u32 address) -> Pool* { + auto& pool = pools[address >> 8 & 0x1fffff]; + if(!pool) pool = (Pool*)allocator.acquire(sizeof(Pool)); + return pool; +} + +auto CPU::Recompiler::block(u32 address) -> Block* { + if(auto block = pool(address)->blocks[address >> 2 & 0x3f]) return block; + auto block = emit(address); + pool(address)->blocks[address >> 2 & 0x3f] = block; + memory::jitprotect(true); + return block; +} + +auto CPU::Recompiler::emit(u32 address) -> Block* { + if(unlikely(allocator.available() < 1_MiB)) { + print("CPU allocator flush\n"); + memory::jitprotect(false); + allocator.release(bump_allocator::zero_fill); + memory::jitprotect(true); + reset(); + } + + auto block = (Block*)allocator.acquire(sizeof(Block)); + beginFunction(3); + + bool hasBranched = 0; + while(true) { + u32 instruction = bus.read(address); + bool branched = emitEXECUTE(instruction); + if(unlikely(instruction == 0x1000'ffff)) { + //accelerate idle loops + mov32(reg(1), imm(64)); + call(&CPU::step); + } + call(&CPU::instructionEpilogue); + address += 4; + if(hasBranched || (address & 0xfc) == 0) break; //block boundary + hasBranched = branched; + testJumpEpilog(); + } + jumpEpilog(); + + memory::jitprotect(false); + block->code = endFunction(); + +//print(hex(PC, 8L), " ", instructions, " ", size(), "\n"); + return block; +} + +#define Sa (instruction >> 6 & 31) +#define Rdn (instruction >> 11 & 31) +#define Rtn (instruction >> 16 & 31) +#define Rsn (instruction >> 21 & 31) +#define Fdn (instruction >> 6 & 31) +#define Fsn (instruction >> 11 & 31) +#define Ftn (instruction >> 16 & 31) + +#define IpuBase offsetof(IPU, r[16]) +#define IpuReg(r) sreg(1), offsetof(IPU, r) - IpuBase +#define Rd IpuReg(r[0]) + Rdn * sizeof(r64) +#define Rt IpuReg(r[0]) + Rtn * sizeof(r64) +#define Rt32 IpuReg(r[0].u32) + Rtn * sizeof(r64) +#define Rs IpuReg(r[0]) + Rsn * sizeof(r64) +#define Rs32 IpuReg(r[0].u32) + Rsn * sizeof(r64) +#define Lo IpuReg(lo) +#define Hi IpuReg(hi) + +#define FpuBase offsetof(FPU, r[16]) +#define FpuReg(r) sreg(2), offsetof(FPU, r) - FpuBase +#define Fd FpuReg(r[0]) + Fdn * sizeof(r64) +#define Fs FpuReg(r[0]) + Fsn * sizeof(r64) +#define Ft FpuReg(r[0]) + Ftn * sizeof(r64) + +#define i16 s16(instruction) +#define n16 u16(instruction) +#define n26 u32(instruction & 0x03ff'ffff) + +auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool { + switch(instruction >> 26) { + + //SPECIAL + case 0x00: { + return emitSPECIAL(instruction); + } + + //REGIMM + case 0x01: { + return emitREGIMM(instruction); + } + + //J n26 + case 0x02: { + mov32(reg(1), imm(n26)); + call(&CPU::J); + return 1; + } + + //JAL n26 + case 0x03: { + mov32(reg(1), imm(n26)); + call(&CPU::JAL); + return 1; + } + + //BEQ Rs,Rt,i16 + case 0x04: { + lea(reg(1), Rs); + lea(reg(2), Rt); + mov32(reg(3), imm(i16)); + call(&CPU::BEQ); + return 1; + } + + //BNE Rs,Rt,i16 + case 0x05: { + lea(reg(1), Rs); + lea(reg(2), Rt); + mov32(reg(3), imm(i16)); + call(&CPU::BNE); + return 1; + } + + //BLEZ Rs,i16 + case 0x06: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BLEZ); + return 1; + } + + //BGTZ Rs,i16 + case 0x07: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BGTZ); + return 1; + } + + //ADDI Rt,Rs,i16 + case 0x08: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::ADDI); + return 0; + } + + //ADDIU Rt,Rs,i16 + case 0x09: { + add32(reg(0), mem(Rs32), imm(i16)); + mov64_s32(reg(0), reg(0)); + mov64(mem(Rt), reg(0)); + return 0; + } + + //SLTI Rt,Rs,i16 + case 0x0a: { + cmp64(mem(Rs), imm(i16), set_slt); + mov64_f(mem(Rt), flag_slt); + return 0; + } + + //SLTIU Rt,Rs,i16 + case 0x0b: { + cmp64(mem(Rs), imm(i16), set_ult); + mov64_f(mem(Rt), flag_ult); + return 0; + } + + //ANDI Rt,Rs,n16 + case 0x0c: { + and64(mem(Rt), mem(Rs), imm(n16)); + return 0; + } + + //ORI Rt,Rs,n16 + case 0x0d: { + or64(mem(Rt), mem(Rs), imm(n16)); + return 0; + } + + //XORI Rt,Rs,n16 + case 0x0e: { + xor64(mem(Rt), mem(Rs), imm(n16)); + return 0; + } + + //LUI Rt,n16 + case 0x0f: { + mov64(mem(Rt), imm(s32(n16 << 16))); + return 0; + } + + //SCC + case 0x10: { + return emitSCC(instruction); + } + + //FPU + case 0x11: { + return emitFPU(instruction); + } + + //COP2 + case 0x12: { + call(&CPU::COP2); + return 1; + } + + //COP3 + case 0x13: { + call(&CPU::COP3); + return 1; + } + + //BEQL Rs,Rt,i16 + case 0x14: { + lea(reg(1), Rs); + lea(reg(2), Rt); + mov32(reg(3), imm(i16)); + call(&CPU::BEQL); + return 1; + } + + //BNEL Rs,Rt,i16 + case 0x15: { + lea(reg(1), Rs); + lea(reg(2), Rt); + mov32(reg(3), imm(i16)); + call(&CPU::BNEL); + return 1; + } + + //BLEZL Rs,i16 + case 0x16: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BLEZL); + return 1; + } + + //BGTZL Rs,i16 + case 0x17: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BGTZL); + return 1; + } + + //DADDI Rt,Rs,i16 + case 0x18: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::DADDI); + return 0; + } + + //DADDIU Rt,Rs,i16 + case 0x19: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::DADDIU); + return 0; + } + + //LDL Rt,Rs,i16 + case 0x1a: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LDL); + return 0; + } + + //LDR Rt,Rs,i16 + case 0x1b: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LDR); + return 0; + } + + //INVALID + case 0x1c ... 0x1f: { + call(&CPU::INVALID); + return 1; + } + + //LB Rt,Rs,i16 + case 0x20: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LB); + return 0; + } + + //LH Rt,Rs,i16 + case 0x21: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LH); + return 0; + } + + //LWL Rt,Rs,i16 + case 0x22: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LWL); + return 0; + } + + //LW Rt,Rs,i16 + case 0x23: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LW); + return 0; + } + + //LBU Rt,Rs,i16 + case 0x24: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LBU); + return 0; + } + + //LHU Rt,Rs,i16 + case 0x25: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LHU); + return 0; + } + + //LWR Rt,Rs,i16 + case 0x26: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LWR); + return 0; + } + + //LWU Rt,Rs,i16 + case 0x27: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LWU); + return 0; + } + + //SB Rt,Rs,i16 + case 0x28: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SB); + return 0; + } + + //SH Rt,Rs,i16 + case 0x29: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SH); + return 0; + } + + //SWL Rt,Rs,i16 + case 0x2a: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SWL); + return 0; + } + + //SW Rt,Rs,i16 + case 0x2b: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SW); + return 0; + } + + //SDL Rt,Rs,i16 + case 0x2c: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SDL); + return 0; + } + + //SDR Rt,Rs,i16 + case 0x2d: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SDR); + return 0; + } + + //SWR Rt,Rs,i16 + case 0x2e: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SWR); + return 0; + } + + //CACHE op(offset),base + case 0x2f: { + mov32(reg(1), imm(instruction >> 16 & 31)); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::CACHE); + return 0; + } + + //LL Rt,Rs,i16 + case 0x30: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LL); + return 0; + } + + //LWC1 Ft,Rs,i16 + case 0x31: { + mov32(reg(1), imm(Ftn)); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LWC1); + return 0; + } + + //LWC2 + case 0x32: { + call(&CPU::COP2); + return 1; + } + + //LWC3 + case 0x33: { + call(&CPU::COP3); + return 1; + } + + //LLD Rt,Rs,i16 + case 0x34: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LLD); + return 0; + } + + //LDC1 Ft,Rs,i16 + case 0x35: { + mov32(reg(1), imm(Ftn)); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LDC1); + return 0; + } + + //LDC2 + case 0x36: { + call(&CPU::COP2); + return 1; + } + + //LD Rt,Rs,i16 + case 0x37: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::LD); + return 0; + } + + //SC Rt,Rs,i16 + case 0x38: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SC); + return 0; + } + + //SWC1 Ft,Rs,i16 + case 0x39: { + mov32(reg(1), imm(Ftn)); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SWC1); + return 0; + } + + //SWC2 + case 0x3a: { + call(&CPU::COP2); + return 1; + } + + //SWC3 + case 0x3b: { + call(&CPU::COP3); + return 1; + } + + //SCD Rt,Rs,i16 + case 0x3c: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SCD); + return 0; + } + + //SDC1 Ft,Rs,i16 + case 0x3d: { + mov32(reg(1), imm(Ftn)); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SDC1); + return 0; + } + + //SDC2 + case 0x3e: { + call(&CPU::COP2); + return 1; + } + + //SD Rt,Rs,i16 + case 0x3f: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&CPU::SD); + return 0; + } + + } + + return 0; +} + +auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { + switch(instruction & 0x3f) { + + //SLL Rd,Rt,Sa + case 0x00: { + shl32(reg(0), mem(Rt32), imm(Sa)); + mov64_s32(reg(0), reg(0)); + mov64(mem(Rd), reg(0)); + return 0; + } + + //INVALID + case 0x01: { + call(&CPU::INVALID); + return 1; + } + + //SRL Rd,Rt,Sa + case 0x02: { + lshr32(reg(0), mem(Rt32), imm(Sa)); + mov64_s32(reg(0), reg(0)); + mov64(mem(Rd), reg(0)); + return 0; + } + + //SRA Rd,Rt,Sa + case 0x03: { + ashr64(reg(0), mem(Rt), imm(Sa)); + mov64_s32(reg(0), reg(0)); + mov64(mem(Rd), reg(0)); + return 0; + } + + //SLLV Rd,Rt,Rs + case 0x04: { + mov32(reg(0), mem(Rt32)); + and32(reg(1), mem(Rs32), imm(31)); + shl32(reg(0), reg(0), reg(1)); + mov64_s32(reg(0), reg(0)); + mov64(mem(Rd), reg(0)); + return 0; + } + + //INVALID + case 0x05: { + call(&CPU::INVALID); + return 1; + } + + //SRLV Rd,Rt,RS + case 0x06: { + mov32(reg(0), mem(Rt32)); + and32(reg(1), mem(Rs32), imm(31)); + lshr32(reg(0), reg(0), reg(1)); + mov64_s32(reg(0), reg(0)); + mov64(mem(Rd), reg(0)); + return 0; + } + + //SRAV Rd,Rt,Rs + case 0x07: { + and32(reg(1), mem(Rs32), imm(31)); + ashr64(reg(0), mem(Rt), reg(1)); + mov64_s32(reg(0), reg(0)); + mov64(mem(Rd), reg(0)); + return 0; + } + + //JR Rs + case 0x08: { + lea(reg(1), Rs); + call(&CPU::JR); + return 1; + } + + //JALR Rd,Rs + case 0x09: { + lea(reg(1), Rd); + lea(reg(2), Rs); + call(&CPU::JALR); + return 1; + } + + //INVALID + case 0x0a ... 0x0b: { + call(&CPU::INVALID); + return 1; + } + + //SYSCALL + case 0x0c: { + call(&CPU::SYSCALL); + return 1; + } + + //BREAK + case 0x0d: { + call(&CPU::BREAK); + return 1; + } + + //INVALID + case 0x0e: { + call(&CPU::INVALID); + return 1; + } + + //SYNC + case 0x0f: { + call(&CPU::SYNC); + return 0; + } + + //MFHI Rd + case 0x10: { + mov64(mem(Rd), mem(Hi)); + return 0; + } + + //MTHI Rs + case 0x11: { + mov64(mem(Hi), mem(Rs)); + return 0; + } + + //MFLO Rd + case 0x12: { + mov64(mem(Rd), mem(Lo)); + return 0; + } + + //MTLO Rs + case 0x13: { + mov64(mem(Lo), mem(Rs)); + return 0; + } + + //DSLLV Rd,Rt,Rs + case 0x14: { + lea(reg(1), Rd); + lea(reg(2), Rt); + lea(reg(3), Rs); + call(&CPU::DSLLV); + return 0; + } + + //INVALID + case 0x15: { + call(&CPU::INVALID); + return 1; + } + + //DSRLV Rd,Rt,Rs + case 0x16: { + lea(reg(1), Rd); + lea(reg(2), Rt); + lea(reg(3), Rs); + call(&CPU::DSRLV); + return 0; + } + + //DSRAV Rd,Rt,Rs + case 0x17: { + lea(reg(1), Rd); + lea(reg(2), Rt); + lea(reg(3), Rs); + call(&CPU::DSRAV); + return 0; + } + + //MULT Rs,Rt + case 0x18: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::MULT); + return 0; + } + + //MULTU Rs,Rt + case 0x19: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::MULTU); + return 0; + } + + //DIV Rs,Rt + case 0x1a: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::DIV); + return 0; + } + + //DIVU Rs,Rt + case 0x1b: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::DIVU); + return 0; + } + + //DMULT Rs,Rt + case 0x1c: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::DMULT); + return 0; + } + + //DMULTU Rs,Rt + case 0x1d: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::DMULTU); + return 0; + } + + //DDIV Rs,Rt + case 0x1e: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::DDIV); + return 0; + } + + //DDIVU Rs,Rt + case 0x1f: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::DDIVU); + return 0; + } + + //ADD Rd,Rs,Rt + case 0x20: { + lea(reg(1), Rd); + lea(reg(2), Rs); + lea(reg(3), Rt); + call(&CPU::ADD); + return 0; + } + + //ADDU Rd,Rs,Rt + case 0x21: { + add32(reg(0), mem(Rs32), mem(Rt32)); + mov64_s32(reg(0), reg(0)); + mov64(mem(Rd), reg(0)); + return 0; + } + + //SUB Rd,Rs,Rt + case 0x22: { + lea(reg(1), Rd); + lea(reg(2), Rs); + lea(reg(3), Rt); + call(&CPU::SUB); + return 0; + } + + //SUBU Rd,Rs,Rt + case 0x23: { + sub32(reg(0), mem(Rs32), mem(Rt32)); + mov64_s32(reg(0), reg(0)); + mov64(mem(Rd), reg(0)); + return 0; + } + + //AND Rd,Rs,Rt + case 0x24: { + and64(mem(Rd), mem(Rs), mem(Rt)); + return 0; + } + + //OR Rd,Rs,Rt + case 0x25: { + or64(mem(Rd), mem(Rs), mem(Rt)); + return 0; + } + + //XOR Rd,Rs,Rt + case 0x26: { + xor64(mem(Rd), mem(Rs), mem(Rt)); + return 0; + } + + //NOR Rd,Rs,Rt + case 0x27: { + or64(reg(0), mem(Rs), mem(Rt)); + not64(reg(0), reg(0)); + mov64(mem(Rd), reg(0)); + return 0; + } + + //INVALID + case 0x28 ... 0x29: { + call(&CPU::INVALID); + return 1; + } + + //SLT Rd,Rs,Rt + case 0x2a: { + cmp64(mem(Rs), mem(Rt), set_slt); + mov64_f(mem(Rd), flag_slt); + return 0; + } + + //SLTU Rd,Rs,Rt + case 0x2b: { + cmp64(mem(Rs), mem(Rt), set_ult); + mov64_f(mem(Rd), flag_ult); + return 0; + } + + //DADD Rd,Rs,Rt + case 0x2c: { + lea(reg(1), Rd); + lea(reg(2), Rs); + lea(reg(3), Rt); + call(&CPU::DADD); + return 0; + } + + //DADDU Rd,Rs,Rt + case 0x2d: { + lea(reg(1), Rd); + lea(reg(2), Rs); + lea(reg(3), Rt); + call(&CPU::DADDU); + return 0; + } + + //DSUB Rd,Rs,Rt + case 0x2e: { + lea(reg(1), Rd); + lea(reg(2), Rs); + lea(reg(3), Rt); + call(&CPU::DSUB); + return 0; + } + + //DSUBU Rd,Rs,Rt + case 0x2f: { + lea(reg(1), Rd); + lea(reg(2), Rs); + lea(reg(3), Rt); + call(&CPU::DSUBU); + return 0; + } + + //TGE Rs,Rt + case 0x30: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::TGE); + return 0; + } + + //TGEU Rs,Rt + case 0x31: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::TGEU); + return 0; + } + + //TLT Rs,Rt + case 0x32: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::TLT); + return 0; + } + + //TLTU Rs,Rt + case 0x33: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::TLTU); + return 0; + } + + //TEQ Rs,Rt + case 0x34: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::TEQ); + return 0; + } + + //INVALID + case 0x35: { + call(&CPU::INVALID); + return 1; + } + + //TNE Rs,Rt + case 0x36: { + lea(reg(1), Rs); + lea(reg(2), Rt); + call(&CPU::TNE); + return 0; + } + + //INVALID + case 0x37: { + call(&CPU::INVALID); + return 1; + } + + //DSLL Rd,Rt,Sa + case 0x38: { + lea(reg(1), Rd); + lea(reg(2), Rt); + mov32(reg(3), imm(Sa)); + call(&CPU::DSLL); + return 0; + } + + //INVALID + case 0x39: { + call(&CPU::INVALID); + return 1; + } + + //DSRL Rd,Rt,Sa + case 0x3a: { + lea(reg(1), Rd); + lea(reg(2), Rt); + mov32(reg(3), imm(Sa)); + call(&CPU::DSRL); + return 0; + } + + //DSRA Rd,Rt,Sa + case 0x3b: { + lea(reg(1), Rd); + lea(reg(2), Rt); + mov32(reg(3), imm(Sa)); + call(&CPU::DSRA); + return 0; + } + + //DSLL32 Rd,Rt,Sa + case 0x3c: { + lea(reg(1), Rd); + lea(reg(2), Rt); + mov32(reg(3), imm(Sa+32)); + call(&CPU::DSLL); + return 0; + } + + //INVALID + case 0x3d: { + call(&CPU::INVALID); + return 1; + } + + //DSRL32 Rd,Rt,Sa + case 0x3e: { + lea(reg(1), Rd); + lea(reg(2), Rt); + mov32(reg(3), imm(Sa+32)); + call(&CPU::DSRL); + return 0; + } + + //DSRA32 Rd,Rt,Sa + case 0x3f: { + lea(reg(1), Rd); + lea(reg(2), Rt); + mov32(reg(3), imm(Sa+32)); + call(&CPU::DSRA); + return 0; + } + + } + + return 0; +} + +auto CPU::Recompiler::emitREGIMM(u32 instruction) -> bool { + switch(instruction >> 16 & 0x1f) { + + //BLTZ Rs,i16 + case 0x00: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BLTZ); + return 0; + } + + //BGEZ Rs,i16 + case 0x01: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BGEZ); + return 0; + } + + //BLTZL Rs,i16 + case 0x02: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BLTZL); + return 0; + } + + //BGEZL Rs,i16 + case 0x03: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BGEZL); + return 0; + } + + //INVALID + case 0x04 ... 0x07: { + call(&CPU::INVALID); + return 1; + } + + //TGEI Rs,i16 + case 0x08: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::TGEI); + return 0; + } + + //TGEIU Rs,i16 + case 0x09: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::TGEIU); + return 0; + } + + //TLTI Rs,i16 + case 0x0a: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::TLTI); + return 0; + } + + //TLTIU Rs,i16 + case 0x0b: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::TLTIU); + return 0; + } + + //TEQI Rs,i16 + case 0x0c: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::TEQI); + return 0; + } + + //INVALID + case 0x0d: { + call(&CPU::INVALID); + return 1; + } + + //TNEI Rs,i16 + case 0x0e: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::TNEI); + return 0; + } + + //INVALID + case 0x0f: { + call(&CPU::INVALID); + return 1; + } + + //BLTZAL Rs,i16 + case 0x10: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BLTZAL); + return 0; + } + + //BGEZAL Rs,i16 + case 0x11: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BGEZAL); + return 0; + } + + //BLTZALL Rs,i16 + case 0x12: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BLTZALL); + return 0; + } + + //BGEZALL Rs,i16 + case 0x13: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&CPU::BGEZALL); + return 0; + } + + //INVALID + case 0x14 ... 0x1f: { + call(&CPU::INVALID); + return 1; + } + + } + + return 0; +} + +auto CPU::Recompiler::emitSCC(u32 instruction) -> bool { + switch(instruction >> 21 & 0x1f) { + + //MFC0 Rt,Rd + case 0x00: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&CPU::MFC0); + return 0; + } + + //DMFC0 Rt,Rd + case 0x01: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&CPU::DMFC0); + return 0; + } + + //INVALID + case 0x02 ... 0x03: { + call(&CPU::INVALID); + return 1; + } + + //MTC0 Rt,Rd + case 0x04: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&CPU::MTC0); + return 0; + } + + //DMTC0 Rt,Rd + case 0x05: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&CPU::DMTC0); + return 0; + } + + //INVALID + case 0x06 ... 0x0f: { + call(&CPU::INVALID); + return 1; + } + + } + + switch(instruction & 0x3f) { + + //TLBR + case 0x01: { + call(&CPU::TLBR); + return 0; + } + + //TLBWI + case 0x02: { + call(&CPU::TLBWI); + return 0; + } + + //TLBWR + case 0x06: { + call(&CPU::TLBWR); + return 0; + } + + //TLBP + case 0x08: { + call(&CPU::TLBP); + return 0; + } + + //ERET + case 0x18: { + call(&CPU::ERET); + return 1; + } + + } + + return 0; +} + +auto CPU::Recompiler::emitFPU(u32 instruction) -> bool { + switch(instruction >> 21 & 0x1f) { + + //MFC1 Rt,Fs + case 0x00: { + lea(reg(1), Rt); + mov32(reg(2), imm(Fsn)); + call(&CPU::MFC1); + return 0; + } + + //DMFC1 Rt,Fs + case 0x01: { + lea(reg(1), Rt); + mov32(reg(2), imm(Fsn)); + call(&CPU::DMFC1); + return 0; + } + + //CFC1 Rt,Rd + case 0x02: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&CPU::CFC1); + return 0; + } + + //INVALID + case 0x03: { + call(&CPU::INVALID); + return 1; + } + + //MTC1 Rt,Fs + case 0x04: { + lea(reg(1), Rt); + mov32(reg(2), imm(Fsn)); + call(&CPU::MTC1); + return 0; + } + + //DMTC1 Rt,Fs + case 0x05: { + lea(reg(1), Rt); + mov32(reg(2), imm(Fsn)); + call(&CPU::DMTC1); + return 0; + } + + //CTC1 Rt,Rd + case 0x06: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&CPU::CTC1); + return 0; + } + + //INVALID + case 0x07: { + call(&CPU::INVALID); + return 1; + } + + //BC1 offset + case 0x08: { + mov32(reg(1), imm(instruction >> 16 & 1)); + mov32(reg(2), imm(instruction >> 17 & 1)); + mov32(reg(3), imm(i16)); + call(&CPU::BC1); + return 1; + } + + //INVALID + case 0x09 ... 0x0f: { + call(&CPU::INVALID); + return 1; + } + + } + + if((instruction >> 21 & 31) == 16) + switch(instruction & 0x3f) { + + //FADD.S Fd,Fs,Ft + case 0x00: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + mov32(reg(3), imm(Ftn)); + call(&CPU::FADD_S); + return 0; + } + + //FSUB.S Fd,Fs,Ft + case 0x01: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + mov32(reg(3), imm(Ftn)); + call(&CPU::FSUB_S); + return 0; + } + + //FMUL.S Fd,Fs,Ft + case 0x02: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + mov32(reg(3), imm(Ftn)); + call(&CPU::FMUL_S); + return 0; + } + + //FDIV.S Fd,Fs,Ft + case 0x03: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + mov32(reg(3), imm(Ftn)); + call(&CPU::FDIV_S); + return 0; + } + + //FSQRT.S Fd,Fs + case 0x04: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FSQRT_S); + return 0; + } + + //FABS.S Fd,Fs + case 0x05: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FABS_S); + return 0; + } + + //FMOV.S Fd,Fs + case 0x06: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FMOV_S); + return 0; + } + + //FNEG.S Fd,Fs + case 0x07: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FNEG_S); + return 0; + } + + //FROUND.L.S Fd,Fs + case 0x08: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FROUND_L_S); + return 0; + } + + //FTRUNC.L.S Fd,Fs + case 0x09: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FTRUNC_L_S); + return 0; + } + + //FCEIL.L.S Fd,Fs + case 0x0a: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCEIL_L_S); + return 0; + } + + //FFLOOR.L.S Fd,Fs + case 0x0b: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FFLOOR_L_S); + return 0; + } + + //FROUND.W.S Fd,Fs + case 0x0c: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FROUND_W_S); + return 0; + } + + //FTRUNC.W.S Fd,Fs + case 0x0d: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FTRUNC_W_S); + return 0; + } + + //FCEIL.W.S Fd,Fs + case 0x0e: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCEIL_W_S); + return 0; + } + + //FFLOOR.W.S Fd,Fs + case 0x0f: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FFLOOR_W_S); + return 0; + } + + //FCVT.D.S Fd,Fs + case 0x21: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_D_S); + return 0; + } + + //FCVT.W.S Fd,Fs + case 0x24: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_W_S); + return 0; + } + + //FCVT.L.S Fd,Fs + case 0x25: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_L_S); + return 0; + } + + //FC.F.S Fs,Ft + case 0x30: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_F_S); + return 0; + } + + //FC.UN.S Fs,Ft + case 0x31: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_UN_S); + return 0; + } + + //FC.EQ.S Fs,Ft + case 0x32: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_EQ_S); + return 0; + } + + //FC.UEQ.S Fs,Ft + case 0x33: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_UEQ_S); + return 0; + } + + //FC.OLT.S Fs,Ft + case 0x34: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_OLT_S); + return 0; + } + + //FC.ULT.S Fs,Ft + case 0x35: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_ULT_S); + return 0; + } + + //FC.OLE.S Fs,Ft + case 0x36: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_OLE_S); + return 0; + } + + //FC.ULE.S Fs,Ft + case 0x37: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_ULE_S); + return 0; + } + + //FC.SF.S Fs,Ft + case 0x38: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_SF_S); + return 0; + } + + //FC.NGLE.S Fs,Ft + case 0x39: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_NGLE_S); + return 0; + } + + //FC.SEQ.S Fs,Ft + case 0x3a: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_SEQ_S); + return 0; + } + + //FC.NGL.S Fs,Ft + case 0x3b: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_NGL_S); + return 0; + } + + //FC.LT.S Fs,Ft + case 0x3c: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_LT_S); + return 0; + } + + //FC.NGE.S Fs,Ft + case 0x3d: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_NGE_S); + return 0; + } + + //FC.LE.S Fs,Ft + case 0x3e: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_LE_S); + return 0; + } + + //FC.NGT.S Fs,Ft + case 0x3f: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_NGT_S); + return 0; + } + + } + + if((instruction >> 21 & 31) == 17) + switch(instruction & 0x3f) { + + //FADD.D Fd,Fs,Ft + case 0x00: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + mov32(reg(3), imm(Ftn)); + call(&CPU::FADD_D); + return 0; + } + + //FSUB.D Fd,Fs,Ft + case 0x01: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + mov32(reg(3), imm(Ftn)); + call(&CPU::FSUB_D); + return 0; + } + + //FMUL.D Fd,Fs,Ft + case 0x02: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + mov32(reg(3), imm(Ftn)); + call(&CPU::FMUL_D); + return 0; + } + + //FDIV.D Fd,Fs,Ft + case 0x03: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + mov32(reg(3), imm(Ftn)); + call(&CPU::FDIV_D); + return 0; + } + + //FSQRT.D Fd,Fs + case 0x04: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FSQRT_D); + return 0; + } + + //FABS.D Fd,Fs + case 0x05: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FABS_D); + return 0; + } + + //FMOV.D Fd,Fs + case 0x06: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FMOV_D); + return 0; + } + + //FNEG.D Fd,Fs + case 0x07: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FNEG_D); + return 0; + } + + //FROUND.L.D Fd,Fs + case 0x08: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FROUND_L_D); + return 0; + } + + //FTRUNC.L.D Fd,Fs + case 0x09: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FTRUNC_L_D); + return 0; + } + + //FCEIL.L.D Fd,Fs + case 0x0a: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCEIL_L_D); + return 0; + } + + //FFLOOR.L.D Fd,Fs + case 0x0b: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FFLOOR_L_D); + return 0; + } + + //FROUND.W.D Fd,Fs + case 0x0c: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FROUND_W_D); + return 0; + } + + //FTRUNC.W.D Fd,Fs + case 0x0d: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FTRUNC_W_D); + return 0; + } + + //FCEIL.W.D Fd,Fs + case 0x0e: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCEIL_W_D); + return 0; + } + + //FFLOOR.W.D Fd,Fs + case 0x0f: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FFLOOR_W_D); + return 0; + } + + //FCVT.S.D Fd,Fs + case 0x20: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_S_D); + return 0; + } + + //FCVT.W.D Fd,Fs + case 0x24: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_W_D); + return 0; + } + + //FCVT.L.D Fd,Fs + case 0x25: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_L_D); + return 0; + } + + //FC.F.D Fs,Ft + case 0x30: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_F_D); + return 0; + } + + //FC.UN.D Fs,Ft + case 0x31: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_UN_D); + return 0; + } + + //FC.EQ.D Fs,Ft + case 0x32: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_EQ_D); + return 0; + } + + //FC.UEQ.D Fs,Ft + case 0x33: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_UEQ_D); + return 0; + } + + //FC.OLT.D Fs,Ft + case 0x34: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_OLT_D); + return 0; + } + + //FC.ULT.D Fs,Ft + case 0x35: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_ULT_D); + return 0; + } + + //FC.OLE.D Fs,Ft + case 0x36: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_OLE_D); + return 0; + } + + //FC.ULE.D Fs,Ft + case 0x37: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_ULE_D); + return 0; + } + + //FC.SF.D Fs,Ft + case 0x38: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_SF_D); + return 0; + } + + //FC.NGLE.D Fs,Ft + case 0x39: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_NGLE_D); + return 0; + } + + //FC.SEQ.D Fs,Ft + case 0x3a: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_SEQ_D); + return 0; + } + + //FC.NGL.D Fs,Ft + case 0x3b: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_NGL_D); + return 0; + } + + //FC.LT.D Fs,Ft + case 0x3c: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_LT_D); + return 0; + } + + //FC.NGE.D Fs,Ft + case 0x3d: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_NGE_D); + return 0; + } + + //FC.LE.D Fs,Ft + case 0x3e: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_LE_D); + return 0; + } + + //FC.NGT.D Fs,Ft + case 0x3f: { + mov32(reg(1), imm(Fsn)); + mov32(reg(2), imm(Ftn)); + call(&CPU::FC_NGT_D); + return 0; + } + + } + + if((instruction >> 21 & 31) == 20) + switch(instruction & 0x3f) { + + //FCVT.S.W Fd,Fs + case 0x20: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_S_W); + return 0; + } + + //FCVT.D.W Fd,Fs + case 0x21: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_D_W); + return 0; + } + + } + + if((instruction >> 21 & 31) == 21) + switch(instruction & 0x3f) { + + //FCVT.S.L + case 0x20: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_S_L); + return 0; + } + + //FCVT.D.L + case 0x21: { + mov32(reg(1), imm(Fdn)); + mov32(reg(2), imm(Fsn)); + call(&CPU::FCVT_D_L); + return 0; + } + + } + + return 0; +} + +#undef Sa +#undef Rdn +#undef Rtn +#undef Rsn +#undef Fdn +#undef Fsn +#undef Ftn +#undef IpuBase +#undef IpuReg +#undef Rd +#undef Rt +#undef Rt32 +#undef Rs +#undef Rs32 +#undef Lo +#undef Hi +#undef FpuBase +#undef FpuReg +#undef Fd +#undef Fs +#undef Ft +#undef i16 +#undef n16 +#undef n26 diff --git a/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp b/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp new file mode 100644 index 0000000000..603e1ce179 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp @@ -0,0 +1,147 @@ +auto CPU::serialize(serializer& s) -> void { + Thread::serialize(s); + + s(pipeline.address); + s(pipeline.instruction); + + s(branch.pc); + s(branch.state); + + s(context.endian); + s(context.mode); + s(context.bits); + s(context.segment); + + for(auto& line : icache.lines) { + s(line.valid); + s(line.tag); + s(line.index); + s(line.words); + } + + for(auto& line : dcache.lines) { + s(line.valid); + s(line.dirty); + s(line.tag); + s(line.index); + s(line.words); + } + + for(auto& e : tlb.entry) { + s(e.global); + s(e.valid); + s(e.dirty); + s(e.cacheAlgorithm); + s(e.physicalAddress); + s(e.pageMask); + s(e.virtualAddress); + s(e.addressSpaceID); + s(e.region); + s(e.globals); + s(e.addressMaskHi); + s(e.addressMaskLo); + s(e.addressSelect); + s(e.addressCompare); + } + s(tlb.physicalAddress); + + for(auto& r : ipu.r) s(r.u64); + s(ipu.lo.u64); + s(ipu.hi.u64); + s(ipu.pc); + + s(scc.index.tlbEntry); + s(scc.index.probeFailure); + s(scc.random.index); + s(scc.random.unused); + s(scc.tlb.global); + s(scc.tlb.valid); + s(scc.tlb.dirty); + s(scc.tlb.cacheAlgorithm); + s(scc.tlb.physicalAddress); + s(scc.tlb.pageMask); + s(scc.tlb.virtualAddress); + s(scc.tlb.addressSpaceID); + s(scc.tlb.region); + s(scc.tlb.globals); + s(scc.tlb.addressMaskHi); + s(scc.tlb.addressMaskLo); + s(scc.tlb.addressSelect); + s(scc.tlb.addressCompare); + s(scc.context.badVirtualAddress); + s(scc.context.pageTableEntryBase); + s(scc.wired.index); + s(scc.wired.unused); + s(scc.badVirtualAddress); + s(scc.count); + s(scc.compare); + s(scc.status.interruptEnable); + s(scc.status.exceptionLevel); + s(scc.status.errorLevel); + s(scc.status.privilegeMode); + s(scc.status.userExtendedAddressing); + s(scc.status.kernelExtendedAddressing); + s(scc.status.interruptMask); + s(scc.status.de); + s(scc.status.ce); + s(scc.status.condition); + s(scc.status.softReset); + s(scc.status.tlbShutdown); + s(scc.status.vectorLocation); + s(scc.status.instructionTracing); + s(scc.status.reverseEndian); + s(scc.status.floatingPointMode); + s(scc.status.lowPowerMode); + s(scc.status.enable.coprocessor0); + s(scc.status.enable.coprocessor1); + s(scc.status.enable.coprocessor2); + s(scc.status.enable.coprocessor3); + s(scc.cause.exceptionCode); + s(scc.cause.interruptPending); + s(scc.cause.coprocessorError); + s(scc.cause.branchDelay); + s(scc.epc); + s(scc.configuration.coherencyAlgorithmKSEG0); + s(scc.configuration.cu); + s(scc.configuration.bigEndian); + s(scc.configuration.sysadWritebackPattern); + s(scc.configuration.systemClockRatio); + s(scc.ll); + s(scc.llbit); + s(scc.watchLo.trapOnWrite); + s(scc.watchLo.trapOnRead); + s(scc.watchLo.physicalAddress); + s(scc.watchHi.physicalAddressExtended); + s(scc.xcontext.badVirtualAddress); + s(scc.xcontext.region); + s(scc.xcontext.pageTableEntryBase); + s(scc.parityError.diagnostic); + s(scc.tagLo.primaryCacheState); + s(scc.tagLo.physicalAddress); + s(scc.epcError); + + for(auto& r : fpu.r) s(r.u64); + s(fpu.csr.roundMode); + s(fpu.csr.flag.inexact); + s(fpu.csr.flag.underflow); + s(fpu.csr.flag.overflow); + s(fpu.csr.flag.divisionByZero); + s(fpu.csr.flag.invalidOperation); + s(fpu.csr.enable.inexact); + s(fpu.csr.enable.underflow); + s(fpu.csr.enable.overflow); + s(fpu.csr.enable.divisionByZero); + s(fpu.csr.enable.invalidOperation); + s(fpu.csr.cause.inexact); + s(fpu.csr.cause.underflow); + s(fpu.csr.cause.overflow); + s(fpu.csr.cause.divisionByZero); + s(fpu.csr.cause.invalidOperation); + s(fpu.csr.cause.unimplementedOperation); + s(fpu.csr.compare); + s(fpu.csr.flushed); + + if constexpr(Accuracy::CPU::Recompiler) { + recompiler.reset(); + } +} diff --git a/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp b/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp new file mode 100644 index 0000000000..dcd4f2de5c --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp @@ -0,0 +1,65 @@ +//the N64 TLB is 32-bit only: only the 64-bit XTLB exception vector is used. + +auto CPU::TLB::load(u32 address) -> Match { + for(auto& entry : this->entry) { + if(!entry.globals || entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue; + if((address & entry.addressMaskHi) != (u32)entry.addressCompare) continue; + bool lo = address & entry.addressSelect; + if(!entry.valid[lo]) { + exception(address); + self.debugger.tlbLoadInvalid(address); + self.exception.tlbLoadInvalid(); + return {false}; + } + physicalAddress = entry.physicalAddress[lo] + (address & entry.addressMaskLo); + self.debugger.tlbLoad(address, physicalAddress); + return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; + } + exception(address); + self.debugger.tlbLoadMiss(address); + self.exception.tlbLoadMiss(); + return {false}; +} + +auto CPU::TLB::store(u32 address) -> Match { + for(auto& entry : this->entry) { + if(!entry.globals || entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue; + if((address & entry.addressMaskHi) != (u32)entry.addressCompare) continue; + bool lo = address & entry.addressSelect; + if(!entry.valid[lo]) { + exception(address); + self.debugger.tlbStoreInvalid(address); + self.exception.tlbStoreInvalid(); + return {false}; + } + if(!entry.dirty[lo]) { + exception(address); + self.debugger.tlbModification(address); + self.exception.tlbModification(); + return {false}; + } + physicalAddress = entry.physicalAddress[lo] + (address & entry.addressMaskLo); + self.debugger.tlbStore(address, physicalAddress); + return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; + } + exception(address); + self.debugger.tlbStoreMiss(address); + self.exception.tlbStoreMiss(); + return {false}; +} + +auto CPU::TLB::exception(u32 address) -> void { + self.scc.badVirtualAddress = address; + self.scc.tlb.virtualAddress.bit(13,39) = address >> 13; + self.scc.context.badVirtualAddress = address >> 13; + self.scc.xcontext.badVirtualAddress = address >> 13; + self.scc.xcontext.region = 0; +} + +auto CPU::TLB::Entry::synchronize() -> void { + globals = global[0] && global[1]; + addressMaskHi = ~(pageMask | 0x1fff); + addressMaskLo = (pageMask | 0x1fff) >> 1; + addressSelect = addressMaskLo + 1; + addressCompare = virtualAddress & addressMaskHi; +} diff --git a/waterbox/ares64/ares/ares/n64/dd/dd.cpp b/waterbox/ares64/ares/ares/n64/dd/dd.cpp new file mode 100644 index 0000000000..b45ac683d0 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/dd/dd.cpp @@ -0,0 +1,43 @@ +#include + +namespace ares::Nintendo64 { + +DD dd; +#include "io.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto DD::load(Node::Object parent) -> void { + node = parent->append("Disk Drive"); + + iplrom.allocate(4_MiB); + c2s.allocate(0x400); + ds.allocate(0x100); + ms.allocate(0x40); + + if(node->setPak(pak = platform->pak(node))) { + if(auto fp = pak->read("64dd.ipl.rom")) { + iplrom.load(fp); + } + } + + debugger.load(node); +} + +auto DD::unload() -> void { + debugger = {}; + iplrom.reset(); + c2s.reset(); + ds.reset(); + ms.reset(); + pak.reset(); + node.reset(); +} + +auto DD::power(bool reset) -> void { + c2s.fill(); + ds.fill(); + ms.fill(); +} + +} diff --git a/waterbox/ares64/ares/ares/n64/dd/dd.hpp b/waterbox/ares64/ares/ares/n64/dd/dd.hpp new file mode 100644 index 0000000000..e3ca8eb851 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/dd/dd.hpp @@ -0,0 +1,34 @@ +//Disk Drive + +struct DD : Memory::IO

{ + Node::Object node; + VFS::Pak pak; + Memory::Readable iplrom; + Memory::Writable c2s; + Memory::Writable ds; + Memory::Writable ms; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto io(bool mode, u32 address, u32 data) -> void; + + struct Tracer { + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //dd.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + auto power(bool reset) -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; +}; + +extern DD dd; diff --git a/waterbox/ares64/ares/ares/n64/dd/debugger.cpp b/waterbox/ares64/ares/ares/n64/dd/debugger.cpp new file mode 100644 index 0000000000..63e51e9662 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/dd/debugger.cpp @@ -0,0 +1,39 @@ +auto DD::Debugger::load(Node::Object parent) -> void { + tracer.io = parent->append("I/O", "DD"); +} + +auto DD::Debugger::io(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "ASIC_DATA", + "ASIC_MISC_REG", + "ASIC_STATUS|ASIC_CMD", + "ASIC_CUR_TK", + "ASIC_BM_STATUS|ASIC_BM_CTL", + "ASIC_ERR_SECTOR", + "ASIC_SEQ_STATUS|ASIC_SEQ_CTL", + "ASIC_CUR_SECTOR", + "ASIC_HARD_RESET", + "ASIC_C1_S0", + "ASIC_HOST_SECBYE", + "ASIC_C1_S2", + "ASIC_SEC_BYTE", + "ASIC_C1_S4", + "ASIC_C1_S6", + "ASIC_CUR_ADDRESS", + "ASIC_ID_REG", + "ASIC_TEST_REG", + "ASIC_TEST_PIN_SEL", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "ASIC_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} diff --git a/waterbox/ares64/ares/ares/n64/dd/io.cpp b/waterbox/ares64/ares/ares/n64/dd/io.cpp new file mode 100644 index 0000000000..32864a870d --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/dd/io.cpp @@ -0,0 +1,168 @@ +auto DD::readWord(u32 address) -> u32 { + address = (address & 0x7f) >> 2; + n32 data; + + //ASIC_DATA + if(address == 0) { + } + + //ASIC_MISC_REG + if(address == 1) { + } + + //ASIC_STATUS + if(address == 2) { + //required to indicate the 64DD is missing + data = 0xffff'ffff; + } + + //ASIC_CUR_TK + if(address == 3) { + } + + //ASIC_BM_STATUS + if(address == 4) { + } + + //ASIC_ERR_SECTOR + if(address == 5) { + } + + //ASIC_SEQ_STATUS + if(address == 6) { + } + + //ASIC_CUR_SECTOR + if(address == 7) { + } + + //ASIC_HARD_RESET + if(address == 8) { + } + + //ASIC_C1_S0 + if(address == 9) { + } + + //ASIC_HOST_SECBYTE + if(address == 10) { + } + + //ASIC_C1_S2 + if(address == 11) { + } + + //ASIC_SEC_BYTE + if(address == 12) { + } + + //ASIC_C1_S4 + if(address == 13) { + } + + //ASIC_C1_S6 + if(address == 14) { + } + + //ASIC_CUR_ADDRESS + if(address == 15) { + } + + //ASIC_ID_REG + if(address == 16) { + } + + //ASIC_TEST_REG + if(address == 17) { + } + + //ASIC_TEST_PIN_SEL + if(address == 18) { + } + + debugger.io(Read, address, data); + return data; +} + +auto DD::writeWord(u32 address, u32 data_) -> void { + address = (address & 0x7f) >> 2; + n32 data = data_; + + //ASIC_DATA + if(address == 0) { + } + + //ASIC_MISC_REG + if(address == 1) { + } + + //ASIC_CMD + if(address == 2) { + } + + //ASIC_CUR_TK + if(address == 3) { + } + + //ASIC_BM_CTL + if(address == 4) { + } + + //ASIC_ERR_SECTOR + if(address == 5) { + } + + //ASIC_SEQ_CTL + if(address == 6) { + } + + //ASIC_CUR_SECTOR + if(address == 7) { + } + + //ASIC_HARD_RESET + if(address == 8) { + } + + //ASIC_C1_S0 + if(address == 9) { + } + + //ASIC_HOST_SECBYTE + if(address == 10) { + } + + //ASIC_C1_S2 + if(address == 11) { + } + + //ASIC_SEC_BYTE + if(address == 12) { + } + + //ASIC_C1_S4 + if(address == 13) { + } + + //ASIC_C1_S6 + if(address == 14) { + } + + //ASIC_CUR_ADDRESS + if(address == 15) { + } + + //ASIC_ID_REG + if(address == 16) { + } + + //ASIC_TEST_REG + if(address == 17) { + } + + //ASIC_TEST_PIN_SEL + if(address == 18) { + } + + debugger.io(Write, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/dd/serialization.cpp b/waterbox/ares64/ares/ares/n64/dd/serialization.cpp new file mode 100644 index 0000000000..7c33499215 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/dd/serialization.cpp @@ -0,0 +1,2 @@ +auto DD::serialize(serializer& s) -> void { +} diff --git a/waterbox/ares64/ares/ares/n64/memory/bus.hpp b/waterbox/ares64/ares/ares/n64/memory/bus.hpp new file mode 100644 index 0000000000..da208e6b42 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/memory/bus.hpp @@ -0,0 +1,96 @@ +template +inline auto Bus::read(u32 address) -> u64 { + static constexpr u64 unmapped = 0; + address &= 0x1fff'ffff - (Size - 1); + + if(address <= 0x007f'ffff) return rdram.ram.read(address); + if(address <= 0x03ef'ffff) return unmapped; + if(address <= 0x03ff'ffff) return rdram.read(address); + if(address <= 0x0400'0fff) return rsp.dmem.read(address); + if(address <= 0x0400'1fff) return rsp.imem.read(address); + if(address <= 0x0403'ffff) return unmapped; + if(address <= 0x0407'ffff) return rsp.read(address); + if(address <= 0x040f'ffff) return rsp.status.read(address); + if(address <= 0x041f'ffff) return rdp.read(address); + if(address <= 0x042f'ffff) return rdp.io.read(address); + if(address <= 0x043f'ffff) return mi.read(address); + if(address <= 0x044f'ffff) return vi.read(address); + if(address <= 0x045f'ffff) return ai.read(address); + if(address <= 0x046f'ffff) return pi.read(address); + if(address <= 0x047f'ffff) return ri.read(address); + if(address <= 0x048f'ffff) return si.read(address); + if(address <= 0x04ff'ffff) return unmapped; + if(address <= 0x0500'03ff) return dd.c2s.read(address); + if(address <= 0x0500'04ff) return dd.ds.read(address); + if(address <= 0x0500'057f) return dd.read(address); + if(address <= 0x0500'05bf) return dd.ms.read(address); + if(address <= 0x05ff'ffff) return unmapped; + if(address <= 0x063f'ffff) return dd.iplrom.read(address); + if(address <= 0x07ff'ffff) return unmapped; + if(address <= 0x0fff'ffff) { + if(cartridge.ram ) return cartridge.ram.read(address); + if(cartridge.flash) return cartridge.flash.read(address); + return unmapped; + } + if(address <= 0x1fbf'ffff) { + if(address >= 0x13ff'0000 && address <= 0x13ff'ffff) { + return cartridge.isviewer.read(address); + } + return cartridge.rom.read(address); + } + if(address <= 0x1fc0'07bf) { + if(pi.io.romLockout) return unmapped; + return pi.rom.read(address); + } + if(address <= 0x1fc0'07ff) return pi.ram.read(address); + return unmapped; +} + +template +inline auto Bus::write(u32 address, u64 data) -> void { + address &= 0x1fff'ffff - (Size - 1); + cpu.recompiler.invalidate(address + 0); if constexpr(Size == Dual) + cpu.recompiler.invalidate(address + 4); + + if(address <= 0x007f'ffff) return rdram.ram.write(address, data); + if(address <= 0x03ef'ffff) return; + if(address <= 0x03ff'ffff) return rdram.write(address, data); + if(address <= 0x0400'0fff) return rsp.dmem.write(address, data); + if(address <= 0x0400'1fff) return rsp.recompiler.invalidate(), rsp.imem.write(address, data); + if(address <= 0x0403'ffff) return; + if(address <= 0x0407'ffff) return rsp.write(address, data); + if(address <= 0x040f'ffff) return rsp.status.write(address, data); + if(address <= 0x041f'ffff) return rdp.write(address, data); + if(address <= 0x042f'ffff) return rdp.io.write(address, data); + if(address <= 0x043f'ffff) return mi.write(address, data); + if(address <= 0x044f'ffff) return vi.write(address, data); + if(address <= 0x045f'ffff) return ai.write(address, data); + if(address <= 0x046f'ffff) return pi.write(address, data); + if(address <= 0x047f'ffff) return ri.write(address, data); + if(address <= 0x048f'ffff) return si.write(address, data); + if(address <= 0x04ff'ffff) return; + if(address <= 0x0500'03ff) return dd.c2s.write(address, data); + if(address <= 0x0500'04ff) return dd.ds.write(address, data); + if(address <= 0x0500'057f) return dd.write(address, data); + if(address <= 0x0500'05bf) return dd.ms.write(address, data); + if(address <= 0x05ff'ffff) return; + if(address <= 0x063f'ffff) return dd.iplrom.write(address, data); + if(address <= 0x07ff'ffff) return; + if(address <= 0x0fff'ffff) { + if(cartridge.ram ) return cartridge.ram.write(address, data); + if(cartridge.flash) return cartridge.flash.write(address, data); + return; + } + if(address <= 0x1fbf'ffff) { + if(address >= 0x13ff'0000 && address <= 0x13ff'ffff) { + cartridge.isviewer.write(address, data); + } + return cartridge.rom.write(address, data); + } + if(address <= 0x1fc0'07bf) { + if(pi.io.romLockout) return; + return pi.rom.write(address, data); + } + if(address <= 0x1fc0'07ff) return pi.ram.write(address, data); + return; +} diff --git a/waterbox/ares64/ares/ares/n64/memory/io.hpp b/waterbox/ares64/ares/ares/n64/memory/io.hpp new file mode 100644 index 0000000000..7aacb44cb8 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/memory/io.hpp @@ -0,0 +1,55 @@ +template +struct IO { + template + auto read(u32 address) -> u64 { + if constexpr(Size == Byte) { + auto data = ((T*)this)->readWord(address); + switch(address & 3) { + case 0: return data >> 24; + case 1: return data >> 16; + case 2: return data >> 8; + case 3: return data >> 0; + } + } + if constexpr(Size == Half) { + auto data = ((T*)this)->readWord(address); + switch(address & 2) { + case 0: return data >> 16; + case 2: return data >> 0; + } + } + if constexpr(Size == Word) { + return ((T*)this)->readWord(address); + } + if constexpr(Size == Dual) { + u64 data = ((T*)this)->readWord(address); + return data << 32 | ((T*)this)->readWord(address + 4); + } + unreachable; + } + + template + auto write(u32 address, u64 data) -> void { + if constexpr(Size == Byte) { + switch(address & 3) { + case 0: return ((T*)this)->writeWord(address, data << 24); + case 1: return ((T*)this)->writeWord(address, data << 16); + case 2: return ((T*)this)->writeWord(address, data << 8); + case 3: return ((T*)this)->writeWord(address, data << 0); + } + } + if constexpr(Size == Half) { + switch(address & 2) { + case 0: return ((T*)this)->writeWord(address, data << 16); + case 2: return ((T*)this)->writeWord(address, data << 0); + } + } + if constexpr(Size == Word) { + ((T*)this)->writeWord(address, data); + } + if constexpr(Size == Dual) { + ((T*)this)->writeWord(address + 0, data >> 32); + ((T*)this)->writeWord(address + 4, data >> 0); + } + } +}; diff --git a/waterbox/ares64/ares/ares/n64/memory/lsb/readable.hpp b/waterbox/ares64/ares/ares/n64/memory/lsb/readable.hpp new file mode 100644 index 0000000000..b6bfd8f566 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/memory/lsb/readable.hpp @@ -0,0 +1,101 @@ +struct Readable { + explicit operator bool() const { + return size > 0; + } + + auto reset() -> void { + memory::free(data); + data = nullptr; + size = 0; + maskByte = 0; + maskHalf = 0; + maskWord = 0; + maskDual = 0; + } + + auto allocate(u32 capacity, u32 fillWith = ~0) -> void { + reset(); + size = capacity & ~7; + u32 mask = bit::round(size) - 1; + maskByte = mask & ~0; + maskHalf = mask & ~1; + maskWord = mask & ~3; + maskDual = mask & ~7; + data = memory::allocate(mask + 1); + fill(fillWith); + } + + auto fill(u32 value = 0) -> void { + for(u32 address = 0; address < size; address += 4) { + *(u32*)&data[address & maskWord] = value; + } + } + + auto load(VFS::File fp) -> void { + if(!size) allocate(fp->size()); + for(u32 address = 0; address < min(size, fp->size()); address += 4) { + *(u32*)&data[address & maskWord] = fp->readm(4L); + } + } + + auto save(VFS::File fp) -> void { + for(u32 address = 0; address < min(size, fp->size()); address += 4) { + fp->writem(*(u32*)&data[address & maskWord], 4L); + } + } + + template + auto read(u32 address) -> u64 { + if constexpr(Size == Byte) return *(u8* )&data[address & maskByte ^ 3]; + if constexpr(Size == Half) return *(u16*)&data[address & maskHalf ^ 2]; + if constexpr(Size == Word) return *(u32*)&data[address & maskWord ^ 0]; + if constexpr(Size == Dual) { + u64 upper = read(address + 0); + u64 lower = read(address + 4); + return upper << 32 | lower << 0; + } + unreachable; + } + + template + auto write(u32 address, u64 value) -> void { + } + + template + auto readUnaligned(u32 address) -> u64 { + static_assert(Size == Half || Size == Word || Size == Dual); + if constexpr(Size == Half) { + u16 upper = read(address + 0); + u16 lower = read(address + 1); + return upper << 8 | lower << 0; + } + if constexpr(Size == Word) { + u32 upper = readUnaligned(address + 0); + u32 lower = readUnaligned(address + 2); + return upper << 16 | lower << 0; + } + if constexpr(Size == Dual) { + u64 upper = readUnaligned(address + 0); + u64 lower = readUnaligned(address + 4); + return upper << 32 | lower << 0; + } + unreachable; + } + + template + auto writeUnaligned(u32 address, u64 value) -> void { + static_assert(Size == Half || Size == Word || Size == Dual); + } + + auto serialize(serializer& s) -> void { + //s(array_span{data, size}); + } + +//private: + u8* data = nullptr; + u32 size = 0; + u32 maskByte = 0; + u32 maskHalf = 0; + u32 maskWord = 0; + u32 maskDual = 0; +}; diff --git a/waterbox/ares64/ares/ares/n64/memory/lsb/writable.hpp b/waterbox/ares64/ares/ares/n64/memory/lsb/writable.hpp new file mode 100644 index 0000000000..ece79ba989 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/memory/lsb/writable.hpp @@ -0,0 +1,120 @@ +struct Writable { + explicit operator bool() const { + return size > 0; + } + + auto reset() -> void { + memory::free(data); + data = nullptr; + size = 0; + maskByte = 0; + maskHalf = 0; + maskWord = 0; + maskDual = 0; + } + + auto allocate(u32 capacity, u32 fillWith = ~0) -> void { + reset(); + size = capacity & ~7; + u32 mask = bit::round(size) - 1; + maskByte = mask & ~0; + maskHalf = mask & ~1; + maskWord = mask & ~3; + maskDual = mask & ~7; + data = memory::allocate(mask + 1); + fill(fillWith); + } + + auto fill(u32 value = 0) -> void { + for(u32 address = 0; address < size; address += 4) { + *(u32*)&data[address & maskWord] = value; + } + } + + auto load(VFS::File fp) -> void { + if(!size) allocate(fp->size()); + for(u32 address = 0; address < min(size, fp->size()); address += 4) { + *(u32*)&data[address & maskWord] = fp->readm(4L); + } + } + + auto save(VFS::File fp) -> void { + for(u32 address = 0; address < min(size, fp->size()); address += 4) { + fp->writem(*(u32*)&data[address & maskWord], 4L); + } + } + + template + auto read(u32 address) -> u64 { + if constexpr(Size == Byte) return *(u8* )&data[address & maskByte ^ 3]; + if constexpr(Size == Half) return *(u16*)&data[address & maskHalf ^ 2]; + if constexpr(Size == Word) return *(u32*)&data[address & maskWord ^ 0]; + if constexpr(Size == Dual) { + u64 upper = read(address + 0); + u64 lower = read(address + 4); + return upper << 32 | lower << 0; + } + unreachable; + } + + template + auto write(u32 address, u64 value) -> void { + if constexpr(Size == Byte) *(u8* )&data[address & maskByte ^ 3] = value; + if constexpr(Size == Half) *(u16*)&data[address & maskHalf ^ 2] = value; + if constexpr(Size == Word) *(u32*)&data[address & maskWord ^ 0] = value; + if constexpr(Size == Dual) { + write(address + 0, value >> 32); + write(address + 4, value >> 0); + } + } + + template + auto readUnaligned(u32 address) -> u64 { + static_assert(Size == Half || Size == Word || Size == Dual); + if constexpr(Size == Half) { + u16 upper = read(address + 0); + u16 lower = read(address + 1); + return upper << 8 | lower << 0; + } + if constexpr(Size == Word) { + u32 upper = readUnaligned(address + 0); + u32 lower = readUnaligned(address + 2); + return upper << 16 | lower << 0; + } + if constexpr(Size == Dual) { + u64 upper = readUnaligned(address + 0); + u64 lower = readUnaligned(address + 4); + return upper << 32 | lower << 0; + } + unreachable; + } + + template + auto writeUnaligned(u32 address, u64 value) -> void { + static_assert(Size == Half || Size == Word || Size == Dual); + if constexpr(Size == Half) { + write(address + 0, value >> 8); + write(address + 1, value >> 0); + } + if constexpr(Size == Word) { + writeUnaligned(address + 0, value >> 16); + writeUnaligned(address + 2, value >> 0); + } + if constexpr(Size == Dual) { + writeUnaligned(address + 0, value >> 32); + writeUnaligned(address + 4, value >> 0); + } + } + + auto serialize(serializer& s) -> void { + s(array_span{data, size}); + } + +//private: + u8* data = nullptr; + u32 size = 0; + u32 maskByte = 0; + u32 maskHalf = 0; + u32 maskWord = 0; + u32 maskDual = 0; +}; diff --git a/waterbox/ares64/ares/ares/n64/memory/memory.cpp b/waterbox/ares64/ares/ares/n64/memory/memory.cpp new file mode 100644 index 0000000000..7aca403723 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/memory/memory.cpp @@ -0,0 +1,7 @@ +#include + +namespace ares::Nintendo64 { + +Bus bus; + +} diff --git a/waterbox/ares64/ares/ares/n64/memory/memory.hpp b/waterbox/ares64/ares/ares/n64/memory/memory.hpp new file mode 100644 index 0000000000..a0de2560f8 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/memory/memory.hpp @@ -0,0 +1,13 @@ +namespace Memory { + #include "lsb/readable.hpp" + #include "lsb/writable.hpp" + #include "io.hpp" +} + +struct Bus { + //bus.hpp + template auto read(u32 address) -> u64; + template auto write(u32 address, u64 data) -> void; +}; + +extern Bus bus; diff --git a/waterbox/ares64/ares/ares/n64/memory/msb/readable.hpp b/waterbox/ares64/ares/ares/n64/memory/msb/readable.hpp new file mode 100644 index 0000000000..231c2f0227 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/memory/msb/readable.hpp @@ -0,0 +1,85 @@ +struct Readable { + explicit operator bool() const { + return size > 0; + } + + auto reset() -> void { + memory::free(data); + data = nullptr; + size = 0; + maskByte = 0; + maskHalf = 0; + maskWord = 0; + maskDual = 0; + } + + auto allocate(u32 capacity, u32 fillWith = ~0) -> void { + reset(); + size = capacity & ~7; + u32 mask = bit::round(size) - 1; + maskByte = mask & ~0; + maskHalf = mask & ~1; + maskWord = mask & ~3; + maskDual = mask & ~7; + data = memory::allocate(mask + 1); + fill(fillWith); + } + + auto fill(u32 value = 0) -> void { + for(u32 address = 0; address < size; address += 4) { + *(u32*)&data[address & maskWord] = bswap32(value); + } + } + + auto load(VFS::File fp) -> void { + if(!size) allocate(fp->size()); + for(u32 address = 0; address < min(size, fp->size()); address += 4) { + *(u32*)&data[address & maskWord] = bswap32(fp->readm(4L)); + } + } + + auto save(VFS::File fp) -> void { + for(u32 address = 0; address < min(size, fp->size()); address += 4) { + fp->writem(bswap32(*(u32*)&data[address & maskWord]), 4L); + } + } + + template + auto read(u32 address) -> u64 { + if constexpr(Size == Byte) return (*(u8* )&data[address & maskByte]); + if constexpr(Size == Half) return bswap16(*(u16*)&data[address & maskHalf]); + if constexpr(Size == Word) return bswap32(*(u32*)&data[address & maskWord]); + if constexpr(Size == Dual) return bswap64(*(u64*)&data[address & maskDual]); + unreachable; + } + + template + auto write(u32 address, u64 value) -> void { + } + + template + auto readUnaligned(u32 address) -> u64 { + static_assert(Size == Half || Size == Word || Size == Dual); + if constexpr(Size == Half) return bswap16(*(u16*)&data[address & maskByte]); + if constexpr(Size == Word) return bswap32(*(u32*)&data[address & maskByte]); + if constexpr(Size == Dual) return bswap64(*(u64*)&data[address & maskByte]); + unreachable; + } + + template + auto writeUnaligned(u32 address, u64 value) -> void { + static_assert(Size == Half || Size == Word || Size == Dual); + } + + auto serialize(serializer& s) -> void { + //s(array_span{data, size}); + } + +//private: + u8* data = nullptr; + u32 size = 0; + u32 maskByte = 0; + u32 maskHalf = 0; + u32 maskWord = 0; + u32 maskDual = 0; +}; diff --git a/waterbox/ares64/ares/ares/n64/memory/msb/writable.hpp b/waterbox/ares64/ares/ares/n64/memory/msb/writable.hpp new file mode 100644 index 0000000000..8a930062ea --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/memory/msb/writable.hpp @@ -0,0 +1,91 @@ +struct Writable { + explicit operator bool() const { + return size > 0; + } + + auto reset() -> void { + memory::free(data); + data = nullptr; + size = 0; + maskByte = 0; + maskHalf = 0; + maskWord = 0; + maskDual = 0; + } + + auto allocate(u32 capacity, u32 fillWith = ~0) -> void { + reset(); + size = capacity & ~7; + u32 mask = bit::round(size) - 1; + maskByte = mask & ~0; + maskHalf = mask & ~1; + maskWord = mask & ~3; + maskDual = mask & ~7; + data = memory::allocate(mask + 1); + fill(fillWith); + } + + auto fill(u32 value = 0) -> void { + for(u32 address = 0; address < size; address += 4) { + *(u32*)&data[address & maskWord] = bswap32(value); + } + } + + auto load(VFS::File fp) -> void { + if(!size) allocate(fp->size()); + for(u32 address = 0; address < min(size, fp->size()); address += 4) { + *(u32*)&data[address & maskWord] = bswap32(fp->readm(4L)); + } + } + + auto save(VFS::File fp) -> void { + for(u32 address = 0; address < min(size, fp->size()); address += 4) { + fp->writem(bswap32(*(u32*)&data[address & maskWord]), 4L); + } + } + + template + auto read(u32 address) -> u64 { + if constexpr(Size == Byte) return (*(u8* )&data[address & maskByte]); + if constexpr(Size == Half) return bswap16(*(u16*)&data[address & maskHalf]); + if constexpr(Size == Word) return bswap32(*(u32*)&data[address & maskWord]); + if constexpr(Size == Dual) return bswap64(*(u64*)&data[address & maskDual]); + unreachable; + } + template + auto write(u32 address, u64 value) -> void { + if constexpr(Size == Byte) *(u8* )&data[address & maskByte] = (value); + if constexpr(Size == Half) *(u16*)&data[address & maskHalf] = bswap16(value); + if constexpr(Size == Word) *(u32*)&data[address & maskWord] = bswap32(value); + if constexpr(Size == Dual) *(u64*)&data[address & maskDual] = bswap64(value); + } + + template + auto readUnaligned(u32 address) -> u64 { + static_assert(Size == Half || Size == Word || Size == Dual); + if constexpr(Size == Half) return bswap16(*(u16*)&data[address & maskByte]); + if constexpr(Size == Word) return bswap32(*(u32*)&data[address & maskByte]); + if constexpr(Size == Dual) return bswap64(*(u64*)&data[address & maskByte]); + unreachable; + } + + template + auto writeUnaligned(u32 address, u64 value) -> void { + static_assert(Size == Half || Size == Word || Size == Dual); + if constexpr(Size == Half) *(u16*)&data[address & maskByte] = bswap16(value); + if constexpr(Size == Word) *(u32*)&data[address & maskByte] = bswap32(value); + if constexpr(Size == Dual) *(u64*)&data[address & maskByte] = bswap64(value); + } + + auto serialize(serializer& s) -> void { + s(array_span{data, size}); + } + +//private: + u8* data = nullptr; + u32 size = 0; + u32 maskByte = 0; + u32 maskHalf = 0; + u32 maskWord = 0; + u32 maskDual = 0; +}; diff --git a/waterbox/ares64/ares/ares/n64/mi/debugger.cpp b/waterbox/ares64/ares/ares/n64/mi/debugger.cpp new file mode 100644 index 0000000000..48f50ed3c0 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/mi/debugger.cpp @@ -0,0 +1,38 @@ +auto MI::Debugger::load(Node::Object parent) -> void { + tracer.interrupt = parent->append("Interrupt", "RCP"); + tracer.io = parent->append("I/O", "MI"); +} + +auto MI::Debugger::interrupt(u8 source) -> void { + if(unlikely(tracer.interrupt->enabled())) { + string type = "unknown"; + if(source == (u32)MI::IRQ::SP) type = "SP"; + if(source == (u32)MI::IRQ::SI) type = "SI"; + if(source == (u32)MI::IRQ::AI) type = "AI"; + if(source == (u32)MI::IRQ::VI) type = "VI"; + if(source == (u32)MI::IRQ::PI) type = "PI"; + if(source == (u32)MI::IRQ::DP) type = "DP"; + tracer.interrupt->notify(type); + } +} + +auto MI::Debugger::io(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "MI_INIT_MODE", + "MI_VERSION", + "MI_INTR", + "MI_INTR_MASK", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "MI_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} diff --git a/waterbox/ares64/ares/ares/n64/mi/io.cpp b/waterbox/ares64/ares/ares/n64/mi/io.cpp new file mode 100644 index 0000000000..aac5e31753 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/mi/io.cpp @@ -0,0 +1,90 @@ +auto MI::readWord(u32 address) -> u32 { + address = (address & 0xfffff) >> 2; + n32 data; + + if(address == 0) { + //MI_INIT_MODE + data.bit(0,6) = io.initializeLength; + data.bit(7) = io.initializeMode; + data.bit(8) = io.ebusTestMode; + data.bit(9) = io.rdramRegisterSelect; + } + + if(address == 1) { + //MI_VERSION + data.byte(0) = revision.io; + data.byte(1) = revision.rac; + data.byte(2) = revision.rdp; + data.byte(3) = revision.rsp; + } + + if(address == 2) { + //MI_INTR + data.bit(0) = irq.sp.line; + data.bit(1) = irq.si.line; + data.bit(2) = irq.ai.line; + data.bit(3) = irq.vi.line; + data.bit(4) = irq.pi.line; + data.bit(5) = irq.dp.line; + } + + if(address == 3) { + //MI_INTR_MASK + data.bit(0) = irq.sp.mask; + data.bit(1) = irq.si.mask; + data.bit(2) = irq.ai.mask; + data.bit(3) = irq.vi.mask; + data.bit(4) = irq.pi.mask; + data.bit(5) = irq.dp.mask; + } + + debugger.io(Read, address, data); + return data; +} + +auto MI::writeWord(u32 address, u32 data_) -> void { + address = (address & 0xfffff) >> 2; + n32 data = data_; + + if(address == 0) { + //MI_INIT_MODE + io.initializeLength = data.bit(0,6); + if(data.bit( 7)) io.initializeMode = 0; + if(data.bit( 8)) io.initializeMode = 1; + if(data.bit( 9)) io.ebusTestMode = 0; + if(data.bit(10)) io.ebusTestMode = 1; + if(data.bit(11)) mi.lower(MI::IRQ::DP); + if(data.bit(12)) io.rdramRegisterSelect = 0; + if(data.bit(13)) io.rdramRegisterSelect = 1; + + if(io.initializeMode) debug(unimplemented, "[MI::writeWord] initializeMode=1"); + if(io.ebusTestMode ) debug(unimplemented, "[MI::writeWord] ebusTestMode=1"); + } + + if(address == 1) { + //MI_VERSION (read-only) + } + + if(address == 2) { + //MI_INTR (read-only) + } + + if(address == 3) { + //MI_INTR_MASK + if(data.bit( 0)) irq.sp.mask = 0; + if(data.bit( 1)) irq.sp.mask = 1; + if(data.bit( 2)) irq.si.mask = 0; + if(data.bit( 3)) irq.si.mask = 1; + if(data.bit( 4)) irq.ai.mask = 0; + if(data.bit( 5)) irq.ai.mask = 1; + if(data.bit( 6)) irq.vi.mask = 0; + if(data.bit( 7)) irq.vi.mask = 1; + if(data.bit( 8)) irq.pi.mask = 0; + if(data.bit( 9)) irq.pi.mask = 1; + if(data.bit(10)) irq.dp.mask = 0; + if(data.bit(11)) irq.dp.mask = 1; + poll(); + } + + debugger.io(Write, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/mi/mi.cpp b/waterbox/ares64/ares/ares/n64/mi/mi.cpp new file mode 100644 index 0000000000..526d53fde8 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/mi/mi.cpp @@ -0,0 +1,62 @@ +#include + +namespace ares::Nintendo64 { + +MI mi; +#include "io.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto MI::load(Node::Object parent) -> void { + node = parent->append("MI"); + + debugger.load(node); +} + +auto MI::unload() -> void { + node.reset(); + debugger = {}; +} + +auto MI::raise(IRQ source) -> void { + debugger.interrupt((u32)source); + switch(source) { + case IRQ::SP: irq.sp.line = 1; break; + case IRQ::SI: irq.si.line = 1; break; + case IRQ::AI: irq.ai.line = 1; break; + case IRQ::VI: irq.vi.line = 1; break; + case IRQ::PI: irq.pi.line = 1; break; + case IRQ::DP: irq.dp.line = 1; break; + } + poll(); +} + +auto MI::lower(IRQ source) -> void { + switch(source) { + case IRQ::SP: irq.sp.line = 0; break; + case IRQ::SI: irq.si.line = 0; break; + case IRQ::AI: irq.ai.line = 0; break; + case IRQ::VI: irq.vi.line = 0; break; + case IRQ::PI: irq.pi.line = 0; break; + case IRQ::DP: irq.dp.line = 0; break; + } + poll(); +} + +auto MI::poll() -> void { + bool line = 0; + line |= irq.sp.line & irq.sp.mask; + line |= irq.si.line & irq.si.mask; + line |= irq.ai.line & irq.ai.mask; + line |= irq.vi.line & irq.vi.mask; + line |= irq.pi.line & irq.pi.mask; + line |= irq.dp.line & irq.dp.mask; + cpu.scc.cause.interruptPending.bit(2) = line; +} + +auto MI::power(bool reset) -> void { + irq = {}; + io = {}; +} + +} diff --git a/waterbox/ares64/ares/ares/n64/mi/mi.hpp b/waterbox/ares64/ares/ares/n64/mi/mi.hpp new file mode 100644 index 0000000000..7402933874 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/mi/mi.hpp @@ -0,0 +1,66 @@ +//MIPS Interface + +struct MI : Memory::IO { + Node::Object node; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto interrupt(u8 source) -> void; + auto io(bool mode, u32 address, u32 data) -> void; + + struct Tracer { + Node::Debugger::Tracer::Notification interrupt; + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //mi.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + + enum class IRQ : u32 { SP, SI, AI, VI, PI, DP }; + auto raise(IRQ) -> void; + auto lower(IRQ) -> void; + auto poll() -> void; + + auto power(bool reset) -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + +private: + struct Interrupt { + b1 line = 1; + b1 mask; + }; + + struct IRQs { + Interrupt sp; + Interrupt si; + Interrupt ai; + Interrupt vi; + Interrupt pi; + Interrupt dp; + } irq; + + struct IO { + n7 initializeLength; + n1 initializeMode; + n1 ebusTestMode; + n1 rdramRegisterSelect; + } io; + + struct Revision { + static constexpr u8 io = 0x02; //I/O interface + static constexpr u8 rac = 0x01; //RAMBUS ASIC cell + static constexpr u8 rdp = 0x02; //Reality Display Processor + static constexpr u8 rsp = 0x02; //Reality Signal Processor + } revision; +}; + +extern MI mi; diff --git a/waterbox/ares64/ares/ares/n64/mi/serialization.cpp b/waterbox/ares64/ares/ares/n64/mi/serialization.cpp new file mode 100644 index 0000000000..40b501d294 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/mi/serialization.cpp @@ -0,0 +1,19 @@ +auto MI::serialize(serializer& s) -> void { + s(irq.sp.line); + s(irq.sp.mask); + s(irq.si.line); + s(irq.si.mask); + s(irq.ai.line); + s(irq.ai.mask); + s(irq.vi.line); + s(irq.vi.mask); + s(irq.pi.line); + s(irq.pi.mask); + s(irq.dp.line); + s(irq.dp.mask); + + s(io.initializeLength); + s(io.initializeMode); + s(io.ebusTestMode); + s(io.rdramRegisterSelect); +} diff --git a/waterbox/ares64/ares/ares/n64/n64.hpp b/waterbox/ares64/ares/ares/n64/n64.hpp new file mode 100644 index 0000000000..c02de37595 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/n64.hpp @@ -0,0 +1,75 @@ +#pragma once +//started: 2020-04-28 + +#include +#include +#include +#include + +#if defined(ARCHITECTURE_AMD64) +#include +using v128 = __m128i; +#endif + +#if defined(VULKAN) + #include +#endif + +#if defined(MAME_RDP) +class n64_state; +#endif + +namespace ares::Nintendo64 { + auto enumerate() -> vector; + auto load(Node::System& node, string name) -> bool; + auto option(string name, string value) -> bool; + + enum : u32 { Read, Write }; + enum : u32 { Byte = 1, Half = 2, Word = 4, Dual = 8 }; + + struct Region { + static inline auto NTSC() -> bool; + static inline auto PAL() -> bool; + }; + + struct Thread { + auto reset() -> void { + clock = 0; + } + + auto serialize(serializer& s) -> void { + s(clock); + } + + s64 clock; + }; + + struct Queue : priority_queue { + enum : u32 { + RSP_DMA, + PI_DMA_Read, + PI_DMA_Write, + SI_DMA_Read, + SI_DMA_Write, + }; + }; + extern Queue queue; + + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include +} diff --git a/waterbox/ares64/ares/ares/n64/pi/debugger.cpp b/waterbox/ares64/ares/ares/n64/pi/debugger.cpp new file mode 100644 index 0000000000..b453b7f658 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/pi/debugger.cpp @@ -0,0 +1,42 @@ +auto PI::Debugger::load(Node::Object parent) -> void { + memory.ram = parent->append("PI RAM"); + memory.ram->setSize(64); + memory.ram->setRead([&](u32 address) -> u8 { + return pi.ram.read(address); + }); + memory.ram->setWrite([&](u32 address, u8 data) -> void { + return pi.ram.write(address, data); + }); + + tracer.io = parent->append("I/O", "PI"); +} + +auto PI::Debugger::io(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "PI_DRAM_ADDRESS", + "PI_PBUS_ADDRESS", + "PI_READ_LENGTH", + "PI_WRITE_LENGTH", + "PI_STATUS", + "PI_BSD_DOM1_LAT", + "PI_BSD_DOM1_PWD", + "PI_BSD_DOM1_PGS", + "PI_BSD_DOM1_RLS", + "PI_BSD_DOM2_LAT", + "PI_BSD_DOM2_PWD", + "PI_BSD_DOM2_PGS", + "PI_BSD_DOM2_RLS", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "PI_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} diff --git a/waterbox/ares64/ares/ares/n64/pi/dma.cpp b/waterbox/ares64/ares/ares/n64/pi/dma.cpp new file mode 100644 index 0000000000..007af9342b --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/pi/dma.cpp @@ -0,0 +1,48 @@ +auto PI::dmaRead() -> void { + io.readLength = (io.readLength | 1) + 1; + for(u32 address = 0; address < io.readLength; address += 2) { + u16 data = bus.read(io.dramAddress + address); + bus.write(io.pbusAddress + address, data); + } + io.dmaBusy = 0; + io.interrupt = 1; + mi.raise(MI::IRQ::PI); +} + +auto PI::dmaWrite() -> void { + u8 mem[128]; + bool first_block = true; + i32 length = io.writeLength+1; + + io.writeLength = 0x7F; + if (length <= 8) io.writeLength -= io.dramAddress&7; + + while (length > 0) { + u32 dest = io.dramAddress & 0x7FFFFE; + i32 misalign = dest & 7; + i32 block_len = 128 - misalign; + i32 cur_len = min(length, block_len); + + length -= cur_len; + if (length.bit(0)) length += 1; + + i32 rom_len = (cur_len + 1) & ~1; + for (u32 i = 0; i < rom_len; i++) + mem[i] = bus.read(io.pbusAddress++); + + if (first_block) { + if (cur_len == block_len-1) cur_len++; + cur_len = max(cur_len-misalign, 0); + } + + for (u32 i = 0; i < cur_len; i++) + bus.write(io.dramAddress++, mem[i]); + io.dramAddress = (io.dramAddress + 7) & ~7; + + first_block = false; + } + + io.dmaBusy = 0; + io.interrupt = 1; + mi.raise(MI::IRQ::PI); +} diff --git a/waterbox/ares64/ares/ares/n64/pi/io.cpp b/waterbox/ares64/ares/ares/n64/pi/io.cpp new file mode 100644 index 0000000000..12405ed6b2 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/pi/io.cpp @@ -0,0 +1,166 @@ +auto PI::readWord(u32 address) -> u32 { + address = (address & 0xfffff) >> 2; + n32 data; + + if(address == 0) { + //PI_DRAM_ADDRESS + data = io.dramAddress; + } + + if(address == 1) { + //PI_CART_ADDRESS + data = io.pbusAddress; + } + + if(address == 2) { + //PI_READ_LENGTH + data = io.readLength; + } + + if(address == 3) { + //PI_WRITE_LENGTH + data = io.writeLength; + } + + if(address == 4) { + //PI_STATUS + data.bit(0) = io.dmaBusy; + data.bit(1) = io.ioBusy; + data.bit(2) = io.error; + data.bit(3) = io.interrupt; + } + + if(address == 5) { + //PI_BSD_DOM1_LAT + data.bit(0,7) = bsd1.latency; + } + + if(address == 6) { + //PI_BSD_DOM1_PWD + data.bit(0,7) = bsd1.pulseWidth; + } + + if(address == 7) { + //PI_BSD_DOM1_PGS + data.bit(0,7) = bsd1.pageSize; + } + + if(address == 8) { + //PI_BSD_DOM1_RLS + data.bit(0,7) = bsd1.releaseDuration; + } + + if(address == 9) { + //PI_BSD_DOM2_LAT + data.bit(0,7) = bsd2.latency; + } + + if(address == 10) { + //PI_BSD_DOM2_PWD + data.bit(0,7) = bsd2.pulseWidth; + } + + if(address == 11) { + //PI_BSD_DOM2_PGS + data.bit(0,7) = bsd2.pageSize; + } + + if(address == 12) { + //PI_BSD_DOM2_RLS + data.bit(0,7) = bsd2.releaseDuration; + } + + debugger.io(Read, address, data); + return data; +} + +auto PI::writeWord(u32 address, u32 data_) -> void { + address = (address & 0xfffff) >> 2; + n32 data = data_; + + //only PI_STATUS can be written while PI is busy + if(address != 4 && (io.dmaBusy || io.ioBusy)) { + io.error = 1; + return; + } + + if(address == 0) { + //PI_DRAM_ADDRESS + io.dramAddress = n24(data) & ~1; + } + + if(address == 1) { + //PI_PBUS_ADDRESS + io.pbusAddress = n29(data) & ~1; + } + + if(address == 2) { + //PI_READ_LENGTH + io.readLength = n24(data); + io.dmaBusy = 1; + queue.insert(Queue::PI_DMA_Read, io.readLength * 9); + } + + if(address == 3) { + //PI_WRITE_LENGTH + io.writeLength = n24(data); + io.dmaBusy = 1; + queue.insert(Queue::PI_DMA_Write, io.writeLength * 9); + } + + if(address == 4) { + //PI_STATUS + if(data.bit(0)) { + io.dmaBusy = 0; + io.error = 0; + queue.remove(Queue::PI_DMA_Read); + queue.remove(Queue::PI_DMA_Write); + } + if(data.bit(1)) { + io.interrupt = 0; + mi.lower(MI::IRQ::PI); + } + } + + if(address == 5) { + //PI_BSD_DOM1_LAT + bsd1.latency = data.bit(0,7); + } + + if(address == 6) { + //PI_BSD_DOM1_PWD + bsd1.pulseWidth = data.bit(0,7); + } + + if(address == 7) { + //PI_BSD_DOM1_PGS + bsd1.pageSize = data.bit(0,7); + } + + if(address == 8) { + //PI_BSD_DOM1_RLS + bsd1.releaseDuration = data.bit(0,7); + } + + if(address == 9) { + //PI_BSD_DOM2_LAT + bsd2.latency = data.bit(0,7); + } + + if(address == 10) { + //PI_BSD_DOM2_PWD + bsd2.pulseWidth = data.bit(0,7); + } + + if(address == 11) { + //PI_BSD_DOM2_PGS + bsd2.pageSize = data.bit(0,7); + } + + if(address == 12) { + //PI_BSD_DOM2_RLS + bsd2.releaseDuration = data.bit(0,7); + } + + debugger.io(Write, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/pi/pi.cpp b/waterbox/ares64/ares/ares/n64/pi/pi.cpp new file mode 100644 index 0000000000..6a513f02e1 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/pi/pi.cpp @@ -0,0 +1,56 @@ +#include + +namespace ares::Nintendo64 { + +PI pi; +#include "dma.cpp" +#include "io.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto PI::load(Node::Object parent) -> void { + node = parent->append("PI"); + rom.allocate(0x7c0); + ram.allocate(0x040); + + debugger.load(node); +} + +auto PI::unload() -> void { + debugger = {}; + rom.reset(); + ram.reset(); + node.reset(); +} + +auto PI::power(bool reset) -> void { + string pifrom = cartridge.region() == "NTSC" ? "pif.ntsc.rom" : "pif.pal.rom"; + if(auto fp = system.pak->read(pifrom)) { + rom.load(fp); + } + + ram.fill(); + io = {}; + bsd1 = {}; + bsd2 = {}; + + //write CIC seeds into PIF RAM so that cartridge checksum function passes + string cic = cartridge.cic(); + n8 seed = 0x3f; + n1 version = 0; + if(cic == "CIC-NUS-6101" || cic == "CIC-NUS-7102") seed = 0x3f, version = 1; + if(cic == "CIC-NUS-6102" || cic == "CIC-NUS-7101") seed = 0x3f; + if(cic == "CIC-NUS-6103" || cic == "CIC-NUS-7103") seed = 0x78; + if(cic == "CIC-NUS-6105" || cic == "CIC-NUS-7105") seed = 0x91; + if(cic == "CIC-NUS-6106" || cic == "CIC-NUS-7106") seed = 0x85; + + n32 data; + data.bit(0, 7) = 0x3f; //CIC IPL2 seed + data.bit(8,15) = seed; //CIC IPL3 seed + data.bit(17) = reset; //osResetType (0 = power; 1 = reset (NMI)) + data.bit(18) = version; //osVersion + data.bit(19) = 0; //osRomType (0 = Gamepak; 1 = 64DD) + ram.write(0x24, data); +} + +} diff --git a/waterbox/ares64/ares/ares/n64/pi/pi.hpp b/waterbox/ares64/ares/ares/n64/pi/pi.hpp new file mode 100644 index 0000000000..42ab2ee0bd --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/pi/pi.hpp @@ -0,0 +1,58 @@ +//Peripheral Interface + +struct PI : Memory::IO { + Node::Object node; + Memory::Readable rom; + Memory::Writable ram; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto io(bool mode, u32 address, u32 data) -> void; + + struct Memory { + Node::Debugger::Memory ram; + } memory; + + struct Tracer { + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //pi.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + auto power(bool reset) -> void; + + //dma.cpp + auto dmaRead() -> void; + auto dmaWrite() -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + struct IO { + n1 dmaBusy; + n1 ioBusy; + n1 error; + n1 interrupt; + n32 dramAddress; + n32 pbusAddress; + n32 readLength; + n32 writeLength; + n1 romLockout; + } io; + + struct BSD { + n8 latency; + n8 pulseWidth; + n8 pageSize; + n8 releaseDuration; + } bsd1, bsd2; +}; + +extern PI pi; diff --git a/waterbox/ares64/ares/ares/n64/pi/serialization.cpp b/waterbox/ares64/ares/ares/n64/pi/serialization.cpp new file mode 100644 index 0000000000..0cd001d533 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/pi/serialization.cpp @@ -0,0 +1,23 @@ +auto PI::serialize(serializer& s) -> void { + s(ram); + + s(io.dmaBusy); + s(io.ioBusy); + s(io.error); + s(io.interrupt); + s(io.dramAddress); + s(io.pbusAddress); + s(io.readLength); + s(io.writeLength); + s(io.romLockout); + + s(bsd1.latency); + s(bsd1.pulseWidth); + s(bsd1.pageSize); + s(bsd1.releaseDuration); + + s(bsd2.latency); + s(bsd2.pulseWidth); + s(bsd2.pageSize); + s(bsd2.releaseDuration); +} diff --git a/waterbox/ares64/ares/ares/n64/rdp/debugger.cpp b/waterbox/ares64/ares/ares/n64/rdp/debugger.cpp new file mode 100644 index 0000000000..77290a687f --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdp/debugger.cpp @@ -0,0 +1,56 @@ +auto RDP::Debugger::load(Node::Object parent) -> void { + tracer.command = parent->append("Command", "RDP"); + tracer.io = parent->append("I/O", "RDP"); +} + +auto RDP::Debugger::command(string_view message) -> void { + if(unlikely(tracer.command->enabled())) { + tracer.command->notify(message); + } +} + +auto RDP::Debugger::ioDPC(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "DPC_START", + "DPC_END", + "DPC_CURRENT", + "DPC_STATUS", + "DPC_CLOCK", + "DPC_BUSY", + "DPC_PIPE_BUSY", + "DPC_TMEM_BUSY", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "DPC_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} + +auto RDP::Debugger::ioDPS(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "DPS_TBIST", + "DPS_TEST_MODE", + "DPS_BUFTEST_ADDR", + "DPS_BUFTEST_DATA", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "DPS_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} diff --git a/waterbox/ares64/ares/ares/n64/rdp/io.cpp b/waterbox/ares64/ares/ares/n64/rdp/io.cpp new file mode 100644 index 0000000000..2f9ec6b7b9 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdp/io.cpp @@ -0,0 +1,174 @@ +auto RDP::readWord(u32 address) -> u32 { + address = (address & 0xfffff) >> 2; + n32 data; + + if(address == 0) { + //DPC_START + data.bit(0,23) = command.start; + } + + if(address == 1) { + //DPC_END + data.bit(0,23) = command.end; + } + + if(address == 2) { + //DPC_CURRENT + data.bit(0,23) = command.current; + } + + if(address == 3) { + //DPC_STATUS + data.bit( 0) = command.source; + data.bit( 1) = command.freeze; + data.bit( 2) = command.flush; + data.bit( 3) = 0; //start gclk? + data.bit( 4) = command.tmemBusy > 0; + data.bit( 5) = command.pipeBusy > 0; + data.bit( 6) = command.bufferBusy > 0; + data.bit( 7) = command.ready; + data.bit( 8) = 0; //DMA busy + data.bit( 9) = 0; //end valid + data.bit(10) = 0; //start valid + } + + if(address == 4) { + //DPC_CLOCK + data.bit(0,23) = command.clock; + } + + if(address == 5) { + //DPC_BUSY + data.bit(0,23) = command.bufferBusy; + } + + if(address == 6) { + //DPC_PIPE_BUSY + data.bit(0,23) = command.pipeBusy; + } + + if(data == 7) { + //DPC_TMEM_BUSY + data.bit(0,23) = command.tmemBusy; + } + + debugger.ioDPC(Read, address, data); + return data; +} + +auto RDP::writeWord(u32 address, u32 data_) -> void { + address = (address & 0xfffff) >> 2; + n32 data = data_; + + if(address == 0) { + //DPC_START + command.start = data.bit(0,23) & ~7; + command.current = command.start; + } + + if(address == 1) { + //DPC_END + command.end = data.bit(0,23) & ~7; + if(command.end > command.current) { + command.freeze = 0; + render(); + command.ready = 1; + } + } + + if(address == 2) { + //DPC_CURRENT (read-only) + } + + if(address == 3) { + //DPC_STATUS + if(data.bit(0)) command.source = 0; + if(data.bit(1)) command.source = 1; + if(data.bit(2)) command.freeze = 0; + //if(data.bit(3)) command.freeze = 1; + if(data.bit(4)) command.flush = 0; + if(data.bit(5)) command.flush = 1; + if(data.bit(6)) command.tmemBusy = 0; + if(data.bit(7)) command.pipeBusy = 0; + if(data.bit(8)) command.bufferBusy = 0; + if(data.bit(9)) command.clock = 0; + } + + if(address == 4) { + //DPC_CLOCK (read-only) + } + + if(address == 5) { + //DPC_BUSY (read-only) + } + + if(address == 6) { + //DPC_PIPE_BUSY (read-only) + } + + if(address == 7) { + //DPC_TMEM_BUSY (read-only) + } + + debugger.ioDPC(Write, address, data); +} + +auto RDP::IO::readWord(u32 address) -> u32 { + address = (address & 0xfffff) >> 2; + n32 data; + + if(address == 0) { + //DPS_TBIST + data.bit(0) = bist.check; + data.bit(1) = bist.go; + data.bit(2) = bist.done; + data.bit(3,10) = bist.fail; + } + + if(address == 1) { + //DPS_TEST_MODE + data.bit(0) = test.enable; + } + + if(address == 2) { + //DPS_BUFTEST_ADDR + data.bit(0,6) = test.address; + } + + if(address == 3) { + //DPS_BUFTEST_DATA + data.bit(0,31) = test.data; + } + + self.debugger.ioDPS(Read, address, data); + return data; +} + +auto RDP::IO::writeWord(u32 address, u32 data_) -> void { + address = (address & 0xfffff) >> 2; + n32 data = data_; + + if(address == 0) { + //DPS_TBIST + bist.check = data.bit(0); + bist.go = data.bit(1); + if(data.bit(2)) bist.done = 0; + } + + if(address == 1) { + //DPS_TEST_MODE + test.enable = data.bit(0); + } + + if(address == 2) { + //DPS_BUFTEST_ADDR + test.address = data.bit(0,6); + } + + if(address == 3) { + //DPS_BUFTEST_DATA + test.data = data.bit(0,31); + } + + self.debugger.ioDPS(Write, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/rdp/rdp.cpp b/waterbox/ares64/ares/ares/n64/rdp/rdp.cpp new file mode 100644 index 0000000000..e9dd892507 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdp/rdp.cpp @@ -0,0 +1,83 @@ +#include + +#if defined(MAME_RDP) +#include "emu.h" +#include "includes/n64.h" + +struct n64_periphs_impl : public n64_periphs { + auto dp_full_sync() -> void override { + ares::Nintendo64::rdp.syncFull(); + } + + static auto instance() -> n64_periphs_impl* { + static n64_periphs_impl* inst = new n64_periphs_impl(); + return inst; + } +}; +#endif + +namespace ares::Nintendo64 { + +RDP rdp; +#include "render.cpp" +#include "io.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto RDP::load(Node::Object parent) -> void { + node = parent->append("RDP"); + debugger.load(node); + + #if defined(MAME_RDP) + state = new n64_state((u32*)rdram.ram.data, (u32*)rsp.dmem.data, n64_periphs_impl::instance()); + puts("starting RDP video"); + state->video_start(); + #endif +} + +auto RDP::unload() -> void { + debugger = {}; + node.reset(); + + #if defined(MAME_RDP) + state.reset(); + #endif +} + +auto RDP::main() -> void { + step(system.frequency()); +} + +auto RDP::step(u32 clocks) -> void { + Thread::clock += clocks; +} + +auto RDP::power(bool reset) -> void { + Thread::reset(); + command = {}; + edge = {}; + shade = {}; + texture = {}; + zbuffer = {}; + rectangle = {}; + other = {}; + fog = {}; + blend = {}; + primitive = {}; + environment = {}; + combine = {}; + tlut = {}; + load_ = {}; + tileSize = {}; + tile = {}; + set = {}; + primitiveDepth = {}; + scissor = {}; + convert = {}; + key = {}; + fillRectangle_ = {}; + io.bist = {}; + io.test = {}; +} + +} diff --git a/waterbox/ares64/ares/ares/n64/rdp/rdp.hpp b/waterbox/ares64/ares/ares/n64/rdp/rdp.hpp new file mode 100644 index 0000000000..bebf4216ff --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdp/rdp.hpp @@ -0,0 +1,356 @@ +//Reality Display Processor + +struct RDP : Thread, Memory::IO { + Node::Object node; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto command(string_view) -> void; + auto ioDPC(bool mode, u32 address, u32 data) -> void; + auto ioDPS(bool mode, u32 address, u32 data) -> void; + + struct Tracer { + Node::Debugger::Tracer::Notification command; + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //rdp.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + + auto main() -> void; + auto step(u32 clocks) -> void; + auto power(bool reset) -> void; + + //render.cpp + auto render() -> void; + auto noOperation() -> void; + auto invalidOperation() -> void; + auto unshadedTriangle() -> void; + auto unshadedZbufferTriangle() -> void; + auto textureTriangle() -> void; + auto textureZbufferTriangle() -> void; + auto shadedTriangle() -> void; + auto shadedZbufferTriangle() -> void; + auto shadedTextureTriangle() -> void; + auto shadedTextureZbufferTriangle() -> void; + auto syncLoad() -> void; + auto syncPipe() -> void; + auto syncTile() -> void; + auto syncFull() -> void; + auto setKeyGB() -> void; + auto setKeyR() -> void; + auto setConvert() -> void; + auto setScissor() -> void; + auto setPrimitiveDepth() -> void; + auto setOtherModes() -> void; + auto textureRectangle() -> void; + auto textureRectangleFlip() -> void; + auto loadTLUT() -> void; + auto setTileSize() -> void; + auto loadBlock() -> void; + auto loadTile() -> void; + auto setTile() -> void; + auto fillRectangle() -> void; + auto setFillColor() -> void; + auto setFogColor() -> void; + auto setBlendColor() -> void; + auto setPrimitiveColor() -> void; + auto setEnvironmentColor() -> void; + auto setCombineMode() -> void; + auto setTextureImage() -> void; + auto setMaskImage() -> void; + auto setColorImage() -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + struct Command { + n24 start; + n24 end; + n24 current; + n24 clock; + n24 bufferBusy; + n24 pipeBusy; + n24 tmemBusy; + n1 source; //0 = RDRAM, 1 = DMEM + n1 freeze; + n1 flush; + n1 ready = 1; + } command; + + struct Point { + n16 i; //integer + n16 f; //fraction + }; + + struct Edge { + n1 lmajor; + n3 level; + n3 tile; + struct Y { + n14 hi; + n14 md; + n14 lo; + } y; + struct X { + struct { + Point c; //coordinate + Point s; //inverse slope + } hi, md, lo; + } x; + } edge; + + struct Shade { + struct Channel { + Point c; //color + Point x; //change per X coordinate + Point y; //change per Y coordinate + Point e; //change along edge + } r, g, b, a; + } shade; + + struct Texture { + struct { + Point c; //coordinate + Point x; //change per X coordinate + Point y; //change per Y coordinate + Point e; //change along edge + } s, t, w; + } texture; + + struct Zbuffer { + Point d; //inverse depth + Point x; //change per X coordinate + Point y; //change per Y coordinate + Point e; //change along edge + } zbuffer; + + struct TextureRectangle { + n3 tile; + struct { + n12 hi; + n12 lo; + } x, y; + Point s; + Point t; + } rectangle; + + struct OtherModes { + n1 atomicPrimitive; + n1 reserved1; + n2 cycleType; + n1 perspective; + n1 detailTexture; + n1 sharpenTexture; + n1 lodTexture; + n1 tlut; + n1 tlutType; + n1 sampleType; + n1 midTexel; + n1 bilerp[2]; + n1 convertOne; + n1 colorKey; + n2 colorDitherMode; + n2 alphaDitherMode; + n4 reserved2; + n2 blend1a[2]; + n2 blend1b[2]; + n2 blend2a[2]; + n2 blend2b[2]; + n1 reserved3; + n1 forceBlend; + n1 alphaCoverage; + n1 coverageXalpha; + n2 zMode; + n2 coverageMode; + n1 colorOnCoverage; + n1 imageRead; + n1 zUpdate; + n1 zCompare; + n1 antialias; + n1 zSource; + n1 ditherAlpha; + n1 alphaCompare; + } other; + + struct FogColor { + n8 red; + n8 green; + n8 blue; + n8 alpha; + } fog; + + struct Blend { + n8 red; + n8 green; + n8 blue; + n8 alpha; + } blend; + + struct PrimitiveColor { + n5 minimum; + n8 fraction; + n8 red; + n8 green; + n8 blue; + n8 alpha; + } primitive; + + struct EnvironmentColor { + n8 red; + n8 green; + n8 blue; + n8 alpha; + } environment; + + struct CombineMode { + struct MUL { + n5 color[2]; + n3 alpha[2]; + } mul; + struct ADD { + n3 color[2]; + n3 alpha[2]; + } add; + struct SUB { + n4 color[2]; + n3 alpha[2]; + } sba, sbb; + } combine; + + struct TLUT { + n3 index; + struct { + n12 lo; + n12 hi; + } s, t; + } tlut; + + struct Load { + struct Block { + n3 index; + struct { + n12 lo; + n12 hi; + } s, t; + } block; + struct Tile { + n3 index; + struct { + n12 lo; + n12 hi; + } s, t; + } tile; + } load_; + + struct TileSize { + n3 index; + struct { + n12 lo; + n12 hi; + } s, t; + } tileSize; + + struct Tile { + n3 format; + n2 size; + n9 line; + n9 address; + n3 index; + n4 palette; + struct { + n1 clamp; + n1 mirror; + n4 mask; + n4 shift; + } s, t; + } tile; + + struct Set { + struct Fill { + n32 color = 0; + } fill; + struct Texture { + n3 format = 0; + n2 size = 0; + n10 width = 0; + n26 dramAddress = 0; + } texture; + struct Mask { + n26 dramAddress = 0; + } mask; + struct Color { + n3 format = 0; + n2 size = 0; + n10 width = 0; + n26 dramAddress = 0; + } color; + } set; + + struct PrimitiveDepth { + n16 z; + n16 deltaZ; + } primitiveDepth; + + struct Scissor { + n1 field; + n1 odd; + struct { + n12 lo; + n12 hi; + } x, y; + } scissor; + + struct Convert { + n9 k[6]; + } convert; + + struct Key { + struct { + n12 width; + n8 center; + n8 scale; + } r, g, b; + } key; + + struct FillRectangle { + struct { + n12 lo; + n12 hi; + } x, y; + } fillRectangle_; + + struct IO : Memory::IO { + RDP& self; + IO(RDP& self) : self(self) {} + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + struct BIST { + n1 check; + n1 go; + n1 done; + n8 fail; + } bist; + struct Test { + n1 enable; + n7 address; + n32 data; + } test; + } io{*this}; + + #if defined(MAME_RDP) + unique_pointer state; + #endif +}; + +extern RDP rdp; diff --git a/waterbox/ares64/ares/ares/n64/rdp/render.cpp b/waterbox/ares64/ares/ares/n64/rdp/render.cpp new file mode 100644 index 0000000000..94a961b7f7 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdp/render.cpp @@ -0,0 +1,706 @@ +static const vector commandNames = { + "No_Operation", "Invalid_01", "Invalid_02", "Invalid_03", + "Invalid_04", "Invalid_05", "Invalid_06", "Invalid_07", + "Unshaded_Triangle", + "Unshaded_Zbuffer_Triangle", + "Texture_Triangle", + "Texture_Zbuffer_Triangle", + "Shaded_Triangle", + "Shaded_Zbuffer_Triangle", + "Shaded_Texture_Triangle", + "Shaded_Texture_Zbuffer_Triangle", + "Invalid_10", "Invalid_11", "Invalid_12", "Invalid_13", + "Invalid_14", "Invalid_15", "Invalid_16", "Invalid_17", + "Invalid_18", "Invalid_19", "Invalid_1a", "Invalid_1b", + "Invalid_1c", "Invalid_1d", "Invalid_1e", "Invalid_1f", + "Invalid_20", "Invalid_21", "Invalid_22", "Invalid_23", + "Texture_Rectangle", + "Texture_Rectangle_Flip", + "Sync_Load", + "Sync_Pipe", + "Sync_Tile", + "Sync_Full", + "Set_Key_GB", + "Set_Key_R", + "Set_Convert", + "Set_Scissor", + "Set_Primitive_Depth", + "Set_Other_Modes", + "Load_Texture_LUT", + "Invalid_31", + "Set_Tile_Size", + "Load_Block", + "Load_Tile", + "Set_Tile", + "Fill_Rectangle", + "Set_Fill_Color", + "Set_Fog_Color", + "Set_Blend_Color", + "Set_Primitive_Color", + "Set_Environment_Color", + "Set_Combine_Mode", + "Set_Texture_Image", + "Set_Mask_Image", + "Set_Color_Image", +}; + +auto RDP::render() -> void { + #if defined(VULKAN) + if(vulkan.enable && vulkan.render()) return; + #endif + + #if defined(MAME_RDP) + auto rdp = state->rdp(); + rdp->set_current(command.current); + rdp->set_end(command.end); + rdp->set_status(command.source ? DP_STATUS_XBUS_DMA : 0); + rdp->process_command_list(); + command.current = rdp->get_current(); + return; + #else + auto& memory = !command.source ? rdram.ram : rsp.dmem; + + auto fetch = [&]() -> u64 { + u64 op = memory.readUnaligned(command.current); + command.current += 8; + return op; + }; + + auto fetchEdge = [&](u64 op) { + edge.lmajor = n1 (op >> 55); + edge.level = n3 (op >> 51); + edge.tile = n3 (op >> 48); + edge.y.lo = n14(op >> 32); + edge.y.md = n14(op >> 16); + edge.y.hi = n14(op >> 0); + op = fetch(); + edge.x.lo.c.i = n16(op >> 48); + edge.x.lo.c.f = n16(op >> 32); + edge.x.lo.s.i = n16(op >> 16); + edge.x.lo.s.f = n16(op >> 0); + op = fetch(); + edge.x.hi.c.i = n16(op >> 48); + edge.x.hi.c.f = n16(op >> 32); + edge.x.hi.s.i = n16(op >> 16); + edge.x.hi.s.f = n16(op >> 0); + op = fetch(); + edge.x.md.c.i = n16(op >> 48); + edge.x.md.c.f = n16(op >> 32); + edge.x.md.s.i = n16(op >> 16); + edge.x.md.s.f = n16(op >> 0); + }; + + auto fetchShade = [&](u64 op) { + op = fetch(); + shade.r.c.i = n16(op >> 48); + shade.g.c.i = n16(op >> 32); + shade.b.c.i = n16(op >> 16); + shade.a.c.i = n16(op >> 0); + op = fetch(); + shade.r.x.i = n16(op >> 48); + shade.g.x.i = n16(op >> 32); + shade.b.x.i = n16(op >> 16); + shade.a.x.i = n16(op >> 0); + op = fetch(); + shade.r.c.f = n16(op >> 48); + shade.g.c.f = n16(op >> 32); + shade.b.c.f = n16(op >> 16); + shade.a.c.f = n16(op >> 0); + op = fetch(); + shade.r.x.f = n16(op >> 48); + shade.g.x.f = n16(op >> 32); + shade.b.x.f = n16(op >> 16); + shade.a.x.f = n16(op >> 0); + op = fetch(); + shade.r.e.i = n16(op >> 48); + shade.g.e.i = n16(op >> 32); + shade.b.e.i = n16(op >> 16); + shade.a.e.i = n16(op >> 0); + op = fetch(); + shade.r.y.i = n16(op >> 48); + shade.g.y.i = n16(op >> 32); + shade.b.y.i = n16(op >> 16); + shade.a.y.i = n16(op >> 0); + op = fetch(); + shade.r.e.f = n16(op >> 48); + shade.g.e.f = n16(op >> 32); + shade.b.e.f = n16(op >> 16); + shade.a.e.f = n16(op >> 0); + op = fetch(); + shade.r.y.f = n16(op >> 48); + shade.g.y.f = n16(op >> 32); + shade.b.y.f = n16(op >> 16); + shade.a.y.f = n16(op >> 0); + }; + + auto fetchTexture = [&](u64 op) { + op = fetch(); + texture.s.c.i = n16(op >> 48); + texture.t.c.i = n16(op >> 32); + texture.w.c.i = n16(op >> 16); + op = fetch(); + texture.s.x.i = n16(op >> 48); + texture.t.x.i = n16(op >> 32); + texture.w.x.i = n16(op >> 16); + op = fetch(); + texture.s.c.f = n16(op >> 48); + texture.t.c.f = n16(op >> 32); + texture.w.c.f = n16(op >> 16); + op = fetch(); + texture.s.x.f = n16(op >> 48); + texture.t.x.f = n16(op >> 32); + texture.w.x.f = n16(op >> 16); + op = fetch(); + texture.s.e.i = n16(op >> 48); + texture.t.e.i = n16(op >> 32); + texture.w.e.i = n16(op >> 16); + op = fetch(); + texture.s.y.i = n16(op >> 48); + texture.t.y.i = n16(op >> 32); + texture.w.y.i = n16(op >> 16); + op = fetch(); + texture.s.e.f = n16(op >> 48); + texture.t.e.f = n16(op >> 32); + texture.w.e.f = n16(op >> 16); + op = fetch(); + texture.s.y.f = n16(op >> 48); + texture.t.y.f = n16(op >> 32); + texture.w.y.f = n16(op >> 16); + }; + + auto fetchZBuffer = [&](u64 op) { + op = fetch(); + zbuffer.d.i = n16(op >> 48); + zbuffer.d.f = n16(op >> 32); + zbuffer.x.i = n16(op >> 16); + zbuffer.x.f = n16(op >> 0); + op = fetch(); + zbuffer.e.i = n16(op >> 48); + zbuffer.e.f = n16(op >> 32); + zbuffer.y.i = n16(op >> 16); + zbuffer.y.f = n16(op >> 0); + }; + + auto fetchRectangle = [&](u64 op) { + rectangle.x.lo = n12(op >> 44); + rectangle.y.lo = n12(op >> 32); + rectangle.tile = n3 (op >> 24); + rectangle.x.hi = n12(op >> 12); + rectangle.y.hi = n12(op >> 0); + op = fetch(); + rectangle.s.i = n16(op >> 48); + rectangle.t.i = n16(op >> 32); + rectangle.s.f = n16(op >> 16); + rectangle.t.f = n16(op >> 0); + }; + + while(command.current < command.end) { + u64 op = fetch(); + + if(debugger.tracer.command->enabled()) { + debugger.command({hex(op, 16L), " ", commandNames(op >> 56 & 0x3f, "Invalid")}); + } + + switch(op >> 56 & 0x3f) { + + case 0x00: { + noOperation(); + } break; + + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: { + invalidOperation(); + } break; + + case 0x08: { + fetchEdge(op); + unshadedTriangle(); + } break; + + case 0x09: { + fetchEdge(op); + fetchZBuffer(op); + unshadedZbufferTriangle(); + } break; + + case 0x0a: { + fetchEdge(op); + fetchTexture(op); + textureTriangle(); + } break; + + case 0x0b: { + fetchEdge(op); + fetchTexture(op); + fetchZBuffer(op); + textureZbufferTriangle(); + } break; + + case 0x0c: { + fetchEdge(op); + fetchShade(op); + shadedTriangle(); + } break; + + case 0x0d: { + fetchEdge(op); + fetchShade(op); + fetchZBuffer(op); + shadedZbufferTriangle(); + } break; + + case 0x0e: { + fetchEdge(op); + fetchShade(op); + fetchTexture(op); + shadedTextureTriangle(); + } break; + + case 0x0f: { + fetchEdge(op); + fetchShade(op); + fetchTexture(op); + fetchZBuffer(op); + shadedTextureZbufferTriangle(); + } break; + + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x20: + case 0x21: + case 0x22: + case 0x23: { + invalidOperation(); + } break; + + case 0x24: { + fetchRectangle(op); + textureRectangle(); + } break; + + case 0x25: { + fetchRectangle(op); + textureRectangleFlip(); + } break; + + case 0x26: { + syncLoad(); + } break; + + case 0x27: { + syncPipe(); + } break; + + case 0x28: { + syncTile(); + } break; + + case 0x29: { + syncFull(); + } break; + + case 0x2a: { + key.g.width = n12(op >> 44); + key.b.width = n12(op >> 32); + key.g.center = n8 (op >> 24); + key.g.scale = n8 (op >> 16); + key.b.center = n8 (op >> 8); + key.b.scale = n8 (op >> 0); + setKeyGB(); + } break; + + case 0x2b: { + key.r.width = n12(op >> 16); + key.r.center = n8 (op >> 8); + key.r.scale = n8 (op >> 0); + setKeyR(); + } break; + + case 0x2c: { + convert.k[0] = n9(op >> 45); + convert.k[1] = n9(op >> 36); + convert.k[2] = n9(op >> 27); + convert.k[3] = n9(op >> 18); + convert.k[4] = n9(op >> 9); + convert.k[5] = n8(op >> 0); + setConvert(); + } break; + + case 0x2d: { + scissor.x.hi = n12(op >> 44); + scissor.y.hi = n12(op >> 32); + scissor.field = n1 (op >> 25); + scissor.odd = n1 (op >> 24); + scissor.x.lo = n12(op >> 12); + scissor.y.lo = n12(op >> 0); + setScissor(); + } break; + + case 0x2e: { + primitiveDepth.z = n16(op >> 16); + primitiveDepth.deltaZ = n16(op >> 0); + setPrimitiveDepth(); + } break; + + case 0x2f: { + other.atomicPrimitive = n1(op >> 55); + other.reserved1 = n1(op >> 54); + other.cycleType = n2(op >> 52); + other.perspective = n1(op >> 51); + other.detailTexture = n1(op >> 50); + other.sharpenTexture = n1(op >> 49); + other.lodTexture = n1(op >> 48); + other.tlut = n1(op >> 47); + other.tlutType = n1(op >> 46); + other.sampleType = n1(op >> 45); + other.midTexel = n1(op >> 44); + other.bilerp[0] = n1(op >> 43); + other.bilerp[1] = n1(op >> 42); + other.convertOne = n1(op >> 41); + other.colorKey = n1(op >> 40); + other.colorDitherMode = n2(op >> 38); + other.alphaDitherMode = n2(op >> 36); + other.reserved2 = n4(op >> 32); + other.blend1a[0] = n2(op >> 30); + other.blend1a[1] = n2(op >> 28); + other.blend1b[0] = n2(op >> 26); + other.blend1b[1] = n2(op >> 24); + other.blend2a[0] = n2(op >> 22); + other.blend2a[1] = n2(op >> 20); + other.blend2b[0] = n2(op >> 18); + other.blend2b[1] = n2(op >> 16); + other.reserved3 = n1(op >> 15); + other.forceBlend = n1(op >> 14); + other.alphaCoverage = n1(op >> 13); + other.coverageXalpha = n1(op >> 12); + other.zMode = n2(op >> 10); + other.coverageMode = n2(op >> 8); + other.colorOnCoverage = n1(op >> 7); + other.imageRead = n1(op >> 6); + other.zUpdate = n1(op >> 5); + other.zCompare = n1(op >> 4); + other.antialias = n1(op >> 3); + other.zSource = n1(op >> 2); + other.ditherAlpha = n1(op >> 1); + other.alphaCompare = n1(op >> 0); + setOtherModes(); + } break; + + case 0x30: { + tlut.s.lo = n12(op >> 44); + tlut.t.lo = n12(op >> 32); + tlut.index = n3 (op >> 24); + tlut.s.hi = n12(op >> 12); + tlut.t.hi = n12(op >> 0); + loadTLUT(); + } break; + + case 0x31: { + invalidOperation(); + } break; + + case 0x32: { + tileSize.s.lo = n12(op >> 44); + tileSize.t.lo = n12(op >> 32); + tileSize.index = n3 (op >> 24); + tileSize.s.hi = n12(op >> 12); + tileSize.t.hi = n12(op >> 0); + setTileSize(); + } break; + + case 0x33: { + load_.block.s.lo = n12(op >> 44); + load_.block.t.lo = n12(op >> 32); + load_.block.index = n3 (op >> 24); + load_.block.s.hi = n12(op >> 12); + load_.block.t.hi = n12(op >> 0); + loadBlock(); + } break; + + case 0x34: { + load_.tile.s.lo = n12(op >> 44); + load_.tile.t.lo = n12(op >> 32); + load_.tile.index = n3 (op >> 24); + load_.tile.s.hi = n12(op >> 12); + load_.tile.t.hi = n12(op >> 0); + loadTile(); + } break; + + case 0x35: { + tile.format = n3(op >> 53); + tile.size = n2(op >> 51); + tile.line = n9(op >> 41); + tile.address = n9(op >> 32); + tile.index = n3(op >> 24); + tile.palette = n4(op >> 20); + tile.t.clamp = n1(op >> 19); + tile.t.mirror = n1(op >> 18); + tile.t.mask = n4(op >> 14); + tile.t.shift = n4(op >> 10); + tile.s.clamp = n1(op >> 9); + tile.s.mirror = n1(op >> 8); + tile.s.mask = n4(op >> 4); + tile.s.shift = n4(op >> 0); + setTile(); + } break; + + case 0x36: { + fillRectangle_.x.lo = n12(op >> 44); + fillRectangle_.y.lo = n12(op >> 32); + fillRectangle_.x.hi = n12(op >> 12); + fillRectangle_.y.hi = n12(op >> 0); + fillRectangle(); + } break; + + case 0x37: { + set.fill.color = n32(op >> 0); + setFillColor(); + } break; + + case 0x38: { + fog.red = n8(op >> 24); + fog.green = n8(op >> 16); + fog.blue = n8(op >> 8); + fog.alpha = n8(op >> 0); + setFogColor(); + } break; + + case 0x39: { + blend.red = n8(op >> 24); + blend.green = n8(op >> 16); + blend.blue = n8(op >> 8); + blend.alpha = n8(op >> 0); + setBlendColor(); + } break; + + case 0x3a: { + primitive.minimum = n4(op >> 40); + primitive.fraction = n8(op >> 32); + primitive.red = n8(op >> 24); + primitive.green = n8(op >> 16); + primitive.blue = n8(op >> 8); + primitive.alpha = n8(op >> 0); + setPrimitiveColor(); + } break; + + case 0x3b: { + environment.red = n8(op >> 24); + environment.green = n8(op >> 16); + environment.blue = n8(op >> 8); + environment.alpha = n8(op >> 0); + setEnvironmentColor(); + } break; + + case 0x3c: { + combine.sba.color[0] = n4(op >> 52); + combine.mul.color[0] = n5(op >> 47); + combine.sba.alpha[0] = n3(op >> 44); + combine.mul.alpha[0] = n3(op >> 41); + combine.sba.color[1] = n4(op >> 37); + combine.mul.color[1] = n5(op >> 32); + combine.sbb.color[0] = n4(op >> 28); + combine.sbb.color[1] = n4(op >> 24); + combine.sba.alpha[1] = n3(op >> 21); + combine.mul.alpha[1] = n3(op >> 18); + combine.add.color[0] = n3(op >> 15); + combine.sbb.alpha[0] = n3(op >> 12); + combine.add.alpha[0] = n3(op >> 9); + combine.add.color[1] = n3(op >> 6); + combine.sbb.alpha[1] = n3(op >> 3); + combine.add.alpha[1] = n3(op >> 0); + setCombineMode(); + } break; + + case 0x3d: { + set.texture.format = n3 (op >> 53); + set.texture.size = n2 (op >> 51); + set.texture.width = n10(op >> 32); + set.texture.dramAddress = n26(op >> 0); + setTextureImage(); + } break; + + case 0x3e: { + set.mask.dramAddress = n26(op >> 0); + setMaskImage(); + } break; + + case 0x3f: { + set.color.format = n3 (op >> 53); + set.color.size = n2 (op >> 51); + set.color.width = n10(op >> 32); + set.color.dramAddress = n26(op >> 0); + setColorImage(); + } break; + + } + } +#endif +} + +//0x00 +auto RDP::noOperation() -> void { +} + +//0x01-0x07; 0x10-0x23; 0x31 +auto RDP::invalidOperation() -> void { +} + +//0x08 +auto RDP::unshadedTriangle() -> void { +} + +//0x09 +auto RDP::unshadedZbufferTriangle() -> void { +} + +//0x0a +auto RDP::textureTriangle() -> void { +} + +//0x0b +auto RDP::textureZbufferTriangle() -> void { +} + +//0x0c +auto RDP::shadedTriangle() -> void { +} + +//0x0d +auto RDP::shadedZbufferTriangle() -> void { +} + +//0x0e +auto RDP::shadedTextureTriangle() -> void { +} + +//0x0f +auto RDP::shadedTextureZbufferTriangle() -> void { +} + +//0x24 +auto RDP::textureRectangle() -> void { +} + +//0x25 +auto RDP::textureRectangleFlip() -> void { +} + +//0x26 +auto RDP::syncLoad() -> void { +} + +//0x27 +auto RDP::syncPipe() -> void { +} + +//0x28 +auto RDP::syncTile() -> void { +} + +//0x29 +auto RDP::syncFull() -> void { + mi.raise(MI::IRQ::DP); +} + +//0x2a +auto RDP::setKeyGB() -> void { +} + +//0x2b +auto RDP::setKeyR() -> void { +} + +//0x2c +auto RDP::setConvert() -> void { +} + +//0x2d +auto RDP::setScissor() -> void { +} + +//0x2e +auto RDP::setPrimitiveDepth() -> void { +} + +//0x2f +auto RDP::setOtherModes() -> void { +} + +//0x30 +auto RDP::loadTLUT() -> void { +} + +//0x32 +auto RDP::setTileSize() -> void { +} + +//0x33 +auto RDP::loadBlock() -> void { +} + +//0x34 +auto RDP::loadTile() -> void { +} + +//0x35 +auto RDP::setTile() -> void { +} + +//0x36 +auto RDP::fillRectangle() -> void { +} + +//0x37 +auto RDP::setFillColor() -> void { +} + +//0x38 +auto RDP::setFogColor() -> void { +} + +//0x39 +auto RDP::setBlendColor() -> void { +} + +//0x3a +auto RDP::setPrimitiveColor() -> void { +} + +//0x3b +auto RDP::setEnvironmentColor() -> void { +} + +//0x3c +auto RDP::setCombineMode() -> void { +} + +//0x3d +auto RDP::setTextureImage() -> void { +} + +//0x3e +auto RDP::setMaskImage() -> void { +} + +//0x3f +auto RDP::setColorImage() -> void { +} diff --git a/waterbox/ares64/ares/ares/n64/rdp/serialization.cpp b/waterbox/ares64/ares/ares/n64/rdp/serialization.cpp new file mode 100644 index 0000000000..d1b08be340 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdp/serialization.cpp @@ -0,0 +1,24 @@ +auto RDP::serialize(serializer& s) -> void { + Thread::serialize(s); + + s(command.start); + s(command.end); + s(command.current); + s(command.clock); + s(command.bufferBusy); + s(command.pipeBusy); + s(command.tmemBusy); + s(command.source); + s(command.freeze); + s(command.flush); + s(command.ready); + + s(io.bist.check); + s(io.bist.go); + s(io.bist.done); + s(io.bist.fail); + + s(io.test.enable); + s(io.test.address); + s(io.test.data); +} diff --git a/waterbox/ares64/ares/ares/n64/rdram/debugger.cpp b/waterbox/ares64/ares/ares/n64/rdram/debugger.cpp new file mode 100644 index 0000000000..3829628d02 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdram/debugger.cpp @@ -0,0 +1,40 @@ +auto RDRAM::Debugger::load(Node::Object parent) -> void { + memory.ram = parent->append("RDRAM"); + memory.ram->setSize(4_MiB + 4_MiB); + memory.ram->setRead([&](u32 address) -> u8 { + return rdram.ram.read(address); + }); + memory.ram->setWrite([&](u32 address, u8 data) -> void { + return rdram.ram.write(address, data); + }); + + tracer.io = parent->append("I/O", "RDRAM"); +} + +auto RDRAM::Debugger::io(bool mode, u32 chipID, u32 address, u32 data) -> void { + static const vector registerNames = { + "RDRAM_DEVICE_TYPE", + "RDRAM_DEVICE_ID", + "RDRAM_DELAY", + "RDRAM_MODE", + "RDRAM_REF_INTERVAL", + "RDRAM_REF_ROW", + "RDRAM_RAS_INTERVAL", + "RDRAM_MIN_INTERVAL", + "RDRAM_ADDRESS_SELECT", + "RDRAM_DEVICE_MANUFACTURER", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "RDRAM_UNKNOWN"); + name.append("[", chipID, "]"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} diff --git a/waterbox/ares64/ares/ares/n64/rdram/io.cpp b/waterbox/ares64/ares/ares/n64/rdram/io.cpp new file mode 100644 index 0000000000..d5dd561629 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdram/io.cpp @@ -0,0 +1,127 @@ +auto RDRAM::readWord(u32 address) -> u32 { + u32 chipID = address >> 13 & 3; + auto& chip = chips[chipID]; + address = (address & 0x3ff) >> 2; + u32 data = 0; + + if(address == 0) { + //RDRAM_DEVICE_TYPE + data = chip.deviceType; + } + + if(address == 1) { + //RDRAM_DEVICE_ID + data = chip.deviceID; + } + + if(address == 2) { + //RDRAM_DELAY + data = chip.delay; + } + + if(address == 3) { + //RDRAM_MODE + data = chip.mode ^ 0xc0c0c0c0; + } + + if(address == 4) { + //RDRAM_REF_INTERVAL + data = chip.refreshInterval; + } + + if(address == 5) { + //RDRAM_REF_ROW + data = chip.refreshRow; + } + + if(address == 6) { + //RDRAM_RAS_INTERVAL + data = chip.rasInterval; + } + + if(address == 7) { + //RDRAM_MIN_INTERVAL + data = chip.minInterval; + } + + if(address == 8) { + //RDRAM_ADDRESS_SELECT + data = chip.addressSelect; + } + + if(address == 9) { + //RDRAM_DEVICE_MANUFACTURER + data = chip.deviceManufacturer; + } + + if(address == 10) { + //RDRAM_CURRENT_CONTROL + data = chip.currentControl; + } + + debugger.io(Read, chipID, address, data); + return data; +} + +auto RDRAM::writeWord(u32 address, u32 data) -> void { + u32 chipID = address >> 13 & 3; + auto& chip = chips[chipID]; + address = (address & 0x3ff) >> 2; + + if(address == 0) { + //RDRAM_DEVICE_TYPE + chip.deviceType = data; + } + + if(address == 1) { + //RDRAM_DEVICE_ID + chip.deviceID = data; + } + + if(address == 2) { + //RDRAM_DELAY + chip.delay = data; + } + + if(address == 3) { + //RDRAM_MODE + chip.mode = data; + } + + if(address == 4) { + //RDRAM_REF_INTERVAL + chip.refreshInterval = data; + } + + if(address == 5) { + //RDRAM_REF_ROW + chip.refreshRow = data; + } + + if(address == 6) { + //RDRAM_RAS_INTERVAL + chip.rasInterval = data; + } + + if(address == 7) { + //RDRAM_MIN_INTERVAL + chip.minInterval = data; + } + + if(address == 8) { + //RDRAM_ADDRESS_SELECT + chip.addressSelect = data; + } + + if(address == 9) { + //RDRAM_DEVICE_MANUFACTURER + chip.deviceManufacturer = data; + } + + if(address == 10) { + //RDRAM_CURRENT_CONTROL + chip.currentControl = data; + } + + debugger.io(Write, chipID, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/rdram/rdram.cpp b/waterbox/ares64/ares/ares/n64/rdram/rdram.cpp new file mode 100644 index 0000000000..f17acd4843 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdram/rdram.cpp @@ -0,0 +1,33 @@ +#include + +namespace ares::Nintendo64 { + +RDRAM rdram; +#include "io.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto RDRAM::load(Node::Object parent) -> void { + node = parent->append("RDRAM"); + + //4_MiB internal + //4_MiB expansion pak + ram.allocate(4_MiB + 4_MiB); + + debugger.load(node); +} + +auto RDRAM::unload() -> void { + debugger = {}; + ram.reset(); + node.reset(); +} + +auto RDRAM::power(bool reset) -> void { + if(!reset) { + ram.fill(); + for(auto& chip : chips) chip = {}; + } +} + +} diff --git a/waterbox/ares64/ares/ares/n64/rdram/rdram.hpp b/waterbox/ares64/ares/ares/n64/rdram/rdram.hpp new file mode 100644 index 0000000000..57cb39ba09 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdram/rdram.hpp @@ -0,0 +1,48 @@ +//RAMBUS RAM + +struct RDRAM : Memory::IO { + Node::Object node; + Memory::Writable ram; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto io(bool mode, u32 chipID, u32 address, u32 data) -> void; + + struct Memory { + Node::Debugger::Memory ram; + } memory; + + struct Tracer { + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //rdram.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + auto power(bool reset) -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + struct Chip { + n32 deviceType; + n32 deviceID; + n32 delay; + n32 mode; + n32 refreshInterval; + n32 refreshRow; + n32 rasInterval; + n32 minInterval; + n32 addressSelect; + n32 deviceManufacturer; + n32 currentControl; + } chips[4]; +}; + +extern RDRAM rdram; diff --git a/waterbox/ares64/ares/ares/n64/rdram/serialization.cpp b/waterbox/ares64/ares/ares/n64/rdram/serialization.cpp new file mode 100644 index 0000000000..e8343fb224 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rdram/serialization.cpp @@ -0,0 +1,16 @@ +auto RDRAM::serialize(serializer& s) -> void { + s(ram); + for(auto& chip : chips) { + s(chip.deviceType); + s(chip.deviceID); + s(chip.delay); + s(chip.mode); + s(chip.refreshInterval); + s(chip.refreshRow); + s(chip.rasInterval); + s(chip.minInterval); + s(chip.addressSelect); + s(chip.deviceManufacturer); + s(chip.currentControl); + } +} diff --git a/waterbox/ares64/ares/ares/n64/ri/debugger.cpp b/waterbox/ares64/ares/ares/n64/ri/debugger.cpp new file mode 100644 index 0000000000..148f5e30fe --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ri/debugger.cpp @@ -0,0 +1,28 @@ +auto RI::Debugger::load(Node::Object parent) -> void { + tracer.io = parent->append("I/O", "RI"); +} + +auto RI::Debugger::io(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "RI_MODE", + "RI_CONFIG", + "RI_CURRENT_LOAD", + "RI_SELECT", + "RI_REFRESH", + "RI_LATENCY", + "RI_RERROR", + "RI_WERROR", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "RI_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} diff --git a/waterbox/ares64/ares/ares/n64/ri/io.cpp b/waterbox/ares64/ares/ares/n64/ri/io.cpp new file mode 100644 index 0000000000..74cb89f8fd --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ri/io.cpp @@ -0,0 +1,106 @@ +auto RI::readWord(u32 address) -> u32 { + address = (address & 0xfffff) >> 2; + n32 data = 0; + + if(address == 0) { + //RI_MODE + data = io.mode; + } + + if(address == 1) { + //RI_CONFIG + data = io.config; + } + + if(address == 2) { + //RI_CURRENT_LOAD + data = io.currentLoad; + } + + if(address == 3) { + //RI_SELECT + data = io.select; + if constexpr(!Accuracy::RDRAM::Broadcasting) { + //this register is read by IPL3 to check if RDRAM initialization should be + //skipped. if we are forcing it to be skipped, we should also consume + //enough cycles to not inadvertently speed up the boot process. + //Wave Race 64 Shindou Pak Taiou Version will freeze on the N64 logo if + //the SCC count register, which increments at half the CPU clock rate, has + //too small a value. + //after a cold boot on real hardware with no expansion pak and using the + //CIC-NUS-6102 IPL3, upon reaching the test ROM's entry point the count + //register was measured to be ~0x1184000. + cpu.step(17'641'000); + } + } + + if(address == 4) { + //RI_REFRESH + data = io.refresh; + } + + if(address == 5) { + //RI_LATENCY + data = io.latency; + } + + if(address == 6) { + //RI_RERROR + data = io.readError; + } + + if(address == 7) { + //RI_WERROR + data = io.writeError; + } + + debugger.io(Read, address, data); + return data; +} + +auto RI::writeWord(u32 address, u32 data_) -> void { + address = (address & 0xfffff) >> 2; + n32 data = data_; + + if(address == 0) { + //RI_MODE + io.mode = data; + } + + if(address == 1) { + //RI_CONFIG + io.config = data; + } + + if(address == 2) { + //RI_CURRENT_LOAD + io.currentLoad = data; + } + + if(address == 3) { + //RI_SELECT + io.select = data; + } + + if(address == 4) { + //RI_REFRESH + io.refresh = data; + } + + if(address == 5) { + //RI_LATENCY + io.latency = data; + } + + if(address == 6) { + //RI_RERROR + io.readError = data; + } + + if(address == 7) { + //RI_WERROR + io.writeError = data; + } + + debugger.io(Write, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/ri/ri.cpp b/waterbox/ares64/ares/ares/n64/ri/ri.cpp new file mode 100644 index 0000000000..1ebca7025c --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ri/ri.cpp @@ -0,0 +1,35 @@ +#include + +namespace ares::Nintendo64 { + +RI ri; +#include "io.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto RI::load(Node::Object parent) -> void { + node = parent->append("RI"); + debugger.load(node); +} + +auto RI::unload() -> void { + debugger = {}; + node.reset(); +} + +auto RI::power(bool reset) -> void { + io = {}; + if constexpr(!Accuracy::RDRAM::Broadcasting) { + //simulate PIF ROM RDRAM power-on self test + io.mode = 0x0e; + io.config = 0x40; + io.select = 0x14; + io.refresh = 0x0006'3634; + + //store RDRAM size result into memory + rdram.ram.write(0x318, rdram.ram.size); //CIC-NUS-6102 + rdram.ram.write(0x3f0, rdram.ram.size); //CIC-NUS-6105 + } +} + +} diff --git a/waterbox/ares64/ares/ares/n64/ri/ri.hpp b/waterbox/ares64/ares/ares/n64/ri/ri.hpp new file mode 100644 index 0000000000..9c13617d0d --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ri/ri.hpp @@ -0,0 +1,40 @@ +//RDRAM Interface + +struct RI : Memory::IO { + Node::Object node; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto io(bool mode, u32 address, u32 data) -> void; + + struct Tracer { + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //ri.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + auto power(bool reset) -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + struct IO { + n32 mode; + n32 config; + n32 currentLoad; + n32 select; + n32 refresh; + n32 latency; + n32 readError; + n32 writeError; + } io; +}; + +extern RI ri; diff --git a/waterbox/ares64/ares/ares/n64/ri/serialization.cpp b/waterbox/ares64/ares/ares/n64/ri/serialization.cpp new file mode 100644 index 0000000000..e27dc8a480 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/ri/serialization.cpp @@ -0,0 +1,10 @@ +auto RI::serialize(serializer& s) -> void { + s(io.mode); + s(io.config); + s(io.currentLoad); + s(io.select); + s(io.refresh); + s(io.latency); + s(io.readError); + s(io.writeError); +} diff --git a/waterbox/ares64/ares/ares/n64/rsp/debugger.cpp b/waterbox/ares64/ares/ares/n64/rsp/debugger.cpp new file mode 100644 index 0000000000..c5b81a5219 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/debugger.cpp @@ -0,0 +1,91 @@ +#define rsp Nintendo64::rsp + +auto RSP::Debugger::load(Node::Object parent) -> void { + memory.dmem = parent->append("RSP DMEM"); + memory.dmem->setSize(4_KiB); + memory.dmem->setRead([&](u32 address) -> u8 { + return rsp.dmem.read(address); + }); + memory.dmem->setWrite([&](u32 address, u8 data) -> void { + return rsp.dmem.write(address, data); + }); + + memory.imem = parent->append("RSP IMEM"); + memory.imem->setSize(4_KiB); + memory.imem->setRead([&](u32 address) -> u8 { + return rsp.imem.read(address); + }); + memory.imem->setWrite([&](u32 address, u8 data) -> void { + return rsp.imem.write(address, data); + }); + + tracer.instruction = parent->append("Instruction", "RSP"); + tracer.instruction->setAddressBits(12, 2); + + tracer.io = parent->append("I/O", "RSP"); +} + +auto RSP::Debugger::unload() -> void { + memory.dmem.reset(); + memory.imem.reset(); + tracer.instruction.reset(); + tracer.io.reset(); +} + +auto RSP::Debugger::instruction() -> void { + if(unlikely(tracer.instruction->enabled())) { + u32 address = rsp.pipeline.address & 0xfff; + u32 instruction = rsp.pipeline.instruction; + if(tracer.instruction->address(address)) { + rsp.disassembler.showColors = 0; + tracer.instruction->notify(rsp.disassembler.disassemble(address, instruction), {}); + rsp.disassembler.showColors = 1; + } + } +} + +auto RSP::Debugger::ioSCC(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "SP_PBUS_ADDRESS", + "SP_DRAM_ADDRESS", + "SP_READ_LENGTH", + "SP_WRITE_LENGTH", + "SP_STATUS", + "SP_DMA_FULL", + "SP_DMA_BUSY", + "SP_SEMAPHORE", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "SP_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} + +auto RSP::Debugger::ioStatus(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "SP_PC_REG", + "SP_IBIST", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "SP_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} + +#undef rsp diff --git a/waterbox/ares64/ares/ares/n64/rsp/disassembler.cpp b/waterbox/ares64/ares/ares/n64/rsp/disassembler.cpp new file mode 100644 index 0000000000..cb1f030acb --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/disassembler.cpp @@ -0,0 +1,530 @@ +auto RSP::Disassembler::disassemble(u32 address, u32 instruction) -> string { + this->address = address; + this->instruction = instruction; + + auto v = EXECUTE(); + if(!v) v.append("invalid", string{"$", hex(instruction, 8L)}); + if(!instruction) v = {"nop"}; + auto s = pad(v.takeFirst(), -8L); + return {s, v.merge(",")}; +} + +auto RSP::Disassembler::EXECUTE() -> vector { + auto rtName = [&] { return ipuRegisterName (instruction >> 16 & 31); }; + auto rtValue = [&] { return ipuRegisterValue(instruction >> 16 & 31); }; + auto rsValue = [&] { return ipuRegisterValue(instruction >> 21 & 31); }; + auto imm16i = [&] { return immediate(s16(instruction)); }; + auto imm16u = [&] { return immediate(u16(instruction), 16L); }; + auto jump = [&] { return immediate(n12(address + 4 & 0xf000'0000 | (instruction & 0x03ff'ffff) << 2)); }; + auto branch = [&] { return immediate(n12(address + 4 + (s16(instruction) << 2))); }; + auto offset = [&] { return ipuRegisterIndex(instruction >> 21 & 31, s16(instruction)); }; + + auto ADDI = [&](string_view add, string_view sub, string_view mov) -> vector { + if(!(instruction >> 21 & 31)) return {mov, rtName(), immediate(s16(instruction), 32L)}; + return {s16(instruction) >= 0 ? add : sub, rtName(), rsValue(), immediate(abs(s16(instruction)))}; + }; + + auto ALU = [&](string_view name) -> vector { + return {name, rtName(), rsValue(), immediate(u16(instruction))}; + }; + + auto BRANCH1 = [&](string_view name) -> vector { + return {name, rsValue(), branch()}; + }; + + auto BRANCH2 = [&](string_view name) -> vector { + return {name, rsValue(), rtValue(), branch()}; + }; + + auto CACHE = [&](string_view name) -> vector { + auto cache = instruction >> 16 & 3; + auto op = instruction >> 18 & 7; + string type = "reserved"; + if(cache == 0) switch(op) { + case 0: type = "code(IndexInvalidate)"; break; + case 1: type = "code(IndexLoadTag)"; break; + case 2: type = "code(IndexStoreTag)"; break; + case 4: type = "code(HitInvalidate)"; break; + case 5: type = "code(Fill)"; break; + case 6: type = "code(HitWriteBack)"; break; + } + if(cache == 1) switch(op) { + case 0: type = "data(IndexWriteBackInvalidate)"; break; + case 1: type = "data(IndexLoadTag)"; break; + case 2: type = "data(IndexStoreTag)"; break; + case 3: type = "data(CreateDirtyExclusive)"; break; + case 4: type = "data(HitInvalidate)"; break; + case 5: type = "data(HitWriteBackInvalidate)"; break; + case 6: type = "data(HitWriteBack)"; break; + } + return {name, type, offset()}; + }; + + auto JUMP = [&](string_view name) -> vector { + return {name, jump()}; + }; + + auto LOAD = [&](string_view name) -> vector { + return {name, rtName(), offset()}; + }; + + auto STORE = [&](string_view name) -> vector { + return {name, rtValue(), offset()}; + }; + + switch(instruction >> 26) { + case 0x00: return SPECIAL(); + case 0x01: return REGIMM(); + case 0x02: return JUMP("j"); + case 0x03: return JUMP("jal"); + case 0x04: return BRANCH2("beq"); + case 0x05: return BRANCH2("bne"); + case 0x06: return BRANCH1("blez"); + case 0x07: return BRANCH1("bgtz"); + case 0x08: return ADDI("addi", "subi", "li"); + case 0x09: return ADDI("addiu", "subiu", "liu"); + case 0x0a: return ALU("slti"); + case 0x0b: return ALU("sltiu"); + case 0x0c: return ALU("andi"); + case 0x0d: return ALU("ori"); + case 0x0e: return ALU("xori"); + case 0x0f: return {"lui", rtName(), imm16u()}; + case 0x10: return SCC(); + case 0x11: break; //COP1 + case 0x12: return VU(); + case 0x13: break; //COP3 + case 0x14: break; //BEQL + case 0x15: break; //BNEL + case 0x16: break; //BLEZL + case 0x17: break; //BGTZL + case 0x18: break; //DADDI + case 0x19: break; //DADDIU + case 0x1a: break; //LDL + case 0x1b: break; //LDR + case 0x1c: break; + case 0x1d: break; + case 0x1e: break; + case 0x1f: break; + case 0x20: return LOAD("lb"); + case 0x21: return LOAD("lh"); + case 0x22: break; //LWL + case 0x23: return LOAD("lw"); + case 0x24: return LOAD("lbu"); + case 0x25: return LOAD("lhu"); + case 0x26: break; //LWR + case 0x27: break; //LWU + case 0x28: return STORE("sb"); + case 0x29: return STORE("sh"); + case 0x2a: break; //SWL + case 0x2b: return STORE("sw"); + case 0x2c: break; //SDL + case 0x2d: break; //SDR + case 0x2e: break; //SWR + case 0x2f: return CACHE("cache"); + case 0x30: break; //LL + case 0x31: break; //LWC1 + case 0x32: return LWC2(); + case 0x33: break; //LWC3 + case 0x34: break; //LLD + case 0x35: break; //LDC1 + case 0x36: break; //LDC2 + case 0x37: break; //LD + case 0x38: break; //SC + case 0x39: break; //SWC1 + case 0x3a: return SWC2(); + case 0x3b: break; //SWC3 + case 0x3c: break; //SCD + case 0x3d: break; //SDC1 + case 0x3e: break; //SDC2 + case 0x3f: break; //SD + } + + return {}; +} + +auto RSP::Disassembler::SPECIAL() -> vector { + auto shift = [&] { return string{instruction >> 6 & 31}; }; + auto rdName = [&] { return ipuRegisterName (instruction >> 11 & 31); }; + auto rdValue = [&] { return ipuRegisterValue(instruction >> 11 & 31); }; + auto rtValue = [&] { return ipuRegisterValue(instruction >> 16 & 31); }; + auto rsValue = [&] { return ipuRegisterValue(instruction >> 21 & 31); }; + + auto ALU = [&](string_view name, string_view by) -> vector { + return {name, rdName(), rtValue(), by}; + }; + + auto JALR = [&](string_view name) -> vector { + if((instruction >> 11 & 31) == 31) return {name, rsValue()}; + return {name, rdName(), rsValue()}; + }; + + auto REG = [&](string_view name) -> vector { + return {name, rdName(), rsValue(), rtValue()}; + }; + + switch(instruction & 0x3f) { + case 0x00: return ALU("sll", shift()); + case 0x01: break; + case 0x02: return ALU("srl", shift()); + case 0x03: return ALU("sra", shift()); + case 0x04: return ALU("sllv", shift()); + case 0x05: break; + case 0x06: return ALU("srlv", rsValue()); + case 0x07: return ALU("srav", rsValue()); + case 0x08: return {"jr", rsValue()}; + case 0x09: return JALR("jalr"); + case 0x0a: break; + case 0x0b: break; + case 0x0c: break; //SYSCALL + case 0x0d: return {"break"}; + case 0x0e: break; + case 0x0f: break; //SYNC + case 0x10: break; //MFHI + case 0x11: break; //MTHI + case 0x12: break; //MFLO + case 0x13: break; //MTLO + case 0x14: break; //DSLLV + case 0x15: break; + case 0x16: break; //DSRLV + case 0x17: break; //DSRAV + case 0x18: break; //MULT + case 0x19: break; //MULTU + case 0x1a: break; //DIV + case 0x1b: break; //DIVU + case 0x1c: break; //DMULT + case 0x1d: break; //DMULTU + case 0x1e: break; //DDIV + case 0x1f: break; //DDIVU + case 0x20: return REG("add"); + case 0x21: return REG("addu"); + case 0x22: return REG("sub"); + case 0x23: return REG("subu"); + case 0x24: return REG("and"); + case 0x25: return REG("or"); + case 0x26: return REG("xor"); + case 0x27: return REG("nor"); + case 0x28: break; + case 0x29: break; + case 0x2a: return REG("slt"); + case 0x2b: return REG("sltu"); + case 0x2c: break; //DADD + case 0x2d: break; //DADDU + case 0x2e: break; //DSUB + case 0x2f: break; //DSUBU + case 0x30: break; //TGE + case 0x31: break; //TGEU + case 0x32: break; //TLT + case 0x33: break; //TLTU + case 0x34: break; //TEQ + case 0x35: break; + case 0x36: break; //TNE + case 0x37: break; + case 0x38: break; //DSLL + case 0x39: break; + case 0x3a: break; //DSRL + case 0x3b: break; //DSRA + case 0x3c: break; //DSLL32 + case 0x3d: break; + case 0x3e: break; //DSRL32 + case 0x3f: break; //DSRA32 + } + + return {}; +} + +auto RSP::Disassembler::REGIMM() -> vector { + auto rsValue = [&] { return ipuRegisterValue(instruction >> 21 & 31); }; + auto branch = [&] { return immediate(n12(address + 4 + (s16(instruction) << 2))); }; + + auto BRANCH = [&](string_view name) -> vector { + return {name, rsValue(), branch()}; + }; + + switch(instruction >> 16 & 0x1f) { + case 0x00: return BRANCH("bltz"); + case 0x01: return BRANCH("bgez"); + case 0x02: break; //BLTZL + case 0x03: break; //BGEZL + case 0x04: break; + case 0x05: break; + case 0x06: break; + case 0x07: break; + case 0x08: break; //TGEI + case 0x09: break; //TGEIU + case 0x0a: break; //TLTI + case 0x0b: break; //TLTIU + case 0x0c: break; //TEQI + case 0x0d: break; + case 0x0e: break; //TNEI + case 0x0f: break; + case 0x10: return BRANCH("bltzal"); + case 0x11: return BRANCH("bgezal"); + case 0x12: break; //BLTZALL + case 0x13: break; //BGEZALL + case 0x14: break; + case 0x15: break; + case 0x16: break; + case 0x17: break; + case 0x18: break; + case 0x19: break; + case 0x1a: break; + case 0x1b: break; + case 0x1c: break; + case 0x1d: break; + case 0x1e: break; + case 0x1f: break; + } + + return {}; +} + +auto RSP::Disassembler::SCC() -> vector { + auto rtName = [&] { return ipuRegisterName (instruction >> 16 & 31); }; + auto rtValue = [&] { return ipuRegisterValue(instruction >> 16 & 31); }; + auto sdName = [&] { return sccRegisterName (instruction >> 11 & 31); }; + auto sdValue = [&] { return sccRegisterValue(instruction >> 11 & 31); }; + + switch(instruction >> 21 & 0x1f) { + case 0x00: return {"mfc0", rtName(), sdValue()}; + case 0x04: return {"mtc0", sdName(), rtValue()}; + } + + return {}; +} + +auto RSP::Disassembler::LWC2() -> vector { + auto vtName = [&] { return vpuRegisterName (instruction >> 16 & 31, instruction >> 7 & 15); }; + auto vtValue = [&] { return vpuRegisterValue (instruction >> 16 & 31, instruction >> 7 & 15); }; + auto offset = [&](u32 multiplier) { return ipuRegisterIndex(instruction >> 21 & 31, i7(instruction) * multiplier); }; + + switch(instruction >> 11 & 31) { + case 0x00: return {"lbv", vtName(), offset( 1)}; + case 0x01: return {"lsv", vtName(), offset( 2)}; + case 0x02: return {"llv", vtName(), offset( 4)}; + case 0x03: return {"ldv", vtName(), offset( 8)}; + case 0x04: return {"lqv", vtName(), offset(16)}; + case 0x05: return {"lrv", vtName(), offset(16)}; + case 0x06: return {"lpv", vtName(), offset( 8)}; + case 0x07: return {"luv", vtName(), offset( 8)}; + case 0x08: return {"lhv", vtName(), offset(16)}; + case 0x09: return {"lfv", vtName(), offset(16)}; +//case 0x0a: return {"lwv", vtName(), offset(16)}; //not present on N64 RSP + case 0x0b: return {"ltv", vtName(), offset(16)}; + } + return {}; +} + +auto RSP::Disassembler::SWC2() -> vector { + auto vtName = [&] { return vpuRegisterName (instruction >> 16 & 31); }; + auto vtValue = [&] { return vpuRegisterValue (instruction >> 16 & 31); }; + auto offset = [&](u32 multiplier) { return ipuRegisterIndex(instruction >> 21 & 31, i7(instruction) * multiplier); }; + + switch(instruction >> 11 & 31) { + case 0x00: return {"sbv", vtValue(), offset( 1)}; + case 0x01: return {"ssv", vtValue(), offset( 2)}; + case 0x02: return {"slv", vtValue(), offset( 4)}; + case 0x03: return {"sdv", vtValue(), offset( 8)}; + case 0x04: return {"sqv", vtValue(), offset(16)}; + case 0x05: return {"srv", vtValue(), offset(16)}; + case 0x06: return {"spv", vtValue(), offset( 8)}; + case 0x07: return {"suv", vtValue(), offset( 8)}; + case 0x08: return {"shv", vtValue(), offset(16)}; + case 0x09: return {"sfv", vtValue(), offset(16)}; + case 0x0a: return {"swv", vtValue(), offset(16)}; + case 0x0b: return {"stv", vtValue(), offset(16)}; + } + return {}; +} + +auto RSP::Disassembler::VU() -> vector { + auto rtName = [&] { return ipuRegisterName (instruction >> 16 & 31); }; + auto rtValue = [&] { return ipuRegisterValue(instruction >> 16 & 31); }; + auto rdName = [&] { return vpuRegisterName (instruction >> 11 & 31, instruction >> 7 & 15); }; + auto rdValue = [&] { return vpuRegisterValue (instruction >> 11 & 31, instruction >> 7 & 15); }; + auto cdName = [&] { return ccrRegisterName (instruction >> 11 & 31); }; + auto cdValue = [&] { return ccrRegisterValue(instruction >> 11 & 31); }; + + switch(instruction >> 21 & 0x1f) { + case 0x00: return {"mfc2", rtName(), rdValue()}; + case 0x02: return {"cfc2", rtName(), cdValue()}; + case 0x04: return {"mtc2", rtValue(), rdName()}; + case 0x06: return {"ctc2", rtValue(), cdName()}; + } + if(!(instruction >> 25 & 1)) return {}; + + auto vdName = [&] { return vpuRegisterName (instruction >> 6 & 31); }; + auto vdValue = [&] { return vpuRegisterValue(instruction >> 6 & 31); }; + auto vsName = [&] { return vpuRegisterName (instruction >> 11 & 31); }; + auto vsValue = [&] { return vpuRegisterValue(instruction >> 11 & 31); }; + auto vtName = [&] { return vpuRegisterName (instruction >> 16 & 31, instruction >> 21 & 15); }; + auto vtValue = [&] { return vpuRegisterValue(instruction >> 16 & 31, instruction >> 21 & 15); }; + auto vmName = [&] { return vpuRegisterName (instruction >> 6 & 31, instruction >> 11 & 31); }; + auto vmValue = [&] { return vpuRegisterValue(instruction >> 6 & 31, instruction >> 11 & 31); }; + + auto DST = [&](string_view name) -> vector { + return {name, vdName(), vsValue(), vtValue()}; + }; + + auto DSE = [&](string_view name) -> vector { + static const string registerNames[] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "acch", "accm", "accl", "r11", "r12", "r13", "r14", "r15", + }; + return {name, vdName(), vsValue(), registerNames[instruction >> 21 & 15]}; + }; + + auto DT = [&](string_view name) -> vector { + return {name, vmName(), vtValue()}; + }; + + auto D = [&](string_view name) -> vector { + return {name, vdName()}; + }; + + switch(instruction & 0x3f) { + case 0x00: return DST("vmulf"); + case 0x01: return DST("vmulu"); + case 0x02: return DST("vrndp"); + case 0x03: return DST("vmulq"); + case 0x04: return DST("vmudl"); + case 0x05: return DST("vmudm"); + case 0x06: return DST("vmudn"); + case 0x07: return DST("vmudh"); + case 0x08: return DST("vmacf"); + case 0x09: return DST("vmacu"); + case 0x0a: return DST("vrndn"); + case 0x0b: return D("vmacq"); + case 0x0c: return DST("vmadl"); + case 0x0d: return DST("vmadm"); + case 0x0e: return DST("vmadn"); + case 0x0f: return DST("vmadh"); + case 0x10: return DST("vadd"); + case 0x11: return DST("vsub"); + case 0x12: break; + case 0x13: return DST("vabs"); + case 0x14: return DST("vaddc"); + case 0x15: return DST("vsubc"); + case 0x16: break; + case 0x17: break; + case 0x18: break; + case 0x19: break; + case 0x1a: break; + case 0x1b: break; + case 0x1c: break; + case 0x1d: return DSE("vsar"); + case 0x1e: break; + case 0x1f: break; + case 0x20: return DST("vlt"); + case 0x21: return DST("veq"); + case 0x22: return DST("vne"); + case 0x23: return DST("vge"); + case 0x24: return DST("vcl"); + case 0x25: return DST("vch"); + case 0x26: return DST("vcr"); + case 0x27: return DST("vmrg"); + case 0x28: return DST("vand"); + case 0x29: return DST("vnand"); + case 0x2a: return DST("vor"); + case 0x2b: return DST("vnor"); + case 0x2c: return DST("vxor"); + case 0x2d: return DST("vnxor"); + case 0x2e: break; + case 0x2f: break; + case 0x30: return DT("vrcp"); + case 0x31: return DT("vrcpl"); + case 0x32: return DT("vrcph"); + case 0x33: return DT("vmov"); + case 0x34: return DT("vrsq"); + case 0x35: return DT("vrsql"); + case 0x36: return DT("vrsqh"); + case 0x37: return {"vnop"}; + case 0x38: break; + case 0x39: break; + case 0x3a: break; + case 0x3b: break; + case 0x3c: break; + case 0x3d: break; + case 0x3e: break; + case 0x3f: break; + } + + return {}; +} + +auto RSP::Disassembler::immediate(s64 value, u32 bits) const -> string { + if(value < 0) return {"-$", hex(-value, bits >> 2)}; + return {"$", hex(value, bits >> 2)}; +}; + +auto RSP::Disassembler::ipuRegisterName(u32 index) const -> string { + static const string registers[32] = { + "0", "at", "v0", "v1", "a0", "a1", "a2", "a3", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra", + }; + return registers[index]; +} + +auto RSP::Disassembler::ipuRegisterValue(u32 index) const -> string { + if(index && showValues) return {ipuRegisterName(index), hint("{$", hex(self.ipu.r[index].u32, 8L), "}")}; + return ipuRegisterName(index); +} + +auto RSP::Disassembler::ipuRegisterIndex(u32 index, s16 offset) const -> string { + string adjust; + if(offset >= 0) adjust = {"+$", hex( offset)}; + if(offset < 0) adjust = {"-$", hex(-offset)}; + if(index && showValues) return {ipuRegisterName(index), adjust, hint("{$", hex(self.ipu.r[index].u32 + offset, 8L), "}")}; + return {ipuRegisterName(index), adjust}; +} + +auto RSP::Disassembler::sccRegisterName(u32 index) const -> string { + static const string registers[32] = { + "SP_PBUS_ADDRESS", "SP_DRAM_ADDRESS", "SP_READ_LENGTH", "SP_WRITE_LENGTH", + "SP_STATUS", "SP_DMA_FULL", "SP_DMA_BUSY", "SP_SEMAPHORE", + "DPC_START", "DPC_END", "DPC_CURRENT", "DPC_STATUS", + "DPC_CLOCK", "DPC_BUSY", "DPC_PIPE_BUSY", "DPC_TMEM_BUSY", + }; + return registers[index & 15]; +} + +auto RSP::Disassembler::sccRegisterValue(u32 index) const -> string { + u32 value = 0; + if(index <= 6) value = rsp.readWord((index & 7) << 2); + if(index == 7) value = self.status.semaphore; //rsp.readSCC(7) has side-effects + if(index >= 8) value = rdp.readWord((index & 7) << 2); + if(showValues) return {sccRegisterName(index), hint("{$", hex(value, 8L), "}")}; + return sccRegisterName(index); +} + +auto RSP::Disassembler::vpuRegisterName(u32 index, u32 element) const -> string { + if(element) return {"v", index, "[", element, "]"}; + return {"v", index}; +} + +auto RSP::Disassembler::vpuRegisterValue(u32 index, u32 element) const -> string { + if(showValues) { + vector elements; + for(u32 e : range(8)) elements.append(hex(self.vpu.r[index].element(e), 4L)); + return {vpuRegisterName(index, element), hint("{$", elements.merge("|"), "}")}; + } + return vpuRegisterName(index, element); +} + +auto RSP::Disassembler::ccrRegisterName(u32 index) const -> string { + static const string registers[32] = {"vco", "vcc", "vce"}; + if(index < 3) return registers[index]; + return {"vc", index}; +} + +auto RSP::Disassembler::ccrRegisterValue(u32 index) const -> string { + if(showValues) return {ccrRegisterName(index)}; //todo + return ccrRegisterName(index); +} + +template +auto RSP::Disassembler::hint(P&&... p) const -> string { + if(showColors) return {"\e[0m\e[37m", forward

(p)..., "\e[0m"}; + return {forward

(p)...}; +} diff --git a/waterbox/ares64/ares/ares/n64/rsp/dma.cpp b/waterbox/ares64/ares/ares/n64/rsp/dma.cpp new file mode 100644 index 0000000000..b422a1cf87 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/dma.cpp @@ -0,0 +1,27 @@ +auto RSP::dmaTransfer() -> void { + if(dma.requests.empty()) return; + auto request = *dma.requests.read(); + auto region = !request.pbusRegion ? 0x0400'0000 : 0x0400'1000; + + if(request.type == DMA::Request::Type::Read) { + for(u32 block : range(request.count)) { + for(u32 offset = 0; offset < request.length; offset += 4) { + u32 data = bus.read(request.dramAddress + offset); + bus.write(region + request.pbusAddress + offset, data); + } + request.pbusAddress += request.length; + request.dramAddress += request.length + request.skip; + } + } + + if(request.type == DMA::Request::Type::Write) { + for(u32 block : range(request.count)) { + for(u32 offset = 0; offset < request.length; offset += 4) { + u32 data = bus.read(region + request.pbusAddress + offset); + bus.write(request.dramAddress + offset, data); + } + request.pbusAddress += request.length; + request.dramAddress += request.length + request.skip; + } + } +} diff --git a/waterbox/ares64/ares/ares/n64/rsp/interpreter-ipu.cpp b/waterbox/ares64/ares/ares/n64/rsp/interpreter-ipu.cpp new file mode 100644 index 0000000000..e0618c062e --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/interpreter-ipu.cpp @@ -0,0 +1,179 @@ +#define PC ipu.pc +#define RA ipu.r[31] + +auto RSP::ADDIU(r32& rt, cr32& rs, s16 imm) -> void { + rt.u32 = s32(rs.u32 + imm); +} + +auto RSP::ADDU(r32& rd, cr32& rs, cr32& rt) -> void { + rd.u32 = s32(rs.u32 + rt.u32); +} + +auto RSP::AND(r32& rd, cr32& rs, cr32& rt) -> void { + rd.u32 = rs.u32 & rt.u32; +} + +auto RSP::ANDI(r32& rt, cr32& rs, u16 imm) -> void { + rt.u32 = rs.u32 & imm; +} + +auto RSP::BEQ(cr32& rs, cr32& rt, s16 imm) -> void { + if(rs.u32 == rt.u32) branch.take(PC + 4 + (imm << 2)); +} + +auto RSP::BGEZ(cr32& rs, s16 imm) -> void { + if(rs.s32 >= 0) branch.take(PC + 4 + (imm << 2)); +} + +auto RSP::BGEZAL(cr32& rs, s16 imm) -> void { + RA.u32 = s32(PC + 8); + if(rs.s32 >= 0) branch.take(PC + 4 + (imm << 2)); +} + +auto RSP::BGTZ(cr32& rs, s16 imm) -> void { + if(rs.s32 > 0) branch.take(PC + 4 + (imm << 2)); +} + +auto RSP::BLEZ(cr32& rs, s16 imm) -> void { + if(rs.s32 <= 0) branch.take(PC + 4 + (imm << 2)); +} + +auto RSP::BLTZ(cr32& rs, s16 imm) -> void { + if(rs.s32 < 0) branch.take(PC + 4 + (imm << 2)); +} + +auto RSP::BLTZAL(cr32& rs, s16 imm) -> void { + RA.u32 = s32(PC + 8); + if(rs.s32 < 0) branch.take(PC + 4 + (imm << 2)); +} + +auto RSP::BNE(cr32& rs, cr32& rt, s16 imm) -> void { + if(rs.u32 != rt.u32) branch.take(PC + 4 + (imm << 2)); +} + +auto RSP::BREAK() -> void { + status.halted = 1; + status.broken = 1; + if(status.interruptOnBreak) mi.raise(MI::IRQ::SP); +} + +auto RSP::J(u32 imm) -> void { + branch.take((PC + 4 & 0xf000'0000) | (imm << 2)); +} + +auto RSP::JAL(u32 imm) -> void { + RA.u32 = s32(PC + 8); + branch.take((PC + 4 & 0xf000'0000) | (imm << 2)); +} + +auto RSP::JALR(r32& rd, cr32& rs) -> void { + rd.u32 = s32(PC + 8); + branch.take(rs.u32); +} + +auto RSP::JR(cr32& rs) -> void { + branch.take(rs.u32); +} + +auto RSP::LB(r32& rt, cr32& rs, s16 imm) -> void { + rt.u32 = s8(dmem.read(rs.u32 + imm)); +} + +auto RSP::LBU(r32& rt, cr32& rs, s16 imm) -> void { + rt.u32 = u8(dmem.read(rs.u32 + imm)); +} + +auto RSP::LH(r32& rt, cr32& rs, s16 imm) -> void { + rt.u32 = s16(dmem.readUnaligned(rs.u32 + imm)); +} + +auto RSP::LHU(r32& rt, cr32& rs, s16 imm) -> void { + rt.u32 = u16(dmem.readUnaligned(rs.u32 + imm)); +} + +auto RSP::LUI(r32& rt, u16 imm) -> void { + rt.u32 = s32(imm << 16); +} + +auto RSP::LW(r32& rt, cr32& rs, s16 imm) -> void { + rt.u32 = s32(dmem.readUnaligned(rs.u32 + imm)); +} + +auto RSP::NOR(r32& rd, cr32& rs, cr32& rt) -> void { + rd.u32 = ~(rs.u32 | rt.u32); +} + +auto RSP::OR(r32& rd, cr32& rs, cr32& rt) -> void { + rd.u32 = rs.u32 | rt.u32; +} + +auto RSP::ORI(r32& rt, cr32& rs, u16 imm) -> void { + rt.u32 = rs.u32 | imm; +} + +auto RSP::SB(cr32& rt, cr32& rs, s16 imm) -> void { + dmem.write(rs.u32 + imm, rt.u32); +} + +auto RSP::SH(cr32& rt, cr32& rs, s16 imm) -> void { + dmem.writeUnaligned(rs.u32 + imm, rt.u32); +} + +auto RSP::SLL(r32& rd, cr32& rt, u8 sa) -> void { + rd.u32 = s32(rt.u32 << sa); +} + +auto RSP::SLLV(r32& rd, cr32& rt, cr32& rs) -> void { + rd.u32 = s32(rt.u32 << (rs.u32 & 31)); +} + +auto RSP::SLT(r32& rd, cr32& rs, cr32& rt) -> void { + rd.u32 = rs.s32 < rt.s32; +} + +auto RSP::SLTI(r32& rt, cr32& rs, s16 imm) -> void { + rt.u32 = rs.s32 < imm; +} + +auto RSP::SLTIU(r32& rt, cr32& rs, s16 imm) -> void { + rt.u32 = rs.u32 < imm; +} + +auto RSP::SLTU(r32& rd, cr32& rs, cr32& rt) -> void { + rd.u32 = rs.u32 < rt.u32; +} + +auto RSP::SRA(r32& rd, cr32& rt, u8 sa) -> void { + rd.u32 = rt.s32 >> sa; +} + +auto RSP::SRAV(r32& rd, cr32& rt, cr32& rs) -> void { + rd.u32 = rt.s32 >> (rs.u32 & 31); +} + +auto RSP::SRL(r32& rd, cr32& rt, u8 sa) -> void { + rd.u32 = s32(rt.u32 >> sa); +} + +auto RSP::SRLV(r32& rd, cr32& rt, cr32& rs) -> void { + rd.u32 = s32(rt.u32 >> (rs.u32 & 31)); +} + +auto RSP::SUBU(r32& rd, cr32& rs, cr32& rt) -> void { + rd.u32 = s32(rs.u32 - rt.u32); +} + +auto RSP::SW(cr32& rt, cr32& rs, s16 imm) -> void { + dmem.writeUnaligned(rs.u32 + imm, rt.u32); +} + +auto RSP::XOR(r32& rd, cr32& rs, cr32& rt) -> void { + rd.u32 = rs.u32 ^ rt.u32; +} + +auto RSP::XORI(r32& rt, cr32& rs, u16 imm) -> void { + rt.u32 = rs.u32 ^ imm; +} + +#undef PC +#undef RA diff --git a/waterbox/ares64/ares/ares/n64/rsp/interpreter-scc.cpp b/waterbox/ares64/ares/ares/n64/rsp/interpreter-scc.cpp new file mode 100644 index 0000000000..fcdbcc5a0f --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/interpreter-scc.cpp @@ -0,0 +1,9 @@ +auto RSP::MFC0(r32& rt, u8 rd) -> void { + if((rd & 8) == 0) rt.u32 = Nintendo64::rsp.readWord((rd & 7) << 2); + if((rd & 8) != 0) rt.u32 = Nintendo64::rdp.readWord((rd & 7) << 2); +} + +auto RSP::MTC0(cr32& rt, u8 rd) -> void { + if((rd & 8) == 0) Nintendo64::rsp.writeWord((rd & 7) << 2, rt.u32); + if((rd & 8) != 0) Nintendo64::rdp.writeWord((rd & 7) << 2, rt.u32); +} diff --git a/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp b/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp new file mode 100644 index 0000000000..fddf7c4ad8 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp @@ -0,0 +1,1431 @@ +#define ACCH vpu.acch +#define ACCM vpu.accm +#define ACCL vpu.accl +#define VCOH vpu.vcoh +#define VCOL vpu.vcol +#define VCCH vpu.vcch +#define VCCL vpu.vccl +#define VCE vpu.vce + +#define DIVIN vpu.divin +#define DIVOUT vpu.divout +#define DIVDP vpu.divdp + +auto RSP::r128::operator()(u32 index) const -> r128 { + if constexpr(Accuracy::RSP::SISD) { + r128 v{*this}; + switch(index) { + case 0: break; + case 1: break; + case 2: v.u16(1) = v.u16(0); v.u16(3) = v.u16(2); v.u16(5) = v.u16(4); v.u16(7) = v.u16(6); break; + case 3: v.u16(0) = v.u16(1); v.u16(2) = v.u16(3); v.u16(4) = v.u16(5); v.u16(6) = v.u16(7); break; + case 4: v.u16(1) = v.u16(2) = v.u16(3) = v.u16(0); v.u16(5) = v.u16(6) = v.u16(7) = v.u16(4); break; + case 5: v.u16(0) = v.u16(2) = v.u16(3) = v.u16(1); v.u16(4) = v.u16(6) = v.u16(7) = v.u16(5); break; + case 6: v.u16(0) = v.u16(1) = v.u16(3) = v.u16(2); v.u16(4) = v.u16(5) = v.u16(7) = v.u16(6); break; + case 7: v.u16(0) = v.u16(1) = v.u16(2) = v.u16(3); v.u16(4) = v.u16(5) = v.u16(6) = v.u16(7); break; + case 8: for(u32 n : range(8)) v.u16(n) = v.u16(0); break; + case 9: for(u32 n : range(8)) v.u16(n) = v.u16(1); break; + case 10: for(u32 n : range(8)) v.u16(n) = v.u16(2); break; + case 11: for(u32 n : range(8)) v.u16(n) = v.u16(3); break; + case 12: for(u32 n : range(8)) v.u16(n) = v.u16(4); break; + case 13: for(u32 n : range(8)) v.u16(n) = v.u16(5); break; + case 14: for(u32 n : range(8)) v.u16(n) = v.u16(6); break; + case 15: for(u32 n : range(8)) v.u16(n) = v.u16(7); break; + } + return v; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + static const __m128i shuffle[16] = { + //vector + _mm_set_epi8(15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), //01234567 + _mm_set_epi8(15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), //01234567 + //scalar quarter + _mm_set_epi8(15,14,15,14,11,10,11,10, 7, 6, 7, 6, 3, 2, 3, 2), //00224466 + _mm_set_epi8(13,12,13,12, 9, 8, 9, 8, 5, 4, 5, 4, 1, 0, 1, 0), //11335577 + //scalar half + _mm_set_epi8(15,14,15,14,15,14,15,14, 7, 6, 7, 6, 7, 6, 7, 6), //00004444 + _mm_set_epi8(13,12,13,12,13,12,13,12, 5, 4, 5, 4, 5, 4, 5, 4), //11115555 + _mm_set_epi8(11,10,11,10,11,10,11,10, 3, 2, 3, 2, 3, 2, 3, 2), //22226666 + _mm_set_epi8( 9, 8, 9, 8, 9, 8, 9, 8, 1, 0, 1, 0, 1, 0, 1, 0), //33337777 + //scalar whole + _mm_set_epi8(15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,14), //00000000 + _mm_set_epi8(13,12,13,12,13,12,13,12,13,12,13,12,13,12,13,12), //11111111 + _mm_set_epi8(11,10,11,10,11,10,11,10,11,10,11,10,11,10,11,10), //22222222 + _mm_set_epi8( 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8), //33333333 + _mm_set_epi8( 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6), //44444444 + _mm_set_epi8( 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4), //55555555 + _mm_set_epi8( 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2), //66666666 + _mm_set_epi8( 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), //77777777 + }; + //todo: benchmark to see if testing for cases 0&1 to return value directly is faster + return {uint128_t(_mm_shuffle_epi8(v128, shuffle[index]))}; +#endif + } +} + +auto RSP::accumulatorGet(u32 index) const -> u64 { + return (u64)ACCH.u16(index) << 32 | (u64)ACCM.u16(index) << 16 | (u64)ACCL.u16(index) << 0; +} + +auto RSP::accumulatorSet(u32 index, u64 value) -> void { + ACCH.u16(index) = value >> 32; + ACCM.u16(index) = value >> 16; + ACCL.u16(index) = value >> 0; +} + +auto RSP::accumulatorSaturate(u32 index, bool slice, u16 negative, u16 positive) const -> u16 { + if(ACCH.s16(index) < 0) { + if(ACCH.u16(index) != 0xffff) return negative; + if(ACCM.s16(index) >= 0) return negative; + } else { + if(ACCH.u16(index) != 0x0000) return positive; + if(ACCM.s16(index) < 0) return positive; + } + return !slice ? ACCL.u16(index) : ACCM.u16(index); +} + +auto RSP::CFC2(r32& rt, u8 rd) -> void { + r128 hi, lo; + switch(rd & 3) { + case 0x00: hi = VCOH; lo = VCOL; break; + case 0x01: hi = VCCH; lo = VCCL; break; + case 0x02: hi = zero; lo = VCE; break; + case 0x03: hi = zero; lo = VCE; break; //unverified + } + + if constexpr(Accuracy::RSP::SISD) { + rt.u32 = 0; + for(u32 n : range(8)) { + rt.u32 |= lo.get(n) << 0 + n; + rt.u32 |= hi.get(n) << 8 + n; + } + rt.u32 = s16(rt.u32); + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + static const v128 reverse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + rt.u32 = s16(_mm_movemask_epi8(_mm_shuffle_epi8(_mm_packs_epi16(hi, lo), reverse))); +#endif + } +} + +auto RSP::CTC2(cr32& rt, u8 rd) -> void { + maybe hi, lo; + r128 null; + switch(rd & 3) { + case 0x00: hi = VCOH; lo = VCOL; break; + case 0x01: hi = VCCH; lo = VCCL; break; + case 0x02: hi = null; lo = VCE; break; + case 0x03: hi = null; lo = VCE; break; //unverified + } + + if constexpr(Accuracy::RSP::SISD) { + for(u32 n : range(8)) { + lo->set(n, rt.u32 & 1 << 0 + n); + hi->set(n, rt.u32 & 1 << 8 + n); + } + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + static const v128 mask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080); + lo->v128 = _mm_cmpeq_epi8(_mm_and_si128(_mm_shuffle_epi8(r128{~rt.u32 >> 0}, zero), mask), zero); + hi->v128 = _mm_cmpeq_epi8(_mm_and_si128(_mm_shuffle_epi8(r128{~rt.u32 >> 8}, zero), mask), zero); +#endif + } +} + +template +auto RSP::LBV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm; + vt.byte(e) = dmem.read(address); +} + +template +auto RSP::LDV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 8; + auto start = e; + auto end = start + 8; + for(u32 offset = start; offset < end; offset++) { + vt.byte(offset & 15) = dmem.read(address++); + } +} + +template +auto RSP::LFV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto start = e >> 1; + auto end = start + 4; + for(u32 offset = start; offset < end; offset++) { + vt.element(offset & 7) = dmem.read(address) << 7; + address += 4; + } +} + +template +auto RSP::LHV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + for(u32 offset = 0; offset < 8; offset++) { + vt.element(offset) = dmem.read(address + (16 - e + offset * 2 & 15)) << 7; + } +} + +template +auto RSP::LLV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 4; + auto start = e; + auto end = start + 4; + for(u32 offset = start; offset < end; offset++) { + vt.byte(offset & 15) = dmem.read(address++); + } +} + +template +auto RSP::LPV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 8; + for(u32 offset = 0; offset < 8; offset++) { + vt.element(offset) = dmem.read(address + (16 - e + offset & 15)) << 8; + } +} + +template +auto RSP::LQV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto start = e; + auto end = 16 - (address & 15); + for(u32 offset = start; offset < end; offset++) { + vt.byte(offset & 15) = dmem.read(address++); + } +} + +template +auto RSP::LRV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto index = e; + auto start = 16 - ((address & 15) - index); + address &= ~15; + for(u32 offset = start; offset < 16; offset++) { + vt.byte(offset & 15) = dmem.read(address++); + } +} + +template +auto RSP::LSV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 2; + auto start = e; + auto end = start + 2; + for(u32 offset = start; offset < end; offset++) { + vt.byte(offset & 15) = dmem.read(address++); + } +} + +template +auto RSP::LTV(u8 vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto start = vt; + auto end = min(32, start + 8); + address = (address + 8 & ~15) + (e & 1); + for(u32 offset = start; offset < end; offset++) { + auto byte = (8 - (e >> 1) + (offset - start)) << 1; + vpu.r[offset].byte(byte + 0 & 15) = dmem.read(address++); + vpu.r[offset].byte(byte + 1 & 15) = dmem.read(address++); + } +} + +template +auto RSP::LUV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 8; + for(u32 offset = 0; offset < 8; offset++) { + vt.element(offset) = dmem.read(address + (16 - e + offset & 15)) << 7; + } +} + +template +auto RSP::LWV(r128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto start = 16 - e; + auto end = e + 16; + for(u32 offset = start; offset < end; offset++) { + vt.byte(offset & 15) = dmem.read(address); + address += 4; + } +} + +template +auto RSP::MFC2(r32& rt, cr128& vs) -> void { + auto hi = vs.byte(e + 0 & 15); + auto lo = vs.byte(e + 1 & 15); + rt.u32 = s16(hi << 8 | lo << 0); +} + +template +auto RSP::MTC2(cr32& rt, r128& vs) -> void { + vs.byte(e + 0 & 15) = rt.u32 >> 8; + vs.byte(e + 1 & 15) = rt.u32 >> 0; +} + +template +auto RSP::SBV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm; + dmem.write(address, vt.byte(e)); +} + +template +auto RSP::SDV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 8; + auto start = e; + auto end = start + 8; + for(u32 offset = start; offset < end; offset++) { + dmem.write(address++, vt.byte(offset & 15)); + } +} + +template +auto RSP::SFV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto start = e >> 1; + auto end = start + 4; + auto base = address & 15; + address &= ~15; + for(u32 offset = start; offset < end; offset++) { + dmem.write(address + (base & 15), vt.element(offset & 7) >> 7); + base += 4; + } +} + +template +auto RSP::SHV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + for(u32 offset = 0; offset < 8; offset++) { + auto byte = e + offset * 2; + auto value = vt.byte(byte + 0 & 15) << 1 | vt.byte(byte + 1 & 15) >> 7; + dmem.write(address, value); + address += 2; + } +} + +template +auto RSP::SLV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 4; + auto start = e; + auto end = start + 4; + for(u32 offset = start; offset < end; offset++) { + dmem.write(address++, vt.byte(offset & 15)); + } +} + +template +auto RSP::SPV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 8; + auto start = e; + auto end = start + 8; + for(u32 offset = start; offset < end; offset++) { + if((offset & 15) < 8) { + dmem.write(address++, vt.byte((offset & 7) << 1)); + } else { + dmem.write(address++, vt.element(offset & 7) >> 7); + } + } +} + +template +auto RSP::SQV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto start = e; + auto end = start + (16 - (address & 15)); + for(u32 offset = start; offset < end; offset++) { + dmem.write(address++, vt.byte(offset & 15)); + } +} + +template +auto RSP::SRV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto start = e; + auto end = start + (address & 15); + auto base = 16 - (address & 15); + address &= ~15; + for(u32 offset = start; offset < end; offset++) { + dmem.write(address++, vt.byte(offset + base & 15)); + } +} + +template +auto RSP::SSV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 2; + auto start = e; + auto end = start + 2; + for(u32 offset = start; offset < end; offset++) { + dmem.write(address++, vt.byte(offset & 15)); + } +} + +template +auto RSP::STV(u8 vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto start = vt; + auto end = min(32, start + 8); + auto element = 8 - (e >> 1); + auto base = (address & 15) + (element << 1); + address &= ~15; + for(u32 offset = start; offset < end; offset++) { + dmem.writeUnaligned(address + (base & 15), vpu.r[offset].element(element++ & 7)); + base += 2; + } +} + +template +auto RSP::SUV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 8; + auto start = e; + auto end = start + 8; + for(u32 offset = start; offset < end; offset++) { + if((offset & 15) < 8) { + dmem.write(address++, vt.element(offset & 7) >> 7); + } else { + dmem.write(address++, vt.byte((offset & 7) << 1)); + } + } +} + +template +auto RSP::SWV(cr128& vt, cr32& rs, s8 imm) -> void { + auto address = rs.u32 + imm * 16; + auto start = e; + auto end = start + 16; + auto base = address & 15; + address &= ~15; + for(u32 offset = start; offset < end; offset++) { + dmem.write(address + (base++ & 15), vt.byte(offset & 15)); + } +} + +template +auto RSP::VABS(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + r128 vte = vt(e); + for(u32 n : range(8)) { + if(vs.s16(n) < 0) { + if(vte.s16(n) == -32768) vte.s16(n) = -32767; + ACCL.s16(n) = -vte.s16(n); + } else if(vs.s16(n) > 0) { + ACCL.s16(n) = +vte.s16(n); + } else { + ACCL.s16(n) = 0; + } + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vs0, slt; + vs0 = _mm_cmpeq_epi16(vs, zero); + slt = _mm_srai_epi16(vs, 15); + vd = _mm_andnot_si128(vs0, vt(e)); + vd = _mm_xor_si128(vd, slt); + ACCL = _mm_sub_epi16(vd, slt); + vd = _mm_subs_epi16(vd, slt); +#endif + } +} + +template +auto RSP::VADD(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + s32 result = vs.s16(n) + vte.s16(n) + VCOL.get(n); + ACCL.s16(n) = result; + vd.s16(n) = sclamp<16>(result); + } + VCOL = zero; + VCOH = zero; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), sum, min, max; + sum = _mm_add_epi16(vs, vte); + ACCL = _mm_sub_epi16(sum, VCOL); + min = _mm_min_epi16(vs, vte); + max = _mm_max_epi16(vs, vte); + min = _mm_subs_epi16(min, VCOL); + vd = _mm_adds_epi16(min, max); + VCOL = zero; + VCOH = zero; +#endif + } +} + +template +auto RSP::VADDC(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + u32 result = vs.u16(n) + vte.u16(n); + ACCL.u16(n) = result; + VCOL.set(n, result >> 16); + } + VCOH = zero; + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), sum; + sum = _mm_adds_epu16(vs, vte); + ACCL = _mm_add_epi16(vs, vte); + VCOL = _mm_cmpeq_epi16(sum, ACCL); + VCOL = _mm_cmpeq_epi16(VCOL, zero); + VCOH = zero; + vd = ACCL; +#endif + } +} + +template +auto RSP::VAND(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + r128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = vs.u16(n) & vte.u16(n); + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + ACCL = _mm_and_si128(vs, vt(e)); + vd = ACCL; +#endif + } +} + +template +auto RSP::VCH(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + if((vs.s16(n) ^ vte.s16(n)) < 0) { + s16 result = vs.s16(n) + vte.s16(n); + ACCL.s16(n) = (result <= 0 ? -vte.s16(n) : vs.s16(n)); + VCCL.set(n, result <= 0); + VCCH.set(n, vte.s16(n) < 0); + VCOL.set(n, 1); + VCOH.set(n, result != 0 && vs.u16(n) != (vte.u16(n) ^ 0xffff)); + VCE.set(n, result == -1); + } else { + s16 result = vs.s16(n) - vte.s16(n); + ACCL.s16(n) = (result >= 0 ? vte.s16(n) : vs.s16(n)); + VCCL.set(n, vte.s16(n) < 0); + VCCH.set(n, result >= 0); + VCOL.set(n, 0); + VCOH.set(n, result != 0 && vs.u16(n) != (vte.u16(n) ^ 0xffff)); + VCE.set(n, 0); + } + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), nvt, diff, diff0, vtn, dlez, dgez, mask; + VCOL = _mm_xor_si128(vs, vte); + VCOL = _mm_cmplt_epi16(VCOL, zero); + nvt = _mm_xor_si128(vte, VCOL); + nvt = _mm_sub_epi16(nvt, VCOL); + diff = _mm_sub_epi16(vs, nvt); + diff0 = _mm_cmpeq_epi16(diff, zero); + vtn = _mm_cmplt_epi16(vte, zero); + dlez = _mm_cmpgt_epi16(diff, zero); + dgez = _mm_or_si128(dlez, diff0); + dlez = _mm_cmpeq_epi16(zero, dlez); + VCCH = _mm_blendv_epi8(dgez, vtn, VCOL); + VCCL = _mm_blendv_epi8(vtn, dlez, VCOL); + VCE = _mm_cmpeq_epi16(diff, VCOL); + VCE = _mm_and_si128(VCE, VCOL); + VCOH = _mm_or_si128(diff0, VCE); + VCOH = _mm_cmpeq_epi16(VCOH, zero); + mask = _mm_blendv_epi8(VCCH, VCCL, VCOL); + ACCL = _mm_blendv_epi8(vs, nvt, mask); + vd = ACCL; +#endif + } +} + +template +auto RSP::VCL(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + if(VCOL.get(n)) { + if(VCOH.get(n)) { + ACCL.u16(n) = VCCL.get(n) ? -vte.u16(n) : vs.u16(n); + } else if(VCE.get(n)) { + ACCL.u16(n) = VCCL.set(n, vs.u16(n) + vte.u16(n) <= 0xffff) ? -vte.u16(n) : vs.u16(n); + } else { + ACCL.u16(n) = VCCL.set(n, vs.u16(n) + vte.u16(n) == 0) ? -vte.u16(n) : vs.u16(n); + } + } else { + if(VCOH.get(n)) { + ACCL.u16(n) = VCCH.get(n) ? vte.u16(n) : vs.u16(n); + } else { + ACCL.u16(n) = VCCH.set(n, (s32)vs.u16(n) - (s32)vte.u16(n) >= 0) ? vte.u16(n) : vs.u16(n); + } + } + } + VCOL = zero; + VCOH = zero; + VCE = zero; + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), nvt, diff, ncarry, nvce, diff0, lec1, lec2, leeq, geeq, le, ge, mask; + nvt = _mm_xor_si128(vte, VCOL); + nvt = _mm_sub_epi16(nvt, VCOL); + diff = _mm_sub_epi16(vs, nvt); + ncarry = _mm_adds_epu16(vs, vte); + ncarry = _mm_cmpeq_epi16(diff, ncarry); + nvce = _mm_cmpeq_epi16(VCE, zero); + diff0 = _mm_cmpeq_epi16(diff, zero); + lec1 = _mm_and_si128(diff0, ncarry); + lec1 = _mm_and_si128(nvce, lec1); + lec2 = _mm_or_si128(diff0, ncarry); + lec2 = _mm_and_si128(VCE, lec2); + leeq = _mm_or_si128(lec1, lec2); + geeq = _mm_subs_epu16(vte, vs); + geeq = _mm_cmpeq_epi16(geeq, zero); + le = _mm_andnot_si128(VCOH, VCOL); + le = _mm_blendv_epi8(VCCL, leeq, le); + ge = _mm_or_si128(VCOL, VCOH); + ge = _mm_blendv_epi8(geeq, VCCH, ge); + mask = _mm_blendv_epi8(ge, le, VCOL); + ACCL = _mm_blendv_epi8(vs, nvt, mask); + VCCH = ge; + VCCL = le; + VCOH = zero; + VCOL = zero; + VCE = zero; + vd = ACCL; +#endif + } +} + +template +auto RSP::VCR(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + if((vs.s16(n) ^ vte.s16(n)) < 0) { + VCCH.set(n, vte.s16(n) < 0); + ACCL.u16(n) = VCCL.set(n, vs.s16(n) + vte.s16(n) + 1 <= 0) ? ~vte.u16(n) : vs.u16(n); + } else { + VCCL.set(n, vte.s16(n) < 0); + ACCL.u16(n) = VCCH.set(n, vs.s16(n) - vte.s16(n) >= 0) ? vte.u16(n) : vs.u16(n); + } + } + VCOL = zero; + VCOH = zero; + VCE = zero; + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), sign, dlez, dgez, nvt, mask; + sign = _mm_xor_si128(vs, vte); + sign = _mm_srai_epi16(sign, 15); + dlez = _mm_and_si128(vs, sign); + dlez = _mm_add_epi16(dlez, vte); + VCCL = _mm_srai_epi16(dlez, 15); + dgez = _mm_or_si128(vs, sign); + dgez = _mm_min_epi16(dgez, vte); + VCCH = _mm_cmpeq_epi16(dgez, vte); + nvt = _mm_xor_si128(vte, sign); + mask = _mm_blendv_epi8(VCCH, VCCL, sign); + ACCL = _mm_blendv_epi8(vs, nvt, mask); + vd = ACCL; + VCOL = zero; + VCOH = zero; + VCE = zero; +#endif + } +} + +template +auto RSP::VEQ(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = VCCL.set(n, !VCE.get(n) && vs.u16(n) == vte.u16(n)) ? vs.u16(n) : vte.u16(n); + } + VCCH = zero; //unverified + VCOL = zero; + VCOH = zero; + VCE = zero; + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), eq; + eq = _mm_cmpeq_epi16(vs, vte); + VCCL = _mm_andnot_si128(VCOH, eq); + ACCL = _mm_blendv_epi8(vte, vs, VCCL); + VCCH = zero; //unverified + VCOH = zero; + VCOL = zero; + vd = ACCL; +#endif + } +} + +template +auto RSP::VGE(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = VCCL.set(n, vs.s16(n) > vte.s16(n) || (vs.s16(n) == vte.s16(n) && (!VCOL.get(n) || VCE.get(n)))) ? vs.u16(n) : vte.u16(n); + } + VCCH = zero; //unverified + VCOL = zero; + VCOH = zero; + VCE = zero; + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), eq, gt, es; + eq = _mm_cmpeq_epi16(vs, vte); + gt = _mm_cmpgt_epi16(vs, vte); + es = _mm_and_si128(VCOH, VCOL); + eq = _mm_andnot_si128(es, eq); + VCCL = _mm_or_si128(gt, eq); + ACCL = _mm_blendv_epi8(vte, vs, VCCL); + VCCH = zero; + VCOH = zero; + VCOL = zero; + vd = ACCL; +#endif + } +} + +template +auto RSP::VLT(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = VCCL.set(n, vs.s16(n) < vte.s16(n) || (vs.s16(n) == vte.s16(n) && VCOL.get(n) && !VCE.get(n))) ? vs.u16(n) : vte.u16(n); + } + VCCH = zero; //unverified + VCOL = zero; + VCOH = zero; + VCE = zero; + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), eq, lt; + eq = _mm_cmpeq_epi16(vs, vte); + lt = _mm_cmplt_epi16(vs, vte); + eq = _mm_and_si128(VCOH, eq); + eq = _mm_and_si128(VCOL, eq); + VCCL = _mm_or_si128(lt, eq); + ACCL = _mm_blendv_epi8(vte, vs, VCCL); + VCCH = zero; + VCOH = zero; + VCOL = zero; + vd = ACCL; +#endif + } +} + +template +auto RSP::VMACF(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + accumulatorSet(n, accumulatorGet(n) + vs.s16(n) * vte.s16(n) * 2); + if constexpr(U == 0) { + vd.u16(n) = accumulatorSaturate(n, 1, 0x8000, 0x7fff); + } + if constexpr(U == 1) { + vd.u16(n) = ACCH.s16(n) < 0 ? 0x0000 : ACCH.s16(n) || ACCM.s16(n) < 0 ? 0xffff : ACCM.u16(n); + } + } + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), lo, md, hi, carry, omask; + lo = _mm_mullo_epi16(vs, vte); + hi = _mm_mulhi_epi16(vs, vte); + md = _mm_slli_epi16(hi, 1); + carry = _mm_srli_epi16(lo, 15); + hi = _mm_srai_epi16(hi, 15); + md = _mm_or_si128(md, carry); + lo = _mm_slli_epi16(lo, 1); + omask = _mm_adds_epu16(ACCL, lo); + ACCL = _mm_add_epi16(ACCL, lo); + omask = _mm_cmpeq_epi16(ACCL, omask); + omask = _mm_cmpeq_epi16(omask, zero); + md = _mm_sub_epi16(md, omask); + carry = _mm_cmpeq_epi16(md, zero); + carry = _mm_and_si128(carry, omask); + hi = _mm_sub_epi16(hi, carry); + omask = _mm_adds_epu16(ACCM, md); + ACCM = _mm_add_epi16(ACCM, md); + omask = _mm_cmpeq_epi16(ACCM, omask); + omask = _mm_cmpeq_epi16(omask, zero); + ACCH = _mm_add_epi16(ACCH, hi); + ACCH = _mm_sub_epi16(ACCH, omask); + if constexpr(!U) { + lo = _mm_unpacklo_epi16(ACCM, ACCH); + hi = _mm_unpackhi_epi16(ACCM, ACCH); + vd = _mm_packs_epi32(lo, hi); + } else { + r128 mmask, hmask; + mmask = _mm_srai_epi16(ACCM, 15); + hmask = _mm_srai_epi16(ACCH, 15); + md = _mm_or_si128(mmask, ACCM); + omask = _mm_cmpgt_epi16(ACCH, zero); + md = _mm_andnot_si128(hmask, md); + vd = _mm_or_si128(omask, md); + } +#endif + } +} + +auto RSP::VMACQ(r128& vd) -> void { + for(u32 n : range(8)) { + s32 product = ACCH.element(n) << 16 | ACCM.element(n) << 0; + if(product < 0 && !(product & 1 << 5)) product += 32; + else if(product > 0 && !(product & 1 << 5)) product -= 32; + ACCH.element(n) = product >> 16; + ACCM.element(n) = product >> 0; + ACCL.element(n) = 0; + vd.element(n) = sclamp<16>(product >> 1) & ~15; + } +} + +template +auto RSP::VMADH(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + s32 result = (accumulatorGet(n) >> 16) + vs.s16(n) * vte.s16(n); + ACCH.u16(n) = result >> 16; + ACCM.u16(n) = result >> 0; + vd.u16(n) = accumulatorSaturate(n, 1, 0x8000, 0x7fff); + } + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), lo, hi, omask; + lo = _mm_mullo_epi16(vs, vte); + hi = _mm_mulhi_epi16(vs, vte); + omask = _mm_adds_epu16(ACCM, lo); + ACCM = _mm_add_epi16(ACCM, lo); + omask = _mm_cmpeq_epi16(ACCM, omask); + omask = _mm_cmpeq_epi16(omask, zero); + hi = _mm_sub_epi16(hi, omask); + ACCH = _mm_add_epi16(ACCH, hi); + lo = _mm_unpacklo_epi16(ACCM, ACCH); + hi = _mm_unpackhi_epi16(ACCM, ACCH); + vd = _mm_packs_epi32(lo, hi); +#endif + } +} + +template +auto RSP::VMADL(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + accumulatorSet(n, accumulatorGet(n) + (u32(vs.u16(n) * vte.u16(n)) >> 16)); + vd.u16(n) = accumulatorSaturate(n, 0, 0x0000, 0xffff); + } + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), hi, omask, nhi, nmd, shi, smd, cmask, cval; + hi = _mm_mulhi_epu16(vs, vte); + omask = _mm_adds_epu16(ACCL, hi); + ACCL = _mm_add_epi16(ACCL, hi); + omask = _mm_cmpeq_epi16(ACCL, omask); + omask = _mm_cmpeq_epi16(omask, zero); + hi = _mm_sub_epi16(zero, omask); + omask = _mm_adds_epu16(ACCM, hi); + ACCM = _mm_add_epi16(ACCM, hi); + omask = _mm_cmpeq_epi16(ACCM, omask); + omask = _mm_cmpeq_epi16(omask, zero); + ACCH = _mm_sub_epi16(ACCH, omask); + nhi = _mm_srai_epi16(ACCH, 15); + nmd = _mm_srai_epi16(ACCM, 15); + shi = _mm_cmpeq_epi16(nhi, ACCH); + smd = _mm_cmpeq_epi16(nhi, nmd); + cmask = _mm_and_si128(smd, shi); + cval = _mm_cmpeq_epi16(nhi, zero); + vd = _mm_blendv_epi8(cval, ACCL, cmask); +#endif + } +} + +template +auto RSP::VMADM(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + accumulatorSet(n, accumulatorGet(n) + vs.s16(n) * vte.u16(n)); + vd.u16(n) = accumulatorSaturate(n, 1, 0x8000, 0x7fff); + } + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), lo, hi, sign, vta, omask; + lo = _mm_mullo_epi16(vs, vte); + hi = _mm_mulhi_epu16(vs, vte); + sign = _mm_srai_epi16(vs, 15); + vta = _mm_and_si128(vte, sign); + hi = _mm_sub_epi16(hi, vta); + omask = _mm_adds_epu16(ACCL, lo); + ACCL = _mm_add_epi16(ACCL, lo); + omask = _mm_cmpeq_epi16(ACCL, omask); + omask = _mm_cmpeq_epi16(omask, zero); + hi = _mm_sub_epi16(hi, omask); + omask = _mm_adds_epu16(ACCM, hi); + ACCM = _mm_add_epi16(ACCM, hi); + omask = _mm_cmpeq_epi16(ACCM, omask); + omask = _mm_cmpeq_epi16(omask, zero); + hi = _mm_srai_epi16(hi, 15); + ACCH = _mm_add_epi16(ACCH, hi); + ACCH = _mm_sub_epi16(ACCH, omask); + lo = _mm_unpacklo_epi16(ACCM, ACCH); + hi = _mm_unpackhi_epi16(ACCM, ACCH); + vd = _mm_packs_epi32(lo, hi); +#endif + } +} + +template +auto RSP::VMADN(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + accumulatorSet(n, accumulatorGet(n) + s64(vs.u16(n) * vte.s16(n))); + vd.u16(n) = accumulatorSaturate(n, 0, 0x0000, 0xffff); + } + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), lo, hi, sign, vsa, omask, nhi, nmd, shi, smd, cmask, cval; + lo = _mm_mullo_epi16(vs, vte); + hi = _mm_mulhi_epu16(vs, vte); + sign = _mm_srai_epi16(vte, 15); + vsa = _mm_and_si128(vs, sign); + hi = _mm_sub_epi16(hi, vsa); + omask = _mm_adds_epu16(ACCL, lo); + ACCL = _mm_add_epi16(ACCL, lo); + omask = _mm_cmpeq_epi16(ACCL, omask); + omask = _mm_cmpeq_epi16(omask, zero); + hi = _mm_sub_epi16(hi, omask); + omask = _mm_adds_epu16(ACCM, hi); + ACCM = _mm_add_epi16(ACCM, hi); + omask = _mm_cmpeq_epi16(ACCM, omask); + omask = _mm_cmpeq_epi16(omask, zero); + hi = _mm_srai_epi16(hi, 15); + ACCH = _mm_add_epi16(ACCH, hi); + ACCH = _mm_sub_epi16(ACCH, omask); + nhi = _mm_srai_epi16(ACCH, 15); + nmd = _mm_srai_epi16(ACCM, 15); + shi = _mm_cmpeq_epi16(nhi, ACCH); + smd = _mm_cmpeq_epi16(nhi, nmd); + cmask = _mm_and_si128(smd, shi); + cval = _mm_cmpeq_epi16(nhi, zero); + vd = _mm_blendv_epi8(cval, ACCL, cmask); +#endif + } +} + +template +auto RSP::VMOV(r128& vd, u8 de, cr128& vt) -> void { + u8 e = E; + switch(e) { + case 0x0 ... 0x1: e = e & 0b000 | de & 0b111; break; //hardware glitch + case 0x2 ... 0x3: e = e & 0b001 | de & 0b110; break; //hardware glitch + case 0x4 ... 0x7: e = e & 0b011 | de & 0b100; break; //hardware glitch + case 0x8 ... 0xf: e = e & 0b111 | de & 0b000; break; //normal behavior + } + vd.u16(de) = vt.u16(e); + ACCL = vt(e); +} + +template +auto RSP::VMRG(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = VCCL.get(n) ? vs.u16(n) : vte.u16(n); + } + VCOH = zero; + VCOL = zero; + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + ACCL = _mm_blendv_epi8(vt(e), vs, VCCL); + VCOH = zero; + VCOL = zero; + vd = ACCL; +#endif + } +} + +template +auto RSP::VMUDH(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + accumulatorSet(n, s64(vs.s16(n) * vte.s16(n)) << 16); + vd.u16(n) = accumulatorSaturate(n, 1, 0x8000, 0x7fff); + } + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), lo, hi; + ACCL = zero; + ACCM = _mm_mullo_epi16(vs, vte); + ACCH = _mm_mulhi_epi16(vs, vte); + lo = _mm_unpacklo_epi16(ACCM, ACCH); + hi = _mm_unpackhi_epi16(ACCM, ACCH); + vd = _mm_packs_epi32(lo, hi); +#endif + } +} + +template +auto RSP::VMUDL(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + accumulatorSet(n, u16(vs.u16(n) * vte.u16(n) >> 16)); + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + ACCL = _mm_mulhi_epu16(vs, vt(e)); + ACCM = zero; + ACCH = zero; + vd = ACCL; +#endif + } +} + +template +auto RSP::VMUDM(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + accumulatorSet(n, s32(vs.s16(n) * vte.u16(n))); + } + vd = ACCM; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), sign, vta; + ACCL = _mm_mullo_epi16(vs, vte); + ACCM = _mm_mulhi_epu16(vs, vte); + sign = _mm_srai_epi16(vs, 15); + vta = _mm_and_si128(vte, sign); + ACCM = _mm_sub_epi16(ACCM, vta); + ACCH = _mm_srai_epi16(ACCM, 15); + vd = ACCM; +#endif + } +} + +template +auto RSP::VMUDN(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + accumulatorSet(n, s32(vs.u16(n) * vte.s16(n))); + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), sign, vsa; + ACCL = _mm_mullo_epi16(vs, vte); + ACCM = _mm_mulhi_epu16(vs, vte); + sign = _mm_srai_epi16(vte, 15); + vsa = _mm_and_si128(vs, sign); + ACCM = _mm_sub_epi16(ACCM, vsa); + ACCH = _mm_srai_epi16(ACCM, 15); + vd = ACCL; +#endif + } +} + +template +auto RSP::VMULF(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + accumulatorSet(n, vs.s16(n) * vte.s16(n) * 2 + 0x8000); + if constexpr(U == 0) { + vd.u16(n) = ACCM.u16(n); + } + if constexpr(U == 1) { + vd.u16(n) = ACCH.s16(n) < 0 ? 0x0000 : (ACCH.s16(n) ^ ACCM.s16(n)) < 0 ? 0xffff : ACCM.u16(n); + } + } + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), lo, hi, round, sign1, sign2, neq, eq, neg; + lo = _mm_mullo_epi16(vs, vte); + round = _mm_cmpeq_epi16(zero, zero); + sign1 = _mm_srli_epi16(lo, 15); + lo = _mm_add_epi16(lo, lo); + round = _mm_slli_epi16(round, 15); + hi = _mm_mulhi_epi16(vs, vte); + sign2 = _mm_srli_epi16(lo, 15); + ACCL = _mm_add_epi16(round, lo); + sign1 = _mm_add_epi16(sign1, sign2); + hi = _mm_slli_epi16(hi, 1); + neq = _mm_cmpeq_epi16(vs, vte); + ACCM = _mm_add_epi16(hi, sign1); + neg = _mm_srai_epi16(ACCM, 15); + if constexpr(!U) { + eq = _mm_and_si128(neq, neg); + ACCH = _mm_andnot_si128(neq, neg); + vd = _mm_add_epi16(ACCM, eq); + } else { + ACCH = _mm_andnot_si128(neq, neg); + hi = _mm_or_si128(ACCM, neg); + vd = _mm_andnot_si128(ACCH, hi); + } +#endif + } +} + +template +auto RSP::VMULQ(r128& vd, cr128& vs, cr128& vt) -> void { + cr128 vte = vt(e); + for(u32 n : range(8)) { + s32 product = (s16)vs.element(n) * (s16)vte.element(n); + if(product < 0) product += 31; //round + ACCH.element(n) = product >> 16; + ACCM.element(n) = product >> 0; + ACCL.element(n) = 0; + vd.element(n) = sclamp<16>(product >> 1) & ~15; + } +} + +template +auto RSP::VNAND(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = ~(vs.u16(n) & vte.u16(n)); + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + ACCL = _mm_and_si128(vs, vt(e)); + ACCL = _mm_xor_si128(ACCL, invert); + vd = ACCL; +#endif + } +} + +template +auto RSP::VNE(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = VCCL.set(n, vs.u16(n) != vte.u16(n) || VCE.get(n)) ? vs.u16(n) : vte.u16(n); + } + VCCH = zero; //unverified + VCOL = zero; + VCOH = zero; + VCE = zero; + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), eq, ne; + eq = _mm_cmpeq_epi16(vs, vte); + ne = _mm_cmpeq_epi16(eq, zero); + VCCL = _mm_and_si128(VCOH, eq); + VCCL = _mm_or_si128(VCCL, ne); + ACCL = _mm_blendv_epi8(vte, vs, VCCL); + VCCH = zero; + VCOH = zero; + VCOL = zero; + vd = ACCL; +#endif + } +} + +auto RSP::VNOP() -> void { +} + +template +auto RSP::VNOR(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = ~(vs.u16(n) | vte.u16(n)); + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + ACCL = _mm_or_si128(vs, vt(e)); + ACCL = _mm_xor_si128(ACCL, invert); + vd = ACCL; +#endif + } +} + +template +auto RSP::VNXOR(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = ~(vs.u16(n) ^ vte.u16(n)); + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + ACCL = _mm_xor_si128(vs, vt(e)); + ACCL = _mm_xor_si128(ACCL, invert); + vd = ACCL; +#endif + } +} + +template +auto RSP::VOR(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = vs.u16(n) | vte.u16(n); + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + ACCL = _mm_or_si128(vs, vt(e)); + vd = ACCL; +#endif + } +} + +template +auto RSP::VRCP(r128& vd, u8 de, cr128& vt) -> void { + s32 result = 0; + s32 input = L && DIVDP ? DIVIN << 16 | vt.element(e & 7) : s16(vt.element(e & 7)); + s32 mask = input >> 31; + s32 data = input ^ mask; + if(input > -32768) data -= mask; + if(data == 0) { + result = 0x7fff'ffff; + } else if(input == -32768) { + result = 0xffff'0000; + } else { + u32 shift = __builtin_clz(data); + u32 index = (u64(data) << shift & 0x7fc0'0000) >> 22; + result = reciprocals[index]; + result = (0x10000 | result) << 14; + result = result >> 31 - shift ^ mask; + } + DIVDP = 0; + DIVOUT = result >> 16; + ACCL = vt(e); + vd.element(de) = result; +} + +template +auto RSP::VRCPH(r128& vd, u8 de, cr128& vt) -> void { + ACCL = vt(e); + DIVDP = 1; + DIVIN = vt.element(e & 7); + vd.element(de) = DIVOUT; +} + +template +auto RSP::VRND(r128& vd, u8 vs, cr128& vt) -> void { + cr128 vte = vt(e); + for(u32 n : range(8)) { + s32 product = (s16)vte.element(n); + if(vs & 1) product <<= 16; + s64 acc = 0; + acc |= ACCH.element(n); acc <<= 16; + acc |= ACCM.element(n); acc <<= 16; + acc |= ACCL.element(n); acc <<= 16; + acc >>= 16; + if(D == 0 && acc < 0) acc += product; + if(D == 1 && acc >= 0) acc += product; + ACCH.element(n) = acc >> 32; + ACCM.element(n) = acc >> 16; + ACCL.element(n) = acc >> 0; + vd.element(n) = acc >> 16; + } +} + +template +auto RSP::VRSQ(r128& vd, u8 de, cr128& vt) -> void { + s32 result = 0; + s32 input = L && DIVDP ? DIVIN << 16 | vt.element(e & 7) : s16(vt.element(e & 7)); + s32 mask = input >> 31; + s32 data = input ^ mask; + if(input > -32768) data -= mask; + if(data == 0) { + result = 0x7fff'ffff; + } else if(input == -32768) { + result = 0xffff'0000; + } else { + u32 shift = __builtin_clz(data); + u32 index = (u64(data) << shift & 0x7fc0'0000) >> 22; + result = inverseSquareRoots[index & 0x1fe | shift & 1]; + result = (0x10000 | result) << 14; + result = result >> (31 - shift >> 1) ^ mask; + } + DIVDP = 0; + DIVOUT = result >> 16; + ACCL = vt(e); + vd.element(de) = result; +} + +template +auto RSP::VRSQH(r128& vd, u8 de, cr128& vt) -> void { + ACCL = vt(e); + DIVDP = 1; + DIVIN = vt.element(e & 7); + vd.element(de) = DIVOUT; +} + +template +auto RSP::VSAR(r128& vd, cr128& vs) -> void { + switch(e) { + case 0x8: vd = ACCH; break; + case 0x9: vd = ACCM; break; + case 0xa: vd = ACCL; break; + default: vd = zero; break; + } +} + +template +auto RSP::VSUB(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + s32 result = vs.s16(n) - vte.s16(n) - VCOL.get(n); + ACCL.s16(n) = result; + vd.s16(n) = sclamp<16>(result); + } + VCOL = zero; + VCOH = zero; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), udiff, sdiff, ov; + udiff = _mm_sub_epi16(vte, VCOL); + sdiff = _mm_subs_epi16(vte, VCOL); + ACCL = _mm_sub_epi16(vs, udiff); + ov = _mm_cmpgt_epi16(sdiff, udiff); + vd = _mm_subs_epi16(vs, sdiff); + vd = _mm_adds_epi16(vd, ov); + VCOL = zero; + VCOH = zero; +#endif + } +} + +template +auto RSP::VSUBC(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + u32 result = vs.u16(n) - vte.u16(n); + ACCL.u16(n) = result; + VCOL.set(n, result >> 16); + VCOH.set(n, result != 0); + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + r128 vte = vt(e), equal, udiff, diff0; + udiff = _mm_subs_epu16(vs, vte); + equal = _mm_cmpeq_epi16(vs, vte); + diff0 = _mm_cmpeq_epi16(udiff, zero); + VCOH = _mm_cmpeq_epi16(equal, zero); + VCOL = _mm_andnot_si128(equal, diff0); + ACCL = _mm_sub_epi16(vs, vte); + vd = ACCL; +#endif + } +} + +template +auto RSP::VXOR(r128& vd, cr128& vs, cr128& vt) -> void { + if constexpr(Accuracy::RSP::SISD) { + cr128 vte = vt(e); + for(u32 n : range(8)) { + ACCL.u16(n) = vs.u16(n) ^ vte.u16(n); + } + vd = ACCL; + } + + if constexpr(Accuracy::RSP::SIMD) { +#if defined(ARCHITECTURE_AMD64) + ACCL = _mm_xor_si128(vs, vt(e)); + vd = ACCL; +#endif + } +} + +#undef ACCH +#undef ACCM +#undef ACCL +#undef VCOH +#undef VCOL +#undef VCCH +#undef VCCL +#undef VCE + +#undef DIVIN +#undef DIVOUT +#undef DIVDP diff --git a/waterbox/ares64/ares/ares/n64/rsp/interpreter.cpp b/waterbox/ares64/ares/ares/n64/rsp/interpreter.cpp new file mode 100644 index 0000000000..18c71c97ec --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/interpreter.cpp @@ -0,0 +1,399 @@ +#define OP pipeline.instruction +#define RD ipu.r[RDn] +#define RT ipu.r[RTn] +#define RS ipu.r[RSn] +#define VD vpu.r[VDn] +#define VS vpu.r[VSn] +#define VT vpu.r[VTn] + +#define jp(id, name, ...) case id: return decoder##name(__VA_ARGS__) +#define op(id, name, ...) case id: return name(__VA_ARGS__) +#define br(id, name, ...) case id: return name(__VA_ARGS__) +#define vu(id, name, ...) case id: \ + switch(E) { \ + case 0x0: return name<0x0>(__VA_ARGS__); \ + case 0x1: return name<0x1>(__VA_ARGS__); \ + case 0x2: return name<0x2>(__VA_ARGS__); \ + case 0x3: return name<0x3>(__VA_ARGS__); \ + case 0x4: return name<0x4>(__VA_ARGS__); \ + case 0x5: return name<0x5>(__VA_ARGS__); \ + case 0x6: return name<0x6>(__VA_ARGS__); \ + case 0x7: return name<0x7>(__VA_ARGS__); \ + case 0x8: return name<0x8>(__VA_ARGS__); \ + case 0x9: return name<0x9>(__VA_ARGS__); \ + case 0xa: return name<0xa>(__VA_ARGS__); \ + case 0xb: return name<0xb>(__VA_ARGS__); \ + case 0xc: return name<0xc>(__VA_ARGS__); \ + case 0xd: return name<0xd>(__VA_ARGS__); \ + case 0xe: return name<0xe>(__VA_ARGS__); \ + case 0xf: return name<0xf>(__VA_ARGS__); \ + } + +#define SA (OP >> 6 & 31) +#define RDn (OP >> 11 & 31) +#define RTn (OP >> 16 & 31) +#define RSn (OP >> 21 & 31) +#define VDn (OP >> 6 & 31) +#define VSn (OP >> 11 & 31) +#define VTn (OP >> 16 & 31) +#define IMMi16 s16(OP) +#define IMMu16 u16(OP) +#define IMMu26 (OP & 0x03ff'ffff) + +auto RSP::decoderEXECUTE() -> void { + switch(OP >> 26) { + jp(0x00, SPECIAL); + jp(0x01, REGIMM); + br(0x02, J, IMMu26); + br(0x03, JAL, IMMu26); + br(0x04, BEQ, RS, RT, IMMi16); + br(0x05, BNE, RS, RT, IMMi16); + br(0x06, BLEZ, RS, IMMi16); + br(0x07, BGTZ, RS, IMMi16); + op(0x08, ADDIU, RT, RS, IMMi16); //ADDI + op(0x09, ADDIU, RT, RS, IMMi16); + op(0x0a, SLTI, RT, RS, IMMi16); + op(0x0b, SLTIU, RT, RS, IMMi16); + op(0x0c, ANDI, RT, RS, IMMu16); + op(0x0d, ORI, RT, RS, IMMu16); + op(0x0e, XORI, RT, RS, IMMu16); + op(0x0f, LUI, RT, IMMu16); + jp(0x10, SCC); + op(0x11, INVALID); //COP1 + jp(0x12, VU); + op(0x13, INVALID); //COP3 + op(0x14, INVALID); //BEQL + op(0x15, INVALID); //BNEL + op(0x16, INVALID); //BLEZL + op(0x17, INVALID); //BGTZL + op(0x18, INVALID); //DADDI + op(0x19, INVALID); //DADDIU + op(0x1a, INVALID); //LDL + op(0x1b, INVALID); //LDR + op(0x1c, INVALID); + op(0x1d, INVALID); + op(0x1e, INVALID); + op(0x1f, INVALID); + op(0x20, LB, RT, RS, IMMi16); + op(0x21, LH, RT, RS, IMMi16); + op(0x22, INVALID); //LWL + op(0x23, LW, RT, RS, IMMi16); + op(0x24, LBU, RT, RS, IMMi16); + op(0x25, LHU, RT, RS, IMMi16); + op(0x26, INVALID); //LWR + op(0x27, INVALID); //LWU + op(0x28, SB, RT, RS, IMMi16); + op(0x29, SH, RT, RS, IMMi16); + op(0x2a, INVALID); //SWL + op(0x2b, SW, RT, RS, IMMi16); + op(0x2c, INVALID); //SDL + op(0x2d, INVALID); //SDR + op(0x2e, INVALID); //SWR + op(0x2f, INVALID); //CACHE + op(0x30, INVALID); //LL + op(0x31, INVALID); //LWC1 + jp(0x32, LWC2); + op(0x33, INVALID); //LWC3 + op(0x34, INVALID); //LLD + op(0x35, INVALID); //LDC1 + op(0x36, INVALID); //LDC2 + op(0x37, INVALID); //LD + op(0x38, INVALID); //SC + op(0x39, INVALID); //SWC1 + jp(0x3a, SWC2); + op(0x3b, INVALID); //SWC3 + op(0x3c, INVALID); //SCD + op(0x3d, INVALID); //SDC1 + op(0x3e, INVALID); //SDC2 + op(0x3f, INVALID); //SD + } +} + +auto RSP::decoderSPECIAL() -> void { + switch(OP & 0x3f) { + op(0x00, SLL, RD, RT, SA); + op(0x01, INVALID); + op(0x02, SRL, RD, RT, SA); + op(0x03, SRA, RD, RT, SA); + op(0x04, SLLV, RD, RT, RS); + op(0x05, INVALID); + op(0x06, SRLV, RD, RT, RS); + op(0x07, SRAV, RD, RT, RS); + br(0x08, JR, RS); + br(0x09, JALR, RD, RS); + op(0x0a, INVALID); + op(0x0b, INVALID); + op(0x0c, INVALID); //SYSCALL + br(0x0d, BREAK); + op(0x0e, INVALID); + op(0x0f, INVALID); //SYNC + op(0x10, INVALID); //MFHI + op(0x11, INVALID); //MTHI + op(0x12, INVALID); //MFLO + op(0x13, INVALID); //MTLO + op(0x14, INVALID); //DSLLV + op(0x15, INVALID); + op(0x16, INVALID); //DSRLV + op(0x17, INVALID); //DSRAV + op(0x18, INVALID); //MULT + op(0x19, INVALID); //MULTU + op(0x1a, INVALID); //DIV + op(0x1b, INVALID); //DIVU + op(0x1c, INVALID); //DMULT + op(0x1d, INVALID); //DMULTU + op(0x1e, INVALID); //DDIV + op(0x1f, INVALID); //DDIVU + op(0x20, ADDU, RD, RS, RT); //ADD + op(0x21, ADDU, RD, RS, RT); + op(0x22, SUBU, RD, RS, RT); //SUB + op(0x23, SUBU, RD, RS, RT); + op(0x24, AND, RD, RS, RT); + op(0x25, OR, RD, RS, RT); + op(0x26, XOR, RD, RS, RT); + op(0x27, NOR, RD, RS, RT); + op(0x28, INVALID); + op(0x29, INVALID); + op(0x2a, SLT, RD, RS, RT); + op(0x2b, SLTU, RD, RS, RT); + op(0x2c, INVALID); //DADD + op(0x2d, INVALID); //DADDU + op(0x2e, INVALID); //DSUB + op(0x2f, INVALID); //DSUBU + op(0x30, INVALID); //TGE + op(0x31, INVALID); //TGEU + op(0x32, INVALID); //TLT + op(0x33, INVALID); //TLTU + op(0x34, INVALID); //TEQ + op(0x35, INVALID); + op(0x36, INVALID); //TNE + op(0x37, INVALID); + op(0x38, INVALID); //DSLL + op(0x39, INVALID); + op(0x3a, INVALID); //DSRL + op(0x3b, INVALID); //DSRA + op(0x3c, INVALID); //DSLL32 + op(0x3d, INVALID); + op(0x3e, INVALID); //DSRL32 + op(0x3f, INVALID); //DSRA32 + } +} + +auto RSP::decoderREGIMM() -> void { + switch(OP >> 16 & 0x1f) { + br(0x00, BLTZ, RS, IMMi16); + br(0x01, BGEZ, RS, IMMi16); + op(0x02, INVALID); //BLTZL + op(0x03, INVALID); //BGEZL + op(0x04, INVALID); + op(0x05, INVALID); + op(0x06, INVALID); + op(0x07, INVALID); + op(0x08, INVALID); //TGEI + op(0x09, INVALID); //TGEIU + op(0x0a, INVALID); //TLTI + op(0x0b, INVALID); //TLTIU + op(0x0c, INVALID); //TEQI + op(0x0d, INVALID); + op(0x0e, INVALID); //TNEI + op(0x0f, INVALID); + br(0x10, BLTZAL, RS, IMMi16); + br(0x11, BGEZAL, RS, IMMi16); + op(0x12, INVALID); //BLTZALL + op(0x13, INVALID); //BGEZALL + op(0x14, INVALID); + op(0x15, INVALID); + op(0x16, INVALID); + op(0x17, INVALID); + op(0x18, INVALID); + op(0x19, INVALID); + op(0x1a, INVALID); + op(0x1b, INVALID); + op(0x1c, INVALID); + op(0x1d, INVALID); + op(0x1e, INVALID); + op(0x1f, INVALID); + } +} + +auto RSP::decoderSCC() -> void { + switch(OP >> 21 & 0x1f) { + op(0x00, MFC0, RT, RDn); + op(0x01, INVALID); //DMFC0 + op(0x02, INVALID); //CFC0 + op(0x03, INVALID); + op(0x04, MTC0, RT, RDn); + op(0x05, INVALID); //DMTC0 + op(0x06, INVALID); //CTC0 + op(0x07, INVALID); + op(0x08, INVALID); //BC0 + op(0x09, INVALID); + op(0x0a, INVALID); + op(0x0b, INVALID); + op(0x0c, INVALID); + op(0x0d, INVALID); + op(0x0e, INVALID); + op(0x0f, INVALID); + } +} + +auto RSP::decoderVU() -> void { + #define E (OP >> 7 & 15) + switch(OP >> 21 & 0x1f) { + vu(0x00, MFC2, RT, VS); + op(0x01, INVALID); //DMFC2 + op(0x02, CFC2, RT, RDn); + op(0x03, INVALID); + vu(0x04, MTC2, RT, VS); + op(0x05, INVALID); //DMTC2 + op(0x06, CTC2, RT, RDn); + op(0x07, INVALID); + op(0x08, INVALID); //BC2 + op(0x09, INVALID); + op(0x0a, INVALID); + op(0x0b, INVALID); + op(0x0c, INVALID); + op(0x0d, INVALID); + op(0x0e, INVALID); + op(0x0f, INVALID); + } + #undef E + + #define E (OP >> 21 & 15) + #define DE (OP >> 11 & 7) + switch(OP & 0x3f) { + vu(0x00, VMULF, VD, VS, VT); + vu(0x01, VMULU, VD, VS, VT); + vu(0x02, VRNDP, VD, VSn, VT); + vu(0x03, VMULQ, VD, VS, VT); + vu(0x04, VMUDL, VD, VS, VT); + vu(0x05, VMUDM, VD, VS, VT); + vu(0x06, VMUDN, VD, VS, VT); + vu(0x07, VMUDH, VD, VS, VT); + vu(0x08, VMACF, VD, VS, VT); + vu(0x09, VMACU, VD, VS, VT); + vu(0x0a, VRNDN, VD, VSn, VT); + op(0x0b, VMACQ, VD); + vu(0x0c, VMADL, VD, VS, VT); + vu(0x0d, VMADM, VD, VS, VT); + vu(0x0e, VMADN, VD, VS, VT); + vu(0x0f, VMADH, VD, VS, VT); + vu(0x10, VADD, VD, VS, VT); + vu(0x11, VSUB, VD, VS, VT); + op(0x12, INVALID); + vu(0x13, VABS, VD, VS, VT); + vu(0x14, VADDC, VD, VS, VT); + vu(0x15, VSUBC, VD, VS, VT); + op(0x16, INVALID); + op(0x17, INVALID); + op(0x18, INVALID); + op(0x19, INVALID); + op(0x1a, INVALID); + op(0x1b, INVALID); + op(0x1c, INVALID); + vu(0x1d, VSAR, VD, VS); + op(0x1e, INVALID); + op(0x1f, INVALID); + vu(0x20, VLT, VD, VS, VT); + vu(0x21, VEQ, VD, VS, VT); + vu(0x22, VNE, VD, VS, VT); + vu(0x23, VGE, VD, VS, VT); + vu(0x24, VCL, VD, VS, VT); + vu(0x25, VCH, VD, VS, VT); + vu(0x26, VCR, VD, VS, VT); + vu(0x27, VMRG, VD, VS, VT); + vu(0x28, VAND, VD, VS, VT); + vu(0x29, VNAND, VD, VS, VT); + vu(0x2a, VOR, VD, VS, VT); + vu(0x2b, VNOR, VD, VS, VT); + vu(0x2c, VXOR, VD, VS, VT); + vu(0x2d, VNXOR, VD, VS, VT); + op(0x2e, INVALID); + op(0x2f, INVALID); + vu(0x30, VRCP, VD, DE, VT); + vu(0x31, VRCPL, VD, DE, VT); + vu(0x32, VRCPH, VD, DE, VT); + vu(0x33, VMOV, VD, DE, VT); + vu(0x34, VRSQ, VD, DE, VT); + vu(0x35, VRSQL, VD, DE, VT); + vu(0x36, VRSQH, VD, DE, VT); + op(0x37, VNOP); + op(0x38, INVALID); + op(0x39, INVALID); + op(0x3a, INVALID); + op(0x3b, INVALID); + op(0x3c, INVALID); + op(0x3d, INVALID); + op(0x3e, INVALID); + op(0x3f, INVALID); + } + #undef E + #undef DE +} + +auto RSP::decoderLWC2() -> void { + #define E (OP >> 7 & 15) + #define IMMi7 i7(OP) + switch(OP >> 11 & 0x1f) { + vu(0x00, LBV, VT, RS, IMMi7); + vu(0x01, LSV, VT, RS, IMMi7); + vu(0x02, LLV, VT, RS, IMMi7); + vu(0x03, LDV, VT, RS, IMMi7); + vu(0x04, LQV, VT, RS, IMMi7); + vu(0x05, LRV, VT, RS, IMMi7); + vu(0x06, LPV, VT, RS, IMMi7); + vu(0x07, LUV, VT, RS, IMMi7); + vu(0x08, LHV, VT, RS, IMMi7); + vu(0x09, LFV, VT, RS, IMMi7); +//vu(0x0a, LWV, VT, RS, IMMi7); //not present on N64 RSP + vu(0x0b, LTV, VTn, RS, IMMi7); + } + #undef E + #undef IMMi7 +} + +auto RSP::decoderSWC2() -> void { + #define E (OP >> 7 & 15) + #define IMMi7 i7(OP) + switch(OP >> 11 & 0x1f) { + vu(0x00, SBV, VT, RS, IMMi7); + vu(0x01, SSV, VT, RS, IMMi7); + vu(0x02, SLV, VT, RS, IMMi7); + vu(0x03, SDV, VT, RS, IMMi7); + vu(0x04, SQV, VT, RS, IMMi7); + vu(0x05, SRV, VT, RS, IMMi7); + vu(0x06, SPV, VT, RS, IMMi7); + vu(0x07, SUV, VT, RS, IMMi7); + vu(0x08, SHV, VT, RS, IMMi7); + vu(0x09, SFV, VT, RS, IMMi7); + vu(0x0a, SWV, VT, RS, IMMi7); + vu(0x0b, STV, VTn, RS, IMMi7); + } + #undef E + #undef IMMi7 +} + +auto RSP::INVALID() -> void { +} + +#undef SA +#undef RDn +#undef RTn +#undef RSn +#undef VDn +#undef VSn +#undef VTn +#undef IMMi16 +#undef IMMu16 +#undef IMMu26 + +#undef jp +#undef op +#undef br + +#undef OP +#undef RD +#undef RT +#undef RS +#undef VD +#undef VS +#undef VT diff --git a/waterbox/ares64/ares/ares/n64/rsp/io.cpp b/waterbox/ares64/ares/ares/n64/rsp/io.cpp new file mode 100644 index 0000000000..a6e6ab14e1 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/io.cpp @@ -0,0 +1,203 @@ +auto RSP::readWord(u32 address) -> u32 { + address = (address & 0x3ffff) >> 2; + n32 data; + + if(address == 0) { + //SP_PBUS_ADDRESS + data.bit( 0,11) = dma.pbusAddress; + data.bit(12) = dma.pbusRegion; + } + + if(address == 1) { + //SP_DRAM_ADDRESS + data.bit(0,23) = dma.dramAddress; + } + + if(address == 2) { + //SP_READ_LENGTH + data.bit( 0,11) = dma.read.length; + data.bit(12,19) = dma.read.count; + data.bit(20,31) = dma.read.skip; + } + + if(address == 3) { + //SP_WRITE_LENGTH + data.bit( 0,11) = dma.write.length; + data.bit(12,19) = dma.write.count; + data.bit(20,31) = dma.write.skip; + } + + if(address == 4) { + //SP_STATUS + data.bit( 0) = status.halted; + data.bit( 1) = status.broken; + data.bit( 2) = !dma.requests.empty(); + data.bit( 3) = dma.requests.full(); + data.bit( 4) = status.full; + data.bit( 5) = status.singleStep; + data.bit( 6) = status.interruptOnBreak; + data.bit( 7) = status.signal[0]; + data.bit( 8) = status.signal[1]; + data.bit( 9) = status.signal[2]; + data.bit(10) = status.signal[3]; + data.bit(11) = status.signal[4]; + data.bit(12) = status.signal[5]; + data.bit(13) = status.signal[6]; + data.bit(14) = status.signal[7]; + } + + if(address == 5) { + //SP_DMA_FULL + data.bit(0) = dma.requests.full(); + } + + if(address == 6) { + //SP_DMA_BUSY + data.bit(0) = !dma.requests.empty(); + } + + if(address == 7) { + //SP_SEMAPHORE + data.bit(0) = status.semaphore; + status.semaphore = 1; + } + + debugger.ioSCC(Read, address, data); + return data; +} + +auto RSP::writeWord(u32 address, u32 data_) -> void { + address = (address & 0x3ffff) >> 2; + n32 data = data_; + + if(address == 0) { + //SP_PBUS_ADDRESS + dma.pbusAddress = data.bit( 0,11); + dma.pbusRegion = data.bit(12); + } + + if(address == 1) { + //SP_DRAM_ADDRESS + dma.dramAddress = data.bit(0,23); + } + + if(address == 2) { + //SP_READ_LENGTH + dma.read.length = data.bit( 0,11); + dma.read.count = data.bit(12,19); + dma.read.skip = data.bit(20,31); + if(!dma.requests.full()) { + DMA::Request request; + request.type = DMA::Request::Type::Read; + request.pbusRegion = dma.pbusRegion; + request.pbusAddress = dma.pbusAddress & ~7; + request.dramAddress = dma.dramAddress & ~7; + request.length = 1 + (dma.read.length | 7); + request.count = 1 + (dma.read.count); + request.skip = dma.read.skip & ~7; + dma.requests.write(request); + queue.insert(Queue::RSP_DMA, request.length * request.count / 4); + } + } + + if(address == 3) { + //SP_WRITE_LENGTH + dma.write.length = data.bit( 0,11); + dma.write.count = data.bit(12,19); + dma.write.skip = data.bit(20,31); + if(!dma.requests.full()) { + DMA::Request request; + request.type = DMA::Request::Type::Write; + request.pbusRegion = dma.pbusRegion; + request.pbusAddress = dma.pbusAddress & ~7; + request.dramAddress = dma.dramAddress & ~7; + request.length = 1 + (dma.write.length | 7); + request.count = 1 + (dma.write.count); + request.skip = dma.write.skip & ~7; + dma.requests.write(request); + queue.insert(Queue::RSP_DMA, request.length * request.count / 4); + } + } + + if(address == 4) { + //SP_STATUS + if(data.bit( 0)) status.halted = 0; + if(data.bit( 1)) status.halted = 1; + if(data.bit( 2)) status.broken = 0; + if(data.bit( 3)) mi.lower(MI::IRQ::SP); + if(data.bit( 4)) mi.raise(MI::IRQ::SP); + if(data.bit( 5)) status.singleStep = 0; + if(data.bit( 6)) status.singleStep = 1; + if(data.bit( 7)) status.interruptOnBreak = 0; + if(data.bit( 8)) status.interruptOnBreak = 1; + if(data.bit( 9)) status.signal[0] = 0; + if(data.bit(10)) status.signal[0] = 1; + if(data.bit(11)) status.signal[1] = 0; + if(data.bit(12)) status.signal[1] = 1; + if(data.bit(13)) status.signal[2] = 0; + if(data.bit(14)) status.signal[2] = 1; + if(data.bit(15)) status.signal[3] = 0; + if(data.bit(16)) status.signal[3] = 1; + if(data.bit(17)) status.signal[4] = 0; + if(data.bit(18)) status.signal[4] = 1; + if(data.bit(19)) status.signal[5] = 0; + if(data.bit(20)) status.signal[5] = 1; + if(data.bit(21)) status.signal[6] = 0; + if(data.bit(22)) status.signal[6] = 1; + if(data.bit(23)) status.signal[7] = 0; + if(data.bit(24)) status.signal[7] = 1; + } + + if(address == 5) { + //SP_DMA_FULL (read-only) + } + + if(address == 6) { + //SP_DMA_BUSY (read-only) + } + + if(address == 7) { + //SP_SEMAPHORE + if(!data.bit(0)) status.semaphore = 0; + } + + debugger.ioSCC(Write, address, data); +} + +auto RSP::Status::readWord(u32 address) -> u32 { + address = (address & 0x7ffff) >> 2; + n32 data; + + if(address == 0) { + //SP_PC_REG + if(halted) { + data.bit(0,11) = self.ipu.pc; + } else { + data.bit(0,11) = random(); + } + } + + if(address == 1) { + //SP_IBIST + } + + self.debugger.ioStatus(Read, address, data); + return data; +} + +auto RSP::Status::writeWord(u32 address, u32 data_) -> void { + address = (address & 0x7ffff) >> 2; + n32 data = data_; + + if(address == 0) { + //SP_PC_REG + self.ipu.pc = data.bit(0,11) & ~3; + self.branch.reset(); + } + + if(address == 1) { + //SP_IBIST + } + + self.debugger.ioStatus(Write, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/rsp/recompiler.cpp b/waterbox/ares64/ares/ares/n64/rsp/recompiler.cpp new file mode 100644 index 0000000000..0706221599 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/recompiler.cpp @@ -0,0 +1,1321 @@ +auto RSP::Recompiler::pool() -> Pool* { + if(context) return context; + + u32 hashcode = 0; + for(u32 offset : range(4096)) { + hashcode = (hashcode << 5) + hashcode + self.imem.read(offset); + } + + PoolHashPair pair; + pair.pool = (Pool*)allocator.acquire(); + pair.hashcode = hashcode; + if(auto result = pools.find(pair)) { + return context = result->pool; + } + + allocator.reserve(sizeof(Pool)); + if(auto result = pools.insert(pair)) { + return context = result->pool; + } + + throw; //should never occur +} + +auto RSP::Recompiler::block(u32 address) -> Block* { + if(auto block = pool()->blocks[address >> 2 & 0x3ff]) return block; + auto block = emit(address); + pool()->blocks[address >> 2 & 0x3ff] = block; + memory::jitprotect(true); + return block; +} + +auto RSP::Recompiler::emit(u32 address) -> Block* { + if(unlikely(allocator.available() < 1_MiB)) { + print("RSP allocator flush\n"); + memory::jitprotect(false); + allocator.release(bump_allocator::zero_fill); + memory::jitprotect(true); + reset(); + } + + auto block = (Block*)allocator.acquire(sizeof(Block)); + beginFunction(3); + + bool hasBranched = 0; + while(true) { + u32 instruction = self.imem.read(address); + bool branched = emitEXECUTE(instruction); + call(&RSP::instructionEpilogue); + address += 4; + if(hasBranched || (address & 0xffc) == 0) break; //IMEM boundary + hasBranched = branched; + testJumpEpilog(); + } + jumpEpilog(); + + memory::jitprotect(false); + block->code = endFunction(); + +//print(hex(PC, 8L), " ", instructions, " ", size(), "\n"); + return block; +} + +#define Sa (instruction >> 6 & 31) +#define Rdn (instruction >> 11 & 31) +#define Rtn (instruction >> 16 & 31) +#define Rsn (instruction >> 21 & 31) +#define Vdn (instruction >> 6 & 31) +#define Vsn (instruction >> 11 & 31) +#define Vtn (instruction >> 16 & 31) +#define Rd sreg(1), offsetof(IPU, r) + Rdn * sizeof(r32) +#define Rt sreg(1), offsetof(IPU, r) + Rtn * sizeof(r32) +#define Rs sreg(1), offsetof(IPU, r) + Rsn * sizeof(r32) +#define Vd sreg(2), offsetof(VU, r) + Vdn * sizeof(r128) +#define Vs sreg(2), offsetof(VU, r) + Vsn * sizeof(r128) +#define Vt sreg(2), offsetof(VU, r) + Vtn * sizeof(r128) +#define i16 s16(instruction) +#define n16 u16(instruction) +#define n26 u32(instruction & 0x03ff'ffff) +#define callvu(name) \ + switch(E) { \ + case 0x0: call(name<0x0>); break; \ + case 0x1: call(name<0x1>); break; \ + case 0x2: call(name<0x2>); break; \ + case 0x3: call(name<0x3>); break; \ + case 0x4: call(name<0x4>); break; \ + case 0x5: call(name<0x5>); break; \ + case 0x6: call(name<0x6>); break; \ + case 0x7: call(name<0x7>); break; \ + case 0x8: call(name<0x8>); break; \ + case 0x9: call(name<0x9>); break; \ + case 0xa: call(name<0xa>); break; \ + case 0xb: call(name<0xb>); break; \ + case 0xc: call(name<0xc>); break; \ + case 0xd: call(name<0xd>); break; \ + case 0xe: call(name<0xe>); break; \ + case 0xf: call(name<0xf>); break; \ + } + +auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { + switch(instruction >> 26) { + + //SPECIAL + case 0x00: { + return emitSPECIAL(instruction); + } + + //REGIMM + case 0x01: { + return emitREGIMM(instruction); + } + + //J n26 + case 0x02: { + mov32(reg(1), imm(n26)); + call(&RSP::J); + return 1; + } + + //JAL n26 + case 0x03: { + mov32(reg(1), imm(n26)); + call(&RSP::JAL); + return 1; + } + + //BEQ Rs,Rt,i16 + case 0x04: { + lea(reg(1), Rs); + lea(reg(2), Rt); + mov32(reg(3), imm(i16)); + call(&RSP::BEQ); + return 1; + } + + //BNE Rs,Rt,i16 + case 0x05: { + lea(reg(1), Rs); + lea(reg(2), Rt); + mov32(reg(3), imm(i16)); + call(&RSP::BNE); + return 1; + } + + //BLEZ Rs,i16 + case 0x06: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&RSP::BLEZ); + return 1; + } + + //BGTZ Rs,i16 + case 0x07: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&RSP::BGTZ); + return 1; + } + + //ADDIU Rt,Rs,i16 + case 0x08 ... 0x09: { + add32(mem(Rt), mem(Rs), imm(i16)); + return 0; + } + + //SLTI Rt,Rs,i16 + case 0x0a: { + cmp32(mem(Rs), imm(i16), set_slt); + mov32_f(mem(Rt), flag_slt); + return 0; + } + + //SLTIU Rt,Rs,i16 + case 0x0b: { + cmp32(mem(Rs), imm(i16), set_ult); + mov32_f(mem(Rt), flag_ult); + return 0; + } + + //ANDI Rt,Rs,n16 + case 0x0c: { + and32(mem(Rt), mem(Rs), imm(n16)); + return 0; + } + + //ORI Rt,Rs,n16 + case 0x0d: { + or32(mem(Rt), mem(Rs), imm(n16)); + return 0; + } + + //XORI Rt,Rs,n16 + case 0x0e: { + xor32(mem(Rt), mem(Rs), imm(n16)); + return 0; + } + + //LUI Rt,n16 + case 0x0f: { + mov32(mem(Rt), imm(s32(n16 << 16))); + return 0; + } + + //SCC + case 0x10: { + return emitSCC(instruction); + } + + //INVALID + case 0x11: { + return 0; + } + + //VPU + case 0x12: { + return emitVU(instruction); + } + + //INVALID + case 0x13 ... 0x1f: { + return 0; + } + + //LB Rt,Rs,i16 + case 0x20: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&RSP::LB); + return 0; + } + + //LH Rt,Rs,i16 + case 0x21: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&RSP::LH); + return 0; + } + + //INVALID + case 0x22: { + return 0; + } + + //LW Rt,Rs,i16 + case 0x23: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&RSP::LW); + return 0; + } + + //LBU Rt,Rs,i16 + case 0x24: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&RSP::LBU); + return 0; + } + + //LHU Rt,Rs,i16 + case 0x25: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&RSP::LHU); + return 0; + } + + //INVALID + case 0x26 ... 0x27: { + return 0; + } + + //SB Rt,Rs,i16 + case 0x28: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&RSP::SB); + return 0; + } + + //SH Rt,Rs,i16 + case 0x29: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&RSP::SH); + return 0; + } + + //INVALID + case 0x2a: { + return 0; + } + + //SW Rt,Rs,i16 + case 0x2b: { + lea(reg(1), Rt); + lea(reg(2), Rs); + mov32(reg(3), imm(i16)); + call(&RSP::SW); + return 0; + } + + //INVALID + case 0x2c ... 0x31: { + return 0; + } + + //LWC2 + case 0x32: { + return emitLWC2(instruction); + } + + //INVALID + case 0x33 ... 0x39: { + return 0; + } + + //SWC2 + case 0x3a: { + return emitSWC2(instruction); + } + + //INVALID + case 0x3b ... 0x3f: { + return 0; + } + + } + + return 0; +} + +auto RSP::Recompiler::emitSPECIAL(u32 instruction) -> bool { + switch(instruction & 0x3f) { + + //SLL Rd,Rt,Sa + case 0x00: { + shl32(mem(Rd), mem(Rt), imm(Sa)); + return 0; + } + + //INVALID + case 0x01: { + return 0; + } + + //SRL Rd,Rt,Sa + case 0x02: { + lshr32(mem(Rd), mem(Rt), imm(Sa)); + return 0; + } + + //SRA Rd,Rt,Sa + case 0x03: { + ashr32(mem(Rd), mem(Rt), imm(Sa)); + return 0; + } + + //SLLV Rd,Rt,Rs + case 0x04: { + and32(reg(0), mem(Rs), imm(31)); + shl32(mem(Rd), mem(Rt), reg(0)); + return 0; + } + + //INVALID + case 0x05: { + return 0; + } + + //SRLV Rd,Rt,Rs + case 0x06: { + and32(reg(0), mem(Rs), imm(31)); + lshr32(mem(Rd), mem(Rt), reg(0)); + return 0; + } + + //SRAV Rd,Rt,Rs + case 0x07: { + and32(reg(0), mem(Rs), imm(31)); + ashr32(mem(Rd), mem(Rt), reg(0)); + return 0; + } + + //JR Rs + case 0x08: { + lea(reg(1), Rs); + call(&RSP::JR); + return 1; + } + + //JALR Rd,Rs + case 0x09: { + lea(reg(1), Rd); + lea(reg(2), Rs); + call(&RSP::JALR); + return 1; + } + + //INVALID + case 0x0a ... 0x0c: { + return 0; + } + + //BREAK + case 0x0d: { + call(&RSP::BREAK); + return 1; + } + + //INVALID + case 0x0e ... 0x1f: { + return 0; + } + + //ADDU Rd,Rs,Rt + case 0x20 ... 0x21: { + add32(mem(Rd), mem(Rs), mem(Rt)); + return 0; + } + + //SUBU Rd,Rs,Rt + case 0x22 ... 0x23: { + sub32(mem(Rd), mem(Rs), mem(Rt)); + return 0; + } + + //AND Rd,Rs,Rt + case 0x24: { + and32(mem(Rd), mem(Rs), mem(Rt)); + return 0; + } + + //OR Rd,Rs,Rt + case 0x25: { + or32(mem(Rd), mem(Rs), mem(Rt)); + return 0; + } + + //XOR Rd,Rs,Rt + case 0x26: { + xor32(mem(Rd), mem(Rs), mem(Rt)); + return 0; + } + + //NOR Rd,Rs,Rt + case 0x27: { + or32(reg(0), mem(Rs), mem(Rt)); + not32(reg(0), reg(0)); + mov32(mem(Rd), reg(0)); + return 0; + } + + //INVALID + case 0x28 ... 0x29: { + return 0; + } + + //SLT Rd,Rs,Rt + case 0x2a: { + cmp32(mem(Rs), mem(Rt), set_slt); + mov32_f(mem(Rd), flag_slt); + return 0; + } + + //SLTU Rd,Rs,Rt + case 0x2b: { + cmp32(mem(Rs), mem(Rt), set_ult); + mov32_f(mem(Rd), flag_ult); + return 0; + } + + //INVALID + case 0x2c ... 0x3f: { + return 0; + } + + } + + return 0; +} + +auto RSP::Recompiler::emitREGIMM(u32 instruction) -> bool { + switch(instruction >> 16 & 0x1f) { + + //BLTZ Rs,i16 + case 0x00: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&RSP::BLTZ); + return 1; + } + + //BGEZ Rs,i16 + case 0x01: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&RSP::BGEZ); + return 1; + } + + //INVALID + case 0x02 ... 0x0f: { + return 0; + } + + //BLTZAL Rs,i16 + case 0x10: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&RSP::BLTZAL); + return 1; + } + + //BGEZAL Rs,i16 + case 0x11: { + lea(reg(1), Rs); + mov32(reg(2), imm(i16)); + call(&RSP::BGEZAL); + return 1; + } + + //INVALID + case 0x12 ... 0x1f: { + return 0; + } + + } + + return 0; +} + +auto RSP::Recompiler::emitSCC(u32 instruction) -> bool { + switch(instruction >> 21 & 0x1f) { + + //MFC0 Rt,Rd + case 0x00: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&RSP::MFC0); + return 0; + } + + //INVALID + case 0x01 ... 0x03: { + return 0; + } + + //MTC0 Rt,Rd + case 0x04: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&RSP::MTC0); + return 0; + } + + //INVALID + case 0x05 ... 0x1f: { + return 0; + } + + } + + return 0; +} + +auto RSP::Recompiler::emitVU(u32 instruction) -> bool { + #define E (instruction >> 7 & 15) + switch(instruction >> 21 & 0x1f) { + + //MFC2 Rt,Vs(e) + case 0x00: { + lea(reg(1), Rt); + lea(reg(2), Vs); + callvu(&RSP::MFC2); + return 0; + } + + //INVALID + case 0x01: { + return 0; + } + + //CFC2 Rt,Rd + case 0x02: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&RSP::CFC2); + return 0; + } + + //INVALID + case 0x03: { + return 0; + } + + //MTC2 Rt,Vs(e) + case 0x04: { + lea(reg(1), Rt); + lea(reg(2), Vs); + callvu(&RSP::MTC2); + return 0; + } + + //INVALID + case 0x05: { + return 0; + } + + //CTC2 Rt,Rd + case 0x06: { + lea(reg(1), Rt); + mov32(reg(2), imm(Rdn)); + call(&RSP::CTC2); + return 0; + } + + //INVALID + case 0x07 ... 0x0f: { + return 0; + } + + } + #undef E + + #define E (instruction >> 21 & 15) + #define DE (instruction >> 11 & 7) + switch(instruction & 0x3f) { + + //VMULF Vd,Vs,Vt(e) + case 0x00: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMULF); + return 0; + } + + //VMULU Vd,Vs,Vt(e) + case 0x01: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMULU); + return 0; + } + + //VRNDP Vd,Vs,Vt(e) + case 0x02: { + lea(reg(1), Vd); + mov32(reg(2), imm(Vsn)); + lea(reg(3), Vt); + callvu(&RSP::VRNDP); + return 0; + } + + //VMULQ Vd,Vs,Vt(e) + case 0x03: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMULQ); + return 0; + } + + //VMUDL Vd,Vs,Vt(e) + case 0x04: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMUDL); + return 0; + } + + //VMUDM Vd,Vs,Vt(e) + case 0x05: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMUDM); + return 0; + } + + //VMUDN Vd,Vs,Vt(e) + case 0x06: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMUDN); + return 0; + } + + //VMUDH Vd,Vs,Vt(e) + case 0x07: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMUDH); + return 0; + } + + //VMACF Vd,Vs,Vt(e) + case 0x08: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMACF); + return 0; + } + + //VMACU Vd,Vs,Vt(e) + case 0x09: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMACU); + return 0; + } + + //VRNDN Vd,Vs,Vt(e) + case 0x0a: { + lea(reg(1), Vd); + mov32(reg(2), imm(Vsn)); + lea(reg(3), Vt); + callvu(&RSP::VRNDN); + return 0; + } + + //VMACQ Vd + case 0x0b: { + lea(reg(1), Vd); + call(&RSP::VMACQ); + return 0; + } + + //VMADL Vd,Vs,Vt(e) + case 0x0c: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMADL); + return 0; + } + + //VMADM Vd,Vs,Vt(e) + case 0x0d: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMADM); + return 0; + } + + //VMADN Vd,Vs,Vt(e) + case 0x0e: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMADN); + return 0; + } + + //VMADH Vd,Vs,Vt(e) + case 0x0f: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMADH); + return 0; + } + + //VADD Vd,Vs,Vt(e) + case 0x10: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VADD); + return 0; + } + + //VSUB Vd,Vs,Vt(e) + case 0x11: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VSUB); + return 0; + } + + //INVALID + case 0x12: { + return 0; + } + + //VABS Vd,Vs,Vt(e) + case 0x13: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VABS); + return 0; + } + + //VADDC Vd,Vs,Vt(e) + case 0x14: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VADDC); + return 0; + } + + //VSUBC Vd,Vs,Vt(e) + case 0x15: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VSUBC); + return 0; + } + + //INVALID + case 0x16 ... 0x1c: { + return 0; + } + + //VSAR Vd,Vs,E + case 0x1d: { + lea(reg(1), Vd); + lea(reg(2), Vs); + callvu(&RSP::VSAR); + return 0; + } + + //INVALID + case 0x1e ... 0x1f: { + return 0; + } + + //VLT Vd,Vs,Vt(e) + case 0x20: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VLT); + return 0; + } + + //VEQ Vd,Vs,Vt(e) + case 0x21: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VEQ); + return 0; + } + + //VNE Vd,Vs,Vt(e) + case 0x22: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VNE); + return 0; + } + + //VGE Vd,Vs,Vt(e) + case 0x23: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VGE); + return 0; + } + + //VCL Vd,Vs,Vt(e) + case 0x24: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VCL); + return 0; + } + + //VCH Vd,Vs,Vt(e) + case 0x25: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VCH); + return 0; + } + + //VCR Vd,Vs,Vt(e) + case 0x26: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VCR); + return 0; + } + + //VMRG Vd,Vs,Vt(e) + case 0x27: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VMRG); + return 0; + } + + //VAND Vd,Vs,Vt(e) + case 0x28: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VAND); + return 0; + } + + //VNAND Vd,Vs,Vt(e) + case 0x29: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VNAND); + return 0; + } + + //VOR Vd,Vs,Vt(e) + case 0x2a: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VOR); + return 0; + } + + //VNOR Vd,Vs,Vt(e) + case 0x2b: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VNOR); + return 0; + } + + //VXOR Vd,Vs,Vt(e) + case 0x2c: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VXOR); + return 0; + } + + //VNXOR Vd,Vs,Vt(e) + case 0x2d: { + lea(reg(1), Vd); + lea(reg(2), Vs); + lea(reg(3), Vt); + callvu(&RSP::VNXOR); + return 0; + } + + //INVALID + case 0x2e ... 0x2f: { + return 0; + } + + //VCRP Vd(de),Vt(e) + case 0x30: { + lea(reg(1), Vd); + mov32(reg(2), imm(DE)); + lea(reg(3), Vt); + callvu(&RSP::VRCP); + return 0; + } + + //VRCPL Vd(de),Vt(e) + case 0x31: { + lea(reg(1), Vd); + mov32(reg(2), imm(DE)); + lea(reg(3), Vt); + callvu(&RSP::VRCPL); + return 0; + } + + //VRCPH Vd(de),Vt(e) + case 0x32: { + lea(reg(1), Vd); + mov32(reg(2), imm(DE)); + lea(reg(3), Vt); + callvu(&RSP::VRCPH); + return 0; + } + + //VMOV Vd(de),Vt(e) + case 0x33: { + lea(reg(1), Vd); + mov32(reg(2), imm(DE)); + lea(reg(3), Vt); + callvu(&RSP::VMOV); + return 0; + } + + //VRSQ Vd(de),Vt(e) + case 0x34: { + lea(reg(1), Vd); + mov32(reg(2), imm(DE)); + lea(reg(3), Vt); + callvu(&RSP::VRSQ); + return 0; + } + + //VRSQL Vd(de),Vt(e) + case 0x35: { + lea(reg(1), Vd); + mov32(reg(2), imm(DE)); + lea(reg(3), Vt); + callvu(&RSP::VRSQL); + return 0; + } + + //VRSQH Vd(de),Vt(e) + case 0x36: { + lea(reg(1), Vd); + mov32(reg(2), imm(DE)); + lea(reg(3), Vt); + callvu(&RSP::VRSQH); + return 0; + } + + //VNOP + case 0x37: { + call(&RSP::VNOP); + } + + //INVALID + case 0x38 ... 0x3f: { + return 0; + } + + } + #undef E + #undef DE + + return 0; +} + +auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { + #define E (instruction >> 7 & 15) + #define i7 (s8(instruction << 1) >> 1) + switch(instruction >> 11 & 0x1f) { + + //LBV Vt(e),Rs,i7 + case 0x00: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LBV); + return 0; + } + + //LSV Vt(e),Rs,i7 + case 0x01: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LSV); + return 0; + } + + //LLV Vt(e),Rs,i7 + case 0x02: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LLV); + return 0; + } + + //LDV Vt(e),Rs,i7 + case 0x03: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LDV); + return 0; + } + + //LQV Vt(e),Rs,i7 + case 0x04: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LQV); + return 0; + } + + //LRV Vt(e),Rs,i7 + case 0x05: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LRV); + return 0; + } + + //LPV Vt(e),Rs,i7 + case 0x06: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LPV); + return 0; + } + + //LUV Vt(e),Rs,i7 + case 0x07: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LUV); + return 0; + } + + //LHV Vt(e),Rs,i7 + case 0x08: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LHV); + return 0; + } + + //LFV Vt(e),Rs,i7 + case 0x09: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LFV); + return 0; + } + + //LWV (not present on N64 RSP) + case 0x0a: { + return 0; + } + + //LTV Vt(e),Rs,i7 + case 0x0b: { + mov32(reg(1), imm(Vtn)); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::LTV); + return 0; + } + + //INVALID + case 0x0c ... 0x1f: { + return 0; + } + + } + #undef E + #undef i7 + + return 0; +} + +auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { + #define E (instruction >> 7 & 15) + #define i7 (s8(instruction << 1) >> 1) + switch(instruction >> 11 & 0x1f) { + + //SBV Vt(e),Rs,i7 + case 0x00: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SBV); + return 0; + } + + //SSV Vt(e),Rs,i7 + case 0x01: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SSV); + return 0; + } + + //SLV Vt(e),Rs,i7 + case 0x02: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SLV); + return 0; + } + + //SDV Vt(e),Rs,i7 + case 0x03: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SDV); + return 0; + } + + //SQV Vt(e),Rs,i7 + case 0x04: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SQV); + return 0; + } + + //SRV Vt(e),Rs,i7 + case 0x05: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SRV); + return 0; + } + + //SPV Vt(e),Rs,i7 + case 0x06: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SPV); + return 0; + } + + //SUV Vt(e),Rs,i7 + case 0x07: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SUV); + return 0; + } + + //SHV Vt(e),Rs,i7 + case 0x08: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SHV); + return 0; + } + + //SFV Vt(e),Rs,i7 + case 0x09: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SFV); + return 0; + } + + //SWV Vt(e),Rs,i7 + case 0x0a: { + lea(reg(1), Vt); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::SWV); + return 0; + } + + //STV Vt(e),Rs,i7 + case 0x0b: { + mov32(reg(1), imm(Vtn)); + lea(reg(2), Rs); + mov32(reg(3), imm(i7)); + callvu(&RSP::STV); + return 0; + } + + //INVALID + case 0x0c ... 0x1f: { + return 0; + } + + } + #undef E + #undef i7 + + return 0; +} + +#undef Sa +#undef Rdn +#undef Rtn +#undef Rsn +#undef Vdn +#undef Vsn +#undef Vtn +#undef Rd +#undef Rt +#undef Rs +#undef Vd +#undef Vs +#undef Vt +#undef i16 +#undef n16 +#undef n26 +#undef callvu diff --git a/waterbox/ares64/ares/ares/n64/rsp/rsp.cpp b/waterbox/ares64/ares/ares/n64/rsp/rsp.cpp new file mode 100644 index 0000000000..fed7d53238 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/rsp.cpp @@ -0,0 +1,124 @@ +#include + +namespace ares::Nintendo64 { + +RSP rsp; +#include "dma.cpp" +#include "io.cpp" +#include "interpreter.cpp" +#include "interpreter-ipu.cpp" +#include "interpreter-scc.cpp" +#include "interpreter-vpu.cpp" +#include "recompiler.cpp" +#include "debugger.cpp" +#include "serialization.cpp" +#include "disassembler.cpp" + +auto RSP::load(Node::Object parent) -> void { + node = parent->append("RSP"); + dmem.allocate(4_KiB); + imem.allocate(4_KiB); + debugger.load(node); +} + +auto RSP::unload() -> void { + debugger.unload(); + dmem.reset(); + imem.reset(); + node.reset(); +} + +auto RSP::main() -> void { + if(status.halted) return step(128); + instruction(); +} + +auto RSP::step(u32 clocks) -> void { + Thread::clock += clocks; +} + +auto RSP::instruction() -> void { + if constexpr(Accuracy::RSP::Recompiler) { + auto block = recompiler.block(ipu.pc); + block->execute(*this); + } + + if constexpr(Accuracy::RSP::Interpreter) { + pipeline.address = ipu.pc; + pipeline.instruction = imem.read(pipeline.address); + debugger.instruction(); + decoderEXECUTE(); + instructionEpilogue(); + step(3); + } +} + +auto RSP::instructionEpilogue() -> s32 { + if constexpr(Accuracy::RSP::Recompiler) { + step(3); + } + + ipu.r[0].u32 = 0; + + switch(branch.state) { + case Branch::Step: ipu.pc += 4; return status.halted; + case Branch::Take: ipu.pc += 4; branch.delaySlot(); return status.halted; + case Branch::DelaySlot: ipu.pc = branch.pc; branch.reset(); return 1; + } + + unreachable; +} + +auto RSP::power(bool reset) -> void { + Thread::reset(); + dmem.fill(); + imem.fill(); + + pipeline = {}; + dma = {}; + status.semaphore = 0; + status.halted = 1; + status.broken = 0; + status.full = 0; + status.singleStep = 0; + status.interruptOnBreak = 0; + for(auto& signal : status.signal) signal = 0; + for(auto& r : ipu.r) r.u32 = 0; + ipu.pc = 0; + branch = {}; + for(auto& r : vpu.r) r.u128 = 0; + vpu.acch.u128 = 0; + vpu.accm.u128 = 0; + vpu.accl.u128 = 0; + vpu.vcoh.u128 = 0; + vpu.vcol.u128 = 0; + vpu.vcch.u128 = 0; + vpu.vccl.u128 = 0; + vpu.vce.u128 = 0; + vpu.divin = 0; + vpu.divout = 0; + vpu.divdp = 0; + + reciprocals[0] = u16(~0); + for(u16 index : range(1, 512)) { + u64 a = index + 512; + u64 b = (u64(1) << 34) / a; + reciprocals[index] = u16(b + 1 >> 8); + } + + for(u16 index : range(0, 512)) { + u64 a = index + 512 >> (index % 2 == 1); + u64 b = 1 << 17; + //find the largest b where b < 1.0 / sqrt(a) + while(a * (b + 1) * (b + 1) < (u64(1) << 44)) b++; + inverseSquareRoots[index] = u16(b >> 1); + } + + if constexpr(Accuracy::RSP::Recompiler) { + auto buffer = ares::Memory::FixedAllocator::get().tryAcquire(64_MiB); + recompiler.allocator.resize(64_MiB, bump_allocator::executable | bump_allocator::zero_fill, buffer); + recompiler.reset(); + } +} + +} diff --git a/waterbox/ares64/ares/ares/n64/rsp/rsp.hpp b/waterbox/ares64/ares/ares/n64/rsp/rsp.hpp new file mode 100644 index 0000000000..55b5169bf2 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/rsp.hpp @@ -0,0 +1,415 @@ +//Reality Signal Processor + +struct RSP : Thread, Memory::IO { + Node::Object node; + Memory::Writable dmem; + Memory::Writable imem; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + + auto instruction() -> void; + auto ioSCC(bool mode, u32 address, u32 data) -> void; + auto ioStatus(bool mode, u32 address, u32 data) -> void; + + struct Memory { + Node::Debugger::Memory dmem; + Node::Debugger::Memory imem; + } memory; + + struct Tracer { + Node::Debugger::Tracer::Instruction instruction; + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //rsp.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + + auto main() -> void; + auto step(u32 clocks) -> void; + + auto instruction() -> void; + auto instructionEpilogue() -> s32; + + auto power(bool reset) -> void; + + struct Pipeline { + u32 address; + u32 instruction; + } pipeline; + + //dma.cpp + auto dmaTransfer() -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + struct DMA { + n1 pbusRegion; + n12 pbusAddress; + n24 dramAddress; + + struct Transfer { + n12 length; + n12 skip; + n8 count; + } read, write; + + struct Request { + //serialization.cpp + auto serialize(serializer&) -> void; + + enum class Type : u32 { Read, Write } type; + n1 pbusRegion; + n12 pbusAddress; + n24 dramAddress; + n16 length; + n16 skip; + n16 count; + }; + nall::queue requests; + } dma; + + struct Status : Memory::IO { + RSP& self; + Status(RSP& self) : self(self) {} + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + n1 semaphore; + n1 halted = 1; + n1 broken; + n1 full; + n1 singleStep; + n1 interruptOnBreak; + n1 signal[8]; + } status{*this}; + + //ipu.cpp + union r32 { + struct { int32_t s32; }; + struct { uint32_t u32; }; + }; + using cr32 = const r32; + + struct IPU { + enum Register : u32 { + R0, AT, V0, V1, A0, A1, A2, A3, + T0, T1, T2, T3, T4, T5, T6, T7, + S0, S1, S2, S3, S4, S5, S6, S7, + T8, T9, K0, K1, GP, SP, S8, RA, + }; + + r32 r[32]; + u32 pc; + } ipu; + + struct Branch { + enum : u32 { Step, Take, DelaySlot }; + + auto inDelaySlot() const -> bool { return state == DelaySlot; } + auto reset() -> void { state = Step; } + auto take(u32 address) -> void { state = Take; pc = address; } + auto delaySlot() -> void { state = DelaySlot; } + + u64 pc = 0; + u32 state = Step; + } branch; + + //cpu-instructions.cpp + auto ADDIU(r32& rt, cr32& rs, s16 imm) -> void; + auto ADDU(r32& rd, cr32& rs, cr32& rt) -> void; + auto AND(r32& rd, cr32& rs, cr32& rt) -> void; + auto ANDI(r32& rt, cr32& rs, u16 imm) -> void; + auto BEQ(cr32& rs, cr32& rt, s16 imm) -> void; + auto BGEZ(cr32& rs, s16 imm) -> void; + auto BGEZAL(cr32& rs, s16 imm) -> void; + auto BGTZ(cr32& rs, s16 imm) -> void; + auto BLEZ(cr32& rs, s16 imm) -> void; + auto BLTZ(cr32& rs, s16 imm) -> void; + auto BLTZAL(cr32& rs, s16 imm) -> void; + auto BNE(cr32& rs, cr32& rt, s16 imm) -> void; + auto BREAK() -> void; + auto J(u32 imm) -> void; + auto JAL(u32 imm) -> void; + auto JALR(r32& rd, cr32& rs) -> void; + auto JR(cr32& rs) -> void; + auto LB(r32& rt, cr32& rs, s16 imm) -> void; + auto LBU(r32& rt, cr32& rs, s16 imm) -> void; + auto LH(r32& rt, cr32& rs, s16 imm) -> void; + auto LHU(r32& rt, cr32& rs, s16 imm) -> void; + auto LUI(r32& rt, u16 imm) -> void; + auto LW(r32& rt, cr32& rs, s16 imm) -> void; + auto NOR(r32& rd, cr32& rs, cr32& rt) -> void; + auto OR(r32& rd, cr32& rs, cr32& rt) -> void; + auto ORI(r32& rt, cr32& rs, u16 imm) -> void; + auto SB(cr32& rt, cr32& rs, s16 imm) -> void; + auto SH(cr32& rt, cr32& rs, s16 imm) -> void; + auto SLL(r32& rd, cr32& rt, u8 sa) -> void; + auto SLLV(r32& rd, cr32& rt, cr32& rs) -> void; + auto SLT(r32& rd, cr32& rs, cr32& rt) -> void; + auto SLTI(r32& rt, cr32& rs, s16 imm) -> void; + auto SLTIU(r32& rt, cr32& rs, s16 imm) -> void; + auto SLTU(r32& rd, cr32& rs, cr32& rt) -> void; + auto SRA(r32& rd, cr32& rt, u8 sa) -> void; + auto SRAV(r32& rd, cr32& rt, cr32& rs) -> void; + auto SRL(r32& rd, cr32& rt, u8 sa) -> void; + auto SRLV(r32& rd, cr32& rt, cr32& rs) -> void; + auto SUBU(r32& rd, cr32& rs, cr32& rt) -> void; + auto SW(cr32& rt, cr32& rs, s16 imm) -> void; + auto XOR(r32& rd, cr32& rs, cr32& rt) -> void; + auto XORI(r32& rt, cr32& rs, u16 imm) -> void; + + //scc.cpp: System Control Coprocessor + auto MFC0(r32& rt, u8 rd) -> void; + auto MTC0(cr32& rt, u8 rd) -> void; + + //vpu.cpp: Vector Processing Unit + union r128 { + struct { uint128_t u128; }; +#if defined(ARCHITECTURE_AMD64) + struct { __m128i v128; }; + + operator __m128i() const { return v128; } + auto operator=(__m128i value) { v128 = value; } +#endif + + auto byte(u32 index) -> uint8_t& { return ((uint8_t*)&u128)[15 - index]; } + auto byte(u32 index) const -> uint8_t { return ((uint8_t*)&u128)[15 - index]; } + + auto element(u32 index) -> uint16_t& { return ((uint16_t*)&u128)[7 - index]; } + auto element(u32 index) const -> uint16_t { return ((uint16_t*)&u128)[7 - index]; } + + auto u8(u32 index) -> uint8_t& { return ((uint8_t*)&u128)[15 - index]; } + auto u8(u32 index) const -> uint8_t { return ((uint8_t*)&u128)[15 - index]; } + + auto s16(u32 index) -> int16_t& { return ((int16_t*)&u128)[7 - index]; } + auto s16(u32 index) const -> int16_t { return ((int16_t*)&u128)[7 - index]; } + + auto u16(u32 index) -> uint16_t& { return ((uint16_t*)&u128)[7 - index]; } + auto u16(u32 index) const -> uint16_t { return ((uint16_t*)&u128)[7 - index]; } + + //VCx registers + auto get(u32 index) const -> bool { return u16(index) != 0; } + auto set(u32 index, bool value) -> bool { return u16(index) = 0 - value, value; } + + //vu-registers.cpp + auto operator()(u32 index) const -> r128; + }; + using cr128 = const r128; + + struct VU { + r128 r[32]; + r128 acch, accm, accl; + r128 vcoh, vcol; //16-bit little endian + r128 vcch, vccl; //16-bit little endian + r128 vce; // 8-bit little endian + s16 divin; + s16 divout; + bool divdp; + } vpu; + + static constexpr r128 zero{0}; + static constexpr r128 invert{u128(0) - 1}; + + auto accumulatorGet(u32 index) const -> u64; + auto accumulatorSet(u32 index, u64 value) -> void; + auto accumulatorSaturate(u32 index, bool slice, u16 negative, u16 positive) const -> u16; + + auto CFC2(r32& rt, u8 rd) -> void; + auto CTC2(cr32& rt, u8 rd) -> void; + template auto LBV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LDV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LFV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LHV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LLV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LPV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LQV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LRV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LSV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LTV(u8 vt, cr32& rs, s8 imm) -> void; + template auto LUV(r128& vt, cr32& rs, s8 imm) -> void; + template auto LWV(r128& vt, cr32& rs, s8 imm) -> void; + template auto MFC2(r32& rt, cr128& vs) -> void; + template auto MTC2(cr32& rt, r128& vs) -> void; + template auto SBV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto SDV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto SFV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto SHV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto SLV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto SPV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto SQV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto SRV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto SSV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto STV(u8 vt, cr32& rs, s8 imm) -> void; + template auto SUV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto SWV(cr128& vt, cr32& rs, s8 imm) -> void; + template auto VABS(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VADD(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VADDC(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VAND(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VCH(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VCL(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VCR(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VEQ(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VGE(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VLT(r128& vd, cr128& vs, cr128& vt) -> void; + template + auto VMACF(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VMACF(r128& vd, cr128& vs, cr128& vt) -> void { VMACF<0, e>(vd, vs, vt); } + template auto VMACU(r128& vd, cr128& vs, cr128& vt) -> void { VMACF<1, e>(vd, vs, vt); } + auto VMACQ(r128& vd) -> void; + template auto VMADH(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VMADL(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VMADM(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VMADN(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VMOV(r128& vd, u8 de, cr128& vt) -> void; + template auto VMRG(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VMUDH(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VMUDL(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VMUDM(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VMUDN(r128& vd, cr128& vs, cr128& vt) -> void; + template + auto VMULF(r128& rd, cr128& vs, cr128& vt) -> void; + template auto VMULF(r128& rd, cr128& vs, cr128& vt) -> void { VMULF<0, e>(rd, vs, vt); } + template auto VMULU(r128& rd, cr128& vs, cr128& vt) -> void { VMULF<1, e>(rd, vs, vt); } + template auto VMULQ(r128& rd, cr128& vs, cr128& vt) -> void; + template auto VNAND(r128& rd, cr128& vs, cr128& vt) -> void; + template auto VNE(r128& vd, cr128& vs, cr128& vt) -> void; + auto VNOP() -> void; + template auto VNOR(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VNXOR(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VOR(r128& vd, cr128& vs, cr128& vt) -> void; + template + auto VRCP(r128& vd, u8 de, cr128& vt) -> void; + template auto VRCP(r128& vd, u8 de, cr128& vt) -> void { VRCP<0, e>(vd, de, vt); } + template auto VRCPL(r128& vd, u8 de, cr128& vt) -> void { VRCP<1, e>(vd, de, vt); } + template auto VRCPH(r128& vd, u8 de, cr128& vt) -> void; + template + auto VRND(r128& vd, u8 vs, cr128& vt) -> void; + template auto VRNDN(r128& vd, u8 vs, cr128& vt) -> void { VRND<0, e>(vd, vs, vt); } + template auto VRNDP(r128& vd, u8 vs, cr128& vt) -> void { VRND<1, e>(vd, vs, vt); } + template + auto VRSQ(r128& vd, u8 de, cr128& vt) -> void; + template auto VRSQ(r128& vd, u8 de, cr128& vt) -> void { VRSQ<0, e>(vd, de, vt); } + template auto VRSQL(r128& vd, u8 de, cr128& vt) -> void { VRSQ<1, e>(vd, de, vt); } + template auto VRSQH(r128& vd, u8 de, cr128& vt) -> void; + template auto VSAR(r128& vd, cr128& vs) -> void; + template auto VSUB(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VSUBC(r128& vd, cr128& vs, cr128& vt) -> void; + template auto VXOR(r128& rd, cr128& vs, cr128& vt) -> void; + +//unserialized: + u16 reciprocals[512]; + u16 inverseSquareRoots[512]; + + //decoder.cpp + auto decoderEXECUTE() -> void; + auto decoderSPECIAL() -> void; + auto decoderREGIMM() -> void; + auto decoderSCC() -> void; + auto decoderVU() -> void; + auto decoderLWC2() -> void; + auto decoderSWC2() -> void; + + auto INVALID() -> void; + + //recompiler.cpp + struct Recompiler : recompiler::generic { + RSP& self; + Recompiler(RSP& self) : self(self), generic(allocator) {} + + struct Block { + auto execute(RSP& self) -> void { + ((void (*)(RSP*, IPU*, VU*))code)(&self, &self.ipu, &self.vpu); + } + + u8* code; + }; + + struct Pool { + Block* blocks[1024]; + }; + + struct PoolHashPair { + auto operator==(const PoolHashPair& source) const -> bool { return hashcode == source.hashcode; } + auto operator< (const PoolHashPair& source) const -> bool { return hashcode < source.hashcode; } + auto hash() const -> u32 { return hashcode; } + + Pool* pool; + u32 hashcode; + }; + + auto reset() -> void { + context = nullptr; + pools.reset(); + } + + auto invalidate() -> void { + context = nullptr; + } + + auto pool() -> Pool*; + auto block(u32 address) -> Block*; + + auto emit(u32 address) -> Block*; + auto emitEXECUTE(u32 instruction) -> bool; + auto emitSPECIAL(u32 instruction) -> bool; + auto emitREGIMM(u32 instruction) -> bool; + auto emitSCC(u32 instruction) -> bool; + auto emitVU(u32 instruction) -> bool; + auto emitLWC2(u32 instruction) -> bool; + auto emitSWC2(u32 instruction) -> bool; + + bump_allocator allocator; + Pool* context = nullptr; + set pools; + //hashset pools; + } recompiler{*this}; + + struct Disassembler { + RSP& self; + Disassembler(RSP& self) : self(self) {} + + //disassembler.cpp + auto disassemble(u32 address, u32 instruction) -> string; + template auto hint(P&&... p) const -> string; + + bool showColors = true; + bool showValues = true; + + private: + auto EXECUTE() -> vector; + auto SPECIAL() -> vector; + auto REGIMM() -> vector; + auto SCC() -> vector; + auto LWC2() -> vector; + auto SWC2() -> vector; + auto VU() -> vector; + auto immediate(s64 value, u32 bits = 0) const -> string; + auto ipuRegisterName(u32 index) const -> string; + auto ipuRegisterValue(u32 index) const -> string; + auto ipuRegisterIndex(u32 index, s16 offset) const -> string; + auto sccRegisterName(u32 index) const -> string; + auto sccRegisterValue(u32 index) const -> string; + auto vpuRegisterName(u32 index, u32 element = 0) const -> string; + auto vpuRegisterValue(u32 index, u32 element = 0) const -> string; + auto ccrRegisterName(u32 index) const -> string; + auto ccrRegisterValue(u32 index) const -> string; + + u32 address; + u32 instruction; + } disassembler{*this}; +}; + +extern RSP rsp; diff --git a/waterbox/ares64/ares/ares/n64/rsp/serialization.cpp b/waterbox/ares64/ares/ares/n64/rsp/serialization.cpp new file mode 100644 index 0000000000..9888c1e91e --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/rsp/serialization.cpp @@ -0,0 +1,60 @@ +auto RSP::serialize(serializer& s) -> void { + Thread::serialize(s); + s(dmem); + s(imem); + + s(pipeline.address); + s(pipeline.instruction); + + s(dma.pbusRegion); + s(dma.pbusAddress); + s(dma.dramAddress); + s(dma.read.length); + s(dma.read.skip); + s(dma.read.count); + s(dma.write.length); + s(dma.write.skip); + s(dma.write.count); + s(dma.requests); + + s(status.semaphore); + s(status.halted); + s(status.broken); + s(status.full); + s(status.singleStep); + s(status.interruptOnBreak); + s(status.signal); + + for(auto& r : ipu.r) s(r.u32); + s(ipu.pc); + + s(branch.pc); + s(branch.state); + + for(auto& r : vpu.r) s(r.u128); + s(vpu.acch.u128); + s(vpu.accm.u128); + s(vpu.accl.u128); + s(vpu.vcoh.u128); + s(vpu.vcol.u128); + s(vpu.vcch.u128); + s(vpu.vccl.u128); + s(vpu.vce.u128); + s(vpu.divin); + s(vpu.divout); + s(vpu.divdp); + + if constexpr(Accuracy::RSP::Recompiler) { + recompiler.reset(); + } +} + +auto RSP::DMA::Request::serialize(serializer& s) -> void { + s((u32&)type); + s(pbusRegion); + s(pbusAddress); + s(dramAddress); + s(length); + s(skip); + s(count); +} diff --git a/waterbox/ares64/ares/ares/n64/si/debugger.cpp b/waterbox/ares64/ares/ares/n64/si/debugger.cpp new file mode 100644 index 0000000000..5bab494257 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/si/debugger.cpp @@ -0,0 +1,27 @@ +auto SI::Debugger::load(Node::Object parent) -> void { + tracer.io = parent->append("I/O", "SI"); +} + +auto SI::Debugger::io(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "SI_DRAM_ADDRESS", + "SI_PIF_ADDRESS_READ64B", + "SI_INT_ADDRESS_WRITE64B", + "SI_RESERVED", + "SI_PIF_ADDRESS_WRITE64B", + "SI_INT_ADDRESS_READ64B", + "SI_STATUS", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "SI_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} diff --git a/waterbox/ares64/ares/ares/n64/si/dma.cpp b/waterbox/ares64/ares/ares/n64/si/dma.cpp new file mode 100644 index 0000000000..cd29871d59 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/si/dma.cpp @@ -0,0 +1,21 @@ +auto SI::dmaRead() -> void { + run(); + for(u32 offset = 0; offset < 64; offset += 2) { + u16 data = bus.read(io.readAddress + offset); + bus.write(io.dramAddress + offset, data); + } + io.dmaBusy = 0; + io.interrupt = 1; + mi.raise(MI::IRQ::SI); +} + +auto SI::dmaWrite() -> void { + for(u32 offset = 0; offset < 64; offset += 2) { + u16 data = bus.read(io.dramAddress + offset); + bus.write(io.writeAddress + offset, data); + } + io.dmaBusy = 0; + io.interrupt = 1; + mi.raise(MI::IRQ::SI); + run(); +} diff --git a/waterbox/ares64/ares/ares/n64/si/io.cpp b/waterbox/ares64/ares/ares/n64/si/io.cpp new file mode 100644 index 0000000000..30174e8971 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/si/io.cpp @@ -0,0 +1,89 @@ +auto SI::readWord(u32 address) -> u32 { + address = (address & 0xfffff) >> 2; + n32 data; + + if(address == 0) { + //SI_DRAM_ADDRESS + data.bit(0,23) = io.dramAddress; + } + + if(address == 1) { + //SI_PIF_ADDRESS_READ64B + data.bit(0,31) = io.readAddress; + } + + if(address == 2) { + //SI_INT_ADDRESS_WRITE64B + } + + if(address == 3) { + //SI_RESERVED + } + + if(address == 4) { + //SI_PIF_ADDRESS_WRITE64B + data.bit(0,31) = io.writeAddress; + } + + if(address == 5) { + //SI_INT_ADDRESS_READ64B + } + + if(address == 6) { + //SI_STATUS + data.bit( 0) = io.dmaBusy; + data.bit( 1) = io.ioBusy; + data.bit( 2) = io.readPending; + data.bit( 3) = io.dmaError; + data.bit( 4, 7) = io.pchState; + data.bit( 8,11) = io.dmaState; + data.bit(12) = io.interrupt; + } + + debugger.io(Read, address, data); + return data; +} + +auto SI::writeWord(u32 address, u32 data_) -> void { + address = (address & 0xfffff) >> 2; + n32 data = data_; + + if(address == 0) { + //SI_DRAM_ADDRESS + io.dramAddress = data.bit(0,23) & ~7; + } + + if(address == 1) { + //SI_PIF_ADDRESS_READ64B + io.readAddress = data.bit(0,31) & ~1; + io.dmaBusy = 1; + queue.insert(Queue::SI_DMA_Read, 2304); + } + + if(address == 2) { + //SI_INT_ADDRESS_WRITE64B + } + + if(address == 3) { + //SI_RESERVED + } + + if(address == 4) { + //SI_PIF_ADDRESS_WRITE64B + io.writeAddress = data.bit(0,31) & ~1; + io.dmaBusy = 1; + queue.insert(Queue::SI_DMA_Write, 2304); + } + + if(address == 5) { + //SI_INT_ADDRESS_READ64B + } + + if(address == 6) { + //SI_STATUS + io.interrupt = 0; + mi.lower(MI::IRQ::SI); + } + + debugger.io(Write, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/si/serialization.cpp b/waterbox/ares64/ares/ares/n64/si/serialization.cpp new file mode 100644 index 0000000000..e692fece3d --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/si/serialization.cpp @@ -0,0 +1,12 @@ +auto SI::serialize(serializer& s) -> void { + s(io.dramAddress); + s(io.readAddress); + s(io.writeAddress); + s(io.dmaBusy); + s(io.ioBusy); + s(io.readPending); + s(io.pchState); + s(io.dmaState); + s(io.dmaError); + s(io.interrupt); +} diff --git a/waterbox/ares64/ares/ares/n64/si/si.cpp b/waterbox/ares64/ares/ares/n64/si/si.cpp new file mode 100644 index 0000000000..23dfbaf6c4 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/si/si.cpp @@ -0,0 +1,364 @@ +#include + +namespace ares::Nintendo64 { + +SI si; +#include "dma.cpp" +#include "io.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto SI::load(Node::Object parent) -> void { + node = parent->append("SI"); + debugger.load(node); + +/*if(auto fp = system.pak->read("pif.sm5.rom")) { + //load 1KB ROM and mirror it to 4KB + fp->read({SM5K::ROM, 1024}); + memory::copy(&SM5K::ROM[1024], &SM5K::ROM[0], 1024); + memory::copy(&SM5K::ROM[2048], &SM5K::ROM[0], 1024); + memory::copy(&SM5K::ROM[3072], &SM5K::ROM[0], 1024); + }*/ +} + +auto SI::unload() -> void { + debugger = {}; + node.reset(); +} + +auto SI::addressCRC(u16 address) const -> n5 { + n5 crc = 0; + for(u32 i : range(16)) { + n5 xor = crc & 0x10 ? 0x15 : 0x00; + crc <<= 1; + if(address & 0x8000) crc |= 1; + address <<= 1; + crc ^= xor; + } + return crc; +} + +auto SI::dataCRC(array_view data) const -> n8 { + n8 crc = 0; + for(u32 i : range(33)) { + for(u32 j : reverse(range(8))) { + n8 xor = crc & 0x80 ? 0x85 : 0x00; + crc <<= 1; + if(i < 32) { + if(data[i] & 1 << j) crc |= 1; + } + crc ^= xor; + } + } + return crc; +} + +auto SI::run() -> void { + auto flags = pi.ram.read(0x3f); + + //controller polling + if(flags & 0x01) { + //todo: this flag is supposed to be cleared, but doing so breaks inputs + //flags &= ~0x01; + scan(); + } + + //CIC-NUS-6105 challenge/response + if(flags & 0x02) { + flags &= ~0x02; + challenge(); + } + + //unknown purpose + if(flags & 0x04) { + flags &= ~0x04; + debug(unimplemented, "[SI::main] flags & 0x04"); + } + + //must be sent within 5s of the console booting, or SM5 will lock the N64 + if(flags & 0x08) { + flags &= ~0x08; + } + + //PIF ROM lockout + if(flags & 0x10) { + flags &= ~0x10; + pi.io.romLockout = 1; + } + + //initialization + if(flags & 0x20) { + flags &= ~0x20; + flags |= 0x80; //set completion flag + } + + //clear PIF RAM + if(flags & 0x40) { + flags &= ~0x40; + pi.ram.fill(); + } + + pi.ram.write(0x3f, flags); +} + +auto SI::scan() -> void { + ControllerPort* controllers[4] = { + &controllerPort1, + &controllerPort2, + &controllerPort3, + &controllerPort4, + }; + + static constexpr bool Debug = 0; + + if constexpr(Debug) { + print("{\n"); + for(u32 y : range(8)) { + print(" "); + for(u32 x : range(8)) { + print(hex(pi.ram.read(y * 8 + x), 2L), " "); + } + print("\n"); + } + print("}\n"); + } + + n3 channel = 0; //0-5 + for(u32 offset = 0; offset < 64;) { + n8 send = pi.ram.read(offset++); + if(send == 0x00) { channel++; continue; } + if(send == 0xfd) continue; //channel reset + if(send == 0xfe) break; //end of packets + if(send == 0xff) continue; //alignment padding + n8 recvOffset = offset; + n8 recv = pi.ram.read(offset++); + if(recv == 0xfe) break; //end of packets + + //clear flags from lengths + send &= 0x3f; + recv &= 0x3f; + + n8 input[64]; + for(u32 index : range(send)) { + input[index] = pi.ram.read(offset++); + } + n8 output[64]; + b1 valid = 0; + + //status + if(input[0] == 0x00 || input[0] == 0xff) { + //controller + if(channel < 4 && controllers[channel]->device) { + output[0] = 0x05; //0x05 = gamepad; 0x02 = mouse + output[1] = 0x00; + output[2] = 0x02; //0x02 = nothing present in controller slot + if(auto& device = controllers[channel]->device) { + if(auto gamepad = dynamic_cast(device.data())) { + if(gamepad->ram || gamepad->motor) { + output[2] = 0x01; //0x01 = pak present + } + } + } + valid = 1; + } + + //cartridge EEPROM (4kbit) + if(channel >= 4 && cartridge.eeprom.size == 512) { + output[0] = 0x00; + output[1] = 0x80; + output[2] = 0x00; + valid = 1; + } + + //cartridge EEPROM (16kbit) + if(channel >= 4 && cartridge.eeprom.size == 2048) { + output[0] = 0x00; + output[1] = 0xc0; + output[2] = 0x00; + valid = 1; + } + } + + //read controller state + if(input[0] == 0x01) { + if(channel < 4 && controllers[channel]->device) { + u32 data = controllers[channel]->device->read(); + output[0] = data >> 24; + output[1] = data >> 16; + output[2] = data >> 8; + output[3] = data >> 0; + if(recv <= 4) { + pi.ram.write(recvOffset, 0x00 | recv & 0x3f); + } else { + pi.ram.write(recvOffset, 0x40 | recv & 0x3f); + } + valid = 1; + } + } + + //read pak + if(input[0] == 0x02 && send >= 3 && recv >= 1) { + if(auto& device = controllers[channel]->device) { + if(auto gamepad = dynamic_cast(device.data())) { + //controller pak + if(auto& ram = gamepad->ram) { + u32 address = (input[1] << 8 | input[2] << 0) & ~31; + if(addressCRC(address) == (n5)input[2]) { + for(u32 index : range(recv - 1)) { + output[index] = ram.read(address++); + } + output[recv - 1] = dataCRC({&output[0], recv - 1}); + valid = 1; + } + } + + //rumble pak + if(gamepad->motor) { + u32 address = (input[1] << 8 | input[2] << 0) & ~31; + if(addressCRC(address) == (n5)input[2]) { + for(u32 index : range(recv - 1)) { + output[index] = 0x80; + } + output[recv - 1] = dataCRC({&output[0], recv - 1}); + valid = 1; + } + } + } + } + } + + //write pak + if(input[0] == 0x03 && send >= 3 && recv >= 1) { + if(auto& device = controllers[channel]->device) { + if(auto gamepad = dynamic_cast(device.data())) { + //controller pak + if(auto& ram = gamepad->ram) { + u32 address = (input[1] << 8 | input[2] << 0) & ~31; + if(addressCRC(address) == (n5)input[2]) { + for(u32 index : range(send - 3)) { + ram.write(address++, input[3 + index]); + } + output[0] = dataCRC({&input[3], send - 3}); + valid = 1; + } + } + + //rumble pak + if(gamepad->motor) { + u32 address = (input[1] << 8 | input[2] << 0) & ~31; + if(addressCRC(address) == (n5)input[2]) { + output[0] = dataCRC({&input[3], send - 3}); + valid = 1; + gamepad->rumble(input[3] & 1); + } + } + } + } + } + + //read EEPROM + if(input[0] == 0x04 && send >= 2) { + u32 address = input[1] * 8; + for(u32 index : range(recv)) { + output[index] = cartridge.eeprom.read(address++); + } + valid = 1; + } + + //write EEPROM + if(input[0] == 0x05 && send >= 2 && recv >= 1) { + u32 address = input[1] * 8; + for(u32 index : range(send - 2)) { + cartridge.eeprom.write(address++, input[2 + index]); + } + output[0] = 0x00; + valid = 1; + } + + //RTC status + if(input[0] == 0x06) { + debug(unimplemented, "[SI::main] RTC status"); + } + + //RTC read + if(input[0] == 0x07) { + debug(unimplemented, "[SI::main] RTC read"); + } + + //RTC write + if(input[0] == 0x08) { + debug(unimplemented, "[SI::main] RTC write"); + } + + if(!valid) { + pi.ram.write(recvOffset, 0x80 | recv & 0x3f); + } + for(u32 index : range(recv)) { + pi.ram.write(offset++, output[index]); + } + channel++; + } + + if constexpr(Debug) { + print("[\n"); + for(u32 y : range(8)) { + print(" "); + for(u32 x : range(8)) { + print(hex(pi.ram.read(y * 8 + x), 2L), " "); + } + print("\n"); + } + print("]\n"); + } +} + +//CIC-NUS-6105 anti-piracy challenge/response +auto SI::challenge() -> void { + static n4 lut[32] = { + 0x4, 0x7, 0xa, 0x7, 0xe, 0x5, 0xe, 0x1, + 0xc, 0xf, 0x8, 0xf, 0x6, 0x3, 0x6, 0x9, + 0x4, 0x1, 0xa, 0x7, 0xe, 0x5, 0xe, 0x1, + 0xc, 0x9, 0x8, 0x5, 0x6, 0x3, 0xc, 0x9, + }; + + n4 challenge[30]; + n4 response[30]; + + //15 bytes -> 30 nibbles + for(u32 address : range(15)) { + auto data = pi.ram.read(0x30 + address); + challenge[address << 1 | 0] = data >> 4; + challenge[address << 1 | 1] = data >> 0; + } + + n4 key = 0xb; + n1 sel = 0; + for(u32 address : range(30)) { + n4 data = key + 5 * challenge[address]; + response[address] = data; + key = lut[sel << 4 | data]; + n1 mod = data >> 3; + n3 mag = data >> 0; + if(mod) mag = ~mag; + if(mag % 3 != 1) mod = !mod; + if(sel) { + if(data == 0x1 || data == 0x9) mod = 1; + if(data == 0xb || data == 0xe) mod = 0; + } + sel = mod; + } + + //30 nibbles -> 15 bytes + for(u32 address : range(15)) { + n8 data = 0; + data |= response[address << 1 | 0] << 4; + data |= response[address << 1 | 1] << 0; + pi.ram.write(0x30 + address, data); + } +} + +auto SI::power(bool reset) -> void { + io = {}; +} + +} diff --git a/waterbox/ares64/ares/ares/n64/si/si.hpp b/waterbox/ares64/ares/ares/n64/si/si.hpp new file mode 100644 index 0000000000..714120a5fa --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/si/si.hpp @@ -0,0 +1,51 @@ +//Serial Interface + +struct SI : Memory::IO { + Node::Object node; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto io(bool mode, u32 address, u32 data) -> void; + + struct Tracer { + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //si.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + auto addressCRC(u16 address) const -> n5; + auto dataCRC(array_view data) const -> n8; + auto run() -> void; + auto scan() -> void; + auto challenge() -> void; + auto power(bool reset) -> void; + + //dma.cpp + auto dmaRead() -> void; + auto dmaWrite() -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + struct IO { + n24 dramAddress; + n32 readAddress; + n32 writeAddress; + n1 dmaBusy; + n1 ioBusy; + n1 readPending; + n4 pchState; + n4 dmaState; + n1 dmaError; + n1 interrupt; + } io; +}; + +extern SI si; diff --git a/waterbox/ares64/ares/ares/n64/system/serialization.cpp b/waterbox/ares64/ares/ares/n64/system/serialization.cpp new file mode 100644 index 0000000000..4c9cc76090 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/system/serialization.cpp @@ -0,0 +1,55 @@ +auto System::serialize(bool synchronize) -> serializer { + serializer s; + + u32 signature = SerializerSignature; + char version[16] = {}; + char description[512] = {}; + memory::copy(&version, (const char*)SerializerVersion, SerializerVersion.size()); + + s(signature); + s(synchronize); + s(version); + s(description); + + serialize(s, synchronize); + return s; +} + +auto System::unserialize(serializer& s) -> bool { + u32 signature = 0; + bool synchronize = true; + char version[16] = {}; + char description[512] = {}; + + s(signature); + s(synchronize); + s(version); + s(description); + + if(signature != SerializerSignature) return false; + if(string{version} != SerializerVersion) return false; + + if(synchronize) power(/* reset = */ false); + serialize(s, synchronize); + return true; +} + +auto System::serialize(serializer& s, bool synchronize) -> void { + s(queue); + s(cartridge); + s(controllerPort1); + s(controllerPort2); + s(controllerPort3); + s(controllerPort4); + s(rdram); + s(mi); + s(vi); + s(ai); + s(pi); + s(ri); + s(si); + s(cpu); + s(rdp); + s(rsp); + s(dd); +} diff --git a/waterbox/ares64/ares/ares/n64/system/system.cpp b/waterbox/ares64/ares/ares/n64/system/system.cpp new file mode 100644 index 0000000000..2b1167511e --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/system/system.cpp @@ -0,0 +1,151 @@ +#include + +namespace ares::Nintendo64 { + +auto enumerate() -> vector { + return { + "[Nintendo] Nintendo 64 (NTSC)", + "[Nintendo] Nintendo 64 (PAL)", + }; +} + +auto load(Node::System& node, string name) -> bool { + if(!enumerate().find(name)) return false; + return system.load(node, name); +} + +auto option(string name, string value) -> bool { + #if defined(VULKAN) + if(name == "Enable Vulkan") vulkan.enable = value.boolean(); + if(name == "Quality" && value == "SD" ) vulkan.internalUpscale = 1; + if(name == "Quality" && value == "HD" ) vulkan.internalUpscale = 2; + if(name == "Quality" && value == "UHD") vulkan.internalUpscale = 4; + if(name == "Supersampling") vulkan.supersampleScanout = value.boolean(); + if(vulkan.internalUpscale == 1) vulkan.supersampleScanout = false; + vulkan.outputUpscale = vulkan.supersampleScanout ? 1 : vulkan.internalUpscale; + #endif + return true; +} + +System system; +Queue queue; +#include "serialization.cpp" + +auto System::game() -> string { + if(cartridge.node) { + return cartridge.title(); + } + + return "(no cartridge connected)"; +} + +auto System::run() -> void { + while(!vi.refreshed) cpu.main(); + vi.refreshed = false; + si.run(); +} + +auto System::load(Node::System& root, string name) -> bool { + if(node) unload(); + + information = {}; + if(name.find("Nintendo 64")) { + information.name = "Nintendo 64"; + } + if(name.find("NTSC")) { + information.region = Region::NTSC; + } + if(name.find("PAL")) { + information.region = Region::PAL; + } + + node = Node::System::create(information.name); + node->setGame({&System::game, this}); + node->setRun({&System::run, this}); + node->setPower({&System::power, this}); + node->setSave({&System::save, this}); + node->setUnload({&System::unload, this}); + node->setSerialize({&System::serialize, this}); + node->setUnserialize({&System::unserialize, this}); + root = node; + if(!node->setPak(pak = platform->pak(node))) return false; + + cartridgeSlot.load(node); + controllerPort1.load(node); + controllerPort2.load(node); + controllerPort3.load(node); + controllerPort4.load(node); + rdram.load(node); + mi.load(node); + vi.load(node); + ai.load(node); + pi.load(node); + ri.load(node); + si.load(node); + cpu.load(node); + rsp.load(node); + rdp.load(node); + dd.load(node); + #if defined(VULKAN) + vulkan.load(node); + #endif + return true; +} + +auto System::unload() -> void { + if(!node) return; + save(); + #if defined(VULKAN) + vulkan.unload(); + #endif + cartridgeSlot.unload(); + controllerPort1.unload(); + controllerPort2.unload(); + controllerPort3.unload(); + controllerPort4.unload(); + rdram.unload(); + mi.unload(); + vi.unload(); + ai.unload(); + pi.unload(); + ri.unload(); + si.unload(); + cpu.unload(); + rsp.unload(); + rdp.unload(); + dd.unload(); + pak.reset(); + node.reset(); +} + +auto System::save() -> void { + if(!node) return; + cartridge.save(); + controllerPort1.save(); + controllerPort2.save(); + controllerPort3.save(); + controllerPort4.save(); +} + +auto System::power(bool reset) -> void { + for(auto& setting : node->find()) setting->setLatch(); + + if constexpr(Accuracy::CPU::Recompiler || Accuracy::RSP::Recompiler) { + ares::Memory::FixedAllocator::get().release(); + } + queue.reset(); + cartridge.power(reset); + rdram.power(reset); + dd.power(reset); + mi.power(reset); + vi.power(reset); + ai.power(reset); + pi.power(reset); + ri.power(reset); + si.power(reset); + cpu.power(reset); + rsp.power(reset); + rdp.power(reset); +} + +} diff --git a/waterbox/ares64/ares/ares/n64/system/system.hpp b/waterbox/ares64/ares/ares/n64/system/system.hpp new file mode 100644 index 0000000000..dc65f11e86 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/system/system.hpp @@ -0,0 +1,37 @@ +struct System { + Node::System node; + VFS::Pak pak; + + enum class Region : u32 { NTSC, PAL }; + + auto name() const -> string { return information.name; } + auto region() const -> Region { return information.region; } + auto frequency() const -> u32 { return information.frequency; } + + //system.cpp + auto game() -> string; + auto run() -> void; + auto load(Node::System& node, string name) -> bool; + auto unload() -> void; + auto save() -> void; + auto power(bool reset) -> void; + + //serialization.cpp + auto serialize(bool synchronize = true) -> serializer; + auto unserialize(serializer&) -> bool; + +private: + struct Information { + string name = "Nintendo 64"; + Region region = Region::NTSC; + u32 frequency = 93'750'000 * 2; + } information; + + //serialization.cpp + auto serialize(serializer&, bool synchronize) -> void; +}; + +extern System system; + +auto Region::NTSC() -> bool { return system.region() == System::Region::NTSC; } +auto Region::PAL() -> bool { return system.region() == System::Region::PAL; } diff --git a/waterbox/ares64/ares/ares/n64/vi/debugger.cpp b/waterbox/ares64/ares/ares/n64/vi/debugger.cpp new file mode 100644 index 0000000000..21a45beeb7 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/vi/debugger.cpp @@ -0,0 +1,34 @@ +auto VI::Debugger::load(Node::Object parent) -> void { + tracer.io = parent->append("I/O", "VI"); +} + +auto VI::Debugger::io(bool mode, u32 address, u32 data) -> void { + static const vector registerNames = { + "VI_CONTROL", + "VI_DRAM_ADDRESS", + "VI_H_WIDTH", + "VI_V_INTR", + "VI_V_CURRENT_LINE", + "VI_TIMING", + "VI_V_SYNC", + "VI_H_SYNC", + "VI_H_SYNC_LEAP", + "VI_H_VIDEO", + "VI_V_VIDEO", + "VI_V_BURST", + "VI_X_SCALE", + "VI_Y_SCALE", + }; + + if(unlikely(tracer.io->enabled())) { + string message; + string name = registerNames(address, "VI_UNKNOWN"); + if(mode == Read) { + message = {name.split("|").first(), " => ", hex(data, 8L)}; + } + if(mode == Write) { + message = {name.split("|").last(), " <= ", hex(data, 8L)}; + } + tracer.io->notify(message); + } +} diff --git a/waterbox/ares64/ares/ares/n64/vi/io.cpp b/waterbox/ares64/ares/ares/n64/vi/io.cpp new file mode 100644 index 0000000000..e6cad99397 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/vi/io.cpp @@ -0,0 +1,195 @@ +auto VI::readWord(u32 address) -> u32 { + address = (address & 0xfffff) >> 2; + n32 data; + + if(address == 0) { + //VI_CONTROL + data.bit( 0, 1) = io.colorDepth; + data.bit( 2) = io.gammaDither; + data.bit( 3) = io.gamma; + data.bit( 4) = io.divot; + data.bit( 5) = io.reserved.bit(5); + data.bit( 6) = io.serrate; + data.bit( 7) = io.reserved.bit(7); + data.bit( 8, 9) = io.antialias; + data.bit(10,15) = io.reserved.bit(10,15); + } + + if(address == 1) { + //VI_DRAM_ADDRESS + data.bit(0,23) = io.dramAddress; + } + + if(address == 2) { + //VI_H_WIDTH + data.bit(0,11) = io.width; + } + + if(address == 3) { + //VI_V_INTR + data.bit(0,9) = io.coincidence; + } + + if(address == 4) { + //VI_V_CURRENT_LINE + data.bit(0) = io.field & io.serrate; + data.bit(1,9) = io.vcounter; + } + + if(address == 5) { + //VI_TIMING + data.bit( 0, 7) = io.hsyncWidth; + data.bit( 8,15) = io.colorBurstWidth; + data.bit(16,19) = io.vsyncWidth; + data.bit(20,29) = io.colorBurstHsync; + } + + if(address == 6) { + //VI_V_SYNC + data.bit(0,9) = io.halfLinesPerField; + } + + if(address == 7) { + //VI_H_SYNC + data.bit( 0,11) = io.quarterLineDuration; + data.bit(16,20) = io.palLeapPattern; + } + + if(address == 8) { + //VI_H_SYNC_LEAP + data.bit( 0,11) = io.hsyncLeap[0]; + data.bit(16,27) = io.hsyncLeap[1]; + } + + if(address == 9) { + //VI_H_VIDEO + data.bit( 0, 9) = io.hend; + data.bit(16,25) = io.hstart; + } + + if(address == 10) { + //VI_V_VIDEO + data.bit( 0, 9) = io.vend; + data.bit(16,25) = io.vstart; + } + + if(address == 11) { + //VI_V_BURST + data.bit( 0, 9) = io.colorBurstEnd; + data.bit(16,25) = io.colorBurstStart; + } + + if(address == 12) { + //VI_X_SCALE + data.bit( 0,11) = io.xscale; + data.bit(16,27) = io.xsubpixel; + } + + if(address == 13) { + //VI_Y_SCALE + data.bit( 0,11) = io.yscale; + data.bit(16,27) = io.ysubpixel; + } + + debugger.io(Read, address, data); + return data; +} + +auto VI::writeWord(u32 address, u32 data_) -> void { + address = (address & 0xfffff) >> 2; + n32 data = data_; + + #if defined(VULKAN) + vulkan.writeWord(address, data); + #endif + + if(address == 0) { + //VI_CONTROL + io.colorDepth = data.bit( 0, 1); + io.gammaDither = data.bit( 2); + io.gamma = data.bit( 3); + io.divot = data.bit( 4); + io.reserved.bit(5) = data.bit( 5); + io.serrate = data.bit( 6); + io.reserved.bit(7) = data.bit( 7); + io.antialias = data.bit( 8, 9); + io.reserved.bit(10,15) = data.bit(10,15); + } + + if(address == 1) { + //VI_DRAM_ADDRESS + io.dramAddress = data.bit(0,23); + } + + if(address == 2) { + //VI_H_WIDTH + io.width = data.bit(0,11); + } + + if(address == 3) { + //VI_V_INTR + io.coincidence = data.bit(0,9); + } + + if(address == 4) { + //VI_V_CURRENT_LINE + mi.lower(MI::IRQ::VI); + } + + if(address == 5) { + //VI_TIMING + io.hsyncWidth = data.bit( 0, 7); + io.colorBurstWidth = data.bit( 8,15); + io.vsyncWidth = data.bit(16,19); + io.colorBurstHsync = data.bit(20,29); + } + + if(address == 6) { + //VI_V_SYNC + io.halfLinesPerField = data.bit(0,9); + } + + if(address == 7) { + //VI_H_SYNC + io.quarterLineDuration = data.bit( 0,11); + io.palLeapPattern = data.bit(16,20); + } + + if(address == 8) { + //VI_H_SYNC_LEAP + io.hsyncLeap[0] = data.bit( 0,11); + io.hsyncLeap[1] = data.bit(16,27); + } + + if(address == 9) { + //VI_H_VIDEO + io.hend = data.bit( 0, 9); + io.hstart = data.bit(16,25); + } + + if(address == 10) { + //VI_V_VIDEO + io.vend = data.bit( 0, 9); + io.vstart = data.bit(16,25); + } + + if(address == 11) { + //VI_V_BURST + io.colorBurstEnd = data.bit( 0, 9); + io.colorBurstStart = data.bit(16,25); + } + + if(address == 12) { + //VI_X_SCALE + io.xscale = data.bit( 0,11); + io.xsubpixel = data.bit(16,27); + } + + if(address == 13) { + //VI_Y_SCALE + io.yscale = data.bit( 0,11); + io.ysubpixel = data.bit(16,27); + } + + debugger.io(Write, address, data); +} diff --git a/waterbox/ares64/ares/ares/n64/vi/serialization.cpp b/waterbox/ares64/ares/ares/n64/vi/serialization.cpp new file mode 100644 index 0000000000..fd0f7df697 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/vi/serialization.cpp @@ -0,0 +1,34 @@ +auto VI::serialize(serializer& s) -> void { + Thread::serialize(s); + + s(io.colorDepth); + s(io.gammaDither); + s(io.gamma); + s(io.divot); + s(io.serrate); + s(io.antialias); + s(io.reserved); + s(io.dramAddress); + s(io.width); + s(io.coincidence); + s(io.hsyncWidth); + s(io.colorBurstWidth); + s(io.vsyncWidth); + s(io.colorBurstHsync); + s(io.halfLinesPerField); + s(io.quarterLineDuration); + s(io.palLeapPattern); + s(io.hsyncLeap); + s(io.hend); + s(io.hstart); + s(io.vend); + s(io.vstart); + s(io.colorBurstEnd); + s(io.colorBurstStart); + s(io.xscale); + s(io.xsubpixel); + s(io.yscale); + s(io.ysubpixel); + s(io.vcounter); + s(io.field); +} diff --git a/waterbox/ares64/ares/ares/n64/vi/vi.cpp b/waterbox/ares64/ares/ares/n64/vi/vi.cpp new file mode 100644 index 0000000000..503637105a --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/vi/vi.cpp @@ -0,0 +1,154 @@ +#include + +namespace ares::Nintendo64 { + +VI vi; +#include "io.cpp" +#include "debugger.cpp" +#include "serialization.cpp" + +auto VI::load(Node::Object parent) -> void { + node = parent->append("VI"); + + #if defined(VULKAN) + screen = node->append("Screen", vulkan.outputUpscale * 640, vulkan.outputUpscale * 576); + #else + screen = node->append("Screen", 640, 576); + #endif + screen->setRefresh({&VI::refresh, this}); + screen->colors((1 << 24) + (1 << 15), [&](n32 color) -> n64 { + if(color < (1 << 24)) { + u64 a = 65535; + u64 r = image::normalize(color >> 16 & 255, 8, 16); + u64 g = image::normalize(color >> 8 & 255, 8, 16); + u64 b = image::normalize(color >> 0 & 255, 8, 16); + return a << 48 | r << 32 | g << 16 | b << 0; + } else { + u64 a = 65535; + u64 r = image::normalize(color >> 10 & 31, 5, 16); + u64 g = image::normalize(color >> 5 & 31, 5, 16); + u64 b = image::normalize(color >> 0 & 31, 5, 16); + return a << 48 | r << 32 | g << 16 | b << 0; + } + }); + #if defined(VULKAN) + screen->setSize(vulkan.outputUpscale * 640, vulkan.outputUpscale * 480); + if(!vulkan.supersampleScanout) { + screen->setScale(1.0 / vulkan.outputUpscale, 1.0 / vulkan.outputUpscale); + } + #else + screen->setSize(640, 480); + #endif + + debugger.load(node); +} + +auto VI::unload() -> void { + debugger = {}; + screen->quit(); + node->remove(screen); + screen.reset(); + node.reset(); +} + +auto VI::main() -> void { + //field is not compared + if(io.vcounter << 1 == io.coincidence) { + mi.raise(MI::IRQ::VI); + } + + if(++io.vcounter >= (Region::NTSC() ? 262 : 312) + io.field) { + io.vcounter = 0; + io.field = io.field + 1 & io.serrate; + if(!io.field) { + #if defined(VULKAN) + gpuOutputValid = vulkan.scanoutAsync(io.field); + vulkan.frame(); + #endif + + refreshed = true; + screen->frame(); + } + } + + if(Region::NTSC()) step(system.frequency() / 60 / 262); + if(Region::PAL ()) step(system.frequency() / 50 / 312); +} + +auto VI::step(u32 clocks) -> void { + Thread::clock += clocks; +} + +auto VI::refresh() -> void { + #if defined(VULKAN) + if(gpuOutputValid) { + const u8* rgba = nullptr; + u32 width = 0, height = 0; + vulkan.mapScanoutRead(rgba, width, height); + if(rgba) { + screen->setViewport(0, 0, width, height); + for(u32 y : range(height)) { + auto source = rgba + width * y * sizeof(u32); + auto target = screen->pixels(1).data() + y * vulkan.outputUpscale * 640; + for(u32 x : range(width)) { + target[x] = source[x * 4 + 0] << 16 | source[x * 4 + 1] << 8 | source[x * 4 + 2] << 0; + } + } + } else { + screen->setViewport(0, 0, 1, 1); + screen->pixels(1).data()[0] = 0; + } + vulkan.unmapScanoutRead(); + vulkan.endScanout(); + return; + } + #endif + + u32 pitch = vi.io.width; + u32 width = vi.io.width; //vi.io.xscale <= 0x300 ? 320 : 640; + u32 height = vi.io.yscale <= 0x400 ? 239 : 478; + screen->setViewport(0, 0, width, height); + + if(vi.io.colorDepth == 0 || io.dramAddress == 0 || (signed)(vi.io.hend - vi.io.hstart) <= 0 || vi.io.hstart >= 640) { + //blank screen + memory::fill(screen->pixels(1).data(), 640 * 576); + return; + } + + if(vi.io.colorDepth == 2) { + //15bpp + for(u32 y : range(height)) { + u32 address = vi.io.dramAddress + y * pitch * 2; + auto line = screen->pixels(1).data() + y * 640; + for(u32 x : range(min(width, pitch))) { + u16 data = bus.read(address + x * 2); + *line++ = 1 << 24 | data >> 1; + } + } + } + + if(vi.io.colorDepth == 3) { + //24bpp + for(u32 y : range(height)) { + u32 address = vi.io.dramAddress + y * pitch * 4; + auto line = screen->pixels(1).data() + y * 640; + for(u32 x : range(min(width, pitch))) { + u32 data = bus.read(address + x * 4); + *line++ = data >> 8; + } + } + } +} + +auto VI::power(bool reset) -> void { + Thread::reset(); + screen->power(); + io = {}; + refreshed = false; + + #if defined(VULKAN) + gpuOutputValid = false; + #endif +} + +} diff --git a/waterbox/ares64/ares/ares/n64/vi/vi.hpp b/waterbox/ares64/ares/ares/n64/vi/vi.hpp new file mode 100644 index 0000000000..9db6468081 --- /dev/null +++ b/waterbox/ares64/ares/ares/n64/vi/vi.hpp @@ -0,0 +1,76 @@ +//Video Interface + +struct VI : Thread, Memory::IO { + Node::Object node; + Node::Video::Screen screen; + + struct Debugger { + //debugger.cpp + auto load(Node::Object) -> void; + auto io(bool mode, u32 address, u32 data) -> void; + + struct Tracer { + Node::Debugger::Tracer::Notification io; + } tracer; + } debugger; + + //vi.cpp + auto load(Node::Object) -> void; + auto unload() -> void; + + auto main() -> void; + auto step(u32 clocks) -> void; + auto refresh() -> void; + auto power(bool reset) -> void; + + //io.cpp + auto readWord(u32 address) -> u32; + auto writeWord(u32 address, u32 data) -> void; + + //serialization.cpp + auto serialize(serializer&) -> void; + + struct IO { + n2 colorDepth; + n1 gammaDither; + n1 gamma; + n1 divot; + n1 serrate; //interlace + n2 antialias; + n32 reserved; + n24 dramAddress; + n12 width; + n10 coincidence = 256; + n8 hsyncWidth; + n8 colorBurstWidth; + n4 vsyncWidth; + n10 colorBurstHsync; + n10 halfLinesPerField; + n12 quarterLineDuration; + n5 palLeapPattern; + n12 hsyncLeap[2]; + n10 hend; + n10 hstart; + n10 vend; + n10 vstart; + n10 colorBurstEnd; + n10 colorBurstStart; + n12 xscale; + n12 xsubpixel; + n12 yscale; + n12 ysubpixel; + + //internal: + n9 vcounter; + n1 field; + } io; + +//unserialized: + bool refreshed; + + #if defined(VULKAN) + bool gpuOutputValid = false; + #endif +}; + +extern VI vi; diff --git a/waterbox/ares64/ares/nall/GNUmakefile b/waterbox/ares64/ares/nall/GNUmakefile new file mode 100644 index 0000000000..5f1e730b5c --- /dev/null +++ b/waterbox/ares64/ares/nall/GNUmakefile @@ -0,0 +1,289 @@ +# disable built-in rules and variables +MAKEFLAGS := Rr +.SUFFIXES: + +[0-9] = 0 1 2 3 4 5 6 7 8 9 +[A-Z] = A B C D E F G H I J K L M N O P Q R S T U V W X Y Z +[a-z] = a b c d e f g h i j k l m n o p q r s t u v w x y z +[markup] = ` ~ ! @ \# $$ % ^ & * ( ) - _ = + [ { ] } \ | ; : ' " , < . > / ? +[all] = $([0-9]) $([A-Z]) $([a-z]) $([markup]) +[empty] := +[space] := $([empty]) $([empty]) + +# platform detection +ifeq ($(platform),) + ifeq ($(OS),Windows_NT) + platform := windows + endif +endif + +ifeq ($(platform),) + uname := $(shell uname) + ifeq ($(uname),) + platform := windows + else ifneq ($(findstring Windows,$(uname)),) + platform := windows + else ifneq ($(findstring NT,$(uname)),) + platform := windows + else ifneq ($(findstring Darwin,$(uname)),) + platform := macos + else ifneq ($(findstring Linux,$(uname)),) + platform := linux + else ifneq ($(findstring BSD,$(uname)),) + platform := bsd + else + $(error unknown platform, please specify manually.) + endif +endif + +# common commands +ifeq ($(shell echo ^^),^) + # cmd + delete = $(info Deleting $1 ...) @del /q $(subst /,\,$1) + rdelete = $(info Deleting $1 ...) @del /s /q $(subst /,\,$1) && if exist $(subst /,\,$1) (rmdir /s /q $(subst /,\,$1)) +else + # sh + delete = $(info Deleting $1 ...) @rm -f $1 + rdelete = $(info Deleting $1 ...) @rm -rf $1 +endif + +compiler.c = $(compiler) -x c -std=c11 +compiler.cpp = $(compiler) -x c++ -std=c++17 -fno-operator-names +compiler.objc = $(compiler) -x objective-c -std=c11 +compiler.objcpp = $(compiler) -x objective-c++ -std=c++17 -fno-operator-names + +flags.c = -x c -std=c11 +flags.cpp = -x c++ -std=c++17 -fno-operator-names +flags.objc = -x objective-c -std=c11 +flags.objcpp = -x objective-c++ -std=c++17 -fno-operator-names +flags.deps = -MMD -MP -MF $(@:.o=.d) + +# compiler detection +ifeq ($(compiler),) + ifeq ($(platform),windows) + compiler := g++ + compiler.cpp = $(compiler) -x c++ -std=gnu++17 -fno-operator-names + flags.cpp = -x c++ -std=gnu++17 -fno-operator-names + else ifeq ($(platform),macos) + compiler := clang++ + else ifeq ($(platform),linux) + compiler := g++ + else ifeq ($(platform),bsd) + compiler := clang++ + else + compiler := g++ + endif +endif + +# architecture detection +ifeq ($(arch),) + machine := $(shell $(compiler) -dumpmachine) + ifneq ($(filter amd64-% x86_64-%,$(machine)),) + arch := amd64 + else ifneq ($(filter arm64-% aarch64-%,$(machine)),) + arch := arm64 + else + $(error unknown arch, please specify manually.) + endif +endif + +# build optimization levels +ifeq ($(build),debug) + symbols = true + flags += -Og -DBUILD_DEBUG +else ifeq ($(build),stable) + lto = true + flags += -O1 -DBUILD_STABLE +else ifeq ($(build),minified) + lto = true + flags += -Os -DBUILD_MINIFIED +else ifeq ($(build),release) + lto = true + flags += -O2 -DBUILD_RELEASE +else ifeq ($(build),optimized) + lto = true + flags += -O3 -fomit-frame-pointer -DBUILD_OPTIMIZED +else + $(error unrecognized build type.) +endif + +# debugging information +ifeq ($(symbols),true) + flags += -g + ifeq ($(platform),windows) + ifeq ($(findstring clang++,$(compiler)),clang++) + flags += -gcodeview + options += -Wl,-pdb= + endif + endif +endif + +# link-time optimization +ifeq ($(lto),true) + flags += -flto + options += -fwhole-program + ifneq ($(findstring clang++,$(compiler)),clang++) + flags += -fwhole-program -fno-fat-lto-objects + options += -flto=jobserver + else + options += -flto=thin + endif +endif + +# openmp support +ifeq ($(openmp),true) + # macOS Xcode does not ship with OpenMP support + ifneq ($(platform),macos) + flags += -fopenmp + options += -fopenmp + endif +endif + +# clang settings +ifeq ($(findstring clang++,$(compiler)),clang++) + flags += -fno-strict-aliasing -fwrapv + ifeq ($(arch),arm64) + # work around bad interaction with alignas(n) when n >= 4096 + flags += -mno-global-merge + endif +# gcc settings +else ifeq ($(findstring g++,$(compiler)),g++) + flags += -fno-strict-aliasing -fwrapv -Wno-trigraphs +endif + +# windows settings +ifeq ($(platform),windows) + options += -mthreads -lpthread -lws2_32 -lole32 + options += $(if $(findstring clang++,$(compiler)),-fuse-ld=lld) + options += $(if $(findstring g++,$(compiler)),-static -static-libgcc -static-libstdc++) + options += $(if $(findstring true,$(console)),-mconsole,-mwindows) + windres := windres +endif + +# macos settings +ifeq ($(platform),macos) + flags += -stdlib=libc++ -mmacosx-version-min=10.9 -Wno-auto-var-id -fobjc-arc + options += -lc++ -lobjc -mmacosx-version-min=10.9 + # allow mprotect() on dynamic recompiler code blocks + options += -Wl,-segprot,__DATA,rwx,rw +endif + +# linux settings +ifeq ($(platform),linux) + options += -ldl +endif + +# bsd settings +ifeq ($(platform),bsd) + flags += -I/usr/local/include + options += -Wl,-rpath=/usr/local/lib + options += -Wl,-rpath=/usr/local/lib/gcc8 + options += -lstdc++ -lm +endif + +# threading support +ifeq ($(threaded),true) + ifneq ($(filter $(platform),linux bsd),) + flags += -pthread + options += -pthread -lrt + endif +endif + +# paths +ifeq ($(object.path),) + object.path := obj +endif + +ifeq ($(output.path),) + output.path := out +endif + +# rules +default: all; + +nall.verbose: + $(info Compiler Flags:) + $(foreach n,$(sort $(call unique,$(flags))),$(if $(filter-out -I%,$n),$(info $([space]) $n))) + $(info Linker Options:) + $(foreach n,$(sort $(call unique,$(options))),$(if $(filter-out -l%,$n),$(info $([space]) $n))) + +%.o: $< + $(info Compiling $(subst ../,,$<) ...) + @$(call compile) + +# function compile([arguments]) +compile = \ + $(strip \ + $(if $(filter %.c,$<), \ + $(compiler.c) $(flags.deps) $(flags) $1 -c $< -o $@ \ + ,$(if $(filter %.cpp,$<), \ + $(compiler.cpp) $(flags.deps) $(flags) $1 -c $< -o $@ \ + )) \ + ) + +# function rwildcard(directory, pattern) +rwildcard = \ + $(strip \ + $(filter $(if $2,$2,%), \ + $(foreach f, \ + $(wildcard $1*), \ + $(eval t = $(call rwildcard,$f/)) \ + $(if $t,$t,$f) \ + ) \ + ) \ + ) + +# function unique(source) +unique = \ + $(eval __temp :=) \ + $(strip \ + $(foreach s,$1,$(if $(filter $s,$(__temp)),,$(eval __temp += $s))) \ + $(__temp) \ + ) + +# function strtr(source, from, to) +strtr = \ + $(eval __temp := $1) \ + $(strip \ + $(foreach c, \ + $(join $(addsuffix :,$2),$3), \ + $(eval __temp := \ + $(subst $(word 1,$(subst :, ,$c)),$(word 2,$(subst :, ,$c)),$(__temp)) \ + ) \ + ) \ + $(__temp) \ + ) + +# function strupper(source) +strupper = $(call strtr,$1,$([a-z]),$([A-Z])) + +# function strlower(source) +strlower = $(call strtr,$1,$([A-Z]),$([a-z])) + +# function strlen(source) +strlen = \ + $(eval __temp := $(subst $([space]),_,$1)) \ + $(words \ + $(strip \ + $(foreach c, \ + $([all]), \ + $(eval __temp := \ + $(subst $c,$c ,$(__temp)) \ + ) \ + ) \ + $(__temp) \ + ) \ + ) + +# function streq(source) +streq = $(if $(filter-out xx,x$(subst $1,,$2)$(subst $2,,$1)x),,1) + +# function strne(source) +strne = $(if $(filter-out xx,x$(subst $1,,$2)$(subst $2,,$1)x),1,) + +# prefix +ifeq ($(platform),windows) + prefix := $(subst $([space]),\$([space]),$(strip $(call strtr,$(LOCALAPPDATA),\,/))) +else + prefix := $(HOME)/.local +endif diff --git a/waterbox/ares64/ares/nall/adaptive-array.hpp b/waterbox/ares64/ares/nall/adaptive-array.hpp new file mode 100644 index 0000000000..b1de354568 --- /dev/null +++ b/waterbox/ares64/ares/nall/adaptive-array.hpp @@ -0,0 +1,64 @@ +//deprecated + +#pragma once + +#include +#include + +namespace nall { + +template +struct adaptive_array { + auto capacity() const -> u32 { return Capacity; } + auto size() const -> u32 { return _size; } + + auto reset() -> void { + for(u32 n : range(_size)) _pool.t[n].~T(); + _size = 0; + } + + auto operator[](u32 index) -> T& { + #ifdef DEBUG + struct out_of_bounds {}; + if(index >= Capacity) throw out_of_bounds{}; + #endif + return _pool.t[index]; + } + + auto operator[](u32 index) const -> const T& { + #ifdef DEBUG + struct out_of_bounds {}; + if(index >= Capacity) throw out_of_bounds{}; + #endif + return _pool.t[index]; + } + + auto append() -> T& { + new(_pool.t + _size) T; + return _pool.t[_size++]; + } + + auto append(const T& value) -> void { + new(_pool.t + _size++) T(value); + } + + auto append(T&& value) -> void { + new(_pool.t + _size++) T(move(value)); + } + + auto begin() { return &_pool.t[0]; } + auto end() { return &_pool.t[_size]; } + + auto begin() const { return &_pool.t[0]; } + auto end() const { return &_pool.t[_size]; } + +private: + union U { + U() {} + ~U() {} + T t[Capacity]; + } _pool; + u32 _size = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/algorithm.hpp b/waterbox/ares64/ares/nall/algorithm.hpp new file mode 100644 index 0000000000..34565f11e2 --- /dev/null +++ b/waterbox/ares64/ares/nall/algorithm.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include + +#undef min +#undef max + +namespace nall { + +template constexpr auto min(const T& t, const U& u) -> T { + return t < u ? t : (T)u; +} + +template constexpr auto min(const T& t, const U& u, P&&... p) -> T { + return t < u ? min(t, forward

(p)...) : min(u, forward

(p)...); +} + +template constexpr auto max(const T& t, const U& u) -> T { + return t > u ? t : (T)u; +} + +template constexpr auto max(const T& t, const U& u, P&&... p) -> T { + return t > u ? max(t, forward

(p)...) : max(u, forward

(p)...); +} + +} diff --git a/waterbox/ares64/ares/nall/any.hpp b/waterbox/ares64/ares/nall/any.hpp new file mode 100644 index 0000000000..d7e789a9fa --- /dev/null +++ b/waterbox/ares64/ares/nall/any.hpp @@ -0,0 +1,83 @@ +#pragma once + +#include +#include + +namespace nall { + +struct any { + any() = default; + any(const any& source) { operator=(source); } + any(any&& source) { operator=(move(source)); } + template any(const T& value) { operator=(value); } + ~any() { reset(); } + + explicit operator bool() const { return container; } + auto reset() -> void { if(container) { delete container; container = nullptr; } } + + auto type() const -> const std::type_info& { + return container ? container->type() : typeid(void); + } + + template auto is() const -> bool { + return type() == typeid(typename remove_reference::type); + } + + template auto get() -> T& { + if(!is()) throw; + return static_cast::type>*>(container)->value; + } + + template auto get() const -> const T& { + if(!is()) throw; + return static_cast::type>*>(container)->value; + } + + template auto get(const T& fallback) const -> const T& { + if(!is()) return fallback; + return static_cast::type>*>(container)->value; + } + + template auto operator=(const T& value) -> any& { + using auto_t = typename conditional::value, typename remove_extent::type>::type*, T>::type; + + if(type() == typeid(auto_t)) { + static_cast*>(container)->value = (auto_t)value; + } else { + if(container) delete container; + container = new holder((auto_t)value); + } + + return *this; + } + + auto operator=(const any& source) -> any& { + if(container) { delete container; container = nullptr; } + if(source.container) container = source.container->copy(); + return *this; + } + + auto operator=(any&& source) -> any& { + if(container) delete container; + container = source.container; + source.container = nullptr; + return *this; + } + +private: + struct placeholder { + virtual ~placeholder() = default; + virtual auto type() const -> const std::type_info& = 0; + virtual auto copy() const -> placeholder* = 0; + }; + placeholder* container = nullptr; + + template struct holder : placeholder { + holder(const T& value) : value(value) {} + auto type() const -> const std::type_info& { return typeid(T); } + auto copy() const -> placeholder* { return new holder(value); } + T value; + }; +}; + +} diff --git a/waterbox/ares64/ares/nall/arguments.hpp b/waterbox/ares64/ares/nall/arguments.hpp new file mode 100644 index 0000000000..7ecb536bff --- /dev/null +++ b/waterbox/ares64/ares/nall/arguments.hpp @@ -0,0 +1,161 @@ +#pragma once + +#include +#include +#include +#include + +namespace nall { + +struct Arguments { + Arguments(int argc, char** argv); + Arguments(vector arguments); + + explicit operator bool() const { return (bool)arguments; } + auto size() const -> u32 { return arguments.size(); } + + auto operator[](u32 index) -> string& { return arguments[index]; } + auto operator[](u32 index) const -> const string& { return arguments[index]; } + + auto programPath() const -> string; + auto programName() const -> string; + auto programLocation() const -> string; + + auto find(string_view name) const -> bool; + auto find(string_view name, bool& argument) const -> bool; + auto find(string_view name, string& argument) const -> bool; + + auto begin() const { return arguments.begin(); } + auto end() const { return arguments.end(); } + + auto rbegin() const { return arguments.rbegin(); } + auto rend() const { return arguments.rend(); } + + auto take() -> string; + auto take(string_view name) -> bool; + auto take(string_view name, bool& argument) -> bool; + auto take(string_view name, string& argument) -> bool; + + auto begin() { return arguments.begin(); } + auto end() { return arguments.end(); } + + auto rbegin() { return arguments.rbegin(); } + auto rend() { return arguments.rend(); } + +private: + auto construct() -> void; + + string programArgument; + vector arguments; +}; + +inline auto Arguments::construct() -> void { + if(!arguments) return; + + //extract and pre-process program argument + programArgument = arguments.takeFirst(); + programArgument = {Path::real(programArgument), Location::file(programArgument)}; + + //normalize path and file arguments + for(auto& argument : arguments) { + if(directory::exists(argument)) argument.transform("\\", "/").trimRight("/").append("/"); + else if(file::exists(argument)) argument.transform("\\", "/").trimRight("/"); + } +} + +inline Arguments::Arguments(int argc, char** argv) { + #if defined(PLATFORM_WINDOWS) + utf8_arguments(argc, argv); + #endif + for(u32 index : range(argc)) arguments.append(argv[index]); + construct(); +} + +inline Arguments::Arguments(vector arguments) { + this->arguments = arguments; + construct(); +} + +inline auto Arguments::programPath() const -> string { + return Location::path(programArgument); +} + +inline auto Arguments::programName() const -> string { + return Location::file(programArgument); +} + +inline auto Arguments::programLocation() const -> string { + return programArgument; +} + +inline auto Arguments::find(string_view name) const -> bool { + for(u32 index : range(arguments.size())) { + if(arguments[index].match(name)) { + return true; + } + } + return false; +} + +inline auto Arguments::find(string_view name, bool& argument) const -> bool { + for(u32 index : range(arguments.size())) { + if(arguments[index].match(name) && arguments.size() >= index + && (arguments[index + 1] == "true" || arguments[index + 1] == "false")) { + argument = arguments[index + 1] == "true"; + return true; + } + } + return false; +} + +inline auto Arguments::find(string_view name, string& argument) const -> bool { + for(u32 index : range(arguments.size())) { + if(arguments[index].match(name) && arguments.size() >= index) { + argument = arguments[index + 1]; + return true; + } + } + return false; +} + +// + +inline auto Arguments::take() -> string { + if(!arguments) return {}; + return arguments.takeFirst(); +} + +inline auto Arguments::take(string_view name) -> bool { + for(u32 index : range(arguments.size())) { + if(arguments[index].match(name)) { + arguments.remove(index); + return true; + } + } + return false; +} + +inline auto Arguments::take(string_view name, bool& argument) -> bool { + for(u32 index : range(arguments.size())) { + if(arguments[index].match(name) && arguments.size() > index + 1 + && (arguments[index + 1] == "true" || arguments[index + 1] == "false")) { + arguments.remove(index); + argument = arguments.take(index) == "true"; + return true; + } + } + return false; +} + +inline auto Arguments::take(string_view name, string& argument) -> bool { + for(u32 index : range(arguments.size())) { + if(arguments[index].match(name) && arguments.size() > index + 1) { + arguments.remove(index); + argument = arguments.take(index); + return true; + } + } + return false; +} + +} diff --git a/waterbox/ares64/ares/nall/arithmetic.hpp b/waterbox/ares64/ares/nall/arithmetic.hpp new file mode 100644 index 0000000000..baf00a9fb2 --- /dev/null +++ b/waterbox/ares64/ares/nall/arithmetic.hpp @@ -0,0 +1,89 @@ +#pragma once + +//multi-precision arithmetic +//warning: each size is quadratically more expensive than the size before it! + +#include +#include +#include +#include + +#include + +namespace nall { + template struct ArithmeticNatural; + template<> struct ArithmeticNatural< 8> { using type = u8; }; + template<> struct ArithmeticNatural< 16> { using type = u16; }; + template<> struct ArithmeticNatural< 32> { using type = u32; }; + template<> struct ArithmeticNatural< 64> { using type = u64; }; + #if defined(__SIZEOF_INT128__) + template<> struct ArithmeticNatural<128> { using type = u128; }; + #endif +} + +#if !defined(__SIZEOF_INT128__) +#define PairBits 128 +#define TypeBits 64 +#define HalfBits 32 +#include +#undef PairBits +#undef TypeBits +#undef HalfBits +#endif + +#define PairBits 256 +#define TypeBits 128 +#define HalfBits 64 +#include +#undef PairBits +#undef TypeBits +#undef HalfBits + +#define PairBits 512 +#define TypeBits 256 +#define HalfBits 128 +#include +#undef PairBits +#undef TypeBits +#undef HalfBits + +#define PairBits 1024 +#define TypeBits 512 +#define HalfBits 256 +#include +#undef PairBits +#undef TypeBits +#undef HalfBits + +#define PairBits 2048 +#define TypeBits 1024 +#define HalfBits 512 +#include +#undef PairBits +#undef TypeBits +#undef HalfBits + +#define PairBits 4096 +#define TypeBits 2048 +#define HalfBits 1024 +#include +#undef PairBits +#undef TypeBits +#undef HalfBits + +#define PairBits 8192 +#define TypeBits 4096 +#define HalfBits 2048 +#include +#undef PairBits +#undef TypeBits +#undef HalfBits + +namespace nall { + //TODO: these types are for expressing smaller bit ranges in class interfaces + //for instance, XChaCha20 taking a 192-bit nonce + //however, they still allow more bits than expressed ... + //some sort of wrapper needs to be devised to ensure these sizes are masked and wrap appropriately + + using u192 = u256; +} diff --git a/waterbox/ares64/ares/nall/arithmetic/barrett.hpp b/waterbox/ares64/ares/nall/arithmetic/barrett.hpp new file mode 100644 index 0000000000..2e183370ff --- /dev/null +++ b/waterbox/ares64/ares/nall/arithmetic/barrett.hpp @@ -0,0 +1,28 @@ +#pragma once + +namespace nall { + +template struct BarrettReduction { + using type = typename ArithmeticNatural<1 * Bits>::type; + using pair = typename ArithmeticNatural<2 * Bits>::type; + + explicit BarrettReduction(type modulo) : modulo(modulo), factor(pair(1) + -pair(modulo) / modulo) {} + + //return => value % modulo + auto operator()(pair value) const -> type { + pair hi, lo; + mul(value, factor, hi, lo); + pair remainder = value - hi * modulo; + return remainder < modulo ? remainder : remainder - modulo; + } + +private: + const pair modulo; + const pair factor; +}; + +template auto operator%(T value, const BarrettReduction& modulo) { + return modulo(value); +} + +} diff --git a/waterbox/ares64/ares/nall/arithmetic/natural.hpp b/waterbox/ares64/ares/nall/arithmetic/natural.hpp new file mode 100644 index 0000000000..a6cc44ae27 --- /dev/null +++ b/waterbox/ares64/ares/nall/arithmetic/natural.hpp @@ -0,0 +1,342 @@ +#define ConcatenateType(Size) u##Size +#define DeclareType(Size) ConcatenateType(Size) + +#define Pair DeclareType(PairBits) +#define Type DeclareType(TypeBits) +#define Half DeclareType(HalfBits) + +//pick the larger of two types to prevent unnecessary data clamping +#define Cast (typename conditional= sizeof(T), Pair, T>::type) + +namespace nall { +//namespace Arithmetic { + +struct Pair { + Pair() = default; + explicit constexpr Pair(const Pair& source) : hi(source.hi), lo(source.lo) {} + template constexpr Pair(const Hi& hi, const Lo& lo) : hi(hi), lo(lo) {} + template Pair(const T& source) { _set(*this, source); } + + explicit operator bool() const { return hi | lo; } + template operator T() const { T value; _get(*this, value); return value; } + + auto operator+() const -> Pair { return *this; } + auto operator-() const -> Pair { return Pair(0) - *this; } + auto operator~() const -> Pair { return {~hi, ~lo}; } + auto operator!() const -> bool { return !(hi || lo); } + + auto operator++() -> Pair& { lo++; hi += lo == 0; return *this; } + auto operator--() -> Pair& { hi -= lo == 0; lo--; return *this; } + + auto operator++(s32) -> Pair { Pair r = *this; lo++; hi += lo == 0; return r; } + auto operator--(s32) -> Pair { Pair r = *this; hi -= lo == 0; lo--; return r; } + + auto operator* (const Pair& rhs) const -> Pair { return mul(*this, rhs); } + auto operator/ (const Pair& rhs) const -> Pair { Pair q, r; div(*this, rhs, q, r); return q; } + auto operator% (const Pair& rhs) const -> Pair { Pair q, r; div(*this, rhs, q, r); return r; } + auto operator+ (const Pair& rhs) const -> Pair { return {hi + rhs.hi + (lo + rhs.lo < lo), lo + rhs.lo}; } + auto operator- (const Pair& rhs) const -> Pair { return {hi - rhs.hi - (lo - rhs.lo > lo), lo - rhs.lo}; } + auto operator<<(const Pair& rhs) const -> Pair { return shl(*this, rhs); } + auto operator>>(const Pair& rhs) const -> Pair { return shr(*this, rhs); } + auto operator& (const Pair& rhs) const -> Pair { return {hi & rhs.hi, lo & rhs.lo}; } + auto operator| (const Pair& rhs) const -> Pair { return {hi | rhs.hi, lo | rhs.lo}; } + auto operator^ (const Pair& rhs) const -> Pair { return {hi ^ rhs.hi, lo ^ rhs.lo}; } + auto operator==(const Pair& rhs) const -> bool { return hi == rhs.hi && lo == rhs.lo; } + auto operator!=(const Pair& rhs) const -> bool { return hi != rhs.hi || lo != rhs.lo; } + auto operator>=(const Pair& rhs) const -> bool { return hi > rhs.hi || (hi == rhs.hi && lo >= rhs.lo); } + auto operator<=(const Pair& rhs) const -> bool { return hi < rhs.hi || (hi == rhs.hi && lo <= rhs.lo); } + auto operator> (const Pair& rhs) const -> bool { return hi > rhs.hi || (hi == rhs.hi && lo > rhs.lo); } + auto operator< (const Pair& rhs) const -> bool { return hi < rhs.hi || (hi == rhs.hi && lo < rhs.lo); } + + template auto& operator*= (const T& rhs) { return *this = *this * Pair(rhs); } + template auto& operator/= (const T& rhs) { return *this = *this / Pair(rhs); } + template auto& operator%= (const T& rhs) { return *this = *this % Pair(rhs); } + template auto& operator+= (const T& rhs) { return *this = *this + Pair(rhs); } + template auto& operator-= (const T& rhs) { return *this = *this - Pair(rhs); } + template auto& operator<<=(const T& rhs) { return *this = *this << Pair(rhs); } + template auto& operator>>=(const T& rhs) { return *this = *this >> Pair(rhs); } + template auto& operator&= (const T& rhs) { return *this = *this & Pair(rhs); } + template auto& operator|= (const T& rhs) { return *this = *this | Pair(rhs); } + template auto& operator^= (const T& rhs) { return *this = *this ^ Pair(rhs); } + + template auto operator* (const T& rhs) const { return Cast(*this) * Cast(rhs); } + template auto operator/ (const T& rhs) const { return Cast(*this) / Cast(rhs); } + template auto operator% (const T& rhs) const { return Cast(*this) % Cast(rhs); } + template auto operator+ (const T& rhs) const { return Cast(*this) + Cast(rhs); } + template auto operator- (const T& rhs) const { return Cast(*this) - Cast(rhs); } + template auto operator<<(const T& rhs) const { return Cast(*this) << Cast(rhs); } + template auto operator>>(const T& rhs) const { return Cast(*this) >> Cast(rhs); } + template auto operator& (const T& rhs) const { return Cast(*this) & Cast(rhs); } + template auto operator| (const T& rhs) const { return Cast(*this) | Cast(rhs); } + template auto operator^ (const T& rhs) const { return Cast(*this) ^ Cast(rhs); } + + template auto operator==(const T& rhs) const -> bool { return Cast(*this) == Cast(rhs); } + template auto operator!=(const T& rhs) const -> bool { return Cast(*this) != Cast(rhs); } + template auto operator>=(const T& rhs) const -> bool { return Cast(*this) >= Cast(rhs); } + template auto operator<=(const T& rhs) const -> bool { return Cast(*this) <= Cast(rhs); } + template auto operator> (const T& rhs) const -> bool { return Cast(*this) > Cast(rhs); } + template auto operator< (const T& rhs) const -> bool { return Cast(*this) < Cast(rhs); } + +private: + Type lo; + Type hi; + + friend auto upper(const Pair&) -> Type; + friend auto lower(const Pair&) -> Type; + friend auto bits(Pair) -> u32; + friend auto square(const Pair&) -> Pair; + friend auto square(const Pair&, Pair&, Pair&) -> void; + friend auto mul(const Pair&, const Pair&) -> Pair; + friend auto mul(const Pair&, const Pair&, Pair&, Pair&) -> void; + friend auto div(const Pair&, const Pair&, Pair&, Pair&) -> void; + template friend auto shl(const Pair&, const T&) -> Pair; + template friend auto shr(const Pair&, const T&) -> Pair; +}; + +template<> struct ArithmeticNatural { + using type = Pair; +}; + +#define ConcatenateUDL(Size) _u##Size +#define DeclareUDL(Size) ConcatenateUDL(Size) + +alwaysinline auto operator"" DeclareUDL(PairBits)(const char* s) -> Pair { + Pair p = 0; + if(s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + s += 2; + while(*s) { + auto c = *s++; + if(c == '\''); + else if(c >= '0' && c <= '9') p = (p << 4) + (c - '0'); + else if(c >= 'a' && c <= 'f') p = (p << 4) + (c - 'a' + 10); + else if(c >= 'A' && c <= 'F') p = (p << 4) + (c - 'A' + 10); + else break; + } + } else { + while(*s) { + auto c = *s++; + if(c == '\''); + else if(c >= '0' && c <= '9') p = (p << 3) + (p << 1) + (c - '0'); + else break; + } + } + return p; +} + +#undef ConcatenateUDL +#undef DeclareUDL + +template alwaysinline auto _set(Pair& lhs, const T& rhs) -> enable_if_t<(sizeof(Pair) == sizeof(T))> { + lhs = rhs; +} + +template alwaysinline auto _set(Pair& lhs, const T& rhs) -> enable_if_t<(sizeof(Pair) > sizeof(T))> { + lhs = {0, rhs}; +} + +template alwaysinline auto _set(Pair& lhs, const T& rhs) -> enable_if_t<(sizeof(Pair) < sizeof(T))> { + lhs = {lower(rhs) >> TypeBits, lower(rhs)}; +} + +template alwaysinline auto _get(const Pair& lhs, T& rhs) -> enable_if_t<(sizeof(T) == sizeof(Pair))> { + rhs = lhs; +} + +template alwaysinline auto _get(const Pair& lhs, T& rhs) -> enable_if_t<(sizeof(T) > sizeof(Pair))> { + rhs = {0, lhs}; +} + +template alwaysinline auto _get(const Pair& lhs, T& rhs) -> enable_if_t<(sizeof(T) < sizeof(Pair))> { + rhs = lower(lhs); +} + +alwaysinline auto upper(const Pair& value) -> Type { return value.hi; } +alwaysinline auto lower(const Pair& value) -> Type { return value.lo; } + +alwaysinline auto bits(Pair value) -> u32 { + if(value.hi) { + u32 bits = TypeBits; + while(value.hi) value.hi >>= 1, bits++; + return bits; + } else { + u32 bits = 0; + while(value.lo) value.lo >>= 1, bits++; + return bits; + } +} + +//Bits * Bits => Bits +inline auto square(const Pair& lhs) -> Pair { + static const Type Mask = (Type(0) - 1) >> HalfBits; + Type a = lhs.hi >> HalfBits, b = lhs.hi & Mask, c = lhs.lo >> HalfBits, d = lhs.lo & Mask; + Type dd = square(d), dc = d * c, db = d * b, da = d * a; + Type cc = square(c), cb = c * b; + + Pair r0 = Pair(dd); + Pair r1 = Pair(dc) + Pair(dc) + Pair(r0 >> HalfBits); + Pair r2 = Pair(db) + Pair(cc) + Pair(db) + Pair(r1 >> HalfBits); + Pair r3 = Pair(da) + Pair(cb) + Pair(cb) + Pair(da) + Pair(r2 >> HalfBits); + + return {(r3.lo & Mask) << HalfBits | (r2.lo & Mask), (r1.lo & Mask) << HalfBits | (r0.lo & Mask)}; +} + +//Bits * Bits => 2 * Bits +inline auto square(const Pair& lhs, Pair& hi, Pair& lo) -> void { + static const Type Mask = (Type(0) - 1) >> HalfBits; + Type a = lhs.hi >> HalfBits, b = lhs.hi & Mask, c = lhs.lo >> HalfBits, d = lhs.lo & Mask; + Type dd = square(d), dc = d * c, db = d * b, da = d * a; + Type cc = square(c), cb = c * b, ca = c * a; + Type bb = square(b), ba = b * a; + Type aa = square(a); + + Pair r0 = Pair(dd); + Pair r1 = Pair(dc) + Pair(dc) + Pair(r0 >> HalfBits); + Pair r2 = Pair(db) + Pair(cc) + Pair(db) + Pair(r1 >> HalfBits); + Pair r3 = Pair(da) + Pair(cb) + Pair(cb) + Pair(da) + Pair(r2 >> HalfBits); + Pair r4 = Pair(ca) + Pair(bb) + Pair(ca) + Pair(r3 >> HalfBits); + Pair r5 = Pair(ba) + Pair(ba) + Pair(r4 >> HalfBits); + Pair r6 = Pair(aa) + Pair(r5 >> HalfBits); + Pair r7 = Pair(r6 >> HalfBits); + + hi = {(r7.lo & Mask) << HalfBits | (r6.lo & Mask), (r5.lo & Mask) << HalfBits | (r4.lo & Mask)}; + lo = {(r3.lo & Mask) << HalfBits | (r2.lo & Mask), (r1.lo & Mask) << HalfBits | (r0.lo & Mask)}; +} + +//Bits * Bits => Bits +alwaysinline auto mul(const Pair& lhs, const Pair& rhs) -> Pair { + static const Type Mask = (Type(0) - 1) >> HalfBits; + Type a = lhs.hi >> HalfBits, b = lhs.hi & Mask, c = lhs.lo >> HalfBits, d = lhs.lo & Mask; + Type e = rhs.hi >> HalfBits, f = rhs.hi & Mask, g = rhs.lo >> HalfBits, h = rhs.lo & Mask; + + Pair r0 = Pair(d * h); + Pair r1 = Pair(c * h) + Pair(d * g) + Pair(r0 >> HalfBits); + Pair r2 = Pair(b * h) + Pair(c * g) + Pair(d * f) + Pair(r1 >> HalfBits); + Pair r3 = Pair(a * h) + Pair(b * g) + Pair(c * f) + Pair(d * e) + Pair(r2 >> HalfBits); + + return {(r3.lo & Mask) << HalfBits | (r2.lo & Mask), (r1.lo & Mask) << HalfBits | (r0.lo & Mask)}; +} + +//Bits * Bits => 2 * Bits +alwaysinline auto mul(const Pair& lhs, const Pair& rhs, Pair& hi, Pair& lo) -> void { + static const Type Mask = (Type(0) - 1) >> HalfBits; + Type a = lhs.hi >> HalfBits, b = lhs.hi & Mask, c = lhs.lo >> HalfBits, d = lhs.lo & Mask; + Type e = rhs.hi >> HalfBits, f = rhs.hi & Mask, g = rhs.lo >> HalfBits, h = rhs.lo & Mask; + + Pair r0 = Pair(d * h); + Pair r1 = Pair(c * h) + Pair(d * g) + Pair(r0 >> HalfBits); + Pair r2 = Pair(b * h) + Pair(c * g) + Pair(d * f) + Pair(r1 >> HalfBits); + Pair r3 = Pair(a * h) + Pair(b * g) + Pair(c * f) + Pair(d * e) + Pair(r2 >> HalfBits); + Pair r4 = Pair(a * g) + Pair(b * f) + Pair(c * e) + Pair(r3 >> HalfBits); + Pair r5 = Pair(a * f) + Pair(b * e) + Pair(r4 >> HalfBits); + Pair r6 = Pair(a * e) + Pair(r5 >> HalfBits); + Pair r7 = Pair(r6 >> HalfBits); + + hi = {(r7.lo & Mask) << HalfBits | (r6.lo & Mask), (r5.lo & Mask) << HalfBits | (r4.lo & Mask)}; + lo = {(r3.lo & Mask) << HalfBits | (r2.lo & Mask), (r1.lo & Mask) << HalfBits | (r0.lo & Mask)}; +} + +alwaysinline auto div(const Pair& lhs, const Pair& rhs, Pair& quotient, Pair& remainder) -> void { + if(!rhs) throw std::runtime_error("division by zero"); + quotient = 0, remainder = lhs; + if(!lhs || lhs < rhs) return; + + auto count = bits(lhs) - bits(rhs); + Pair x = rhs << count; + Pair y = Pair(1) << count; + if(x > remainder) x >>= 1, y >>= 1; + while(remainder >= rhs) { + if(remainder >= x) remainder -= x, quotient |= y; + x >>= 1, y >>= 1; + } +} + +template alwaysinline auto shl(const Pair& lhs, const T& rhs) -> Pair { + if(!rhs) return lhs; + auto shift = (u32)rhs; + if(shift < TypeBits) { + return {lhs.hi << shift | lhs.lo >> (TypeBits - shift), lhs.lo << shift}; + } else { + return {lhs.lo << (shift - TypeBits), 0}; + } +} + +template alwaysinline auto shr(const Pair& lhs, const T& rhs) -> Pair { + if(!rhs) return lhs; + auto shift = (u32)rhs; + if(shift < TypeBits) { + return {lhs.hi >> shift, lhs.hi << (TypeBits - shift) | lhs.lo >> shift}; + } else { + return {0, lhs.hi >> (shift - TypeBits)}; + } +} + +template alwaysinline auto rol(const Pair& lhs, const T& rhs) -> Pair { + return lhs << rhs | lhs >> (PairBits - rhs); +} + +template alwaysinline auto ror(const Pair& lhs, const T& rhs) -> Pair { + return lhs >> rhs | lhs << (PairBits - rhs); +} + +#define EI enable_if_t::value> + +template auto& operator*= (T& lhs, const Pair& rhs) { return lhs = lhs * T(rhs); } +template auto& operator/= (T& lhs, const Pair& rhs) { return lhs = lhs / T(rhs); } +template auto& operator%= (T& lhs, const Pair& rhs) { return lhs = lhs % T(rhs); } +template auto& operator+= (T& lhs, const Pair& rhs) { return lhs = lhs + T(rhs); } +template auto& operator-= (T& lhs, const Pair& rhs) { return lhs = lhs - T(rhs); } +template auto& operator<<=(T& lhs, const Pair& rhs) { return lhs = lhs << T(rhs); } +template auto& operator>>=(T& lhs, const Pair& rhs) { return lhs = lhs >> T(rhs); } +template auto& operator&= (T& lhs, const Pair& rhs) { return lhs = lhs & T(rhs); } +template auto& operator|= (T& lhs, const Pair& rhs) { return lhs = lhs | T(rhs); } +template auto& operator^= (T& lhs, const Pair& rhs) { return lhs = lhs ^ T(rhs); } + +template auto operator* (const T& lhs, const Pair& rhs) { return Cast(lhs) * Cast(rhs); } +template auto operator/ (const T& lhs, const Pair& rhs) { return Cast(lhs) / Cast(rhs); } +template auto operator% (const T& lhs, const Pair& rhs) { return Cast(lhs) % Cast(rhs); } +template auto operator+ (const T& lhs, const Pair& rhs) { return Cast(lhs) + Cast(rhs); } +template auto operator- (const T& lhs, const Pair& rhs) { return Cast(lhs) - Cast(rhs); } +template auto operator<<(const T& lhs, const Pair& rhs) { return Cast(lhs) << Cast(rhs); } +template auto operator>>(const T& lhs, const Pair& rhs) { return Cast(lhs) >> Cast(rhs); } +template auto operator& (const T& lhs, const Pair& rhs) { return Cast(lhs) & Cast(rhs); } +template auto operator| (const T& lhs, const Pair& rhs) { return Cast(lhs) | Cast(rhs); } +template auto operator^ (const T& lhs, const Pair& rhs) { return Cast(lhs) ^ Cast(rhs); } + +template auto operator==(const T& lhs, const Pair& rhs) { return Cast(lhs) == Cast(rhs); } +template auto operator!=(const T& lhs, const Pair& rhs) { return Cast(lhs) != Cast(rhs); } +template auto operator>=(const T& lhs, const Pair& rhs) { return Cast(lhs) >= Cast(rhs); } +template auto operator<=(const T& lhs, const Pair& rhs) { return Cast(lhs) <= Cast(rhs); } +template auto operator> (const T& lhs, const Pair& rhs) { return Cast(lhs) > Cast(rhs); } +template auto operator< (const T& lhs, const Pair& rhs) { return Cast(lhs) < Cast(rhs); } + +#undef EI + +template<> struct stringify { + stringify(Pair source) { + char _output[1 + sizeof(Pair) * 3]; + auto p = (char*)&_output; + do { + Pair quotient, remainder; + div(source, 10, quotient, remainder); + *p++ = remainder + '0'; + source = quotient; + } while(source); + _size = p - _output; + *p = 0; + for(s32 x = _size - 1, y = 0; x >= 0 && y < _size; x--, y++) _data[x] = _output[y]; + } + + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return _size; } + char _data[1 + sizeof(Pair) * 3]; + u32 _size; +}; + +} + +#undef ConcatenateType +#undef DeclareType +#undef Pair +#undef Type +#undef Half +#undef Cast diff --git a/waterbox/ares64/ares/nall/arithmetic/unsigned.hpp b/waterbox/ares64/ares/nall/arithmetic/unsigned.hpp new file mode 100644 index 0000000000..35e8a34b9e --- /dev/null +++ b/waterbox/ares64/ares/nall/arithmetic/unsigned.hpp @@ -0,0 +1,61 @@ +#pragma once + +namespace nall { + +template::value>> +inline auto upper(T value) -> T { + return value >> sizeof(T) * 4; +} + +template::value>> +inline auto lower(T value) -> T { + static const T Mask = ~T(0) >> sizeof(T) * 4; + return value & Mask; +} + +template::value>, enable_if_t::value>> +inline auto mul(T lhs, U rhs) -> uintmax { + return lhs * rhs; +} + +template::value>> +inline auto square(T value) -> uintmax { + return value * value; +} + +template +inline auto rol(T lhs, U rhs, enable_if_t::value>* = 0) -> T { + return lhs << rhs | lhs >> sizeof(T) * 8 - rhs; +} + +template +inline auto ror(T lhs, U rhs, enable_if_t::value>* = 0) -> T { + return lhs >> rhs | lhs << sizeof(T) * 8 - rhs; +} + +#if defined(__SIZEOF_INT128__) +inline auto operator"" _u128(const char* s) -> u128 { + u128 p = 0; + if(s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + s += 2; + while(*s) { + auto c = *s++; + if(c == '\''); + else if(c >= '0' && c <= '9') p = (p << 4) + (c - '0'); + else if(c >= 'a' && c <= 'f') p = (p << 4) + (c - 'a' + 10); + else if(c >= 'A' && c <= 'F') p = (p << 4) + (c - 'A' + 10); + else break; + } + } else { + while(*s) { + auto c = *s++; + if(c == '\''); + else if(c >= '0' && c <= '9') p = (p << 3) + (p << 1) + (c - '0'); + else break; + } + } + return p; +} +#endif + +} diff --git a/waterbox/ares64/ares/nall/array-span.hpp b/waterbox/ares64/ares/nall/array-span.hpp new file mode 100644 index 0000000000..fabd7c3ec6 --- /dev/null +++ b/waterbox/ares64/ares/nall/array-span.hpp @@ -0,0 +1,114 @@ +#pragma once + +#include + +namespace nall { + +template struct array_span : array_view { + using type = array_span; + using super = array_view; + + array_span() { + super::_data = nullptr; + super::_size = 0; + } + + array_span(nullptr_t) { + super::_data = nullptr; + super::_size = 0; + } + + array_span(void* data, u64 size) { + super::_data = (T*)data; + super::_size = (s32)size; + } + + template array_span(T (&data)[size]) { + super::_data = data; + super::_size = size; + } + + explicit operator bool() const { + return super::_data && super::_size > 0; + } + + explicit operator T*() { + return (T*)super::_data; + } + + T& operator*() const { + return (T&)*super::_data; + } + + auto operator++() -> type& { super::_data++; super::_size--; return *this; } + auto operator--() -> type& { super::_data--; super::_size++; return *this; } + + auto operator++(s32) -> type { auto copy = *this; ++(*this); return copy; } + auto operator--(s32) -> type { auto copy = *this; --(*this); return copy; } + + auto operator[](u32 index) -> T& { return (T&)super::operator[](index); } + + template auto data() -> U* { return (U*)super::_data; } + template auto data() const -> const U* { return (const U*)super::_data; } + + auto begin() -> iterator { return {(T*)super::_data, (u32)0}; } + auto end() -> iterator { return {(T*)super::_data, (u32)super::_size}; } + + auto rbegin() -> reverse_iterator { return {(T*)super::_data, (u32)super::_size - 1}; } + auto rend() -> reverse_iterator { return {(T*)super::_data, (u32)-1}; } + + auto write(T value) -> void { + operator[](0) = value; + super::_data++; + super::_size--; + } + + auto span(u32 offset, u32 length) -> type { + #ifdef DEBUG + struct out_of_bounds {}; + if(offset + length >= super::_size) throw out_of_bounds{}; + #endif + return {(T*)super::_data + offset, length}; + } + + //array_span specializations + template auto writel(U value, u32 size) -> void; + template auto writem(U value, u32 size) -> void; + template auto writevn(U value, u32 size) -> void; + template auto writevi(U value, u32 size) -> void; +}; + +//array_span + +template<> inline auto array_span::write(u8 value) -> void { + operator[](0) = value; + _data++; + _size--; +} + +template<> template inline auto array_span::writel(U value, u32 size) -> void { + for(u32 byte : range(size)) write(value >> byte * 8); +} + +template<> template inline auto array_span::writem(U value, u32 size) -> void { + for(u32 byte : reverse(range(size))) write(value >> byte * 8); +} + +template<> template inline auto array_span::writevn(U value, u32 size) -> void { + while(true) { + auto byte = value & 0x7f; + value >>= 7; + if(value == 0) return write(0x80 | byte); + write(byte); + value--; + } +} + +template<> template inline auto array_span::writevi(U value, u32 size) -> void { + bool negate = value < 0; + if(negate) value = ~value; + value = value << 1 | negate; + writevn(value); +} + +} diff --git a/waterbox/ares64/ares/nall/array-view.hpp b/waterbox/ares64/ares/nall/array-view.hpp new file mode 100644 index 0000000000..aeb07127b3 --- /dev/null +++ b/waterbox/ares64/ares/nall/array-view.hpp @@ -0,0 +1,145 @@ +#pragma once + +#include +#include +#include + +namespace nall { + +template struct array_view { + using type = array_view; + + array_view() { + _data = nullptr; + _size = 0; + } + + array_view(nullptr_t) { + _data = nullptr; + _size = 0; + } + + array_view(const void* data, u64 size) { + _data = (const T*)data; + _size = (s32)size; + } + + template array_view(const T (&data)[size]) { + _data = data; + _size = size; + } + + explicit operator bool() const { + return _data && _size > 0; + } + + explicit operator const T*() const { + return _data; + } + + const T& operator*() const { + return *_data; + } + + auto operator++() -> type& { _data++; _size--; return *this; } + auto operator--() -> type& { _data--; _size++; return *this; } + + auto operator++(s32) -> type { auto copy = *this; ++(*this); return copy; } + auto operator--(s32) -> type { auto copy = *this; --(*this); return copy; } + + auto operator-=(s32 distance) -> type& { _data -= distance; _size += distance; return *this; } + auto operator+=(s32 distance) -> type& { _data += distance; _size -= distance; return *this; } + + auto operator[](u32 index) const -> const T& { + #ifdef DEBUG + struct out_of_bounds {}; + if(index >= _size) throw out_of_bounds{}; + #endif + return _data[index]; + } + + auto operator()(u32 index, const T& fallback = {}) const -> T { + if(index >= _size) return fallback; + return _data[index]; + } + + template auto data() const -> const U* { return (const U*)_data; } + template auto size() const -> u64 { return _size * sizeof(T) / sizeof(U); } + + auto begin() const -> iterator_const { return {_data, (u32)0}; } + auto end() const -> iterator_const { return {_data, (u32)_size}; } + + auto rbegin() const -> reverse_iterator_const { return {_data, (u32)_size - 1}; } + auto rend() const -> reverse_iterator_const { return {_data, (u32)-1}; } + + auto read() -> T { + auto value = operator[](0); + _data++; + _size--; + return value; + } + + auto view(u32 offset, u32 length) const -> type { + #ifdef DEBUG + struct out_of_bounds {}; + if(offset + length >= _size) throw out_of_bounds{}; + #endif + return {_data + offset, length}; + } + + //array_view specializations + template auto readl(U& value, u32 size) -> U; + template auto readm(U& value, u32 size) -> U; + template auto readvn(U& value, u32 size) -> U; + template auto readvi(U& value, u32 size) -> U; + + template auto readl(U& value, u32 offset, u32 size) -> U { return view(offset, size).readl(value, size); } + + template auto readl(u32 size) -> U { U value; return readl(value, size); } + template auto readm(u32 size) -> U { U value; return readm(value, size); } + template auto readvn(u32 size) -> U { U value; return readvn(value, size); } + template auto readvi(u32 size) -> U { U value; return readvi(value, size); } + + template auto readl(u32 offset, u32 size) -> U { U value; return readl(value, offset, size); } + +protected: + const T* _data; + s32 _size; +}; + +//array_view + +template<> template inline auto array_view::readl(U& value, u32 size) -> U { + value = 0; + for(u32 byte : range(size)) value |= (U)read() << byte * 8; + return value; +} + +template<> template inline auto array_view::readm(U& value, u32 size) -> U { + value = 0; + for(u32 byte : reverse(range(size))) value |= (U)read() << byte * 8; + return value; +} + +template<> template inline auto array_view::readvn(U& value, u32 size) -> U { + value = 0; + u32 shift = 1; + while(true) { + auto byte = read(); + value += (byte & 0x7f) * shift; + if(byte & 0x80) break; + shift <<= 7; + value += shift; + } + return value; +} + +template<> template inline auto array_view::readvi(U& value, u32 size) -> U { + value = readvn(); + bool negate = value & 1; + value >>= 1; + if(negate) value = ~value; + return value; +} + +} diff --git a/waterbox/ares64/ares/nall/array.hpp b/waterbox/ares64/ares/nall/array.hpp new file mode 100644 index 0000000000..86f0d31cc4 --- /dev/null +++ b/waterbox/ares64/ares/nall/array.hpp @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include + +namespace nall { + +template struct array; + +//usage: s32 x[256] => array x +template struct array { + array() = default; + + array(const initializer_list& source) { + u32 index = 0; + for(auto& value : source) { + operator[](index++) = value; + } + } + + operator array_span() { + return {data(), size()}; + } + + operator array_view() const { + return {data(), size()}; + } + + alwaysinline auto operator[](u32 index) -> T& { + #ifdef DEBUG + struct out_of_bounds {}; + if(index >= Size) throw out_of_bounds{}; + #endif + return values[index]; + } + + alwaysinline auto operator[](u32 index) const -> const T& { + #ifdef DEBUG + struct out_of_bounds {}; + if(index >= Size) throw out_of_bounds{}; + #endif + return values[index]; + } + + alwaysinline auto operator()(u32 index, const T& fallback = {}) const -> const T& { + if(index >= Size) return fallback; + return values[index]; + } + + auto fill(const T& fill = {}) -> array& { + for(auto& value : values) value = fill; + return *this; + } + + auto data() -> T* { return values; } + auto data() const -> const T* { return values; } + auto size() const -> u32 { return Size; } + + auto begin() -> T* { return &values[0]; } + auto end() -> T* { return &values[Size]; } + + auto begin() const -> const T* { return &values[0]; } + auto end() const -> const T* { return &values[Size]; } + +private: + T values[Size]; +}; + +template inline auto from_array(u32 index) -> T { + static const array table{p...}; + struct out_of_bounds {}; + #if defined(DEBUG) + if(index >= sizeof...(p)) throw out_of_bounds{}; + #endif + return table[index]; +} + +template inline auto from_array(u32 index) -> s64 { + static const array table{p...}; + struct out_of_bounds {}; + #if defined(DEBUG) + if(index >= sizeof...(p)) throw out_of_bounds{}; + #endif + return table[index]; +} + +} diff --git a/waterbox/ares64/ares/nall/atoi.hpp b/waterbox/ares64/ares/nall/atoi.hpp new file mode 100644 index 0000000000..e617fe0f76 --- /dev/null +++ b/waterbox/ares64/ares/nall/atoi.hpp @@ -0,0 +1,87 @@ +#pragma once + +#include + +namespace nall { + +constexpr inline auto toBinary_(const char* s, u64 sum = 0) -> u64 { + return ( + *s == '0' || *s == '1' ? toBinary_(s + 1, (sum << 1) | *s - '0') : + *s == '\'' ? toBinary_(s + 1, sum) : + sum + ); +} + +constexpr inline auto toOctal_(const char* s, u64 sum = 0) -> u64 { + return ( + *s >= '0' && *s <= '7' ? toOctal_(s + 1, (sum << 3) | *s - '0') : + *s == '\'' ? toOctal_(s + 1, sum) : + sum + ); +} + +constexpr inline auto toDecimal_(const char* s, u64 sum = 0) -> u64 { + return ( + *s >= '0' && *s <= '9' ? toDecimal_(s + 1, (sum * 10) + *s - '0') : + *s == '\'' ? toDecimal_(s + 1, sum) : + sum + ); +} + +constexpr inline auto toHex_(const char* s, u64 sum = 0) -> u64 { + return ( + *s >= 'A' && *s <= 'F' ? toHex_(s + 1, (sum << 4) | *s - 'A' + 10) : + *s >= 'a' && *s <= 'f' ? toHex_(s + 1, (sum << 4) | *s - 'a' + 10) : + *s >= '0' && *s <= '9' ? toHex_(s + 1, (sum << 4) | *s - '0') : + *s == '\'' ? toHex_(s + 1, sum) : + sum + ); +} + +// + +constexpr inline auto toBinary(const char* s) -> u64 { + return ( + *s == '0' && (*(s + 1) == 'B' || *(s + 1) == 'b') ? toBinary_(s + 2) : + *s == '%' ? toBinary_(s + 1) : toBinary_(s) + ); +} + +constexpr inline auto toOctal(const char* s) -> u64 { + return ( + *s == '0' && (*(s + 1) == 'O' || *(s + 1) == 'o') ? toOctal_(s + 2) : + toOctal_(s) + ); +} + +constexpr inline auto toHex(const char* s) -> u64 { + return ( + *s == '0' && (*(s + 1) == 'X' || *(s + 1) == 'x') ? toHex_(s + 2) : + *s == '$' ? toHex_(s + 1) : toHex_(s) + ); +} + +// + +constexpr inline auto toNatural(const char* s) -> u64 { + return ( + *s == '0' && (*(s + 1) == 'B' || *(s + 1) == 'b') ? toBinary_(s + 2) : + *s == '0' && (*(s + 1) == 'O' || *(s + 1) == 'o') ? toOctal_(s + 2) : + *s == '0' && (*(s + 1) == 'X' || *(s + 1) == 'x') ? toHex_(s + 2) : + *s == '%' ? toBinary_(s + 1) : *s == '$' ? toHex_(s + 1) : toDecimal_(s) + ); +} + +constexpr inline auto toInteger(const char* s) -> s64 { + return ( + *s == '+' ? +toNatural(s + 1) : *s == '-' ? -toNatural(s + 1) : toNatural(s) + ); +} + +// + +inline auto toReal(const char* s) -> f64 { + return atof(s); +} + +} diff --git a/waterbox/ares64/ares/nall/beat/single/apply.hpp b/waterbox/ares64/ares/nall/beat/single/apply.hpp new file mode 100644 index 0000000000..81eb0ad02e --- /dev/null +++ b/waterbox/ares64/ares/nall/beat/single/apply.hpp @@ -0,0 +1,88 @@ +#pragma once + +namespace nall::Beat::Single { + +inline auto apply(array_view source, array_view beat, maybe manifest = {}, maybe result = {}) -> maybe> { + #define error(text) { if(result) *result = {"error: ", text}; return {}; } + #define warning(text) { if(result) *result = {"warning: ", text}; return target; } + #define success() { if(result) *result = ""; return target; } + if(beat.size() < 19) error("beat size mismatch"); + + vector target; + + u32 beatOffset = 0; + auto read = [&]() -> u8 { + return beat[beatOffset++]; + }; + + auto decode = [&]() -> u64 { + u64 data = 0, shift = 1; + while(true) { + u8 x = read(); + data += (x & 0x7f) * shift; + if(x & 0x80) break; + shift <<= 7; + data += shift; + } + return data; + }; + + auto write = [&](u8 data) { + target.append(data); + }; + + if(read() != 'B') error("beat header invalid"); + if(read() != 'P') error("beat header invalid"); + if(read() != 'S') error("beat header invalid"); + if(read() != '1') error("beat version mismatch"); + if(decode() != source.size()) error("source size mismatch"); + u32 targetSize = decode(); + target.reserve(targetSize); + u32 metadataSize = decode(); + for(u32 n : range(metadataSize)) { + auto data = read(); + if(manifest) manifest->append((char)data); + } + + enum : u32 { SourceRead, TargetRead, SourceCopy, TargetCopy }; + + u32 sourceRelativeOffset = 0, targetRelativeOffset = 0; + while(beatOffset < beat.size() - 12) { + u32 length = decode(); + u32 mode = length & 3; + length = (length >> 2) + 1; + + if(mode == SourceRead) { + while(length--) write(source[target.size()]); + } else if(mode == TargetRead) { + while(length--) write(read()); + } else { + s32 offset = decode(); + offset = offset & 1 ? -(offset >> 1) : (offset >> 1); + if(mode == SourceCopy) { + sourceRelativeOffset += offset; + while(length--) write(source[sourceRelativeOffset++]); + } else { + targetRelativeOffset += offset; + while(length--) write(target[targetRelativeOffset++]); + } + } + } + + u32 sourceHash = 0, targetHash = 0, beatHash = 0; + for(u32 shift : range(0, 32, 8)) sourceHash |= read() << shift; + for(u32 shift : range(0, 32, 8)) targetHash |= read() << shift; + for(u32 shift : range(0, 32, 8)) beatHash |= read() << shift; + + if(target.size() != targetSize) warning("target size mismatch"); + if(sourceHash != Hash::CRC32(source).value()) warning("source hash mismatch"); + if(targetHash != Hash::CRC32(target).value()) warning("target hash mismatch"); + if(beatHash != Hash::CRC32({beat.data(), beat.size() - 4}).value()) warning("beat hash mismatch"); + + success(); + #undef error + #undef warning + #undef success +} + +} diff --git a/waterbox/ares64/ares/nall/beat/single/create.hpp b/waterbox/ares64/ares/nall/beat/single/create.hpp new file mode 100644 index 0000000000..e0da04b283 --- /dev/null +++ b/waterbox/ares64/ares/nall/beat/single/create.hpp @@ -0,0 +1,99 @@ +#pragma once + +#include + +namespace nall::Beat::Single { + +inline auto create(array_view source, array_view target, string_view manifest = {}) -> vector { + vector beat; + + auto write = [&](u8 data) { + beat.append(data); + }; + + auto encode = [&](u64 data) { + while(true) { + u64 x = data & 0x7f; + data >>= 7; + if(data == 0) { write(0x80 | x); break; } + write(x); + data--; + } + }; + + write('B'), write('P'), write('S'), write('1'); + encode(source.size()), encode(target.size()), encode(manifest.size()); + for(auto& byte : manifest) write(byte); + + //generating lrcp() arrays for source requires O(4n) computations, and O(16m) memory, + //but it reduces find() complexity from O(n log m) to O(n + log m). and yet in practice, + //no matter how large n scales to, the O(n + log m) find() is paradoxically slower. + auto sourceArray = SuffixArray(source); + auto targetArray = SuffixArray(target).lpf(); + + enum : u32 { SourceRead, TargetRead, SourceCopy, TargetCopy }; + u32 outputOffset = 0, sourceRelativeOffset = 0, targetRelativeOffset = 0; + + u32 targetReadLength = 0; + auto flush = [&] { + if(!targetReadLength) return; + encode(TargetRead | ((targetReadLength - 1) << 2)); + u32 offset = outputOffset - targetReadLength; + while(targetReadLength) write(target[offset++]), targetReadLength--; + }; + + u32 overlap = min(source.size(), target.size()); + while(outputOffset < target.size()) { + u32 mode = TargetRead, longestLength = 3, longestOffset = 0; + s32 length = 0, offset = outputOffset; + + while(offset < overlap) { + if(source[offset] != target[offset]) break; + length++, offset++; + } + if(length > longestLength) { + mode = SourceRead, longestLength = length; + } + + sourceArray.find(length, offset, {target.data() + outputOffset, target.size() - outputOffset}); + if(length > longestLength) { + mode = SourceCopy, longestLength = length, longestOffset = offset; + } + + targetArray.previous(length, offset, outputOffset); + if(length > longestLength) { + mode = TargetCopy, longestLength = length, longestOffset = offset; + } + + if(mode == TargetRead) { + targetReadLength++; //queue writes to group sequential commands + outputOffset++; + } else { + flush(); + encode(mode | ((longestLength - 1) << 2)); + if(mode == SourceCopy) { + s32 relativeOffset = longestOffset - sourceRelativeOffset; + sourceRelativeOffset = longestOffset + longestLength; + encode(relativeOffset < 0 | abs(relativeOffset) << 1); + } + if(mode == TargetCopy) { + s32 relativeOffset = longestOffset - targetRelativeOffset; + targetRelativeOffset = longestOffset + longestLength; + encode(relativeOffset < 0 | abs(relativeOffset) << 1); + } + outputOffset += longestLength; + } + } + flush(); + + auto sourceHash = Hash::CRC32(source); + for(u32 shift : range(0, 32, 8)) write(sourceHash.value() >> shift); + auto targetHash = Hash::CRC32(target); + for(u32 shift : range(0, 32, 8)) write(targetHash.value() >> shift); + auto beatHash = Hash::CRC32(beat); + for(u32 shift : range(0, 32, 8)) write(beatHash.value() >> shift); + + return beat; +} + +} diff --git a/waterbox/ares64/ares/nall/bit.hpp b/waterbox/ares64/ares/nall/bit.hpp new file mode 100644 index 0000000000..b259c12201 --- /dev/null +++ b/waterbox/ares64/ares/nall/bit.hpp @@ -0,0 +1,141 @@ +#pragma once + +#include + +namespace nall { + +template inline auto uclamp(T x) -> u64 { + enum : u64 { b = 1ull << (bits - 1), y = b * 2 - 1 }; + if constexpr(is_unsigned_v) { + return y + ((x - y) & -(x < y)); //min(x, y); + } + if constexpr(is_signed_v) { + return x < 0 ? 0 : x > y ? y : x; + } +} + +template inline auto uclip(u64 x) -> u64 { + enum : u64 { b = 1ull << (bits - 1), m = b * 2 - 1 }; + return (x & m); +} + +template inline auto sclamp(s64 x) -> s64 { + enum : s64 { b = 1ull << (bits - 1), m = b - 1 }; + return (x > m) ? m : (x < -b) ? -b : x; +} + +template inline auto sclip(s64 x) -> s64 { + enum : u64 { b = 1ull << (bits - 1), m = b * 2 - 1 }; + return ((x & m) ^ b) - b; +} + +namespace bit { + constexpr inline auto mask(const char* s, u64 sum = 0) -> u64 { + return ( + *s == '0' || *s == '1' ? mask(s + 1, (sum << 1) | 1) : + *s == ' ' || *s == '_' ? mask(s + 1, sum) : + *s ? mask(s + 1, sum << 1) : + sum + ); + } + + constexpr inline auto test(const char* s, u64 sum = 0) -> u64 { + return ( + *s == '0' || *s == '1' ? test(s + 1, (sum << 1) | (*s - '0')) : + *s == ' ' || *s == '_' ? test(s + 1, sum) : + *s ? test(s + 1, sum << 1) : + sum + ); + } + + //lowest(0b1110) == 0b0010 + constexpr inline auto lowest(const u64 x) -> u64 { + return x & -x; + } + + //clear_lowest(0b1110) == 0b1100 + constexpr inline auto clearLowest(const u64 x) -> u64 { + return x & (x - 1); + } + + //set_lowest(0b0101) == 0b0111 + constexpr inline auto setLowest(const u64 x) -> u64 { + return x | (x + 1); + } + + //count number of bits set in a byte + constexpr inline auto count(u64 x) -> u32 { + u32 count = 0; + while(x) x &= x - 1, count++; //clear the least significant bit + return count; + } + + //return index of the first bit set (or zero of no bits are set) + //first(0b1000) == 3 + constexpr inline auto first(u64 x) -> u32 { + u32 first = 0; + while(x) { if(x & 1) break; x >>= 1; first++; } + return first; + } + + //round up to next highest single bit: + //round(15) == 16, round(16) == 16, round(17) == 32 + constexpr inline auto round(u64 x) -> u64 { + if((x & (x - 1)) == 0) return x; + while(x & (x - 1)) x &= x - 1; + return x << 1; + } + + template + constexpr inline auto reverse(T x) -> T { + static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8); + if constexpr(sizeof(T) == 1) { + #if __has_builtin(__builtin_bitreverse8) + return __builtin_bitreverse8(x); + #else + x = (x & 0xaa) >> 1 | (x & 0x55) << 1; + x = (x & 0xcc) >> 2 | (x & 0x33) << 2; + x = (x & 0xf0) >> 4 | (x & 0x0f) << 4; + return x; + #endif + } + if constexpr(sizeof(T) == 2) { + #if __has_builtin(__builtin_bitreverse16) + return __builtin_bitreverse16(x); + #else + x = (x & 0xaaaa) >> 1 | (x & 0x5555) << 1; + x = (x & 0xcccc) >> 2 | (x & 0x3333) << 2; + x = (x & 0xf0f0) >> 4 | (x & 0x0f0f) << 4; + x = (x & 0xff00) >> 8 | (x & 0x00ff) << 8; + return x; + #endif + } + if constexpr(sizeof(T) == 4) { + #if __has_builtin(__builtin_bitreverse32) + return __builtin_bitreverse32(x); + #else + x = (x & 0xaaaaaaaa) >> 1 | (x & 0x55555555) << 1; + x = (x & 0xcccccccc) >> 2 | (x & 0x33333333) << 2; + x = (x & 0xf0f0f0f0) >> 4 | (x & 0x0f0f0f0f) << 4; + x = (x & 0xff00ff00) >> 8 | (x & 0x00ff00ff) << 8; + x = (x & 0xffff0000) >> 16 | (x & 0x0000ffff) << 16; + return x; + #endif + } + if constexpr(sizeof(T) == 8) { + #if __has_builtin(__builtin_bitreverse64) + return __builtin_bitreverse64(x); + #else + x = (x & 0xaaaaaaaaaaaaaaaaULL) >> 1 | (x & 0x5555555555555555ULL) << 1; + x = (x & 0xccccccccccccccccULL) >> 2 | (x & 0x3333333333333333ULL) << 2; + x = (x & 0xf0f0f0f0f0f0f0f0ULL) >> 4 | (x & 0x0f0f0f0f0f0f0f0fULL) << 4; + x = (x & 0xff00ff00ff00ff00ULL) >> 8 | (x & 0x00ff00ff00ff00ffULL) << 8; + x = (x & 0xffff0000ffff0000ULL) >> 16 | (x & 0x0000ffff0000ffffULL) << 16; + x = (x & 0xffffffff00000000ULL) >> 32 | (x & 0x00000000ffffffffULL) << 32; + return x; + #endif + } + } +} + +} diff --git a/waterbox/ares64/ares/nall/bump-allocator.hpp b/waterbox/ares64/ares/nall/bump-allocator.hpp new file mode 100644 index 0000000000..dbf1e52adc --- /dev/null +++ b/waterbox/ares64/ares/nall/bump-allocator.hpp @@ -0,0 +1,107 @@ +#pragma once + +#include + +namespace nall { + +struct bump_allocator { + static constexpr u32 executable = 1 << 0; + static constexpr u32 zero_fill = 1 << 1; + + ~bump_allocator() { + reset(); + } + + explicit operator bool() const { + return _memory; + } + + auto reset() -> void { + if(_owner) memory::unmap(_memory, _capacity); + _memory = nullptr; + _capacity = 0; + _offset = 0; + _owner = false; + } + + auto resize(u32 capacity, u32 flags = 0, u8* buffer = nullptr) -> bool { + reset(); + + if(buffer) { + if(flags & executable) { + memory::protect(buffer, capacity, true); + } + if(flags & zero_fill) { + memset(buffer, 0x00, capacity); + } + } else { + buffer = (u8*)memory::map(capacity, flags & executable); + if(!buffer) return false; + _owner = true; + } + _memory = buffer; + _capacity = capacity; + + return true; + } + + //release all acquired memory + auto release(u32 flags = 0) -> void { + _offset = 0; + if(flags & zero_fill) memset(_memory, 0x00, _capacity); + } + + auto capacity() const -> u32 { + return _capacity; + } + + auto available() const -> u32 { + return _capacity - _offset; + } + + //for allocating blocks of known size + auto acquire(u32 size) -> u8* { + #ifdef DEBUG + struct out_of_memory {}; + if((nextOffset(size)) > _capacity) throw out_of_memory{}; + #endif + auto memory = _memory + _offset; + _offset = nextOffset(size); //alignment + return memory; + } + + //for allocating blocks of unknown size (eg for a dynamic recompiler code block) + auto acquire() -> u8* { + #ifdef DEBUG + struct out_of_memory {}; + if(_offset > _capacity) throw out_of_memory{}; + #endif + return _memory + _offset; + } + + //size can be reserved once the block size is known + auto reserve(u32 size) -> void { + #ifdef DEBUG + struct out_of_memory {}; + if((nextOffset(size)) > _capacity) throw out_of_memory{}; + #endif + _offset = nextOffset(size); //alignment + } + + auto tryAcquire(u32 size) -> u8* { + if((nextOffset(size)) > _capacity) return nullptr; + return acquire(size); + } + +private: + auto nextOffset(u32 size) const -> u32 { + return _offset + size + 15 & ~15; + } + + u8* _memory = nullptr; + u32 _capacity = 0; + u32 _offset = 0; + bool _owner = false; +}; + +} diff --git a/waterbox/ares64/ares/nall/cd.hpp b/waterbox/ares64/ares/nall/cd.hpp new file mode 100644 index 0000000000..9fca9abab7 --- /dev/null +++ b/waterbox/ares64/ares/nall/cd.hpp @@ -0,0 +1,31 @@ +#pragma once + +/* CD-ROM sector functions. + * + * Implemented: + * eight-to-fourteen modulation (encoding and decoding) + * sync header creation and verification + * error detection code creation and verification + * reed-solomon product-code creation and verification + * sector scrambling and descrambling (currently unverified) + * + * Unimplemented: + * reed-solomon product-code correction + * cross-interleave reed-solomon creation, verification, and correction + * CD-ROM XA mode 2 forms 1 & 2 support + * subcode insertion and removal + * subcode decoding from CUE files + * channel frame expansion and reduction + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include diff --git a/waterbox/ares64/ares/nall/cd/crc16.hpp b/waterbox/ares64/ares/nall/cd/crc16.hpp new file mode 100644 index 0000000000..edd3826618 --- /dev/null +++ b/waterbox/ares64/ares/nall/cd/crc16.hpp @@ -0,0 +1,18 @@ +#pragma once + +//CRC-16/KERMIT + +namespace nall::CD { + +inline auto CRC16(array_view data) -> u16 { + u16 crc = 0; + while(data) { + crc ^= *data++ << 8; + for(u32 bit : range(8)) { + crc = crc << 1 ^ (crc & 0x8000 ? 0x1021 : 0); + } + } + return ~crc; +} + +} diff --git a/waterbox/ares64/ares/nall/cd/edc.hpp b/waterbox/ares64/ares/nall/cd/edc.hpp new file mode 100644 index 0000000000..22c7a51378 --- /dev/null +++ b/waterbox/ares64/ares/nall/cd/edc.hpp @@ -0,0 +1,65 @@ +#pragma once + +//error detection code + +namespace nall::CD::EDC { + +//polynomial(x) = (x^16 + x^15 + x^2 + 1) * (x^16 + x^2 + x + 1) +inline auto polynomial(u8 x) -> u32 { + static u32 lookup[256]{}; + static bool once = false; + if(!once) { once = true; + for(u32 n : range(256)) { + u32 edc = n; + for(u32 b : range(8)) edc = edc >> 1 ^ (edc & 1 ? 0xd8018001 : 0); + lookup[n] = edc; + } + } + return lookup[x]; +} + +// + +inline auto create(array_view input) -> u32 { + u32 sum = 0; + for(auto& byte : input) sum = sum >> 8 ^ polynomial(sum ^ byte); + return sum; +} + +inline auto create(array_view input, array_span output) -> bool { + if(output.size() != 4) return false; + auto sum = create(input); + output[0] = sum >> 0; + output[1] = sum >> 8; + output[2] = sum >> 16; + output[3] = sum >> 24; + return true; +} + +inline auto createMode1(array_span sector) -> bool { + if(sector.size() != 2352) return false; + return create({sector.data(), 2064}, {sector.data() + 2064, 4}); +} + +// + +inline auto verify(array_view input, u32 edc) -> bool { + return edc == create(input); +} + +inline auto verify(array_view input, array_view compare) -> bool { + if(compare.size() != 4) return false; + auto sum = create(input); + if(compare[0] != u8(sum >> 0)) return false; + if(compare[1] != u8(sum >> 8)) return false; + if(compare[2] != u8(sum >> 16)) return false; + if(compare[3] != u8(sum >> 24)) return false; + return true; +} + +inline auto verifyMode1(array_view sector) -> bool { + if(sector.size() != 2352) return false; + return verify({sector.data(), 2064}, {sector.data() + 2064, 4}); +} + +} diff --git a/waterbox/ares64/ares/nall/cd/efm.hpp b/waterbox/ares64/ares/nall/cd/efm.hpp new file mode 100644 index 0000000000..6163263dec --- /dev/null +++ b/waterbox/ares64/ares/nall/cd/efm.hpp @@ -0,0 +1,68 @@ +#pragma once + +//eight-to-fourteen modulation: +//separates each 1-bit by at least two 0-bits and at most ten 0-bits + +namespace nall::CD::EFM { + +//the algorithm to generate this table is unknown +inline auto lookup(u8 index) -> u16 { + static const u16 lookup[256] = { + 0x1220, 0x2100, 0x2420, 0x2220, 0x1100, 0x0110, 0x0420, 0x0900, + 0x1240, 0x2040, 0x2440, 0x2240, 0x1040, 0x0040, 0x0440, 0x0840, + 0x2020, 0x2080, 0x2480, 0x0820, 0x1080, 0x0080, 0x0480, 0x0880, + 0x1210, 0x2010, 0x2410, 0x2210, 0x1010, 0x0210, 0x0410, 0x0810, + 0x0020, 0x2108, 0x0220, 0x0920, 0x1108, 0x0108, 0x1020, 0x0908, + 0x1248, 0x2048, 0x2448, 0x2248, 0x1048, 0x0048, 0x0448, 0x0848, + 0x0100, 0x2088, 0x2488, 0x2110, 0x1088, 0x0088, 0x0488, 0x0888, + 0x1208, 0x2008, 0x2408, 0x2208, 0x1008, 0x0208, 0x0408, 0x0808, + 0x1224, 0x2124, 0x2424, 0x2224, 0x1124, 0x0024, 0x0424, 0x0924, + 0x1244, 0x2044, 0x2444, 0x2244, 0x1044, 0x0044, 0x0444, 0x0844, + 0x2024, 0x2084, 0x2484, 0x0824, 0x1084, 0x0084, 0x0484, 0x0884, + 0x1204, 0x2004, 0x2404, 0x2204, 0x1004, 0x0204, 0x0404, 0x0804, + 0x1222, 0x2122, 0x2422, 0x2222, 0x1122, 0x0022, 0x1024, 0x0922, + 0x1242, 0x2042, 0x2442, 0x2242, 0x1042, 0x0042, 0x0442, 0x0842, + 0x2022, 0x2082, 0x2482, 0x0822, 0x1082, 0x0082, 0x0482, 0x0882, + 0x1202, 0x0248, 0x2402, 0x2202, 0x1002, 0x0202, 0x0402, 0x0802, + 0x1221, 0x2121, 0x2421, 0x2221, 0x1121, 0x0021, 0x0421, 0x0921, + 0x1241, 0x2041, 0x2441, 0x2241, 0x1041, 0x0041, 0x0441, 0x0841, + 0x2021, 0x2081, 0x2481, 0x0821, 0x1081, 0x0081, 0x0481, 0x0881, + 0x1201, 0x2090, 0x2401, 0x2201, 0x1090, 0x0201, 0x0401, 0x0890, + 0x0221, 0x2109, 0x1110, 0x0121, 0x1109, 0x0109, 0x1021, 0x0909, + 0x1249, 0x2049, 0x2449, 0x2249, 0x1049, 0x0049, 0x0449, 0x0849, + 0x0120, 0x2089, 0x2489, 0x0910, 0x1089, 0x0089, 0x0489, 0x0889, + 0x1209, 0x2009, 0x2409, 0x2209, 0x1009, 0x0209, 0x0409, 0x0809, + 0x1120, 0x2111, 0x2490, 0x0224, 0x1111, 0x0111, 0x0490, 0x0911, + 0x0241, 0x2101, 0x0244, 0x0240, 0x1101, 0x0101, 0x0090, 0x0901, + 0x0124, 0x2091, 0x2491, 0x2120, 0x1091, 0x0091, 0x0491, 0x0891, + 0x1211, 0x2011, 0x2411, 0x2211, 0x1011, 0x0211, 0x0411, 0x0811, + 0x1102, 0x0102, 0x2112, 0x0902, 0x1112, 0x0112, 0x1022, 0x0912, + 0x2102, 0x2104, 0x0249, 0x0242, 0x1104, 0x0104, 0x0422, 0x0904, + 0x0122, 0x2092, 0x2492, 0x0222, 0x1092, 0x0092, 0x0492, 0x0892, + 0x1212, 0x2012, 0x2412, 0x2212, 0x1012, 0x0212, 0x0412, 0x0812, + }; + return lookup[index]; +} + +// + +inline auto encode(u8 data) -> u16 { + return lookup(data); +} + +// + +inline auto decode(u16 data) -> maybe { + static u16 table[1 << 14]; + static bool once = true; + if(once) { + once = false; + for(u32 n : range(1 << 14)) table[n] = 0xffff; + for(u32 n : range(1 << 8)) table[lookup(n)] = n; + } + u16 result = table[data & 0x3fff]; + if(result == 0xffff) return {}; + return (u8)result; +} + +} diff --git a/waterbox/ares64/ares/nall/cd/rspc.hpp b/waterbox/ares64/ares/nall/cd/rspc.hpp new file mode 100644 index 0000000000..11e4a795e0 --- /dev/null +++ b/waterbox/ares64/ares/nall/cd/rspc.hpp @@ -0,0 +1,128 @@ +#pragma once + +//reed-solomon product code + +namespace nall::CD::RSPC { + +inline auto encodeP(array_view input, array_span parity) -> bool { + ReedSolomon<26,24> s; + u32 lo = 0, hi = 43 * 2; + for(u32 x : range(43)) { + for(u32 w : range(2)) { //16-bit words + u32 z = 0; + for(u32 y : range(24)) { + s[z++] = input[(y * 43 + x) * 2 + w]; + } + s.generateParity(); + parity[lo++] = s[z++]; + parity[hi++] = s[z++]; + } + } + return true; +} + +inline auto encodeQ(array_view input, array_span parity) -> bool { + ReedSolomon<45,43> s; + u32 lo = 0, hi = 26 * 2; + for(u32 y : range(26)) { + for(u32 w : range(2)) { + u32 z = 0; + for(u32 x : range(43)) { + s[z++] = input[((x * 44 + y * 43) * 2 + w) % (26 * 43 * 2)]; + } + s.generateParity(); + parity[lo++] = s[z++]; + parity[hi++] = s[z++]; + } + } + return true; +} + +inline auto encodeMode1(array_span sector) -> bool { + if(sector.size() != 2352) return false; + if(!encodeP({sector.data() + 12, 2064}, {sector.data() + 2076, 172})) return false; + if(!encodeQ({sector.data() + 12, 2236}, {sector.data() + 2248, 104})) return false; + return true; +} + +// + +inline auto decodeP(array_span input, array_span parity) -> s32 { + bool success = false; + bool failure = false; + ReedSolomon<26,24> s; + u32 lo = 0, hi = 43 * 2; + for(u32 x : range(43)) { + for(u32 w : range(2)) { + u32 z = 0; + for(u32 y : range(24)) { + s[z++] = input[(y * 43 + x) * 2 + w]; + } + s[z++] = parity[lo++]; + s[z++] = parity[hi++]; + auto count = s.correctErrors(); + if(count < 0) { + failure = true; + } + if(count > 0) { + success = true; + z = 0; + for(u32 y : range(24)) { + input[(y * 43 + x) * 2 + w] = s[z++]; + } + parity[lo - 1] = s[z++]; + parity[hi - 1] = s[z++]; + } + } + } + if(!success && !failure) return 0; //no errors remaining + return success ? 1 : -1; //return success even if there are some failures +} + +inline auto decodeQ(array_span input, array_span parity) -> s32 { + bool success = false; + bool failure = false; + ReedSolomon<45,43> s; + u32 lo = 0, hi = 26 * 2; + for(u32 y : range(26)) { + for(u32 w : range(2)) { + u32 z = 0; + for(u32 x : range(43)) { + s[z++] = input[((x * 44 + y * 43) * 2 + w) % (26 * 43 * 2)]; + } + s[z++] = parity[lo++]; + s[z++] = parity[hi++]; + auto count = s.correctErrors(); + if(count < 0) { + failure = true; + } + if(count > 0) { + success = true; + z = 0; + for(u32 x : range(43)) { + input[((x * 44 + y * 43) * 2 + w) % (26 * 43 * 2)] = s[z++]; + } + parity[lo - 1] = s[z++]; + parity[hi - 1] = s[z++]; + } + } + } + if(!success && !failure) return 0; + return success ? 1 : -1; +} + +inline auto decodeMode1(array_span sector) -> bool { + if(sector.size() != 2352) return false; + //P corrections can allow Q corrections that previously failed to succeed, and vice versa. + //the more iterations, the more chances to correct errors, but the more computationally expensive it is. + //there must be a limit on the amount of retries, or this function may get stuck in an infinite loop. + for(u32 attempt : range(4)) { + auto p = decodeP({sector.data() + 12, 2064}, {sector.data() + 2076, 172}); + auto q = decodeQ({sector.data() + 12, 2236}, {sector.data() + 2248, 104}); + if(p == 0 && q == 0) return true; //no errors remaining + if(p < 0 && q < 0) return false; //no more errors correctable + } + return false; //exhausted all retries with errors remaining +} + +} diff --git a/waterbox/ares64/ares/nall/cd/scrambler.hpp b/waterbox/ares64/ares/nall/cd/scrambler.hpp new file mode 100644 index 0000000000..43aa4fa3d5 --- /dev/null +++ b/waterbox/ares64/ares/nall/cd/scrambler.hpp @@ -0,0 +1,35 @@ +#pragma once + +namespace nall::CD::Scrambler { + +//polynomial(x) = x^15 + x + 1 +inline auto polynomial(u32 x) -> u8 { + static u8 lookup[2340]{}; + static bool once = false; + if(!once) { once = true; + u16 shift = 0x0001; + for(u32 n : range(2340)) { + lookup[n] = shift; + for(u32 b : range(8)) { + bool carry = shift & 1 ^ shift >> 1 & 1; + shift = (carry << 15 | shift) >> 1; + } + } + } + return lookup[x]; +} + +// + +inline auto transform(array_span sector) -> bool { + if(sector.size() == 2352) sector += 12; //header is not scrambled + if(sector.size() != 2340) return false; //F1 frames only + + for(u32 index : range(2340)) { + sector[index] ^= polynomial(index); + } + + return true; +} + +} diff --git a/waterbox/ares64/ares/nall/cd/session.hpp b/waterbox/ares64/ares/nall/cd/session.hpp new file mode 100644 index 0000000000..26d359591c --- /dev/null +++ b/waterbox/ares64/ares/nall/cd/session.hpp @@ -0,0 +1,494 @@ +#pragma once + +//subchannel processor +//note: this code is not tolerant to subchannel data that violates the Redbook standard + +namespace nall::CD { + +enum : s32 { InvalidLBA = 100 * 60 * 75 }; + +struct BCD { + static auto encode(u8 value) -> u8 { return value / 10 << 4 | value % 10; } + static auto decode(u8 value) -> u8 { return (value >> 4) * 10 + (value & 15); } +}; + +struct MSF { + u8 minute; //00-99 + u8 second; //00-59 + u8 frame = 0xff; //00-74 + + MSF() = default; + MSF(u8 m, u8 s, u8 f) : minute(m), second(s), frame(f) {} + MSF(s32 lba) { *this = fromLBA(lba); } + + explicit operator bool() const { + return minute <= 99 && second <= 59 && frame <= 74; + } + + static auto fromBCD(u8 minute, u8 second, u8 frame) -> MSF { + return {BCD::decode(minute), BCD::decode(second), BCD::decode(frame)}; + } + + static auto fromLBA(s32 lba) -> MSF { + if(lba < 0) lba = 100 * 60 * 75 + lba; + if(lba >= 100 * 60 * 75) return {}; + u8 minute = lba / 75 / 60 % 100; + u8 second = lba / 75 % 60; + u8 frame = lba % 75; + return {minute, second, frame}; + } + + auto toLBA() const -> s32 { + s32 lba = minute * 60 * 75 + second * 75 + frame; + if(minute < 90) return lba; + return -(100 * 60 * 75 - lba); + } + + //for debugging purposes + auto toString() const -> string { + if(!operator bool()) return "??:??:??"; + return {pad(minute, 2, '0'), ":", pad(second, 2, '0'), ":", pad(frame, 2, '0')}; + } +}; + +struct Index { + s32 lba = InvalidLBA; + s32 end = InvalidLBA; //inclusive range + + explicit operator bool() const { + return lba != InvalidLBA; + } + + auto inRange(s32 sector) const -> bool { + if(lba == InvalidLBA || end == InvalidLBA) return false; + return sector >= lba && sector <= end; + } +}; + +struct Track { + u8 control = 0b1111; //4-bit + Index indices[100]; + u8 firstIndex = 0xff; + u8 lastIndex = 0xff; + + explicit operator bool() const { + return (bool)indices[1]; + } + + auto emphasis() const -> bool { + return control & 1; + } + + auto copyable() const -> bool { + return control & 2; + } + + auto channels() const -> u32 { + if((control & 0b1100) == 0b0000) return 2; + if((control & 0b1100) == 0b1000) return 4; + return 0; //data track or reserved + } + + auto pregap() const -> s32 { + if(!indices[0] || !indices[1]) return InvalidLBA; + return indices[1].lba - indices[0].lba; + } + + auto isAudio() const -> bool { + return channels() != 0; + } + + auto isData() const -> bool { + return (control & 0b1100) == 0b0100; + } + + auto inIndex(s32 lba) const -> maybe { + for(u8 index : range(100)) { + if(indices[index].inRange(lba)) return index; + } + return {}; + } + + auto inRange(s32 lba) const -> bool { + if(firstIndex > 99 || lastIndex > 99) return false; + return lba >= indices[firstIndex].lba && lba <= indices[lastIndex].end; + } + + auto index(u8 indexID) -> maybe { + if(indexID < 100 && indices[indexID]) return indices[indexID]; + return {}; + } +}; + +struct Session { + Index leadIn; //00 + Track tracks[100]; //01-99 + Index leadOut; //aa + u8 firstTrack = 0xff; + u8 lastTrack = 0xff; + + auto inLeadIn(s32 lba) const -> bool { + return leadIn && lba <= leadIn.end; + } + + auto inTrack(s32 lba) const -> maybe { + for(u8 trackID : range(99)) { + auto& track = tracks[trackID+1]; + if(track && track.inRange(lba)) return trackID+1; + } + return {}; + } + + auto inLeadOut(s32 lba) const -> bool { + return leadOut && lba >= leadOut.lba; + } + + auto track(u8 trackID) -> maybe { + if(trackID >= 1 && trackID < 100 && tracks[trackID]) return tracks[trackID]; + return {}; + } + + auto encode(u32 sectors) const -> vector { + if(sectors < abs(leadIn.lba) + leadOut.lba) return {}; //not enough sectors + + vector data; + data.resize(sectors * 96 + 96); //add one sector for P shift + + auto toP = [&](s32 lba) -> array_span { + //P is encoded one sector later than Q + return {&data[(lba + abs(leadIn.lba) + 1) * 96], 12}; + }; + + auto toQ = [&](s32 lba) -> array_span { + return {&data[(lba + abs(leadIn.lba)) * 96 + 12], 12}; + }; + + //lead-in + s32 lba = leadIn.lba; + while(lba < 0) { + //tracks + for(u32 trackID : range(100)) { + for(u32 repeat : range(3)) { + auto& track = tracks[trackID]; + if(!track) continue; + auto q = toQ(lba); + q[0] = track.control << 4 | 1; + q[1] = 0x00; + q[2] = BCD::encode(trackID); + auto msf = MSF(lba); + q[3] = BCD::encode(msf.minute); + q[4] = BCD::encode(msf.second); + q[5] = BCD::encode(msf.frame); + q[6] = 0x00; + msf = MSF(track.indices[1].lba); + q[7] = BCD::encode(msf.minute); + q[8] = BCD::encode(msf.second); + q[9] = BCD::encode(msf.frame); + auto crc16 = CRC16({q.data(), 10}); + q[10] = crc16 >> 8; + q[11] = crc16 >> 0; + if(++lba >= 0) break; + } if( lba >= 0) break; + } if( lba >= 0) break; + + //first track + for(u32 repeat : range(3)) { + auto q = toQ(lba); + q[0] = 0x01; //control value unverified; address = 1 + q[1] = 0x00; //track# = 00 (TOC) + q[2] = 0xa0; //first track + auto msf = MSF(lba); + q[3] = BCD::encode(msf.minute); + q[4] = BCD::encode(msf.second); + q[5] = BCD::encode(msf.frame); + q[6] = 0x00; + q[7] = BCD::encode(firstTrack); + q[8] = 0x00; + q[9] = 0x00; + auto crc16 = CRC16({q.data(), 10}); + q[10] = crc16 >> 8; + q[11] = crc16 >> 0; + if(++lba >= 0) break; + } if( lba >= 0) break; + + //last track + for(u32 repeat : range(3)) { + auto q = toQ(lba); + q[0] = 0x01; + q[1] = 0x00; + q[2] = 0xa1; //last track + auto msf = MSF(lba); + q[3] = BCD::encode(msf.minute); + q[4] = BCD::encode(msf.second); + q[5] = BCD::encode(msf.frame); + q[6] = 0x00; + q[7] = BCD::encode(lastTrack); + q[8] = 0x00; + q[9] = 0x00; + auto crc16 = CRC16({q.data(), 10}); + q[10] = crc16 >> 8; + q[11] = crc16 >> 0; + if(++lba >= 0) break; + } if( lba >= 0) break; + + //lead-out point + for(u32 repeat : range(3)) { + auto q = toQ(lba); + q[0] = 0x01; + q[1] = 0x00; + q[2] = 0xa2; //lead-out point + auto msf = MSF(lba); + q[3] = BCD::encode(msf.minute); + q[4] = BCD::encode(msf.second); + q[5] = BCD::encode(msf.frame); + q[6] = 0x00; + msf = MSF(leadOut.lba); + q[7] = BCD::encode(msf.minute); + q[8] = BCD::encode(msf.second); + q[9] = BCD::encode(msf.frame); + auto crc16 = CRC16({q.data(), 10}); + q[10] = crc16 >> 8; + q[11] = crc16 >> 0; + if(++lba >= 0) break; + } if( lba >= 0) break; + } + + //tracks + s32 end = leadOut.lba; + for(u8 trackID : reverse(range(100))) { + auto& track = tracks[trackID]; + if(!track) continue; + + //indices + for(u8 indexID : reverse(range(100))) { + auto& index = track.indices[indexID]; + if(!index) continue; + + for(s32 lba = index.lba; lba < end; lba++) { + auto p = toP(lba); + u8 byte = indexID == 0 ? 0xff : 0x00; + for(u32 index : range(12)) p[index] = byte; + + auto q = toQ(lba); + q[0] = track.control << 4 | 1; + q[1] = BCD::encode(trackID); + q[2] = BCD::encode(indexID); + auto msf = MSF(lba - track.indices[1].lba); + q[3] = BCD::encode(msf.minute); + q[4] = BCD::encode(msf.second); + q[5] = BCD::encode(msf.frame); + q[6] = 0x00; + msf = MSF(lba); + q[7] = BCD::encode(msf.minute); + q[8] = BCD::encode(msf.second); + q[9] = BCD::encode(msf.frame); + auto crc16 = CRC16({q.data(), 10}); + q[10] = crc16 >> 8; + q[11] = crc16 >> 0; + } + + end = index.lba; + } + } + + //pre-lead-out (2-3s at the end of last track) + for(auto i : range(150)) { + auto p = toP(leadOut.lba - 150 + i); + for(auto sig : range(12)) { + p[sig]= 0xff; + }} + + //lead-out + for(s32 lba : range(sectors - abs(leadIn.lba) - leadOut.lba)) { + auto p = toP(leadOut.lba + lba); + u8 byte; + if(lba < 150) { + //2s start (standard specifies 2-3s start) + byte = 0x00; + } else { + //2hz duty cycle; rounded downward (standard specifies 2% tolerance) + byte = (lba - 150) / (75 >> 1) & 1 ? 0x00 : 0xff; + } + for(u32 index : range(12)) p[index] = byte; + + auto q = toQ(leadOut.lba + lba); + q[0] = 0x01; + q[1] = 0xaa; //lead-out track# + q[2] = 0x01; //lead-out index# + auto msf = MSF(lba); + q[3] = BCD::encode(msf.minute); + q[4] = BCD::encode(msf.second); + q[5] = BCD::encode(msf.frame); + q[6] = 0x00; + msf = MSF(leadOut.lba + lba); + q[7] = BCD::encode(msf.minute); + q[8] = BCD::encode(msf.second); + q[9] = BCD::encode(msf.frame); + auto crc16 = CRC16({q.data(), 10}); + q[10] = crc16 >> 8; + q[11] = crc16 >> 0; + } + + data.resize(data.size() - 96); //remove padding for P shift + return data; + } + + auto decode(array_view data, u32 size, u32 leadOutSectors = 0) -> bool { + *this = {}; //reset session + //three data[] types supported: subcode Q only, subcode P-W only, data+subcode complete image + if(size != 12 && size != 96 && size != 2448) return false; + + //determine lead-in sector count + leadIn.lba = InvalidLBA; + for(s32 lba : range(7500)) { //7500 max sectors scanned + u32 offset = lba * size; + if(size == 96) offset += 12; + if(size == 2448) offset += 12 + 2352; + if(offset + 12 > data.size()) break; + auto q = array_view{&data[offset], 12}; + auto crc16 = CRC16({q.data(), 10}); + if(q[10] != u8(crc16 >> 8)) continue; + if(q[11] != u8(crc16 >> 0)) continue; + + u8 control = q[0] >> 4; + u8 address = q[0] & 15; + u8 trackID = q[1]; + if(address != 1) continue; + if(trackID != 0) continue; + + leadIn.lba = lba - 7500; + break; + } + if(leadIn.lba == InvalidLBA || leadIn.lba >= 0) return false; + + auto toQ = [&](s32 lba) -> array_view { + u32 offset = (lba + abs(leadIn.lba)) * size; + if(size == 96) offset += 12; + if(size == 2448) offset += 12 + 2352; + if(offset + 12 > data.size()) return {}; + return {&data[offset], 12}; + }; + + //lead-in + leadOut.lba = InvalidLBA; + for(s32 lba = leadIn.lba; lba < 0; lba++) { + auto q = toQ(lba); + if(!q) break; + auto crc16 = CRC16({q.data(), 10}); + if(q[10] != u8(crc16 >> 8)) continue; + if(q[11] != u8(crc16 >> 0)) continue; + + u8 control = q[0] >> 4; + u8 address = q[0] & 15; + u8 trackID = q[1]; + if(address != 1) continue; + if(trackID != 0) continue; + + trackID = BCD::decode(q[2]); + + if(trackID <= 99) { //00-99 + auto& track = tracks[trackID]; + track.control = control; + track.indices[1].lba = MSF::fromBCD(q[7], q[8], q[9]).toLBA(); + } + + if(trackID == 100) { //a0 + firstTrack = BCD::decode(q[7]); + } + + if(trackID == 101) { //a1 + lastTrack = BCD::decode(q[7]); + } + + if(trackID == 102) { //a2 + leadOut.lba = MSF::fromBCD(q[7], q[8], q[9]).toLBA(); + } + } + if(leadOut.lba == InvalidLBA) return false; + + //tracks + for(s32 lba = 0; lba < leadOut.lba; lba++) { + auto q = toQ(lba); + if(!q) break; + auto crc16 = CRC16({q.data(), 10}); + if(q[10] != u8(crc16 >> 8)) continue; + if(q[11] != u8(crc16 >> 0)) continue; + + u8 control = q[0] >> 4; + u8 address = q[0] & 15; + u8 trackID = BCD::decode(q[1]); + u8 indexID = BCD::decode(q[2]); + if(address != 1) continue; + if(trackID == 0 || trackID > 99) continue; + if(indexID > 99) continue; + + auto& track = tracks[trackID]; + if(!track) continue; //track not found? + auto& index = track.indices[indexID]; + if(index) continue; //index already decoded? + + index.lba = MSF::fromBCD(q[7], q[8], q[9]).toLBA(); + } + + synchronize(leadOutSectors); + return true; + } + + //calculates Index::end variables: + //needed for Session::isTrack() and Track::isIndex() to function. + auto synchronize(u32 leadOutSectors = 0) -> void { + leadIn.end = -1; + s32 end = leadOut.lba - 1; + for(u32 trackID : reverse(range(100))) { + auto& track = tracks[trackID]; + if(!track) continue; + + for(u32 indexID : reverse(range(100))) { + auto& index = track.indices[indexID]; + if(!index) continue; + + index.end = end; + end = index.lba - 1; + } + + for(u32 indexID : range(100)) { + auto& index = track.indices[indexID]; + if(index) { track.firstIndex = indexID; break; } + } + + for(u32 indexID : reverse(range(100))) { + auto& index = track.indices[indexID]; + if(index) { track.lastIndex = indexID; break; } + } + } + leadOut.end = leadOut.lba + leadOutSectors - 1; + } + + //for diagnostic use only + auto serialize() const -> string { + string s; + s.append("session\n"); + s.append(" leadIn: "); + s.append(MSF(leadIn.lba).toString(), " - ", MSF(leadIn.end).toString(), "\n"); + for(u32 trackID : range(100)) { + auto& track = tracks[trackID]; + if(!track) continue; + s.append(" track", pad(trackID, 2, '0')); + if(trackID == firstTrack) s.append(" first"); + if(trackID == lastTrack) s.append( " last"); + s.append("\n"); + s.append(" control: ", binary(track.control, 4, '0'), "\n"); + for(u32 indexID : range(100)) { + auto& index = track.indices[indexID]; + if(!index) continue; + s.append(" index", pad(indexID, 2, '0'), ": "); + s.append(MSF(index.lba).toString(), " - ", MSF(index.end).toString(), "\n"); + } + } + s.append(" leadOut: "); + s.append(MSF(leadOut.lba).toString(), " - ", MSF(leadOut.end).toString(), "\n"); + return s; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/cd/sync.hpp b/waterbox/ares64/ares/nall/cd/sync.hpp new file mode 100644 index 0000000000..6ae08fe32f --- /dev/null +++ b/waterbox/ares64/ares/nall/cd/sync.hpp @@ -0,0 +1,27 @@ +#pragma once + +namespace nall::CD::Sync { + +inline auto create(array_span sector) -> bool { + if(sector.size() != 12 && sector.size() != 2352) return false; + + for(u32 n : range(12)) { + sector[n] = ((n == 0 || n == 11) ? 0x00 : 0xff); + } + + return true; +} + +// + +inline auto verify(array_view sector) -> bool { + if(sector.size() != 12 && sector.size() != 2352) return false; + + for(u32 n : range(12)) { + if(sector[n] != ((n == 0 || n == 11) ? 0x00 : 0xff)) return false; + } + + return true; +} + +} diff --git a/waterbox/ares64/ares/nall/chrono.hpp b/waterbox/ares64/ares/nall/chrono.hpp new file mode 100644 index 0000000000..08b9e3e86a --- /dev/null +++ b/waterbox/ares64/ares/nall/chrono.hpp @@ -0,0 +1,180 @@ +#pragma once + +#include +#include + +namespace nall::chrono { + +//passage of time functions (from unknown epoch) + +inline auto nanosecond() -> u64 { + timespec tv; + clock_gettime(CLOCK_MONOTONIC, &tv); + return tv.tv_sec * 1'000'000'000 + tv.tv_nsec; +} + +inline auto microsecond() -> u64 { return nanosecond() / 1'000; } +inline auto millisecond() -> u64 { return nanosecond() / 1'000'000; } +inline auto second() -> u64 { return nanosecond() / 1'000'000'000; } + +inline auto benchmark(const function& f, u64 times = 1) -> void { + auto start = nanosecond(); + while(times--) f(); + auto end = nanosecond(); + print("[chrono::benchmark] ", (double)(end - start) / 1'000'000'000.0, "s\n"); +} + +//exact date/time functions (from system epoch) + +struct timeinfo { + timeinfo(u32 year = 0, u32 month = 0, u32 day = 0, u32 hour = 0, u32 minute = 0, u32 second = 0, u32 weekday = 0): + year(year), month(month), day(day), hour(hour), minute(minute), second(second), weekday(weekday) { + } + + explicit operator bool() const { return month; } + + u32 year; //... + u32 month; //1 - 12 + u32 day; //1 - 31 + u32 hour; //0 - 23 + u32 minute; //0 - 59 + u32 second; //0 - 60 + u32 weekday; //0 - 6 +}; + +inline auto timestamp() -> u64 { + return ::time(nullptr); +} + +//0 = failure condition +inline auto timestamp(const string& datetime) -> u64 { + static constexpr u32 monthDays[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + u64 timestamp = 0; + if(datetime.match("??????????")) { + return datetime.natural(); + } + if(datetime.match("????*")) { + u32 year = datetime.slice(0, 4).natural(); + if(year < 1970 || year > 2199) return 0; + for(u32 y = 1970; y < year && y < 2999; y++) { + u32 daysInYear = 365; + if(y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)) daysInYear++; + timestamp += daysInYear * 24 * 60 * 60; + } + } + if(datetime.match(R"(????-??*)")) { + u32 y = datetime.slice(0, 4).natural(); + u32 month = datetime.slice(5, 2).natural(); + if(month < 1 || month > 12) return 0; + for(u32 m = 1; m < month && m < 12; m++) { + u32 daysInMonth = monthDays[m - 1]; + if(m == 2 && y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)) daysInMonth++; + timestamp += daysInMonth * 24 * 60 * 60; + } + } + if(datetime.match(R"(????-??-??*)")) { + u32 day = datetime.slice(8, 2).natural(); + if(day < 1 || day > 31) return 0; + timestamp += (day - 1) * 24 * 60 * 60; + } + if(datetime.match(R"(????-??-?? ??*)")) { + u32 hour = datetime.slice(11, 2).natural(); + if(hour > 23) return 0; + timestamp += hour * 60 * 60; + } + if(datetime.match(R"(????-??-?? ??:??*)")) { + u32 minute = datetime.slice(14, 2).natural(); + if(minute > 59) return 0; + timestamp += minute * 60; + } + if(datetime.match(R"(????-??-?? ??:??:??*)")) { + u32 second = datetime.slice(17, 2).natural(); + if(second > 59) return 0; + timestamp += second; + } + return timestamp; +} + +namespace utc { + inline auto timeinfo(u64 time = 0) -> chrono::timeinfo { + auto stamp = time ? (time_t)time : (time_t)timestamp(); + auto info = gmtime(&stamp); + return { + (u32)info->tm_year + 1900, + (u32)info->tm_mon + 1, + (u32)info->tm_mday, + (u32)info->tm_hour, + (u32)info->tm_min, + (u32)info->tm_sec, + (u32)info->tm_wday + }; + } + + inline auto year(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).year, 4, '0'); } + inline auto month(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).month, 2, '0'); } + inline auto day(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).day, 2, '0'); } + inline auto hour(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).hour, 2, '0'); } + inline auto minute(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).minute, 2, '0'); } + inline auto second(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).second, 2, '0'); } + + inline auto date(u64 timestamp = 0) -> string { + auto t = timeinfo(timestamp); + return {pad(t.year, 4, '0'), "-", pad(t.month, 2, '0'), "-", pad(t.day, 2, '0')}; + } + + inline auto time(u64 timestamp = 0) -> string { + auto t = timeinfo(timestamp); + return {pad(t.hour, 2, '0'), ":", pad(t.minute, 2, '0'), ":", pad(t.second, 2, '0')}; + } + + inline auto datetime(u64 timestamp = 0) -> string { + auto t = timeinfo(timestamp); + return { + pad(t.year, 4, '0'), "-", pad(t.month, 2, '0'), "-", pad(t.day, 2, '0'), " ", + pad(t.hour, 2, '0'), ":", pad(t.minute, 2, '0'), ":", pad(t.second, 2, '0') + }; + } +} + +namespace local { + inline auto timeinfo(u64 time = 0) -> chrono::timeinfo { + auto stamp = time ? (time_t)time : (time_t)timestamp(); + auto info = localtime(&stamp); + return { + (u32)info->tm_year + 1900, + (u32)info->tm_mon + 1, + (u32)info->tm_mday, + (u32)info->tm_hour, + (u32)info->tm_min, + (u32)info->tm_sec, + (u32)info->tm_wday + }; + } + + inline auto year(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).year, 4, '0'); } + inline auto month(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).month, 2, '0'); } + inline auto day(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).day, 2, '0'); } + inline auto hour(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).hour, 2, '0'); } + inline auto minute(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).minute, 2, '0'); } + inline auto second(u64 timestamp = 0) -> string { return pad(timeinfo(timestamp).second, 2, '0'); } + + inline auto date(u64 timestamp = 0) -> string { + auto t = timeinfo(timestamp); + return {pad(t.year, 4, '0'), "-", pad(t.month, 2, '0'), "-", pad(t.day, 2, '0')}; + } + + inline auto time(u64 timestamp = 0) -> string { + auto t = timeinfo(timestamp); + return {pad(t.hour, 2, '0'), ":", pad(t.minute, 2, '0'), ":", pad(t.second, 2, '0')}; + } + + inline auto datetime(u64 timestamp = 0) -> string { + auto t = timeinfo(timestamp); + return { + pad(t.year, 4, '0'), "-", pad(t.month, 2, '0'), "-", pad(t.day, 2, '0'), " ", + pad(t.hour, 2, '0'), ":", pad(t.minute, 2, '0'), ":", pad(t.second, 2, '0') + }; + } +} + +} diff --git a/waterbox/ares64/ares/nall/cipher/chacha20.hpp b/waterbox/ares64/ares/nall/cipher/chacha20.hpp new file mode 100644 index 0000000000..4d0088056a --- /dev/null +++ b/waterbox/ares64/ares/nall/cipher/chacha20.hpp @@ -0,0 +1,109 @@ +#pragma once + +#include +#include + +namespace nall::Cipher { + +//64-bit nonce; 64-bit x 64-byte (256GB) counter +struct ChaCha20 { + ChaCha20(u256 key, u64 nonce, u64 counter = 0) { + static const u128 sigma = 0x6b20657479622d323320646e61707865_u128; //"expand 32-byte k" + + input[ 0] = sigma >> 0; + input[ 1] = sigma >> 32; + input[ 2] = sigma >> 64; + input[ 3] = sigma >> 96; + input[ 4] = key >> 0; + input[ 5] = key >> 32; + input[ 6] = key >> 64; + input[ 7] = key >> 96; + input[ 8] = key >> 128; + input[ 9] = key >> 160; + input[10] = key >> 192; + input[11] = key >> 224; + input[12] = counter >> 0; + input[13] = counter >> 32; + input[14] = nonce >> 0; + input[15] = nonce >> 32; + + offset = 0; + } + + auto encrypt(array_view input) -> vector { + vector output; + while(input) { + if(!offset) { + cipher(); + increment(); + } + auto byte = offset++; + output.append(*input++ ^ (block[byte >> 2] >> (byte & 3) * 8)); + offset &= 63; + } + return output; + } + + auto decrypt(array_view input) -> vector { + return encrypt(input); //reciprocal cipher + } + +//protected: + auto rol(u32 value, u32 bits) -> u32 { + return value << bits | value >> 32 - bits; + } + + auto quarterRound(u32 x[16], u32 a, u32 b, u32 c, u32 d) -> void { + x[a] += x[b]; x[d] = rol(x[d] ^ x[a], 16); + x[c] += x[d]; x[b] = rol(x[b] ^ x[c], 12); + x[a] += x[b]; x[d] = rol(x[d] ^ x[a], 8); + x[c] += x[d]; x[b] = rol(x[b] ^ x[c], 7); + } + + auto cipher() -> void { + memory::copy(block, input, 64); + for(u32 n : range(10)) { + quarterRound(block, 0, 4, 8, 12); + quarterRound(block, 1, 5, 9, 13); + quarterRound(block, 2, 6, 10, 14); + quarterRound(block, 3, 7, 11, 15); + quarterRound(block, 0, 5, 10, 15); + quarterRound(block, 1, 6, 11, 12); + quarterRound(block, 2, 7, 8, 13); + quarterRound(block, 3, 4, 9, 14); + } + } + + auto increment() -> void { + for(u32 n : range(16)) { + block[n] += input[n]; + } + if(!++input[12]) ++input[13]; + } + + u32 input[16]; + u32 block[16]; + u64 offset; +}; + +struct HChaCha20 : protected ChaCha20 { + HChaCha20(u256 key, u128 nonce) : ChaCha20(key, nonce >> 64, nonce >> 0) { + cipher(); + } + + auto key() const -> u256 { + u256 key = 0; + for(u32 n : range(4)) key |= (u256)block[ 0 + n] << (n + 0) * 32; + for(u32 n : range(4)) key |= (u256)block[12 + n] << (n + 4) * 32; + return key; + } +}; + +//192-bit nonce; 64-bit x 64-byte (256GB) counter +struct XChaCha20 : ChaCha20 { + XChaCha20(u256 key, u192 nonce, u64 counter = 0): + ChaCha20(HChaCha20(key, nonce).key(), nonce >> 128, counter) { + } +}; + +} diff --git a/waterbox/ares64/ares/nall/counting-sort.hpp b/waterbox/ares64/ares/nall/counting-sort.hpp new file mode 100644 index 0000000000..0fad8d1429 --- /dev/null +++ b/waterbox/ares64/ares/nall/counting-sort.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include + +namespace nall { + +//counting sort by powers of two: used to implement radix sort +template +auto counting_sort(T* output, const T* input, u32 size) -> void { + static_assert(Bits >= 1 && Bits <= 20, "must be between 1 and 20 bits"); + enum : u32 { Base = 1 << Bits, Mask = Base - 1 }; + + u64 count[Base] = {}, last = 0; + for(u32 n : range(size)) ++count[(input[n] >> Shift) & Mask]; + for(u32 n : range(Base)) last += count[n], count[n] = last - count[n]; + for(u32 n : range(size)) output[count[(input[n] >> Shift) & Mask]++] = input[n]; +} + +} diff --git a/waterbox/ares64/ares/nall/database/odbc.hpp b/waterbox/ares64/ares/nall/database/odbc.hpp new file mode 100644 index 0000000000..ba19541fb3 --- /dev/null +++ b/waterbox/ares64/ares/nall/database/odbc.hpp @@ -0,0 +1,299 @@ +#pragma once + +//legacy code; no longer used + +#include + +#include +#include +#include + +namespace nall::Database { + +struct ODBC { + struct Statement { + Statement(const Statement& source) = delete; + auto operator=(const Statement& source) -> Statement& = delete; + + Statement(SQLHANDLE statement) : _statement(statement) {} + Statement(Statement&& source) { operator=(move(source)); } + + auto operator=(Statement&& source) -> Statement& { + _statement = source._statement; + _output = source._output; + _values = move(source._values); + source._statement = nullptr; + source._output = 0; + return *this; + } + + auto columns() -> u32 { + SQLSMALLINT columns = 0; + if(statement()) SQLNumResultCols(statement(), &columns); + return columns; + } + + auto integer(u32 column) -> s64 { + if(auto value = _values(column)) return value.get(0); + s64 value = 0; + SQLGetData(statement(), 1 + column, SQL_C_SBIGINT, &value, 0, nullptr); + _values(column) = (s64)value; + return value; + } + + auto natural(u32 column) -> u64 { + if(auto value = _values(column)) return value.get(0); + u64 value = 0; + SQLGetData(statement(), 1 + column, SQL_C_UBIGINT, &value, 0, nullptr); + _values(column) = (u64)value; + return value; + } + + auto real(u32 column) -> double { + if(auto value = _values(column)) return value.get(0.0); + f64 value = 0.0; + SQLGetData(statement(), 1 + column, SQL_C_DOUBLE, &value, 0, nullptr); + _values(column) = (f64)value; + return value; + } + + auto text(u32 column) -> string { + if(auto value = _values(column)) return value.get({}); + string value; + value.resize(65535); + SQLLEN size = 0; + SQLGetData(statement(), 1 + column, SQL_C_CHAR, value.get(), value.size(), &size); + value.resize(size); + _values(column) = (string)value; + return value; + } + + auto data(u32 column) -> vector { + if(auto value = _values(column)) return value.get>({}); + vector value; + value.resize(65535); + SQLLEN size = 0; + SQLGetData(statement(), 1 + column, SQL_C_CHAR, value.data(), value.size(), &size); + value.resize(size); + _values(column) = (vector)value; + return value; + } + + auto integer() -> s64 { return integer(_output++); } + auto natural() -> u64 { return natural(_output++); } + auto real() -> f64 { return real(_output++); } + auto text() -> string { return text(_output++); } + auto data() -> vector { return data(_output++); } + + protected: + virtual auto statement() -> SQLHANDLE { return _statement; } + + SQLHANDLE _statement = nullptr; + u32 _output = 0; + vector _values; //some ODBC drivers (eg MS-SQL) do not allow the same column to be read more than once + }; + + struct Query : Statement { + Query(const Query& source) = delete; + auto operator=(const Query& source) -> Query& = delete; + + Query(SQLHANDLE statement) : Statement(statement) {} + Query(Query&& source) : Statement(source._statement) { operator=(move(source)); } + + ~Query() { + if(statement()) { + SQLFreeHandle(SQL_HANDLE_STMT, _statement); + _statement = nullptr; + } + } + + auto operator=(Query&& source) -> Query& { + Statement::operator=(move(source)); + _bindings = move(source._bindings); + _result = source._result; + _input = source._input; + _stepped = source._stepped; + source._result = SQL_SUCCESS; + source._input = 0; + source._stepped = false; + return *this; + } + + explicit operator bool() { + //this is likely not the best way to test if the query has returned data ... + //but I wasn't able to find an ODBC API for this seemingly simple task + return statement() && success(); + } + + //ODBC SQLBindParameter only holds pointers to data values + //if the bound paramters go out of scope before the query is executed, binding would reference dangling pointers + //so to work around this, we cache all parameters inside Query until the query is executed + + auto& bind(u32 column, nullptr_t) { return _bindings.append({column, any{(nullptr_t)nullptr}}), *this; } + auto& bind(u32 column, s32 value) { return _bindings.append({column, any{(s32)value}}), *this; } + auto& bind(u32 column, u32 value) { return _bindings.append({column, any{(u32)value}}), *this; } + auto& bind(u32 column, s64 value) { return _bindings.append({column, any{(s64)value}}), *this; } + auto& bind(u32 column, u64 value) { return _bindings.append({column, any{(u64)value}}), *this; } + auto& bind(u32 column, f64 value) { return _bindings.append({column, any{(f64)value}}), *this; } + auto& bind(u32 column, const string& value) { return _bindings.append({column, any{(string)value}}), *this; } + auto& bind(u32 column, const vector& value) { return _bindings.append({column, any{(vector)value}}), *this; } + + auto& bind(nullptr_t) { return bind(_input++, nullptr); } + auto& bind(s32 value) { return bind(_input++, value); } + auto& bind(u32 value) { return bind(_input++, value); } + auto& bind(s64 value) { return bind(_input++, value); } + auto& bind(u64 value) { return bind(_input++, value); } + auto& bind(f64 value) { return bind(_input++, value); } + auto& bind(const string& value) { return bind(_input++, value); } + auto& bind(const vector& value) { return bind(_input++, value); } + + auto step() -> bool { + if(!_stepped) { + for(auto& binding : _bindings) { + if(binding.value.is()) { + SQLLEN length = SQL_NULL_DATA; + SQLBindParameter(_statement, 1 + binding.column, SQL_PARAM_INPUT, SQL_C_NUMERIC, SQL_NUMERIC, 0, 0, nullptr, 0, &length); + } else if(binding.value.is()) { + SQLBindParameter(_statement, 1 + binding.column, SQL_PARAM_INPUT, SQL_C_SLONG, SQL_INTEGER, 0, 0, &binding.value.get(), 0, nullptr); + } else if(binding.value.is()) { + SQLBindParameter(_statement, 1 + binding.column, SQL_PARAM_INPUT, SQL_C_ULONG, SQL_INTEGER, 0, 0, &binding.value.get(), 0, nullptr); + } else if(binding.value.is()) { + SQLBindParameter(_statement, 1 + binding.column, SQL_PARAM_INPUT, SQL_C_SBIGINT, SQL_INTEGER, 0, 0, &binding.value.get(), 0, nullptr); + } else if(binding.value.is()) { + SQLBindParameter(_statement, 1 + binding.column, SQL_PARAM_INPUT, SQL_C_UBIGINT, SQL_INTEGER, 0, 0, &binding.value.get(), 0, nullptr); + } else if(binding.value.is()) { + SQLBindParameter(_statement, 1 + binding.column, SQL_PARAM_INPUT, SQL_C_DOUBLE, SQL_DOUBLE, 0, 0, &binding.value.get(), 0, nullptr); + } else if(binding.value.is()) { + auto& value = binding.value.get(); + SQLLEN length = SQL_NTS; + SQLBindParameter(_statement, 1 + binding.column, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARCHAR, value.size(), 0, (SQLPOINTER)value.data(), 0, &length); + } else if(binding.value.is>()) { + auto& value = binding.value.get>(); + SQLLEN length = value.size(); + SQLBindParameter(_statement, 1 + binding.column, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_VARBINARY, value.size(), 0, (SQLPOINTER)value.data(), 0, &length); + } + } + + _stepped = true; + _result = SQLExecute(_statement); + if(!success()) return false; + } + + _values.reset(); //clear previous row's cached read results + _result = SQLFetch(_statement); + _output = 0; + return success(); + } + + struct Iterator { + Iterator(Query& query, bool finished) : query(query), finished(finished) {} + auto operator*() -> Statement { return query._statement; } + auto operator!=(const Iterator& source) const -> bool { return finished != source.finished; } + auto operator++() -> Iterator& { finished = !query.step(); return *this; } + + protected: + Query& query; + bool finished = false; + }; + + auto begin() -> Iterator { return Iterator(*this, !step()); } + auto end() -> Iterator { return Iterator(*this, true); } + + private: + auto success() const -> bool { + return _result == SQL_SUCCESS || _result == SQL_SUCCESS_WITH_INFO; + } + + auto statement() -> SQLHANDLE override { + if(!_stepped) step(); + return _statement; + } + + struct Binding { + u32 column; + any value; + }; + vector _bindings; + + SQLRETURN _result = SQL_SUCCESS; + u32 _input = 0; + bool _stepped = false; + }; + + ODBC() { + _result = SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &_environment); + if(!success()) return; + + SQLSetEnvAttr(_environment, SQL_ATTR_ODBC_VERSION, (void*)SQL_OV_ODBC3, 0); + } + + ODBC(const string& database, const string& username, const string& password) : ODBC() { + open(database, username, password); + } + + ~ODBC() { + if(_environment) { + close(); + SQLFreeHandle(SQL_HANDLE_ENV, _environment); + _environment = nullptr; + } + } + + explicit operator bool() const { return _connection; } + + auto open(const string& database, const string& username, const string& password) -> bool { + if(!_environment) return false; + close(); + + _result = SQLAllocHandle(SQL_HANDLE_DBC, _environment, &_connection); + if(!success()) return false; + + SQLSetConnectAttr(_connection, SQL_LOGIN_TIMEOUT, (SQLPOINTER)5, 0); + _result = SQLConnectA(_connection, + (SQLCHAR*)database.data(), SQL_NTS, + (SQLCHAR*)username.data(), SQL_NTS, + (SQLCHAR*)password.data(), SQL_NTS + ); + if(!success()) return close(), false; + + return true; + } + + auto close() -> void { + if(_connection) { + SQLDisconnect(_connection); + SQLFreeHandle(SQL_HANDLE_DBC, _connection); + _connection = nullptr; + } + } + + template auto execute(const string& statement, P&&... p) -> Query { + if(!_connection) return {nullptr}; + + SQLHANDLE _statement = nullptr; + _result = SQLAllocHandle(SQL_HANDLE_STMT, _connection, &_statement); + if(!success()) return {nullptr}; + + Query query{_statement}; + _result = SQLPrepareA(_statement, (SQLCHAR*)statement.data(), SQL_NTS); + if(!success()) return {nullptr}; + + bind(query, forward

(p)...); + return query; + } + +private: + auto success() const -> bool { return _result == SQL_SUCCESS || _result == SQL_SUCCESS_WITH_INFO; } + + auto bind(Query&) -> void {} + template auto bind(Query& query, const T& value, P&&... p) -> void { + query.bind(value); + bind(query, forward

(p)...); + } + + SQLHANDLE _environment = nullptr; + SQLHANDLE _connection = nullptr; + SQLRETURN _result = SQL_SUCCESS; +}; + +} diff --git a/waterbox/ares64/ares/nall/database/sqlite3.hpp b/waterbox/ares64/ares/nall/database/sqlite3.hpp new file mode 100644 index 0000000000..ac53aec0c9 --- /dev/null +++ b/waterbox/ares64/ares/nall/database/sqlite3.hpp @@ -0,0 +1,218 @@ +#pragma once + +//SQLite3 C++ RAII wrapper for nall +//note: it is safe (no-op) to call sqlite3_* functions on null sqlite3 objects + +#include + +#include +#include + +namespace nall::Database { + +struct SQLite3 { + struct Statement { + Statement(const Statement& source) = delete; + auto operator=(const Statement& source) -> Statement& = delete; + + Statement(sqlite3_stmt* statement) : _statement(statement) {} + Statement(Statement&& source) { operator=(move(source)); } + + auto operator=(Statement&& source) -> Statement& { + _statement = source._statement; + _response = source._response; + _output = source._output; + source._statement = nullptr; + source._response = SQLITE_OK; + source._output = 0; + return *this; + } + + explicit operator bool() { + return sqlite3_data_count(statement()); + } + + auto columns() -> u32 { + return sqlite3_column_count(statement()); + } + + auto boolean(u32 column) -> bool { + return sqlite3_column_int64(statement(), column) != 0; + } + + auto integer(u32 column) -> s64 { + return sqlite3_column_int64(statement(), column); + } + + auto natural(u32 column) -> u64 { + return sqlite3_column_int64(statement(), column); + } + + auto real(u32 column) -> f64 { + return sqlite3_column_double(statement(), column); + } + + auto string(u32 column) -> nall::string { + nall::string result; + if(auto text = sqlite3_column_text(statement(), column)) { + result.resize(sqlite3_column_bytes(statement(), column)); + memory::copy(result.get(), text, result.size()); + } + return result; + } + + auto data(u32 column) -> vector { + vector result; + if(auto data = sqlite3_column_blob(statement(), column)) { + result.resize(sqlite3_column_bytes(statement(), column)); + memory::copy(result.data(), data, result.size()); + } + return result; + } + + auto boolean() -> bool { return boolean(_output++); } + auto integer() -> s64 { return integer(_output++); } + auto natural() -> u64 { return natural(_output++); } + auto real() -> f64 { return real(_output++); } + auto string() -> nall::string { return string(_output++); } + auto data() -> vector { return data(_output++); } + + protected: + virtual auto statement() -> sqlite3_stmt* { return _statement; } + + sqlite3_stmt* _statement = nullptr; + s32 _response = SQLITE_OK; + u32 _output = 0; + }; + + struct Query : Statement { + Query(const Query& source) = delete; + auto operator=(const Query& source) -> Query& = delete; + + Query(sqlite3_stmt* statement) : Statement(statement) {} + Query(Query&& source) : Statement(source._statement) { operator=(move(source)); } + + ~Query() { + sqlite3_finalize(statement()); + _statement = nullptr; + } + + auto operator=(Query&& source) -> Query& { + _statement = source._statement; + _input = source._input; + source._statement = nullptr; + source._input = 0; + return *this; + } + + auto& bind(u32 column, nullptr_t) { sqlite3_bind_null(_statement, 1 + column); return *this; } + auto& bind(u32 column, bool value) { sqlite3_bind_int(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, s32 value) { sqlite3_bind_int(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, u32 value) { sqlite3_bind_int(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, s64 value) { sqlite3_bind_int64(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, u64 value) { sqlite3_bind_int64(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, intmax value) { sqlite3_bind_int64(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, uintmax value) { sqlite3_bind_int64(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, nall::boolean value) { sqlite3_bind_int64(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, nall::integer value) { sqlite3_bind_int64(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, nall::natural value) { sqlite3_bind_int64(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, f64 value) { sqlite3_bind_double(_statement, 1 + column, value); return *this; } + auto& bind(u32 column, const nall::string& value) { sqlite3_bind_text(_statement, 1 + column, value.data(), value.size(), SQLITE_TRANSIENT); return *this; } + auto& bind(u32 column, const vector& value) { sqlite3_bind_blob(_statement, 1 + column, value.data(), value.size(), SQLITE_TRANSIENT); return *this; } + + auto& bind(nullptr_t) { return bind(_input++, nullptr); } + auto& bind(bool value) { return bind(_input++, value); } + auto& bind(s32 value) { return bind(_input++, value); } + auto& bind(u32 value) { return bind(_input++, value); } + auto& bind(s64 value) { return bind(_input++, value); } + auto& bind(u64 value) { return bind(_input++, value); } + auto& bind(intmax value) { return bind(_input++, value); } + auto& bind(uintmax value) { return bind(_input++, value); } + auto& bind(nall::boolean value) { return bind(_input++, value); } + auto& bind(nall::integer value) { return bind(_input++, value); } + auto& bind(nall::natural value) { return bind(_input++, value); } + auto& bind(f64 value) { return bind(_input++, value); } + auto& bind(const nall::string& value) { return bind(_input++, value); } + auto& bind(const vector& value) { return bind(_input++, value); } + + auto step() -> bool { + _stepped = true; + return sqlite3_step(_statement) == SQLITE_ROW; + } + + struct Iterator { + Iterator(Query& query, bool finished) : query(query), finished(finished) {} + auto operator*() -> Statement { return query._statement; } + auto operator!=(const Iterator& source) const -> bool { return finished != source.finished; } + auto operator++() -> Iterator& { finished = !query.step(); return *this; } + + protected: + Query& query; + bool finished = false; + }; + + auto begin() -> Iterator { return Iterator(*this, !step()); } + auto end() -> Iterator { return Iterator(*this, true); } + + private: + auto statement() -> sqlite3_stmt* override { + if(!_stepped) step(); + return _statement; + } + + u32 _input = 0; + bool _stepped = false; + }; + + SQLite3() = default; + SQLite3(const string& filename) { open(filename); } + ~SQLite3() { close(); } + + explicit operator bool() const { return _database; } + + auto open(const string& filename) -> bool { + close(); + sqlite3_open(filename, &_database); + return _database; + } + + auto close() -> void { + sqlite3_close(_database); + _database = nullptr; + } + + template auto execute(const string& statement, P&&... p) -> Query { + if(!_database) return {nullptr}; + + sqlite3_stmt* _statement = nullptr; + sqlite3_prepare_v2(_database, statement.data(), statement.size(), &_statement, nullptr); + if(!_statement) { + if(_debug) print("[sqlite3_prepare_v2] ", sqlite3_errmsg(_database), "\n"); + return {nullptr}; + } + + Query query{_statement}; + bind(query, forward

(p)...); + return query; + } + + auto lastInsertID() const -> u64 { + return _database ? sqlite3_last_insert_rowid(_database) : 0; + } + + auto setDebug(bool debug = true) -> void { + _debug = debug; + } + +protected: + auto bind(Query&) -> void {} + template auto bind(Query& query, const T& value, P&&... p) -> void { + query.bind(value); + bind(query, forward

(p)...); + } + + bool _debug = false; + sqlite3* _database = nullptr; +}; + +} diff --git a/waterbox/ares64/ares/nall/decode/base.hpp b/waterbox/ares64/ares/nall/decode/base.hpp new file mode 100644 index 0000000000..f7932548c9 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/base.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include + +namespace nall::Decode { + +template inline auto Base(const string& value) -> T { + static const string format = + Bits == 2 ? "01" + : Bits == 8 ? "01234567" + : Bits == 10 ? "0123456789" + : Bits == 16 ? "0123456789abcdef" + : Bits == 32 ? "0123456789abcdefghijklmnopqrstuv" + : Bits == 34 ? "023456789abcdefghijkmnopqrstuvwxyz" //1l + : Bits == 36 ? "0123456789abcdefghijklmnopqrstuvwxyz" + : Bits == 57 ? "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" //01IOl + : Bits == 62 ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + : Bits == 64 ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{}" + : Bits == 85 ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%()+,-.:;=@[]^_`{|}~" //\ "&'*/<>? + : ""; + static bool initialized = false; + static u8 lookup[256] = {}; + if(!initialized) { + initialized = true; + for(u32 n : range(format.size())) { + lookup[format[n]] = n; + } + } + + T result = 0; + for(auto byte : value) { + result = result * Bits + lookup[byte]; + } + return result; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/base64.hpp b/waterbox/ares64/ares/nall/decode/base64.hpp new file mode 100644 index 0000000000..a99b92583b --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/base64.hpp @@ -0,0 +1,48 @@ +#pragma once + +namespace nall::Decode { + +inline auto Base64(const string& text) -> vector { + static bool initialized = false; + static u8 lookup[256] = {}; + if(!initialized) { + initialized = true; + for(u32 n : range(26)) lookup['A' + n] = n; + for(u32 n : range(26)) lookup['a' + n] = n + 26; + for(u32 n : range(10)) lookup['0' + n] = n + 52; + lookup['+'] = lookup['-'] = 62; + lookup['/'] = lookup['_'] = 63; + } + + vector result; + u8 buffer = 0; + u8 output = 0; + for(u32 n : range(text.size())) { + u8 buffer = lookup[text[n]]; + + switch(n & 3) { + case 0: + output = buffer << 2; + break; + + case 1: + result.append(output | buffer >> 4); + output = (buffer & 15) << 4; + break; + + case 2: + result.append(output | buffer >> 2); + output = (buffer & 3) << 6; + break; + + case 3: + result.append(output | buffer); + break; + } + } + + if(text.size() & 3) result.append(output | buffer); + return result; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/bmp.hpp b/waterbox/ares64/ares/nall/decode/bmp.hpp new file mode 100644 index 0000000000..271cb1d246 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/bmp.hpp @@ -0,0 +1,76 @@ +#pragma once + +namespace nall::Decode { + +struct BMP { + BMP() = default; + BMP(const string& filename) { load(filename); } + BMP(const u8* data, u32 size) { load(data, size); } + + explicit operator bool() const { return _data; } + + auto reset() -> void { + if(_data) { delete[] _data; _data = nullptr; } + } + + auto data() -> u32* { return _data; } + auto data() const -> const u32* { return _data; } + auto width() const -> u32 { return _width; } + auto height() const -> u32 { return _height; } + + auto load(const string& filename) -> bool { + auto buffer = file::read(filename); + return load(buffer.data(), buffer.size()); + } + + auto load(const u8* data, u32 size) -> bool { + if(size < 0x36) return false; + const u8* p = data; + if(read(p, 2) != 0x4d42) return false; //signature + read(p, 8); + u32 offset = read(p, 4); + if(read(p, 4) != 40) return false; //DIB size + s32 width = (s32)read(p, 4); + if(width < 0) width = -width; + s32 height = (s32)read(p, 4); + bool flip = height >= 0; + if(height < 0) height = -height; + read(p, 2); + u32 bitsPerPixel = read(p, 2); + if(bitsPerPixel != 24 && bitsPerPixel != 32) return false; + if(read(p, 4) != 0) return false; //compression type + + _width = width; + _height = height; + _data = new u32[width * height]; + + u32 bytesPerPixel = bitsPerPixel / 8; + u32 alignedWidth = width * bytesPerPixel; + u32 paddingLength = 0; + while(alignedWidth % 4) alignedWidth++, paddingLength++; + + p = data + offset; + for(auto y : range(height)) { + u32* output = flip ? _data + (height - 1 - y) * width : _data + y * width; + for(auto x : range(width)) { + *output++ = read(p, bytesPerPixel) | (bitsPerPixel == 24 ? 255u << 24 : 0); + } + if(paddingLength) read(p, paddingLength); + } + + return true; + } + +private: + u32* _data = nullptr; + u32 _width = 0; + u32 _height = 0; + + auto read(const u8*& buffer, u32 length) -> u64 { + u64 result = 0; + for(u32 n : range(length)) result |= (u64)*buffer++ << (n << 3); + return result; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/decode/bwt.hpp b/waterbox/ares64/ares/nall/decode/bwt.hpp new file mode 100644 index 0000000000..c3691473c5 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/bwt.hpp @@ -0,0 +1,47 @@ +#pragma once + +//burrows-wheeler transform + +#include + +namespace nall::Decode { + +inline auto BWT(array_view input) -> vector { + vector output; + + u32 size = 0; + for(u32 byte : range(8)) size |= *input++ << byte * 8; + output.resize(size); + + u32 I = 0; + for(u32 byte : range(8)) I |= *input++ << byte * 8; + + auto suffixes = SuffixArray(input); + + auto L = input; + auto F = new u8[size]; + for(u32 offset : range(size)) F[offset] = L[suffixes[offset + 1]]; + + u64 K[256] = {}; + auto C = new s32[size]; + for(u32 i : range(size)) { + C[i] = K[L[i]]; + K[L[i]]++; + } + + s32 M[256]; + memory::fill(M, 256, -1); + for(u32 i : range(size)) { + if(M[F[i]] == -1) M[F[i]] = i; + } + + u32 i = I; + for(u32 j : reverse(range(size))) { + output[j] = L[i]; + i = C[i] + M[L[i]]; + } + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/cue.hpp b/waterbox/ares64/ares/nall/decode/cue.hpp new file mode 100644 index 0000000000..66a09a460b --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/cue.hpp @@ -0,0 +1,238 @@ +#pragma once + +#include +#include +#include +#include + +namespace nall::Decode { + +struct CUE { + struct Index { + auto sectorCount() const -> u32; + + u8 number = 0xff; //00-99 + s32 lba = -1; + s32 end = -1; + }; + + struct Track { + auto sectorCount() const -> u32; + auto sectorSize() const -> u32; + + u8 number = 0xff; //01-99 + string type; + vector indices; + maybe pregap; + maybe postgap; + }; + + struct File { + auto sectorCount() const -> u32; + auto scan(const string& pathname) -> bool; + + string name; + string type; + vector tracks; + }; + + auto load(const string& location) -> bool; + auto sectorCount() const -> u32; + + vector files; + +private: + auto loadFile(vector& lines, u32& offset) -> File; + auto loadTrack(vector& lines, u32& offset) -> Track; + auto loadIndex(vector& lines, u32& offset) -> Index; + auto toLBA(const string& msf) -> u32; +}; + +inline auto CUE::load(const string& location) -> bool { + auto lines = string::read(location).replace("\r", "").split("\n"); + + u32 offset = 0; + while(offset < lines.size()) { + lines[offset].strip(); + if(lines[offset].ibeginsWith("FILE ")) { + auto file = loadFile(lines, offset); + if(!file.tracks) continue; + files.append(file); + continue; + } + offset++; + } + + if(!files) return false; + if(!files.first().tracks) return false; + if(!files.first().tracks.first().indices) return false; + + // calculate index ends for all but the last index + for(auto& file : files) { + maybe previous; + for(auto& track : file.tracks) { + for(auto& index : track.indices) { + if(index.lba < 0) continue; // ignore gaps (not in file) + if(previous) previous->end = index.lba - 1; + previous = index; + } + } + } + + for(auto& file : files) { + if(!file.scan(Location::path(location))) return false; + } + + return true; +} + +inline auto CUE::loadFile(vector& lines, u32& offset) -> File { + File file; + + lines[offset].itrimLeft("FILE ", 1L).strip(); + file.type = lines[offset].split(" ").last().strip().downcase(); + lines[offset].itrimRight(file.type, 1L).strip(); + file.name = lines[offset].trim("\"", "\"", 1L); + offset++; + + while(offset < lines.size()) { + lines[offset].strip(); + if(lines[offset].ibeginsWith("FILE ")) break; + if(lines[offset].ibeginsWith("TRACK ")) { + auto track = loadTrack(lines, offset); + if(!track.indices) continue; + file.tracks.append(track); + continue; + } + offset++; + } + + return file; +} + +inline auto CUE::loadTrack(vector& lines, u32& offset) -> Track { + Track track; + + lines[offset].itrimLeft("TRACK ", 1L).strip(); + track.type = lines[offset].split(" ").last().strip().downcase(); + lines[offset].itrimRight(track.type, 1L).strip(); + track.number = lines[offset].natural(); + offset++; + + while(offset < lines.size()) { + lines[offset].strip(); + if(lines[offset].ibeginsWith("FILE ")) break; + if(lines[offset].ibeginsWith("TRACK ")) break; + if(lines[offset].ibeginsWith("INDEX ")) { + auto index = loadIndex(lines, offset); + if(index.number == 0 && track.number == 1) + index.lba = 0; // ignore track 1 index 0 (assume 1st pregap always starts at origin) + track.indices.append(index); + continue; + } + if(lines[offset].ibeginsWith("PREGAP ")) { + track.pregap = toLBA(lines[offset++].itrimLeft("PREGAP ", 1L)); + Index index; index.number = 0; index.lba = -1; + track.indices.append(index); // placeholder + continue; + } + if(lines[offset].ibeginsWith("POSTGAP ")) { + track.postgap = toLBA(lines[offset++].itrimLeft("POSTGAP ", 1L)); + Index index; index.number = track.indices.last().number + 1; index.lba = -1; + track.indices.append(index); // placeholder + continue; + } + offset++; + } + + if(track.number == 0 || track.number > 99) return {}; + return track; +} + +inline auto CUE::loadIndex(vector& lines, u32& offset) -> Index { + Index index; + + lines[offset].itrimLeft("INDEX ", 1L); + string sector = lines[offset].split(" ").last().strip(); + lines[offset].itrimRight(sector, 1L).strip(); + index.number = lines[offset].natural(); + index.lba = toLBA(sector); + offset++; + + if(index.number > 99) return {}; + return index; +} + +inline auto CUE::toLBA(const string& msf) -> u32 { + u32 m = msf.split(":")(0).natural(); + u32 s = msf.split(":")(1).natural(); + u32 f = msf.split(":")(2).natural(); + return m * 60 * 75 + s * 75 + f; +} + +inline auto CUE::sectorCount() const -> u32 { + u32 count = 0; + for(auto& file : files) count += file.sectorCount(); + return count; +} + +inline auto CUE::File::scan(const string& pathname) -> bool { + string location = {Location::path(pathname), name}; + if(!file::exists(location)) return false; + + u64 size = 0; + + if(type == "binary") { + size = file::size(location); + } else if(type == "wave") { + Decode::WAV wav; + if(!wav.open(location)) return false; + if(wav.channels != 2) return false; + if(wav.frequency != 44100) return false; + if(wav.bitrate != 16) return false; + size = wav.size(); + } else { + return false; + } + + // calculate last index end for the file + for(auto& track : tracks) { + for(auto& index : track.indices) { + if(index.lba < 0) continue; // ignore gaps (not in file) + if(index.end >= 0) { + size -= track.sectorSize() * index.sectorCount(); + } else { + index.end = index.lba + size / track.sectorSize() - 1; + } + } + } + + return true; +} + +inline auto CUE::File::sectorCount() const -> u32 { + u32 count = 0; + for(auto& track : tracks) count += track.sectorCount(); + return count; +} + +inline auto CUE::Track::sectorCount() const -> u32 { + u32 count = 0; + for(auto& index : indices) count += index.sectorCount(); + return count; +} + +inline auto CUE::Track::sectorSize() const -> u32 { + if(type == "mode1/2048") return 2048; + if(type == "mode1/2352") return 2352; + if(type == "mode2/2352") return 2352; + if(type == "audio" ) return 2352; + return 0; +} + +inline auto CUE::Index::sectorCount() const -> u32 { + if(end < 0) return 0; + return end - lba + 1; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/gzip.hpp b/waterbox/ares64/ares/nall/decode/gzip.hpp new file mode 100644 index 0000000000..b4b5ce7677 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/gzip.hpp @@ -0,0 +1,78 @@ +#pragma once + +#include +#include + +namespace nall::Decode { + +struct GZIP { + inline ~GZIP(); + + inline auto decompress(const string& filename) -> bool; + inline auto decompress(const u8* data, u32 size) -> bool; + + string filename; + u8* data = nullptr; + u32 size = 0; +}; + +GZIP::~GZIP() { + if(data) delete[] data; +} + +auto GZIP::decompress(const string& filename) -> bool { + if(auto memory = file::read(filename)) { + return decompress(memory.data(), memory.size()); + } + return false; +} + +auto GZIP::decompress(const u8* data, u32 size) -> bool { + if(size < 18) return false; + if(data[0] != 0x1f) return false; + if(data[1] != 0x8b) return false; + u32 cm = data[2]; + u32 flg = data[3]; + u32 mtime = data[4]; + mtime |= data[5] << 8; + mtime |= data[6] << 16; + mtime |= data[7] << 24; + u32 xfl = data[8]; + u32 os = data[9]; + u32 p = 10; + u32 isize = data[size - 4]; + isize |= data[size - 3] << 8; + isize |= data[size - 2] << 16; + isize |= data[size - 1] << 24; + filename = ""; + + if(flg & 0x04) { //FEXTRA + u32 xlen = data[p + 0]; + xlen |= data[p + 1] << 8; + p += 2 + xlen; + } + + if(flg & 0x08) { //FNAME + char buffer[PATH_MAX]; + for(u32 n = 0; n < PATH_MAX; n++, p++) { + buffer[n] = data[p]; + if(data[p] == 0) break; + } + if(data[p++]) return false; + filename = buffer; + } + + if(flg & 0x10) { //FCOMMENT + while(data[p++]); + } + + if(flg & 0x02) { //FHCRC + p += 2; + } + + this->size = isize; + this->data = new u8[this->size]; + return inflate(this->data, this->size, data + p, size - p - 8); +} + +} diff --git a/waterbox/ares64/ares/nall/decode/html.hpp b/waterbox/ares64/ares/nall/decode/html.hpp new file mode 100644 index 0000000000..2cfd072db5 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/html.hpp @@ -0,0 +1,40 @@ +#pragma once + +namespace nall::Decode { + +inline auto HTML(const string& input) -> string { + string output; + for(u32 n = 0; n < input.size();) { + if(input[n] == '&') { + if(input(n + 1) == 'a' && input(n + 2) == 'm' && input(n + 3) == 'p' && input(n + 4) == ';') { + output.append('&'); + n += 5; + continue; + } + if(input(n + 1) == 'l' && input(n + 2) == 't' && input(n + 3) == ';') { + output.append('<'); + n += 4; + continue; + } + if(input(n + 1) == 'g' && input(n + 2) == 't' && input(n + 3) == ';') { + output.append('>'); + n += 4; + continue; + } + if(input(n + 1) == 'q' && input(n + 2) == 'u' && input(n + 3) == 'o' && input(n + 4) == 't' && input(n + 5) == ';') { + output.append('"'); + n += 6; + continue; + } + if(input(n + 1) == 'a' && input(n + 2) == 'p' && input(n + 3) == 'o' && input(n + 4) == 's' && input(n + 5) == ';') { + output.append('\''); + n += 6; + continue; + } + } + output.append(input[n++]); + } + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/huffman.hpp b/waterbox/ares64/ares/nall/decode/huffman.hpp new file mode 100644 index 0000000000..5f0f39c6b5 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/huffman.hpp @@ -0,0 +1,36 @@ +#pragma once + +namespace nall::Decode { + +inline auto Huffman(array_view input) -> vector { + vector output; + + u32 size = 0; + for(u32 byte : range(8)) size |= *input++ << byte * 8; + output.reserve(size); + + u32 byte = 0, bits = 0; + auto read = [&]() -> bool { + if(bits == 0) bits = 8, byte = *input++; + return byte >> --bits & 1; + }; + + u32 nodes[256][2] = {}; + for(u32 offset : range(256)) { + for(u32 index : range(9)) nodes[offset][0] = nodes[offset][0] << 1 | read(); + for(u32 index : range(9)) nodes[offset][1] = nodes[offset][1] << 1 | read(); + } + + u32 node = 511; + while(output.size() < size) { + node = nodes[node - 256][read()]; + if(node < 256) { + output.append(node); + node = 511; + } + } + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/inflate.hpp b/waterbox/ares64/ares/nall/decode/inflate.hpp new file mode 100644 index 0000000000..5e1d7d2d19 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/inflate.hpp @@ -0,0 +1,340 @@ +#pragma once + +//a bad implementation of inflate from zlib/minizip +//todo: replace with Talarubi's version + +#include + +namespace nall::Decode { + +namespace puff { + inline auto puff(u8* dest, u32* destlen, u8* source, u32* sourcelen) -> s32; +} + +inline auto inflate(u8* target, u32 targetLength, const u8* source, u32 sourceLength) -> bool { + u32 tl = targetLength, sl = sourceLength; + s32 result = puff::puff((u8*)target, &tl, (u8*)source, &sl); + return result == 0; +} + +namespace puff { + +enum : u32 { + MAXBITS = 15, + MAXLCODES = 286, + MAXDCODES = 30, + FIXLCODES = 288, + MAXCODES = MAXLCODES + MAXDCODES, +}; + +struct state { + u8* out; + u32 outlen; + u32 outcnt; + + u8* in; + u32 inlen; + u32 incnt; + s32 bitbuf; + s32 bitcnt; + + jmp_buf env; +}; + +struct huffman { + s16* count; + s16* symbol; +}; + +inline auto bits(state* s, s32 need) -> s32 { + s32 val; + + val = s->bitbuf; + while(s->bitcnt < need) { + if(s->incnt == s->inlen) longjmp(s->env, 1); + val |= (s32)(s->in[s->incnt++]) << s->bitcnt; + s->bitcnt += 8; + } + + s->bitbuf = (s32)(val >> need); + s->bitcnt -= need; + + return (s32)(val & ((1L << need) - 1)); +} + +inline auto stored(state* s) -> s32 { + u32 len; + + s->bitbuf = 0; + s->bitcnt = 0; + + if(s->incnt + 4 > s->inlen) return 2; + len = s->in[s->incnt++]; + len |= s->in[s->incnt++] << 8; + if(s->in[s->incnt++] != (~len & 0xff) || + s->in[s->incnt++] != ((~len >> 8) & 0xff) + ) return 2; + + if(s->incnt + len > s->inlen) return 2; + if(s->out != nullptr) { + if(s->outcnt + len > s->outlen) return 1; + while(len--) s->out[s->outcnt++] = s->in[s->incnt++]; + } else { + s->outcnt += len; + s->incnt += len; + } + + return 0; +} + +inline auto decode(state* s, huffman* h) -> s32 { + s32 len, code, first, count, index, bitbuf, left; + s16* next; + + bitbuf = s->bitbuf; + left = s->bitcnt; + code = first = index = 0; + len = 1; + next = h->count + 1; + while(true) { + while(left--) { + code |= bitbuf & 1; + bitbuf >>= 1; + count = *next++; + if(code - count < first) { + s->bitbuf = bitbuf; + s->bitcnt = (s->bitcnt - len) & 7; + return h->symbol[index + (code - first)]; + } + index += count; + first += count; + first <<= 1; + code <<= 1; + len++; + } + left = (MAXBITS + 1) - len; + if(left == 0) break; + if(s->incnt == s->inlen) longjmp(s->env, 1); + bitbuf = s->in[s->incnt++]; + if(left > 8) left = 8; + } + + return -10; +} + +inline auto construct(huffman* h, s16* length, s32 n) -> s32 { + s32 symbol, len, left; + s16 offs[MAXBITS + 1]; + + for(len = 0; len <= MAXBITS; len++) h->count[len] = 0; + for(symbol = 0; symbol < n; symbol++) h->count[length[symbol]]++; + if(h->count[0] == n) return 0; + + left = 1; + for(len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= h->count[len]; + if(left < 0) return left; + } + + offs[1] = 0; + for(len = 1; len < MAXBITS; len++) offs[len + 1] = offs[len] + h->count[len]; + + for(symbol = 0; symbol < n; symbol++) { + if(length[symbol] != 0) h->symbol[offs[length[symbol]]++] = symbol; + } + + return left; +} + +inline auto codes(state* s, huffman* lencode, huffman* distcode) -> s32 { + s32 symbol, len; + u32 dist; + static const s16 lens[29] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258 + }; + static const s16 lext[29] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 + }; + static const s16 dists[30] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577 + }; + static const s16 dext[30] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13 + }; + + do { + symbol = decode(s, lencode); + if(symbol < 0) return symbol; + if(symbol < 256) { + if(s->out != nullptr) { + if(s->outcnt == s->outlen) return 1; + s->out[s->outcnt] = symbol; + } + s->outcnt++; + } else if(symbol > 256) { + symbol -= 257; + if(symbol >= 29) return -10; + len = lens[symbol] + bits(s, lext[symbol]); + + symbol = decode(s, distcode); + if(symbol < 0) return symbol; + dist = dists[symbol] + bits(s, dext[symbol]); + #ifndef INFLATE_ALLOW_INVALID_DISTANCE_TOO_FAR + if(dist > s->outcnt) return -11; + #endif + + if(s->out != nullptr) { + if(s->outcnt + len > s->outlen) return 1; + while(len--) { + s->out[s->outcnt] = + #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOO_FAR + dist > s->outcnt ? 0 : + #endif + s->out[s->outcnt - dist]; + s->outcnt++; + } + } else { + s->outcnt += len; + } + } + } while(symbol != 256); + + return 0; +} + +inline auto fixed(state* s) -> s32 { + static s32 virgin = 1; + static s16 lencnt[MAXBITS + 1], lensym[FIXLCODES]; + static s16 distcnt[MAXBITS + 1], distsym[MAXDCODES]; + static huffman lencode, distcode; + + if(virgin) { + s32 symbol = 0; + s16 lengths[FIXLCODES]; + + lencode.count = lencnt; + lencode.symbol = lensym; + distcode.count = distcnt; + distcode.symbol = distsym; + + for(; symbol < 144; symbol++) lengths[symbol] = 8; + for(; symbol < 256; symbol++) lengths[symbol] = 9; + for(; symbol < 280; symbol++) lengths[symbol] = 7; + for(; symbol < FIXLCODES; symbol++) lengths[symbol] = 8; + construct(&lencode, lengths, FIXLCODES); + + for(symbol = 0; symbol < MAXDCODES; symbol++) lengths[symbol] = 5; + construct(&distcode, lengths, MAXDCODES); + + virgin = 0; + } + + return codes(s, &lencode, &distcode); +} + +inline auto dynamic(state* s) -> s32 { + s32 nlen, ndist, ncode, index, err; + s16 lengths[MAXCODES]; + s16 lencnt[MAXBITS + 1], lensym[MAXLCODES]; + s16 distcnt[MAXBITS + 1], distsym[MAXDCODES]; + huffman lencode, distcode; + static const s16 order[19] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + }; + + lencode.count = lencnt; + lencode.symbol = lensym; + distcode.count = distcnt; + distcode.symbol = distsym; + + nlen = bits(s, 5) + 257; + ndist = bits(s, 5) + 1; + ncode = bits(s, 4) + 4; + if(nlen > MAXLCODES || ndist > MAXDCODES) return -3; + + for(index = 0; index < ncode; index++) lengths[order[index]] = bits(s, 3); + for(; index < 19; index++) lengths[order[index]] = 0; + + err = construct(&lencode, lengths, 19); + if(err != 0) return -4; + + index = 0; + while(index < nlen + ndist) { + s32 symbol, len; + + symbol = decode(s, &lencode); + if(symbol < 16) { + lengths[index++] = symbol; + } else { + len = 0; + if(symbol == 16) { + if(index == 0) return -5; + len = lengths[index - 1]; + symbol = 3 + bits(s, 2); + } else if(symbol == 17) { + symbol = 3 + bits(s, 3); + } else { + symbol = 11 + bits(s, 7); + } + if(index + symbol > nlen + ndist) return -6; + while(symbol--) lengths[index++] = len; + } + } + + if(lengths[256] == 0) return -9; + + err = construct(&lencode, lengths, nlen); + if(err < 0 || (err > 0 && nlen - lencode.count[0] != 1)) return -7; + + err = construct(&distcode, lengths + nlen, ndist); + if(err < 0 || (err > 0 && ndist - distcode.count[0] != 1)) return -8; + + return codes(s, &lencode, &distcode); +} + +inline auto puff(u8* dest, u32* destlen, u8* source, u32* sourcelen) -> s32 { + state s; + s32 last, type, err; + + s.out = dest; + s.outlen = *destlen; + s.outcnt = 0; + + s.in = source; + s.inlen = *sourcelen; + s.incnt = 0; + s.bitbuf = 0; + s.bitcnt = 0; + + if(setjmp(s.env) != 0) { + err = 2; + } else { + do { + last = bits(&s, 1); + type = bits(&s, 2); + err = type == 0 ? stored(&s) + : type == 1 ? fixed(&s) + : type == 2 ? dynamic(&s) + : -1; + if(err != 0) break; + } while(!last); + } + + if(err <= 0) { + *destlen = s.outcnt; + *sourcelen = s.incnt; + } + + return err; +} + +} + +} diff --git a/waterbox/ares64/ares/nall/decode/lzsa.hpp b/waterbox/ares64/ares/nall/decode/lzsa.hpp new file mode 100644 index 0000000000..df5790e9a0 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/lzsa.hpp @@ -0,0 +1,72 @@ +#pragma once + +#include + +namespace nall::Decode { + +inline auto LZSA(array_view input) -> vector { + vector output; + u32 index = 0; + + u32 size = 0; + for(u32 byte : range(8)) size |= *input++ << byte * 8; + output.resize(size); + + auto load = [&]() -> vector { + u32 size = 0; + for(u32 byte : range(8)) size |= *input++ << byte * 8; + vector buffer; + buffer.reserve(size); + while(size--) buffer.append(*input++); + return buffer; + }; + + auto flags = Decode::Huffman(load()); + auto literals = Decode::Huffman(load()); + auto lengths = Decode::Huffman(load()); + auto offsets = Decode::Huffman(load()); + + auto flagData = flags.data(); + u32 byte = 0, bits = 0; + auto flagRead = [&]() -> bool { + if(bits == 0) bits = 8, byte = *flagData++; + return byte >> --bits & 1; + }; + + auto literalData = literals.data(); + auto literalRead = [&]() -> u8 { + return *literalData++; + }; + + auto lengthData = lengths.data(); + auto lengthRead = [&]() -> u64 { + u32 byte = *lengthData++, bytes = 1; + while(!(byte & 1)) byte >>= 1, bytes++; + u32 length = byte >> 1, shift = 8 - bytes; + while(--bytes) length |= *lengthData++ << shift, shift += 8; + return length; + }; + + auto offsetData = offsets.data(); + auto offsetRead = [&]() -> u32 { + u32 offset = 0; + offset |= *offsetData++ << 0; if(index < 1 << 8) return offset; + offset |= *offsetData++ << 8; if(index < 1 << 16) return offset; + offset |= *offsetData++ << 16; if(index < 1 << 24) return offset; + offset |= *offsetData++ << 24; return offset; + }; + + while(index < size) { + if(!flagRead()) { + output[index++] = literalRead(); + } else { + u32 length = lengthRead() + 6; + u32 offset = index - offsetRead(); + while(length--) output[index++] = output[offset++]; + } + } + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/mtf.hpp b/waterbox/ares64/ares/nall/decode/mtf.hpp new file mode 100644 index 0000000000..3e2a46cae1 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/mtf.hpp @@ -0,0 +1,25 @@ +#pragma once + +//move to front + +namespace nall::Decode { + +inline auto MTF(array_view input) -> vector { + vector output; + output.resize(input.size()); + + u8 order[256]; + for(u32 n : range(256)) order[n] = n; + + for(u32 offset : range(input.size())) { + u32 data = input[offset]; + u32 value = order[data]; + output[offset] = value; + memory::move(&order[1], &order[0], data); + order[0] = value; + } + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/png.hpp b/waterbox/ares64/ares/nall/decode/png.hpp new file mode 100644 index 0000000000..75c6e81651 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/png.hpp @@ -0,0 +1,332 @@ +#pragma once + +#include +#include + +namespace nall::Decode { + +struct PNG { + PNG(); + ~PNG(); + + auto load(const string& filename) -> bool; + auto load(const u8* sourceData, u32 sourceSize) -> bool; + auto readbits(const u8*& data) -> u32; + + struct Info { + u32 width; + u32 height; + u32 bitDepth; + //colorType: + //0 = L (luma) + //2 = R,G,B + //3 = P (palette) + //4 = L,A + //6 = R,G,B,A + u32 colorType; + u32 compressionMethod; + u32 filterType; + u32 interlaceMethod; + + u32 bytesPerPixel; + u32 pitch; + + u8 palette[256][3]; + } info; + + u8* data = nullptr; + u32 size = 0; + + u32 bitpos = 0; + +protected: + enum class FourCC : u32 { + IHDR = 0x49484452, + PLTE = 0x504c5445, + IDAT = 0x49444154, + IEND = 0x49454e44, + }; + + auto interlace(u32 pass, u32 index) -> u32; + auto inflateSize() -> u32; + auto deinterlace(const u8*& inputData, u32 pass) -> bool; + auto filter(u8* outputData, const u8* inputData, u32 width, u32 height) -> bool; + auto read(const u8* data, u32 length) -> u32; +}; + +inline PNG::PNG() { +} + +inline PNG::~PNG() { + if(data) delete[] data; +} + +inline auto PNG::load(const string& filename) -> bool { + if(auto memory = file::read(filename)) { + return load(memory.data(), memory.size()); + } + return false; +} + +inline auto PNG::load(const u8* sourceData, u32 sourceSize) -> bool { + if(sourceSize < 8) return false; + if(read(sourceData + 0, 4) != 0x89504e47) return false; + if(read(sourceData + 4, 4) != 0x0d0a1a0a) return false; + + u8* compressedData = nullptr; + u32 compressedSize = 0; + + u32 offset = 8; + while(offset < sourceSize) { + u32 length = read(sourceData + offset + 0, 4); + u32 fourCC = read(sourceData + offset + 4, 4); + u32 checksum = read(sourceData + offset + 8 + length, 4); + + if(fourCC == (u32)FourCC::IHDR) { + info.width = read(sourceData + offset + 8, 4); + info.height = read(sourceData + offset + 12, 4); + info.bitDepth = read(sourceData + offset + 16, 1); + info.colorType = read(sourceData + offset + 17, 1); + info.compressionMethod = read(sourceData + offset + 18, 1); + info.filterType = read(sourceData + offset + 19, 1); + info.interlaceMethod = read(sourceData + offset + 20, 1); + + if(info.bitDepth == 0 || info.bitDepth > 16) return false; + if(info.bitDepth & (info.bitDepth - 1)) return false; //not a power of two + if(info.compressionMethod != 0) return false; + if(info.filterType != 0) return false; + if(info.interlaceMethod != 0 && info.interlaceMethod != 1) return false; + + switch(info.colorType) { + case 0: info.bytesPerPixel = info.bitDepth * 1; break; //L + case 2: info.bytesPerPixel = info.bitDepth * 3; break; //R,G,B + case 3: info.bytesPerPixel = info.bitDepth * 1; break; //P + case 4: info.bytesPerPixel = info.bitDepth * 2; break; //L,A + case 6: info.bytesPerPixel = info.bitDepth * 4; break; //R,G,B,A + default: return false; + } + + if(info.colorType == 2 || info.colorType == 4 || info.colorType == 6) { + if(info.bitDepth != 8 && info.bitDepth != 16) return false; + } + if(info.colorType == 3 && info.bitDepth == 16) return false; + + info.bytesPerPixel = (info.bytesPerPixel + 7) / 8; + info.pitch = (s32)info.width * info.bytesPerPixel; + } + + if(fourCC == (u32)FourCC::PLTE) { + if(length % 3) return false; + for(u32 n = 0, p = offset + 8; n < length / 3; n++) { + info.palette[n][0] = sourceData[p++]; + info.palette[n][1] = sourceData[p++]; + info.palette[n][2] = sourceData[p++]; + } + } + + if(fourCC == (u32)FourCC::IDAT) { + compressedData = (u8*)realloc(compressedData, compressedSize + length); + memcpy(compressedData + compressedSize, sourceData + offset + 8, length); + compressedSize += length; + } + + if(fourCC == (u32)FourCC::IEND) { + break; + } + + offset += 4 + 4 + length + 4; + } + + u32 interlacedSize = inflateSize(); + auto interlacedData = new u8[interlacedSize]; + + bool result = inflate(interlacedData, interlacedSize, compressedData + 2, compressedSize - 6); + free(compressedData); + + if(result == false) { + delete[] interlacedData; + return false; + } + + size = info.width * info.height * info.bytesPerPixel; + data = new u8[size]; + + if(info.interlaceMethod == 0) { + if(filter(data, interlacedData, info.width, info.height) == false) { + delete[] interlacedData; + delete[] data; + data = nullptr; + return false; + } + } else { + const u8* passData = interlacedData; + for(u32 pass = 0; pass < 7; pass++) { + if(deinterlace(passData, pass) == false) { + delete[] interlacedData; + delete[] data; + data = nullptr; + return false; + } + } + } + + delete[] interlacedData; + return true; +} + +inline auto PNG::interlace(u32 pass, u32 index) -> u32 { + static const u32 data[7][4] = { + //x-distance, y-distance, x-origin, y-origin + {8, 8, 0, 0}, + {8, 8, 4, 0}, + {4, 8, 0, 4}, + {4, 4, 2, 0}, + {2, 4, 0, 2}, + {2, 2, 1, 0}, + {1, 2, 0, 1}, + }; + return data[pass][index]; +} + +inline auto PNG::inflateSize() -> u32 { + if(info.interlaceMethod == 0) { + return info.width * info.height * info.bytesPerPixel + info.height; + } + + u32 size = 0; + for(u32 pass = 0; pass < 7; pass++) { + u32 xd = interlace(pass, 0), yd = interlace(pass, 1); + u32 xo = interlace(pass, 2), yo = interlace(pass, 3); + u32 width = (info.width + (xd - xo - 1)) / xd; + u32 height = (info.height + (yd - yo - 1)) / yd; + if(width == 0 || height == 0) continue; + size += width * height * info.bytesPerPixel + height; + } + return size; +} + +inline auto PNG::deinterlace(const u8*& inputData, u32 pass) -> bool { + u32 xd = interlace(pass, 0), yd = interlace(pass, 1); + u32 xo = interlace(pass, 2), yo = interlace(pass, 3); + u32 width = (info.width + (xd - xo - 1)) / xd; + u32 height = (info.height + (yd - yo - 1)) / yd; + if(width == 0 || height == 0) return true; + + u32 outputSize = width * height * info.bytesPerPixel; + auto outputData = new u8[outputSize]; + bool result = filter(outputData, inputData, width, height); + + const u8* rd = outputData; + for(u32 y = yo; y < info.height; y += yd) { + u8* wr = data + y * info.pitch; + for(u32 x = xo; x < info.width; x += xd) { + for(u32 b = 0; b < info.bytesPerPixel; b++) { + wr[x * info.bytesPerPixel + b] = *rd++; + } + } + } + + inputData += outputSize + height; + delete[] outputData; + return result; +} + +inline auto PNG::filter(u8* outputData, const u8* inputData, u32 width, u32 height) -> bool { + u8* wr = outputData; + const u8* rd = inputData; + s32 bpp = info.bytesPerPixel, pitch = width * bpp; + for(s32 y = 0; y < height; y++) { + u8 filter = *rd++; + + switch(filter) { + case 0x00: //None + for(s32 x = 0; x < pitch; x++) { + wr[x] = rd[x]; + } + break; + + case 0x01: //Subtract + for(s32 x = 0; x < pitch; x++) { + wr[x] = rd[x] + (x - bpp < 0 ? 0 : wr[x - bpp]); + } + break; + + case 0x02: //Above + for(s32 x = 0; x < pitch; x++) { + wr[x] = rd[x] + (y - 1 < 0 ? 0 : wr[x - pitch]); + } + break; + + case 0x03: //Average + for(s32 x = 0; x < pitch; x++) { + s16 a = x - bpp < 0 ? 0 : wr[x - bpp]; + s16 b = y - 1 < 0 ? 0 : wr[x - pitch]; + + wr[x] = rd[x] + (u8)((a + b) / 2); + } + break; + + case 0x04: //Paeth + for(s32 x = 0; x < pitch; x++) { + s16 a = x - bpp < 0 ? 0 : wr[x - bpp]; + s16 b = y - 1 < 0 ? 0 : wr[x - pitch]; + s16 c = x - bpp < 0 || y - 1 < 0 ? 0 : wr[x - pitch - bpp]; + + s16 p = a + b - c; + s16 pa = p > a ? p - a : a - p; + s16 pb = p > b ? p - b : b - p; + s16 pc = p > c ? p - c : c - p; + + auto paeth = (u8)((pa <= pb && pa <= pc) ? a : (pb <= pc) ? b : c); + + wr[x] = rd[x] + paeth; + } + break; + + default: //Invalid + return false; + } + + rd += pitch; + wr += pitch; + } + + return true; +} + +inline auto PNG::read(const u8* data, u32 length) -> u32 { + u32 result = 0; + while(length--) result = (result << 8) | (*data++); + return result; +} + +inline auto PNG::readbits(const u8*& data) -> u32 { + u32 result = 0; + switch(info.bitDepth) { + case 1: + result = (*data >> bitpos) & 1; + bitpos++; + if(bitpos == 8) { data++; bitpos = 0; } + break; + case 2: + result = (*data >> bitpos) & 3; + bitpos += 2; + if(bitpos == 8) { data++; bitpos = 0; } + break; + case 4: + result = (*data >> bitpos) & 15; + bitpos += 4; + if(bitpos == 8) { data++; bitpos = 0; } + break; + case 8: + result = *data++; + break; + case 16: + result = (data[0] << 8) | (data[1] << 0); + data += 2; + break; + } + return result; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/rle.hpp b/waterbox/ares64/ares/nall/decode/rle.hpp new file mode 100644 index 0000000000..059ca521e0 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/rle.hpp @@ -0,0 +1,44 @@ +#pragma once + +namespace nall::Decode { + +template //S = word size; M = match length +inline auto RLE(array_view input) -> vector { + vector output; + + auto load = [&]() -> u8 { + return input ? *input++ : 0; + }; + + u32 base = 0; + u64 size = 0; + for(u32 byte : range(8)) size |= load() << byte * 8; + output.resize(size); + + auto read = [&]() -> u64 { + u64 value = 0; + for(u32 byte : range(S)) value |= load() << byte * 8; + return value; + }; + + auto write = [&](u64 value) -> void { + if(base >= size) return; + for(u32 byte : range(S)) output[base++] = value >> byte * 8; + }; + + while(base < size) { + auto byte = load(); + if(byte < 128) { + byte++; + while(byte--) write(read()); + } else { + auto value = read(); + byte = (byte & 127) + M; + while(byte--) write(value); + } + } + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/url.hpp b/waterbox/ares64/ares/nall/decode/url.hpp new file mode 100644 index 0000000000..18382d1c96 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/url.hpp @@ -0,0 +1,39 @@ +#pragma once + +namespace nall::Decode { + +//returns empty string on malformed content +inline auto URL(string_view input) -> string { + string output; + for(u32 n = 0; n < input.size();) { + char c = input[n]; + + //unreserved characters + if(c >= 'A' && c <= 'Z') { output.append(c); n++; continue; } + if(c >= 'a' && c <= 'z') { output.append(c); n++; continue; } + if(c >= '0' && c <= '9') { output.append(c); n++; continue; } + if(c == '-' || c == '_' || c == '.' || c == '~') { output.append(c); n++; continue; } + + //special characters + if(c == '+') { output.append(' '); n++; continue; } + + //reserved characters + if(c != '%' || n + 2 >= input.size()) return ""; + char hi = input[n + 1]; + char lo = input[n + 2]; + if(hi >= '0' && hi <= '9') hi -= '0'; + else if(hi >= 'A' && hi <= 'F') hi -= 'A' - 10; + else if(hi >= 'a' && hi <= 'f') hi -= 'a' - 10; + else return ""; + if(lo >= '0' && lo <= '9') lo -= '0'; + else if(lo >= 'A' && lo <= 'F') lo -= 'A' - 10; + else if(lo >= 'a' && lo <= 'f') lo -= 'a' - 10; + else return ""; + char byte = hi * 16 + lo; + output.append(byte); + n += 3; + } + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/wav.hpp b/waterbox/ares64/ares/nall/decode/wav.hpp new file mode 100644 index 0000000000..7ba6ee14d3 --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/wav.hpp @@ -0,0 +1,93 @@ +#pragma once + +#include +#include + +namespace nall::Decode { + +struct WAV { + auto open(const string& filename) -> bool; + auto close() -> void; + auto read() -> u64; + auto end() const -> bool; + auto size() const -> u64; + + file_buffer fp; + u32 channels = 0; + u32 frequency = 0; + u32 bitrate = 0; + u32 samples = 0; + u32 headerSize = 0; +}; + +inline auto WAV::open(const string& filename) -> bool { + close(); + + if(fp = file::open(filename, file::mode::read)) { + if(fp.read() != 'R') return false; + if(fp.read() != 'I') return false; + if(fp.read() != 'F') return false; + if(fp.read() != 'F') return false; + + u32 chunkSize = fp.readl(4); + + if(fp.read() != 'W') return false; + if(fp.read() != 'A') return false; + if(fp.read() != 'V') return false; + if(fp.read() != 'E') return false; + + if(fp.read() != 'f') return false; + if(fp.read() != 'm') return false; + if(fp.read() != 't') return false; + if(fp.read() != ' ') return false; + + u32 subchunkSize = fp.readl(4); + if(subchunkSize != 16) return false; + + u16 format = fp.readl(2); + if(format != 1) return false; //only PCM is supported + + channels = fp.readl(2); + frequency = fp.readl(4); + u32 byteRate = fp.readl(4); + u16 blockAlign = fp.readl(2); + bitrate = fp.readl(2); + + //todo: handle LIST chunk better than this + while(!fp.end() && fp.read() != 'd'); + while(!fp.end() && fp.read() != 'a'); + while(!fp.end() && fp.read() != 't'); + while(!fp.end() && fp.read() != 'a'); + if(fp.end()) return false; + + u32 dataSize = fp.readl(4); + u32 remaining = fp.size() - fp.offset(); + samples = remaining / (bitrate / 8) / channels; + headerSize = fp.offset(); + return true; + } + + return false; +} + +inline auto WAV::close() -> void { + fp.close(); + channels = 0; + frequency = 0; + bitrate = 0; + samples = 0; +} + +inline auto WAV::read() -> u64 { + return fp.readl((bitrate / 8) * channels); +} + +inline auto WAV::end() const -> bool { + return fp.end(); +} + +inline auto WAV::size() const -> u64 { + return samples * (bitrate / 8) * channels; +} + +} diff --git a/waterbox/ares64/ares/nall/decode/zip.hpp b/waterbox/ares64/ares/nall/decode/zip.hpp new file mode 100644 index 0000000000..f980df08ae --- /dev/null +++ b/waterbox/ares64/ares/nall/decode/zip.hpp @@ -0,0 +1,136 @@ +#pragma once + +#include +#include +#include +#include + +namespace nall::Decode { + +struct ZIP { + struct File { + string name; + const u8* data; + u32 size; + u32 csize; + u32 cmode; //0 = uncompressed, 8 = deflate + u32 crc32; + time_t timestamp; + }; + + ~ZIP() { + close(); + } + + auto open(const string& filename) -> bool { + close(); + if(fm.open(filename, file::mode::read) == false) return false; + if(open(fm.data(), fm.size()) == false) { + fm.close(); + return false; + } + return true; + } + + auto open(const u8* data, u32 size) -> bool { + if(size < 22) return false; + + filedata = data; + filesize = size; + + file.reset(); + + const u8* footer = data + size - 22; + while(true) { + if(footer <= data + 22) return false; + if(read(footer, 4) == 0x06054b50) { + u32 commentlength = read(footer + 20, 2); + if(footer + 22 + commentlength == data + size) break; + } + footer--; + } + const u8* directory = data + read(footer + 16, 4); + + while(true) { + u32 signature = read(directory + 0, 4); + if(signature != 0x02014b50) break; + + File file; + file.cmode = read(directory + 10, 2); + file.crc32 = read(directory + 16, 4); + file.csize = read(directory + 20, 4); + file.size = read(directory + 24, 4); + + u16 dosTime = read(directory + 12, 2); + u16 dosDate = read(directory + 14, 2); + tm info = {}; + info.tm_sec = (dosTime >> 0 & 31) << 1; + info.tm_min = (dosTime >> 5 & 63); + info.tm_hour = (dosTime >> 11 & 31); + info.tm_mday = (dosDate >> 0 & 31); + info.tm_mon = (dosDate >> 5 & 15) - 1; + info.tm_year = (dosDate >> 9 & 127) + 80; + info.tm_isdst = -1; + file.timestamp = mktime(&info); + + u32 namelength = read(directory + 28, 2); + u32 extralength = read(directory + 30, 2); + u32 commentlength = read(directory + 32, 2); + + char* filename = new char[namelength + 1]; + memcpy(filename, directory + 46, namelength); + filename[namelength] = 0; + file.name = filename; + delete[] filename; + + u32 offset = read(directory + 42, 4); + u32 offsetNL = read(data + offset + 26, 2); + u32 offsetEL = read(data + offset + 28, 2); + file.data = data + offset + 30 + offsetNL + offsetEL; + + directory += 46 + namelength + extralength + commentlength; + + this->file.append(file); + } + + return true; + } + + auto extract(File& file) -> vector { + vector buffer; + + if(file.cmode == 0) { + buffer.resize(file.size); + memcpy(buffer.data(), file.data, file.size); + } + + if(file.cmode == 8) { + buffer.resize(file.size); + if(inflate(buffer.data(), buffer.size(), file.data, file.csize) == false) { + buffer.reset(); + } + } + + return buffer; + } + + auto close() -> void { + if(fm) fm.close(); + } + +protected: + file_map fm; + const u8* filedata; + u32 filesize; + + auto read(const u8* data, u32 size) -> u32 { + u32 result = 0, shift = 0; + while(size--) { result |= *data++ << shift; shift += 8; } + return result; + } + +public: + vector file; +}; + +} diff --git a/waterbox/ares64/ares/nall/directory.hpp b/waterbox/ares64/ares/nall/directory.hpp new file mode 100644 index 0000000000..c3a2d3aa6f --- /dev/null +++ b/waterbox/ares64/ares/nall/directory.hpp @@ -0,0 +1,355 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#if defined(PLATFORM_WINDOWS) + #include +#else + #include + #include + #include +#endif + +namespace nall { + +struct directory : inode { + directory() = delete; + + static auto copy(const string& source, const string& target) -> bool; //recursive + static auto create(const string& pathname, u32 permissions = 0755) -> bool; //recursive + static auto remove(const string& pathname) -> bool; //recursive + static auto exists(const string& pathname) -> bool; + + static auto folders(const string& pathname, const string& pattern = "*") -> vector { + auto folders = directory::ufolders(pathname, pattern); + folders.sort(); + for(auto& folder : folders) folder.append("/"); //must append after sorting + return folders; + } + + static auto files(const string& pathname, const string& pattern = "*") -> vector { + auto files = directory::ufiles(pathname, pattern); + files.sort(); + return files; + } + + static auto contents(const string& pathname, const string& pattern = "*") -> vector { + auto folders = directory::ufolders(pathname); //pattern search of contents should only filter files + folders.sort(); + for(auto& folder : folders) folder.append("/"); //must append after sorting + auto files = directory::ufiles(pathname, pattern); + files.sort(); + for(auto& file : files) folders.append(file); + return folders; + } + + static auto ifolders(const string& pathname, const string& pattern = "*") -> vector { + auto folders = ufolders(pathname, pattern); + folders.isort(); + for(auto& folder : folders) folder.append("/"); //must append after sorting + return folders; + } + + static auto ifiles(const string& pathname, const string& pattern = "*") -> vector { + auto files = ufiles(pathname, pattern); + files.isort(); + return files; + } + + static auto icontents(const string& pathname, const string& pattern = "*") -> vector { + auto folders = directory::ufolders(pathname); //pattern search of contents should only filter files + folders.isort(); + for(auto& folder : folders) folder.append("/"); //must append after sorting + auto files = directory::ufiles(pathname, pattern); + files.isort(); + for(auto& file : files) folders.append(file); + return folders; + } + + static auto rcontents(const string& pathname, const string& pattern = "*") -> vector { + vector contents; + function + recurse = [&](const string& basename, const string& pathname, const string& pattern) { + for(auto& folder : directory::ufolders(pathname)) { + contents.append(string{pathname, folder, "/"}.trimLeft(basename, 1L)); + recurse(basename, {pathname, folder, "/"}, pattern); + } + for(auto& file : directory::ufiles(pathname, pattern)) { + contents.append(string{pathname, file}.trimLeft(basename, 1L)); + } + }; + for(auto& folder : directory::ufolders(pathname)) { + contents.append({folder, "/"}); + recurse(pathname, {pathname, folder, "/"}, pattern); + } + for(auto& file : directory::ufiles(pathname, pattern)) { + contents.append(file); + } + contents.sort(); + return contents; + } + + static auto ircontents(const string& pathname, const string& pattern = "*") -> vector { + vector contents; + function + recurse = [&](const string& basename, const string& pathname, const string& pattern) { + for(auto& folder : directory::ufolders(pathname)) { + contents.append(string{pathname, folder, "/"}.trimLeft(basename, 1L)); + recurse(basename, {pathname, folder, "/"}, pattern); + } + for(auto& file : directory::ufiles(pathname, pattern)) { + contents.append(string{pathname, file}.trimLeft(basename, 1L)); + } + }; + for(auto& folder : directory::ufolders(pathname)) { + contents.append({folder, "/"}); + recurse(pathname, {pathname, folder, "/"}, pattern); + } + for(auto& file : directory::ufiles(pathname, pattern)) { + contents.append(file); + } + contents.isort(); + return contents; + } + + static auto rfolders(const string& pathname, const string& pattern = "*") -> vector { + vector folders; + for(auto& folder : rcontents(pathname, pattern)) { + if(directory::exists({pathname, folder})) folders.append(folder); + } + return folders; + } + + static auto irfolders(const string& pathname, const string& pattern = "*") -> vector { + vector folders; + for(auto& folder : ircontents(pathname, pattern)) { + if(directory::exists({pathname, folder})) folders.append(folder); + } + return folders; + } + + static auto rfiles(const string& pathname, const string& pattern = "*") -> vector { + vector files; + for(auto& file : rcontents(pathname, pattern)) { + if(file::exists({pathname, file})) files.append(file); + } + return files; + } + + static auto irfiles(const string& pathname, const string& pattern = "*") -> vector { + vector files; + for(auto& file : ircontents(pathname, pattern)) { + if(file::exists({pathname, file})) files.append(file); + } + return files; + } + +private: + //internal functions; these return unsorted lists + static auto ufolders(const string& pathname, const string& pattern = "*") -> vector; + static auto ufiles(const string& pathname, const string& pattern = "*") -> vector; +}; + +inline auto directory::copy(const string& source, const string& target) -> bool { + bool result = true; + if(!directory::exists(source)) return result = false; + if(!directory::create(target)) return result = false; + for(auto& name : directory::folders(source)) { + if(!directory::copy({source, name}, {target, name})) result = false; + } + for(auto& name : directory::files(source)) { + if(!file::copy({source, name}, {target, name})) result = false; + } + return result; +} + +#if defined(PLATFORM_WINDOWS) + inline auto directory::create(const string& pathname, u32 permissions) -> bool { + string path; + auto list = string{pathname}.transform("\\", "/").trimRight("/").split("/"); + bool result = true; + for(auto& part : list) { + path.append(part, "/"); + if(directory::exists(path)) continue; + result &= (_wmkdir(utf16_t(path)) == 0); + } + return result; + } + + inline auto directory::remove(const string& pathname) -> bool { + if(!pathname || pathname == "/" || pathname.match("?:") || pathname.match("?:/")) return false; //safeguard + string separator = pathname.endsWith("/") || pathname.endsWith("\\") ? "" : "/"; + auto list = directory::contents(pathname); + for(auto& name : list) { + if(name.endsWith("/")) directory::remove({pathname, separator, name}); + else file::remove({pathname, separator, name}); + } + return _wrmdir(utf16_t(pathname)) == 0; + } + + inline auto directory::exists(const string& pathname) -> bool { + if(!pathname) return false; + string name = pathname; + name.trim("\"", "\""); + DWORD result = GetFileAttributes(utf16_t(name)); + if(result == INVALID_FILE_ATTRIBUTES) return false; + return (result & FILE_ATTRIBUTE_DIRECTORY); + } + + inline auto directory::ufolders(const string& pathname, const string& pattern) -> vector { + if(!pathname) { + //special root pseudo-folder (return list of drives) + wchar_t drives[PATH_MAX] = {0}; + GetLogicalDriveStrings(PATH_MAX, drives); + wchar_t* p = drives; + while(*p || *(p + 1)) { + if(!*p) *p = ';'; + p++; + } + return string{(const char*)utf8_t(drives)}.replace("\\", "/").split(";"); + } + + vector list; + string path = pathname; + path.transform("/", "\\"); + if(!path.endsWith("\\")) path.append("\\"); + path.append("*"); + HANDLE handle; + WIN32_FIND_DATA data; + handle = FindFirstFile(utf16_t(path), &data); + if(handle != INVALID_HANDLE_VALUE) { + if(wcscmp(data.cFileName, L".") && wcscmp(data.cFileName, L"..")) { + if(data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + string name = (const char*)utf8_t(data.cFileName); + if(name.match(pattern)) list.append(name); + } + } + while(FindNextFile(handle, &data) != false) { + if(wcscmp(data.cFileName, L".") && wcscmp(data.cFileName, L"..")) { + if(data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + string name = (const char*)utf8_t(data.cFileName); + if(name.match(pattern)) list.append(name); + } + } + } + FindClose(handle); + } + return list; + } + + inline auto directory::ufiles(const string& pathname, const string& pattern) -> vector { + if(!pathname) return {}; + + vector list; + string path = pathname; + path.transform("/", "\\"); + if(!path.endsWith("\\")) path.append("\\"); + path.append("*"); + HANDLE handle; + WIN32_FIND_DATA data; + handle = FindFirstFile(utf16_t(path), &data); + if(handle != INVALID_HANDLE_VALUE) { + if((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) { + string name = (const char*)utf8_t(data.cFileName); + if(name.match(pattern)) list.append(name); + } + while(FindNextFile(handle, &data) != false) { + if((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) { + string name = (const char*)utf8_t(data.cFileName); + if(name.match(pattern)) list.append(name); + } + } + FindClose(handle); + } + return list; + } +#else + inline auto directoryIsFolder(DIR* dp, struct dirent* ep) -> bool { + if(ep->d_type == DT_DIR) return true; + if(ep->d_type == DT_LNK || ep->d_type == DT_UNKNOWN) { + //symbolic links must be resolved to determine type + struct stat sp = {0}; + fstatat(dirfd(dp), ep->d_name, &sp, 0); + return S_ISDIR(sp.st_mode); + } + return false; + } + + inline auto directory::create(const string& pathname, u32 permissions) -> bool { + string path; + auto list = string{pathname}.trimRight("/").split("/"); + bool result = true; + for(auto& part : list) { + path.append(part, "/"); + if(directory::exists(path)) continue; + result &= (mkdir(path, permissions) == 0); + } + return result; + } + + inline auto directory::remove(const string& pathname) -> bool { + if(!pathname || pathname == "/") return false; //safeguard + string separator = pathname.endsWith("/") ? "" : "/"; + auto list = directory::contents(pathname); + for(auto& name : list) { + if(name.endsWith("/")) directory::remove({pathname, separator, name}); + else file::remove({pathname, separator, name}); + } + return rmdir(pathname) == 0; + } + + inline auto directory::exists(const string& pathname) -> bool { + if(!pathname) return false; + struct stat data; + if(stat(pathname, &data) != 0) return false; + return S_ISDIR(data.st_mode); + } + + inline auto directory::ufolders(const string& pathname, const string& pattern) -> vector { + if(!pathname) return vector{"/"}; + + vector list; + DIR* dp; + struct dirent* ep; + dp = opendir(pathname); + if(dp) { + while(ep = readdir(dp)) { + if(!strcmp(ep->d_name, ".")) continue; + if(!strcmp(ep->d_name, "..")) continue; + if(!directoryIsFolder(dp, ep)) continue; + string name{ep->d_name}; + if(name.match(pattern)) list.append(std::move(name)); + } + closedir(dp); + } + return list; + } + + inline auto directory::ufiles(const string& pathname, const string& pattern) -> vector { + if(!pathname) return {}; + + vector list; + DIR* dp; + struct dirent* ep; + dp = opendir(pathname); + if(dp) { + while(ep = readdir(dp)) { + if(!strcmp(ep->d_name, ".")) continue; + if(!strcmp(ep->d_name, "..")) continue; + if(directoryIsFolder(dp, ep)) continue; + string name{ep->d_name}; + if(name.match(pattern)) list.append(std::move(name)); + } + closedir(dp); + } + return list; + } +#endif + +} diff --git a/waterbox/ares64/ares/nall/dl.hpp b/waterbox/ares64/ares/nall/dl.hpp new file mode 100644 index 0000000000..8116d34921 --- /dev/null +++ b/waterbox/ares64/ares/nall/dl.hpp @@ -0,0 +1,126 @@ +#pragma once + +//dynamic linking support + +#include +#include +#include +#include +#include + +#if defined(PLATFORM_WINDOWS) + #include +#else + #include +#endif + +namespace nall { + +struct library { + library() = default; + ~library() { close(); } + + library& operator=(const library&) = delete; + library(const library&) = delete; + + explicit operator bool() const { return open(); } + auto open() const -> bool { return handle; } + auto open(const string&, const string& = "") -> bool; + auto openAbsolute(const string&) -> bool; + auto sym(const string&) -> void*; + auto close() -> void; + +private: + uintptr handle = 0; +}; + +#if defined(PLATFORM_LINUX) || defined(PLATFORM_BSD) +inline auto library::open(const string& name, const string& path) -> bool { + if(handle) close(); + if(path) handle = (uintptr)dlopen(string(path, "lib", name, ".so"), RTLD_LAZY); + if(!handle) handle = (uintptr)dlopen(string(Path::user(), ".local/lib/lib", name, ".so"), RTLD_LAZY); + if(!handle) handle = (uintptr)dlopen(string("/usr/local/lib/lib", name, ".so"), RTLD_LAZY); + if(!handle) handle = (uintptr)dlopen(string("lib", name, ".so"), RTLD_LAZY); + return handle; +} + +inline auto library::openAbsolute(const string& name) -> bool { + if(handle) close(); + handle = (uintptr)dlopen(name, RTLD_LAZY); + return handle; +} + +inline auto library::sym(const string& name) -> void* { + if(!handle) return nullptr; + return dlsym((void*)handle, name); +} + +inline auto library::close() -> void { + if(!handle) return; + dlclose((void*)handle); + handle = 0; +} +#elif defined(PLATFORM_MACOS) +inline auto library::open(const string& name, const string& path) -> bool { + if(handle) close(); + if(path) handle = (uintptr)dlopen(string(path, "lib", name, ".dylib"), RTLD_LAZY); + if(!handle) handle = (uintptr)dlopen(string(Path::user(), ".local/lib/lib", name, ".dylib"), RTLD_LAZY); + if(!handle) handle = (uintptr)dlopen(string("/usr/local/lib/lib", name, ".dylib"), RTLD_LAZY); + if(!handle) handle = (uintptr)dlopen(string("lib", name, ".dylib"), RTLD_LAZY); + return handle; +} + +inline auto library::openAbsolute(const string& name) -> bool { + if(handle) close(); + handle = (uintptr)dlopen(name, RTLD_LAZY); + return handle; +} + +inline auto library::sym(const string& name) -> void* { + if(!handle) return nullptr; + return dlsym((void*)handle, name); +} + +inline auto library::close() -> void { + if(!handle) return; + dlclose((void*)handle); + handle = 0; +} +#elif defined(PLATFORM_WINDOWS) +inline auto library::open(const string& name, const string& path) -> bool { + if(handle) close(); + if(path) { + string filepath = {path, name, ".dll"}; + handle = (uintptr)LoadLibraryW(utf16_t(filepath)); + } + if(!handle) { + string filepath = {name, ".dll"}; + handle = (uintptr)LoadLibraryW(utf16_t(filepath)); + } + return handle; +} + +inline auto library::openAbsolute(const string& name) -> bool { + if(handle) close(); + handle = (uintptr)LoadLibraryW(utf16_t(name)); + return handle; +} + +inline auto library::sym(const string& name) -> void* { + if(!handle) return nullptr; + return (void*)GetProcAddress((HMODULE)handle, name); +} + +inline auto library::close() -> void { + if(!handle) return; + FreeLibrary((HMODULE)handle); + handle = 0; +} +#else +inline auto library::open(const string&, const string&) -> bool { return false; } +inline auto library::openAbsolute(const string&) -> bool { return false; } +inline auto library::sym(const string&) -> void* { return nullptr; } +inline auto library::close() -> void {} +#endif + +} diff --git a/waterbox/ares64/ares/nall/dsp/iir/biquad.hpp b/waterbox/ares64/ares/nall/dsp/iir/biquad.hpp new file mode 100644 index 0000000000..5ab85bad5a --- /dev/null +++ b/waterbox/ares64/ares/nall/dsp/iir/biquad.hpp @@ -0,0 +1,162 @@ +#pragma once + +//transposed direct form II biquadratic second-order IIR filter + +namespace nall::DSP::IIR { + +struct Biquad { + enum class Type : u32 { + LowPass, + HighPass, + BandPass, + Notch, + Peak, + LowShelf, + HighShelf, + }; + + auto reset(Type type, f64 cutoffFrequency, f64 samplingFrequency, f64 quality, f64 gain = 0.0) -> void; + auto process(f64 in) -> f64; //normalized sample (-1.0 to +1.0) + + static auto shelf(f64 gain, f64 slope) -> f64; + static auto butterworth(u32 order, u32 phase) -> f64; + +private: + Type type; + f64 cutoffFrequency; + f64 samplingFrequency; + f64 quality; //frequency response quality + f64 gain; //peak gain + f64 a0, a1, a2, b1, b2; //coefficients + f64 z1, z2; //second-order IIR +}; + +inline auto Biquad::reset(Type type, f64 cutoffFrequency, f64 samplingFrequency, f64 quality, f64 gain) -> void { + this->type = type; + this->cutoffFrequency = cutoffFrequency; + this->samplingFrequency = samplingFrequency; + this->quality = quality; + this->gain = gain; + + z1 = 0.0; + z2 = 0.0; + + f64 v = pow(10, fabs(gain) / 20.0); + f64 k = tan(Math::Pi * cutoffFrequency / samplingFrequency); + f64 q = quality; + f64 n = 0.0; + + switch(type) { + + case Type::LowPass: + n = 1 / (1 + k / q + k * k); + a0 = k * k * n; + a1 = 2 * a0; + a2 = a0; + b1 = 2 * (k * k - 1) * n; + b2 = (1 - k / q + k * k) * n; + break; + + case Type::HighPass: + n = 1 / (1 + k / q + k * k); + a0 = 1 * n; + a1 = -2 * a0; + a2 = a0; + b1 = 2 * (k * k - 1) * n; + b2 = (1 - k / q + k * k) * n; + break; + + case Type::BandPass: + n = 1 / (1 + k / q + k * k); + a0 = k / q * n; + a1 = 0; + a2 = -a0; + b1 = 2 * (k * k - 1) * n; + b2 = (1 - k / q + k * k) * n; + break; + + case Type::Notch: + n = 1 / (1 + k / q + k * k); + a0 = (1 + k * k) * n; + a1 = 2 * (k * k - 1) * n; + a2 = a0; + b1 = a1; + b2 = (1 - k / q + k * k) * n; + break; + + case Type::Peak: + if(gain >= 0) { + n = 1 / (1 + 1 / q * k + k * k); + a0 = (1 + v / q * k + k * k) * n; + a1 = 2 * (k * k - 1) * n; + a2 = (1 - v / q * k + k * k) * n; + b1 = a1; + b2 = (1 - 1 / q * k + k * k) * n; + } else { + n = 1 / (1 + v / q * k + k * k); + a0 = (1 + 1 / q * k + k * k) * n; + a1 = 2 * (k * k - 1) * n; + a2 = (1 - 1 / q * k + k * k) * n; + b1 = a1; + b2 = (1 - v / q * k + k * k) * n; + } + break; + + case Type::LowShelf: + if(gain >= 0) { + n = 1 / (1 + k / q + k * k); + a0 = (1 + sqrt(v) / q * k + v * k * k) * n; + a1 = 2 * (v * k * k - 1) * n; + a2 = (1 - sqrt(v) / q * k + v * k * k) * n; + b1 = 2 * (k * k - 1) * n; + b2 = (1 - k / q + k * k) * n; + } else { + n = 1 / (1 + sqrt(v) / q * k + v * k * k); + a0 = (1 + k / q + k * k) * n; + a1 = 2 * (k * k - 1) * n; + a2 = (1 - k / q + k * k) * n; + b1 = 2 * (v * k * k - 1) * n; + b2 = (1 - sqrt(v) / q * k + v * k * k) * n; + } + break; + + case Type::HighShelf: + if(gain >= 0) { + n = 1 / (1 + k / q + k * k); + a0 = (v + sqrt(v) / q * k + k * k) * n; + a1 = 2 * (k * k - v) * n; + a2 = (v - sqrt(v) / q * k + k * k) * n; + b1 = 2 * (k * k - 1) * n; + b2 = (1 - k / q + k * k) * n; + } else { + n = 1 / (v + sqrt(v) / q * k + k * k); + a0 = (1 + k / q + k * k) * n; + a1 = 2 * (k * k - 1) * n; + a2 = (1 - k / q + k * k) * n; + b1 = 2 * (k * k - v) * n; + b2 = (v - sqrt(v) / q * k + k * k) * n; + } + break; + + } +} + +inline auto Biquad::process(f64 in) -> f64 { + f64 out = in * a0 + z1; + z1 = in * a1 + z2 - b1 * out; + z2 = in * a2 - b2 * out; + return out; +} + +//compute Q values for low-shelf and high-shelf filtering +inline auto Biquad::shelf(f64 gain, f64 slope) -> f64 { + f64 a = pow(10, gain / 40); + return 1 / sqrt((a + 1 / a) * (1 / slope - 1) + 2); +} + +//compute Q values for Nth-order butterworth filtering +inline auto Biquad::butterworth(u32 order, u32 phase) -> f64 { + return -0.5 / cos(Math::Pi * (phase + order + 0.5) / order); +} + +} diff --git a/waterbox/ares64/ares/nall/dsp/iir/dc-removal.hpp b/waterbox/ares64/ares/nall/dsp/iir/dc-removal.hpp new file mode 100644 index 0000000000..db77790c52 --- /dev/null +++ b/waterbox/ares64/ares/nall/dsp/iir/dc-removal.hpp @@ -0,0 +1,27 @@ +#pragma once + +//DC offset removal IIR filter + +namespace nall::DSP::IIR { + +struct DCRemoval { + auto reset() -> void; + auto process(f64 in) -> f64; //normalized sample (-1.0 to +1.0) + +private: + f64 x; + f64 y; +}; + +inline auto DCRemoval::reset() -> void { + x = 0.0; + y = 0.0; +} + +inline auto DCRemoval::process(f64 in) -> f64 { + x = 0.999 * x + in - y; + y = in; + return x; +} + +} diff --git a/waterbox/ares64/ares/nall/dsp/iir/one-pole.hpp b/waterbox/ares64/ares/nall/dsp/iir/one-pole.hpp new file mode 100644 index 0000000000..9feab2dc01 --- /dev/null +++ b/waterbox/ares64/ares/nall/dsp/iir/one-pole.hpp @@ -0,0 +1,44 @@ +#pragma once + +//one-pole first-order IIR filter + +namespace nall::DSP::IIR { + +struct OnePole { + enum class Type : u32 { + LowPass, + HighPass, + }; + + auto reset(Type type, f64 cutoffFrequency, f64 samplingFrequency) -> void; + auto process(f64 in) -> f64; //normalized sample (-1.0 to +1.0) + +private: + Type type; + f64 cutoffFrequency; + f64 samplingFrequency; + f64 a0, b1; //coefficients + f64 z1; //first-order IIR +}; + +inline auto OnePole::reset(Type type, f64 cutoffFrequency, f64 samplingFrequency) -> void { + this->type = type; + this->cutoffFrequency = cutoffFrequency; + this->samplingFrequency = samplingFrequency; + + z1 = 0.0; + f64 x = cos(2.0 * Math::Pi * cutoffFrequency / samplingFrequency); + if(type == Type::LowPass) { + b1 = +2.0 - x - sqrt((+2.0 - x) * (+2.0 - x) - 1); + a0 = 1.0 - b1; + } else { + b1 = -2.0 - x + sqrt((-2.0 - x) * (-2.0 - x) - 1); + a0 = 1.0 + b1; + } +} + +inline auto OnePole::process(f64 in) -> f64 { + return z1 = in * a0 + z1 * b1; +} + +} diff --git a/waterbox/ares64/ares/nall/dsp/resampler/cubic.hpp b/waterbox/ares64/ares/nall/dsp/resampler/cubic.hpp new file mode 100644 index 0000000000..f690e78d33 --- /dev/null +++ b/waterbox/ares64/ares/nall/dsp/resampler/cubic.hpp @@ -0,0 +1,83 @@ +#pragma once + +#include +#include + +namespace nall::DSP::Resampler { + +struct Cubic { + auto inputFrequency() const -> f64 { return _inputFrequency; } + auto outputFrequency() const -> f64 { return _outputFrequency; } + + auto reset(f64 inputFrequency, f64 outputFrequency = 0, u32 queueSize = 0) -> void; + auto setInputFrequency(f64 inputFrequency) -> void; + auto pending() const -> bool; + auto read() -> f64; + auto write(f64 sample) -> void; + auto serialize(serializer&) -> void; + +private: + f64 _inputFrequency; + f64 _outputFrequency; + + f64 _ratio; + f64 _fraction; + f64 _history[4]; + queue _samples; +}; + +inline auto Cubic::reset(f64 inputFrequency, f64 outputFrequency, u32 queueSize) -> void { + _inputFrequency = inputFrequency; + _outputFrequency = outputFrequency ? outputFrequency : _inputFrequency; + + _ratio = _inputFrequency / _outputFrequency; + _fraction = 0.0; + for(auto& sample : _history) sample = 0.0; + _samples.resize(queueSize ? queueSize : _outputFrequency * 0.02); //default to 20ms max queue size +} + +inline auto Cubic::setInputFrequency(f64 inputFrequency) -> void { + _inputFrequency = inputFrequency; + _ratio = _inputFrequency / _outputFrequency; +} + +inline auto Cubic::pending() const -> bool { + return _samples.pending(); +} + +inline auto Cubic::read() -> double { + return _samples.read(); +} + +inline auto Cubic::write(f64 sample) -> void { + auto& mu = _fraction; + auto& s = _history; + + s[0] = s[1]; + s[1] = s[2]; + s[2] = s[3]; + s[3] = sample; + + while(mu <= 1.0) { + f64 A = s[3] - s[2] - s[0] + s[1]; + f64 B = s[0] - s[1] - A; + f64 C = s[2] - s[0]; + f64 D = s[1]; + + _samples.write(A * mu * mu * mu + B * mu * mu + C * mu + D); + mu += _ratio; + } + + mu -= 1.0; +} + +inline auto Cubic::serialize(serializer& s) -> void { + s(_inputFrequency); + s(_outputFrequency); + s(_ratio); + s(_fraction); + s(_history); + s(_samples); +} + +} diff --git a/waterbox/ares64/ares/nall/elliptic-curve/curve25519.hpp b/waterbox/ares64/ares/nall/elliptic-curve/curve25519.hpp new file mode 100644 index 0000000000..e97dd080b1 --- /dev/null +++ b/waterbox/ares64/ares/nall/elliptic-curve/curve25519.hpp @@ -0,0 +1,57 @@ +#pragma once + +#if defined(EC_REFERENCE) + #include +#else + #include +#endif + +namespace nall::EllipticCurve { + +struct Curve25519 { + auto sharedKey(u256 secretKey, u256 basepoint = 9) const -> u256 { + secretKey &= (1_u256 << 254) - 8; + secretKey |= (1_u256 << 254); + basepoint &= ~0_u256 >> 1; + + point p = scalarMultiply(basepoint % P, secretKey); + field k = p.x * reciprocal(p.z); + return k(); + } + +private: + using field = Modulo25519; + struct point { field x, z; }; + const BarrettReduction<256> P = BarrettReduction<256>{EllipticCurve::P}; + + auto montgomeryDouble(point p) const -> point { + field a = square(p.x + p.z); + field b = square(p.x - p.z); + field c = a - b; + field d = a + c * 121665; + return {a * b, c * d}; + } + + auto montgomeryAdd(point p, point q, field b) const -> point { + return { + square(p.x * q.x - p.z * q.z), + square(p.x * q.z - p.z * q.x) * b + }; + } + + auto scalarMultiply(field b, u256 exponent) const -> point { + point p{1, 0}, q{b, 1}; + for(u32 bit : reverse(range(255))) { + bool condition = exponent >> bit & 1; + cswap(condition, p.x, q.x); + cswap(condition, p.z, q.z); + q = montgomeryAdd(p, q, b); + p = montgomeryDouble(p); + cswap(condition, p.x, q.x); + cswap(condition, p.z, q.z); + } + return p; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/elliptic-curve/ed25519.hpp b/waterbox/ares64/ares/nall/elliptic-curve/ed25519.hpp new file mode 100644 index 0000000000..76b0628402 --- /dev/null +++ b/waterbox/ares64/ares/nall/elliptic-curve/ed25519.hpp @@ -0,0 +1,144 @@ +#pragma once + +#include +#if defined(EC_REFERENCE) + #include +#else + #include +#endif + +namespace nall::EllipticCurve { + +static const u256 L = (1_u256 << 252) + 27742317777372353535851937790883648493_u256; + +struct Ed25519 { + auto publicKey(u256 privateKey) const -> u256 { + return compress(scalarMultiply(B, clamp(hash(privateKey)) % L)); + } + + auto sign(array_view message, u256 privateKey) const -> u512 { + u512 H = hash(privateKey); + u256 a = clamp(H) % L; + u256 A = compress(scalarMultiply(B, a)); + + u512 r = hash(upper(H), message) % L; + u256 R = compress(scalarMultiply(B, r)); + + u512 k = hash(R, A, message) % L; + u256 S = (k * a + r) % L; + + return u512(S) << 256 | R; + } + + auto verify(array_view message, u512 signature, u256 publicKey) const -> bool { + auto R = decompress(lower(signature)); + auto A = decompress(publicKey); + if(!R || !A) return false; + + u256 S = upper(signature) % L; + u512 r = hash(lower(signature), publicKey, message) % L; + + auto p = scalarMultiply(B, S); + auto q = edwardsAdd(R(), scalarMultiply(A(), r)); + if(!onCurve(p) || !onCurve(q)) return false; + if(p.x * q.z - q.x * p.z) return false; + if(p.y * q.z - q.y * p.z) return false; + return true; + } + +private: + using field = Modulo25519; + struct point { field x, y, z, t; }; + const field D = -field(121665) * reciprocal(field(121666)); + const point B = *decompress((field(4) * reciprocal(field(5)))()); + const BarrettReduction<256> L = BarrettReduction<256>{EllipticCurve::L}; + + auto input(Hash::SHA512&) const -> void {} + + template auto input(Hash::SHA512& hash, u256 value, P&&... p) const -> void { + for(u32 byte : range(32)) hash.input(u8(value >> byte * 8)); + input(hash, forward

(p)...); + } + + template auto input(Hash::SHA512& hash, array_view value, P&&... p) const -> void { + hash.input(value); + input(hash, forward

(p)...); + } + + template auto hash(P&&... p) const -> u512 { + Hash::SHA512 hash; + input(hash, forward

(p)...); + u512 result; + for(auto byte : reverse(hash.output())) result = result << 8 | byte; + return result; + } + + auto clamp(u256 p) const -> u256 { + p &= (1_u256 << 254) - 8; + p |= (1_u256 << 254); + return p; + } + + auto onCurve(point p) const -> bool { + if(!p.z) return false; + if(p.x * p.y - p.z * p.t) return false; + if(square(p.y) - square(p.x) - square(p.z) - square(p.t) * D) return false; + return true; + } + + auto decompress(u256 c) const -> maybe { + field y = c & ~0_u256 >> 1; + field x = squareRoot((square(y) - 1) * reciprocal(D * square(y) + 1)); + if(c >> 255) x = -x; + point p{x, y, 1, x * y}; + if(!onCurve(p)) return nothing; + return p; + } + + auto compress(point p) const -> u256 { + field r = reciprocal(p.z); + field x = p.x * r; + field y = p.y * r; + return (x & 1) << 255 | (y & ~0_u256 >> 1); + } + + auto edwardsDouble(point p) const -> point { + field a = square(p.x); + field b = square(p.y); + field c = square(p.z); + field d = -a; + field e = square(p.x + p.y) - a - b; + field g = d + b; + field f = g - (c + c); + field h = d - b; + return {e * f, g * h, f * g, e * h}; + } + + auto edwardsAdd(point p, point q) const -> point { + field a = (p.y - p.x) * (q.y - q.x); + field b = (p.y + p.x) * (q.y + q.x); + field c = (p.t + p.t) * q.t * D; + field d = (p.z + p.z) * q.z; + field e = b - a; + field f = d - c; + field g = d + c; + field h = b + a; + return {e * f, g * h, f * g, e * h}; + } + + auto scalarMultiply(point q, u256 exponent) const -> point { + point p{0, 1, 1, 0}, c; + for(u32 bit : reverse(range(253))) { + p = edwardsDouble(p); + c = edwardsAdd(p, q); + bool condition = exponent >> bit & 1; + cmove(condition, p.x, c.x); + cmove(condition, p.y, c.y); + cmove(condition, p.z, c.z); + cmove(condition, p.t, c.t); + } + return p; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-optimized.hpp b/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-optimized.hpp new file mode 100644 index 0000000000..d0e0f4e4d7 --- /dev/null +++ b/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-optimized.hpp @@ -0,0 +1,218 @@ +#pragma once + +#include + +namespace nall::EllipticCurve { + +static const u256 P = (1_u256 << 255) - 19; + +#define Mask ((1ull << 51) - 1) + +struct Modulo25519 { + Modulo25519() = default; + Modulo25519(const Modulo25519&) = default; + Modulo25519(u64 a, u64 b = 0, u64 c = 0, u64 d = 0, u64 e = 0) : l{a, b, c, d, e} {} + Modulo25519(u256 n); + + explicit operator bool() const { return (bool)operator()(); } + auto operator[](u32 index) -> u64& { return l[index]; } + auto operator[](u32 index) const -> u64 { return l[index]; } + auto operator()() const -> u256; + +private: + u64 l[5]; //51-bits per limb; 255-bits total +}; + +inline Modulo25519::Modulo25519(u256 n) { + l[0] = n >> 0 & Mask; + l[1] = n >> 51 & Mask; + l[2] = n >> 102 & Mask; + l[3] = n >> 153 & Mask; + l[4] = n >> 204 & Mask; +} + +inline auto Modulo25519::operator()() const -> u256 { + Modulo25519 o = *this; + + o[1] += (o[0] >> 51); o[0] &= Mask; + o[2] += (o[1] >> 51); o[1] &= Mask; + o[3] += (o[2] >> 51); o[2] &= Mask; + o[4] += (o[3] >> 51); o[3] &= Mask; + o[0] += 19 * (o[4] >> 51); o[4] &= Mask; + + o[1] += (o[0] >> 51); o[0] &= Mask; + o[2] += (o[1] >> 51); o[1] &= Mask; + o[3] += (o[2] >> 51); o[2] &= Mask; + o[4] += (o[3] >> 51); o[3] &= Mask; + o[0] += 19 * (o[4] >> 51); o[4] &= Mask; + + o[0] += 19; + o[1] += (o[0] >> 51); o[0] &= Mask; + o[2] += (o[1] >> 51); o[1] &= Mask; + o[3] += (o[2] >> 51); o[2] &= Mask; + o[4] += (o[3] >> 51); o[3] &= Mask; + o[0] += 19 * (o[4] >> 51); o[4] &= Mask; + + o[0] += Mask - 18; + o[1] += Mask; + o[2] += Mask; + o[3] += Mask; + o[4] += Mask; + + o[1] += o[0] >> 51; o[0] &= Mask; + o[2] += o[1] >> 51; o[1] &= Mask; + o[3] += o[2] >> 51; o[2] &= Mask; + o[4] += o[3] >> 51; o[3] &= Mask; + o[4] &= Mask; + + return (u256)o[0] << 0 | (u256)o[1] << 51 | (u256)o[2] << 102 | (u256)o[3] << 153 | (u256)o[4] << 204; +} + +inline auto cmove(bool move, Modulo25519& l, const Modulo25519& r) -> void { + u64 mask = -move; + l[0] ^= mask & (l[0] ^ r[0]); + l[1] ^= mask & (l[1] ^ r[1]); + l[2] ^= mask & (l[2] ^ r[2]); + l[3] ^= mask & (l[3] ^ r[3]); + l[4] ^= mask & (l[4] ^ r[4]); +} + +inline auto cswap(bool swap, Modulo25519& l, Modulo25519& r) -> void { + u64 mask = -swap, x; + x = mask & (l[0] ^ r[0]); l[0] ^= x; r[0] ^= x; + x = mask & (l[1] ^ r[1]); l[1] ^= x; r[1] ^= x; + x = mask & (l[2] ^ r[2]); l[2] ^= x; r[2] ^= x; + x = mask & (l[3] ^ r[3]); l[3] ^= x; r[3] ^= x; + x = mask & (l[4] ^ r[4]); l[4] ^= x; r[4] ^= x; +} + +inline auto operator-(const Modulo25519& l) -> Modulo25519 { //P - l + Modulo25519 o; + u64 c; + o[0] = 0xfffffffffffda - l[0]; c = o[0] >> 51; o[0] &= Mask; + o[1] = 0xffffffffffffe - l[1] + c; c = o[1] >> 51; o[1] &= Mask; + o[2] = 0xffffffffffffe - l[2] + c; c = o[2] >> 51; o[2] &= Mask; + o[3] = 0xffffffffffffe - l[3] + c; c = o[3] >> 51; o[3] &= Mask; + o[4] = 0xffffffffffffe - l[4] + c; c = o[4] >> 51; o[4] &= Mask; + o[0] += c * 19; + return o; +} + +inline auto operator+(const Modulo25519& l, const Modulo25519& r) -> Modulo25519 { + Modulo25519 o; + u64 c; + o[0] = l[0] + r[0]; c = o[0] >> 51; o[0] &= Mask; + o[1] = l[1] + r[1] + c; c = o[1] >> 51; o[1] &= Mask; + o[2] = l[2] + r[2] + c; c = o[2] >> 51; o[2] &= Mask; + o[3] = l[3] + r[3] + c; c = o[3] >> 51; o[3] &= Mask; + o[4] = l[4] + r[4] + c; c = o[4] >> 51; o[4] &= Mask; + o[0] += c * 19; + return o; +} + +inline auto operator-(const Modulo25519& l, const Modulo25519& r) -> Modulo25519 { + Modulo25519 o; + u64 c; + o[0] = l[0] + 0x1fffffffffffb4 - r[0]; c = o[0] >> 51; o[0] &= Mask; + o[1] = l[1] + 0x1ffffffffffffc - r[1] + c; c = o[1] >> 51; o[1] &= Mask; + o[2] = l[2] + 0x1ffffffffffffc - r[2] + c; c = o[2] >> 51; o[2] &= Mask; + o[3] = l[3] + 0x1ffffffffffffc - r[3] + c; c = o[3] >> 51; o[3] &= Mask; + o[4] = l[4] + 0x1ffffffffffffc - r[4] + c; c = o[4] >> 51; o[4] &= Mask; + o[0] += c * 19; + return o; +} + +inline auto operator*(const Modulo25519& l, u64 scalar) -> Modulo25519 { + Modulo25519 o; + u128 a; + a = (u128)l[0] * scalar; o[0] = a & Mask; + a = (u128)l[1] * scalar + (a >> 51 & Mask); o[1] = a & Mask; + a = (u128)l[2] * scalar + (a >> 51 & Mask); o[2] = a & Mask; + a = (u128)l[3] * scalar + (a >> 51 & Mask); o[3] = a & Mask; + a = (u128)l[4] * scalar + (a >> 51 & Mask); o[4] = a & Mask; + o[0] += (a >> 51) * 19; + return o; +} + +inline auto operator*(const Modulo25519& l, Modulo25519 r) -> Modulo25519 { + u128 t[] = { + (u128)r[0] * l[0], + (u128)r[0] * l[1] + (u128)r[1] * l[0], + (u128)r[0] * l[2] + (u128)r[1] * l[1] + (u128)r[2] * l[0], + (u128)r[0] * l[3] + (u128)r[1] * l[2] + (u128)r[2] * l[1] + (u128)r[3] * l[0], + (u128)r[0] * l[4] + (u128)r[1] * l[3] + (u128)r[2] * l[2] + (u128)r[3] * l[1] + (u128)r[4] * l[0] + }; + + r[1] *= 19, r[2] *= 19, r[3] *= 19, r[4] *= 19; + + t[0] += (u128)r[4] * l[1] + (u128)r[3] * l[2] + (u128)r[2] * l[3] + (u128)r[1] * l[4]; + t[1] += (u128)r[4] * l[2] + (u128)r[3] * l[3] + (u128)r[2] * l[4]; + t[2] += (u128)r[4] * l[3] + (u128)r[3] * l[4]; + t[3] += (u128)r[4] * l[4]; + + u64 c; r[0] = t[0] & Mask; c = (u64)(t[0] >> 51); + t[1] += c; r[1] = t[1] & Mask; c = (u64)(t[1] >> 51); + t[2] += c; r[2] = t[2] & Mask; c = (u64)(t[2] >> 51); + t[3] += c; r[3] = t[3] & Mask; c = (u64)(t[3] >> 51); + t[4] += c; r[4] = t[4] & Mask; c = (u64)(t[4] >> 51); + + r[0] += c * 19; c = r[0] >> 51; r[0] &= Mask; + r[1] += c; c = r[1] >> 51; r[1] &= Mask; + r[2] += c; + return r; +} + +inline auto operator&(const Modulo25519& lhs, u256 rhs) -> u256 { + return lhs() & rhs; +} + +inline auto square(const Modulo25519& lhs) -> Modulo25519 { + Modulo25519 r{lhs}; + Modulo25519 d{r[0] * 2, r[1] * 2, r[2] * 2 * 19, r[4] * 19, r[4] * 19 * 2}; + + u128 t[5]; + t[0] = (u128)r[0] * r[0] + (u128)d[4] * r[1] + (u128)d[2] * r[3]; + t[1] = (u128)d[0] * r[1] + (u128)d[4] * r[2] + (u128)r[3] * r[3] * 19; + t[2] = (u128)d[0] * r[2] + (u128)r[1] * r[1] + (u128)d[4] * r[3]; + t[3] = (u128)d[0] * r[3] + (u128)d[1] * r[2] + (u128)r[4] * d[3]; + t[4] = (u128)d[0] * r[4] + (u128)d[1] * r[3] + (u128)r[2] * r[2]; + + u64 c; r[0] = t[0] & Mask; c = (u64)(t[0] >> 51); + t[1] += c; r[1] = t[1] & Mask; c = (u64)(t[1] >> 51); + t[2] += c; r[2] = t[2] & Mask; c = (u64)(t[2] >> 51); + t[3] += c; r[3] = t[3] & Mask; c = (u64)(t[3] >> 51); + t[4] += c; r[4] = t[4] & Mask; c = (u64)(t[4] >> 51); + + r[0] += c * 19; c = r[0] >> 51; r[0] &= Mask; + r[1] += c; c = r[1] >> 51; r[1] &= Mask; + r[2] += c; + return r; +} + +inline auto exponentiate(const Modulo25519& lhs, u256 exponent) -> Modulo25519 { + Modulo25519 x = 1, y; + for(u32 bit : reverse(range(256))) { + x = square(x); + y = x * lhs; + cmove(exponent >> bit & 1, x, y); + } + return x; +} + +inline auto reciprocal(const Modulo25519& lhs) -> Modulo25519 { + return exponentiate(lhs, P - 2); +} + +inline auto squareRoot(const Modulo25519& lhs) -> Modulo25519 { + static const Modulo25519 I = exponentiate(Modulo25519(2), P - 1 >> 2); //I == sqrt(-1) + Modulo25519 x = exponentiate(lhs, P + 3 >> 3); + Modulo25519 y = x * I; + cmove(bool(square(x) - lhs), x, y); + y = -x; + cmove(x & 1, x, y); + return x; +} + +#undef Mask + +} diff --git a/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-reference.hpp b/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-reference.hpp new file mode 100644 index 0000000000..5a8ee302db --- /dev/null +++ b/waterbox/ares64/ares/nall/elliptic-curve/modulo25519-reference.hpp @@ -0,0 +1,84 @@ +#pragma once + +//warning: this implementation leaks side-channel information +//use modulo25519-optimized.hpp in production + +#include + +namespace nall::EllipticCurve { + +static const u256 P = (1_u256 << 255) - 19; + +struct Modulo25519 { + Modulo25519() = default; + Modulo25519(const Modulo25519& source) : value(source.value) {} + template Modulo25519(const T& value) : value(value) {} + explicit operator bool() const { return (bool)value; } + auto operator()() const -> u256 { return value; } + +private: + u256 value; +}; + +inline auto operator-(const Modulo25519& lhs) -> Modulo25519 { + return P - lhs(); +} + +inline auto operator+(const Modulo25519& lhs, const Modulo25519& rhs) -> Modulo25519 { + u512 value = (u512)lhs() + rhs(); + if(value >= P) value -= P; + return value; +} + +inline auto operator-(const Modulo25519& lhs, const Modulo25519& rhs) -> Modulo25519 { + u512 value = (u512)lhs(); + if(value < rhs()) value += P; + return u256(value - rhs()); +} + +inline auto operator*(const Modulo25519& lhs, const Modulo25519& rhs) -> Modulo25519 { + static const BarrettReduction<256> P{EllipticCurve::P}; + u256 hi, lo; + mul(lhs(), rhs(), hi, lo); + return u512{hi, lo} % P; +} + +inline auto operator&(const Modulo25519& lhs, u256 rhs) -> u256 { + return lhs() & rhs; +} + +inline auto square(const Modulo25519& lhs) -> Modulo25519 { + static const BarrettReduction<256> P{EllipticCurve::P}; + u256 hi, lo; + square(lhs(), hi, lo); + return u512{hi, lo} % P; +} + +inline auto exponentiate(const Modulo25519& lhs, u256 exponent) -> Modulo25519 { + if(exponent == 0) return 1; + Modulo25519 value = square(exponentiate(lhs, exponent >> 1)); + if(exponent & 1) value = value * lhs; + return value; +} + +inline auto reciprocal(const Modulo25519& lhs) -> Modulo25519 { + return exponentiate(lhs, P - 2); +} + +inline auto squareRoot(const Modulo25519& lhs) -> Modulo25519 { + static const Modulo25519 I = exponentiate(Modulo25519(2), P - 1 >> 2); //I = sqrt(-1) + Modulo25519 value = exponentiate(lhs, P + 3 >> 3); + if(square(value) - lhs) value = value * I; + if(value & 1) value = -value; + return value; +} + +inline auto cmove(bool condition, Modulo25519& lhs, const Modulo25519& rhs) -> void { + if(condition) lhs = rhs; +} + +inline auto cswap(bool condition, Modulo25519& lhs, Modulo25519& rhs) -> void { + if(condition) swap(lhs, rhs); +} + +} diff --git a/waterbox/ares64/ares/nall/emulation/21fx.hpp b/waterbox/ares64/ares/nall/emulation/21fx.hpp new file mode 100644 index 0000000000..fd36ce26d4 --- /dev/null +++ b/waterbox/ares64/ares/nall/emulation/21fx.hpp @@ -0,0 +1,128 @@ +#pragma once + +#include +#include +using namespace nall; + +struct FX { + auto open(Arguments& arguments) -> bool; + auto close() -> void; + auto readable() -> bool; + auto read() -> u8; + auto writable() -> bool; + auto write(u8 data) -> void; + + auto read(u32 offset, u32 length) -> vector; + auto write(u32 offset, const void* buffer, u32 length) -> void; + auto write(u32 offset, const vector& buffer) -> void { write(offset, buffer.data(), buffer.size()); } + auto execute(u32 offset) -> void; + + auto read(u32 offset) -> u8; + auto write(u32 offset, u8 data) -> void; + + serial device; +}; + +inline auto FX::open(Arguments& arguments) -> bool { + //device name override support + string name; + arguments.take("--device", name); + if(!device.open(name)) { + print("[21fx] error: unable to open hardware device\n"); + return false; + } + + //flush the device (to clear floating inputs) + while(true) { + while(readable()) read(); + auto iplrom = read(0x2184, 122); + auto sha256 = Hash::SHA256(iplrom).digest(); + if(sha256 == "41b79712a4a2d16d39894ae1b38cde5c41dad22eadc560df631d39f13df1e4b9") break; + } + + return true; +} + +inline auto FX::close() -> void { + device.close(); +} + +inline auto FX::readable() -> bool { + return device.readable(); +} + +//1000ns delay avoids burning CPU core at 100%; does not slow down max transfer rate at all +inline auto FX::read() -> u8 { + while(!readable()) usleep(1000); + u8 buffer[1] = {0}; + device.read(buffer, 1); + return buffer[0]; +} + +inline auto FX::writable() -> bool { + return device.writable(); +} + +inline auto FX::write(u8 data) -> void { + while(!writable()) usleep(1000); + u8 buffer[1] = {data}; + device.write(buffer, 1); +} + +// + +inline auto FX::read(u32 offset, u32 length) -> vector { + write(0x21); + write(0x66); + write(0x78); + write(offset >> 16); + write(offset >> 8); + write(offset >> 0); + write(0x01); + write(length >> 8); + write(length >> 0); + write(0x00); + + vector buffer; + while(length--) buffer.append(read()); + return buffer; +} + +inline auto FX::write(u32 offset, const void* data, u32 length) -> void { + write(0x21); + write(0x66); + write(0x78); + write(offset >> 16); + write(offset >> 8); + write(offset >> 0); + write(0x01); + write(length >> 8); + write(length >> 0); + write(0x01); + + auto buffer = (u8*)data; + for(auto n : range(length)) write(buffer[n]); + write(0x00); +} + +inline auto FX::execute(u32 offset) -> void { + write(0x21); + write(0x66); + write(0x78); + write(offset >> 16); + write(offset >> 8); + write(offset >> 0); + write(0x00); +} + +// + +inline auto FX::read(u32 offset) -> u8 { + auto buffer = read(offset, 1); + return buffer[0]; +} + +inline auto FX::write(u32 offset, u8 data) -> void { + vector buffer = {data}; + write(offset, buffer); +} diff --git a/waterbox/ares64/ares/nall/encode/base.hpp b/waterbox/ares64/ares/nall/encode/base.hpp new file mode 100644 index 0000000000..295b0b2be5 --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/base.hpp @@ -0,0 +1,38 @@ +#pragma once + +//required bytes: ceil(bits / log2(base)) +//base57 => 128=22, 256=44, 512=88 +//base62 => 128=22, 256=43, 512=86 +//base64 => 128=22, 256=43, 512=86 + +#include + +namespace nall::Encode { + +template inline auto Base(T value) -> string { + static const string format = + Bits == 2 ? "01" + : Bits == 8 ? "01234567" + : Bits == 10 ? "0123456789" + : Bits == 16 ? "0123456789abcdef" + : Bits == 32 ? "0123456789abcdefghijklmnopqrstuv" + : Bits == 34 ? "023456789abcdefghijkmnopqrstuvwxyz" //1l + : Bits == 36 ? "0123456789abcdefghijklmnopqrstuvwxyz" + : Bits == 57 ? "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" //01IOl + : Bits == 62 ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + : Bits == 64 ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{}" + : Bits == 85 ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%()+,-.:;=@[]^_`{|}~" //\ "&'*/<>? + : ""; + static const u32 size = ceil(sizeof(T) * 8 / log2(Bits)); + + string result; + result.resize(size); + char* data = result.get() + size; + for(auto byte : result) { + *--data = format[value % Bits]; + value /= Bits; + } + return result; +} + +} diff --git a/waterbox/ares64/ares/nall/encode/base64.hpp b/waterbox/ares64/ares/nall/encode/base64.hpp new file mode 100644 index 0000000000..3595e5a91a --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/base64.hpp @@ -0,0 +1,68 @@ +#pragma once + +namespace nall::Encode { + +inline auto Base64(const void* vdata, u32 size, const string& format = "MIME") -> string { + static bool initialized = false; + static char lookup[65] = {}; + if(!initialized) { + initialized = true; + for(u32 n : range(26)) lookup[n + 0] = 'A' + n; + for(u32 n : range(26)) lookup[n + 26] = 'a' + n; + for(u32 n : range(10)) lookup[n + 52] = '0' + n; + } + + if(format == "MIME") { + lookup[62] = '+'; + lookup[63] = '/'; + lookup[64] = '='; + } else if(format == "URI") { + lookup[62] = '-'; + lookup[63] = '_'; + lookup[64] = 0; + } else return ""; + + auto data = (const u8*)vdata; + u32 overflow = (3 - (size % 3)) % 3; //bytes to round to nearest multiple of 3 + string result; + u8 buffer = 0; + for(u32 n : range(size)) { + switch(n % 3) { + case 0: + buffer = data[n] >> 2; + result.append(lookup[buffer]); + buffer = (data[n] & 3) << 4; + break; + + case 1: + buffer |= data[n] >> 4; + result.append(lookup[buffer]); + buffer = (data[n] & 15) << 2; + break; + + case 2: + buffer |= data[n] >> 6; + result.append(lookup[buffer]); + buffer = (data[n] & 63); + result.append(lookup[buffer]); + break; + } + } + + if(overflow) result.append(lookup[buffer]); + if(lookup[64]) { + while(result.size() % 4) result.append(lookup[64]); + } + + return result; +} + +inline auto Base64(const vector& buffer, const string& format = "MIME") -> string { + return Base64(buffer.data(), buffer.size(), format); +} + +inline auto Base64(const string& text, const string& format = "MIME") -> string { + return Base64(text.data(), text.size(), format); +} + +} diff --git a/waterbox/ares64/ares/nall/encode/bmp.hpp b/waterbox/ares64/ares/nall/encode/bmp.hpp new file mode 100644 index 0000000000..87679b9995 --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/bmp.hpp @@ -0,0 +1,47 @@ +#pragma once + +namespace nall::Encode { + +struct BMP { + static auto create(const string& filename, const void* data, u32 pitch, u32 width, u32 height, bool alpha) -> bool { + auto fp = file::open(filename, file::mode::write); + if(!fp) return false; + + u32 bitsPerPixel = alpha ? 32 : 24; + u32 bytesPerPixel = bitsPerPixel / 8; + u32 alignedWidth = width * bytesPerPixel; + u32 paddingLength = 0; + u32 imageSize = alignedWidth * height; + u32 fileSize = 0x36 + imageSize; + while(alignedWidth % 4) alignedWidth++, paddingLength++; + + fp.writel(0x4d42, 2); //signature + fp.writel(fileSize, 4); //file size + fp.writel(0, 2); //reserved + fp.writel(0, 2); //reserved + fp.writel(0x36, 4); //offset + + fp.writel(40, 4); //DIB size + fp.writel(width, 4); //width + fp.writel(-height, 4); //height + fp.writel(1, 2); //color planes + fp.writel(bitsPerPixel, 2); //bits per pixel + fp.writel(0, 4); //compression method (BI_RGB) + fp.writel(imageSize, 4); //image data size + fp.writel(3780, 4); //horizontal resolution + fp.writel(3780, 4); //vertical resolution + fp.writel(0, 4); //palette size + fp.writel(0, 4); //important color count + + pitch >>= 2; + for(auto y : range(height)) { + auto p = (const u32*)data + y * pitch; + for(auto x : range(width)) fp.writel(*p++, bytesPerPixel); + if(paddingLength) fp.writel(0, paddingLength); + } + + return true; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/encode/bwt.hpp b/waterbox/ares64/ares/nall/encode/bwt.hpp new file mode 100644 index 0000000000..61cd69bda0 --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/bwt.hpp @@ -0,0 +1,86 @@ +#pragma once + +//burrows-wheeler transform + +#include + +namespace nall::Encode { + +/* + A standard suffix array cannot produce a proper burrows-wheeler transform, due to rotations. + + Take the input string, "nall", this gives us: + nall + alln + llna + lnal + + If we suffix sort this, we produce: + all => alln + l => lnal + ll => llna + nall => nall + + If we sort this, we produce: + alln + llna + lnal + nall + + Thus, suffix sorting gives us "nlal" as the last column instead of "nall". + This is because BWT rotates the input string, whereas suffix arrays sort the input string. + + Adding a 256th character terminator before sorting will not produce the desired result, either. + A more complicated string such as "mississippi" will sort as "ssmppissiii" with terminator=256, + and as "ipssmpissii" with terminator=0, alphabet=1..256, whereas we want "pssmipissii". + + Performing a merge sort to use a specialized comparison function that wraps suffixes is too slow at O(n log n). + + Producing a custom induced sort to handle rotations would be incredibly complicated, + owing to the recursive nature of induced sorting, among other things. + + So instead, a temporary array is produced that contains the input suffix twice. + This is then fed into the suffix array sort, and the doubled matches are filtered out. + After this point, suffixes are sorted in their mirrored form, and the correct result can be derived + + The result of this is an O(2n) algorithm, which vastly outperforms a naive O(n log n) algorithm, + but is still far from ideal. However, this will have to do until a better solution is devised. + + Although to be fair, BWT is inferior to the bijective BWT anyway, so it may not be worth the effort. +*/ + +inline auto BWT(array_view input) -> vector { + auto size = input.size(); + vector output; + output.reserve(8 + 8 + size); + for(u32 byte : range(8)) output.append(size >> byte * 8); + for(u32 byte : range(8)) output.append(0x00); + + vector buffer; + buffer.reserve(2 * size); + for(u32 offset : range(size)) buffer.append(input[offset]); + for(u32 offset : range(size)) buffer.append(input[offset]); + + auto suffixes = SuffixArray(buffer); + + vector prefixes; + prefixes.reserve(size); + + for(u32 offset : range(2 * size + 1)) { + u32 suffix = suffixes[offset]; + if(suffix >= size) continue; //beyond the bounds of the original input string + prefixes.append(suffix); + } + + u64 root = 0; + for(u32 offset : range(size)) { + u32 suffix = prefixes[offset]; + if(suffix == 0) root = offset, suffix = size; + output.append(input[--suffix]); + } + for(u32 byte : range(8)) output[8 + byte] = root >> byte * 8; + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/encode/html.hpp b/waterbox/ares64/ares/nall/encode/html.hpp new file mode 100644 index 0000000000..6e4fd04b1a --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/html.hpp @@ -0,0 +1,18 @@ +#pragma once + +namespace nall::Encode { + +inline auto HTML(const string& input) -> string { + string output; + for(char c : input) { + if(c == '&' ) { output.append("&" ); continue; } + if(c == '<' ) { output.append("<" ); continue; } + if(c == '>' ) { output.append(">" ); continue; } + if(c == '"' ) { output.append("""); continue; } + if(c == '\'') { output.append("'"); continue; } + output.append(c); + } + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/encode/huffman.hpp b/waterbox/ares64/ares/nall/encode/huffman.hpp new file mode 100644 index 0000000000..ca547d1e7b --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/huffman.hpp @@ -0,0 +1,84 @@ +#pragma once + +namespace nall::Encode { + +inline auto Huffman(array_view input) -> vector { + vector output; + for(u32 byte : range(8)) output.append(input.size() >> byte * 8); + + struct Node { + u32 frequency = 0; + u32 parent = 0; + u32 lhs = 0; + u32 rhs = 0; + }; + array nodes; + for(u32 offset : range(input.size())) nodes[input[offset]].frequency++; + + u32 count = 0; + for(u32 offset : range(511)) { + if(nodes[offset].frequency) count++; + else nodes[offset].parent = 511; + } + + auto minimum = [&] { + u32 frequency = ~0, minimum = 511; + for(u32 index : range(511)) { + if(!nodes[index].parent && nodes[index].frequency && nodes[index].frequency < frequency) { + frequency = nodes[index].frequency; + minimum = index; + } + } + return minimum; + }; + + //group the least two frequently used nodes until only one node remains + u32 index = 256; + for(u32 remaining = max(2, count); remaining >= 2; remaining--) { + u32 lhs = minimum(); + nodes[lhs].parent = index; + u32 rhs = minimum(); + nodes[rhs].parent = index; + if(remaining == 2) index = nodes[lhs].parent = nodes[rhs].parent = 511; + nodes[index].lhs = lhs; + nodes[index].rhs = rhs; + nodes[index].parent = 0; + nodes[index].frequency = nodes[lhs].frequency + nodes[rhs].frequency; + index++; + } + + u32 byte = 0, bits = 0; + auto write = [&](bool bit) { + byte = byte << 1 | bit; + if(++bits == 8) output.append(byte), bits = 0; + }; + + //only the upper half of the table is needed for decompression + //the first 256 nodes are always treated as leaf nodes + for(u32 offset : range(256)) { + for(u32 index : reverse(range(9))) write(nodes[256 + offset].lhs >> index & 1); + for(u32 index : reverse(range(9))) write(nodes[256 + offset].rhs >> index & 1); + } + + for(u32 byte : input) { + u32 node = byte, length = 0; + u256 sequence = 0; + //traversing the array produces the bitstream in reverse order + do { + u32 parent = nodes[node].parent; + bool bit = nodes[nodes[node].parent].rhs == node; + sequence = sequence << 1 | bit; + length++; + node = parent; + } while(node != 511); + //output the generated bits in the correct order + for(u32 index : range(length)) { + write(sequence >> index & 1); + } + } + while(bits) write(0); + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/encode/lzsa.hpp b/waterbox/ares64/ares/nall/encode/lzsa.hpp new file mode 100644 index 0000000000..b1394972d8 --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/lzsa.hpp @@ -0,0 +1,86 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace nall::Encode { + +inline auto LZSA(array_view input) -> vector { + vector output; + for(u32 byte : range(8)) output.append(input.size() >> byte * 8); + + auto suffixArray = SuffixArray(input).lpf(); + u32 index = 0; + vector flags; + vector literals; + vector stringLengths; + vector stringOffsets; + + u32 byte = 0, bits = 0; + auto flagWrite = [&](bool bit) { + byte = byte << 1 | bit; + if(++bits == 8) flags.append(byte), bits = 0; + }; + + auto literalWrite = [&](u8 literal) { + literals.append(literal); + }; + + auto lengthWrite = [&](u64 length) { + if(length < 1 << 7) length = length << 1 | 0b1; + else if(length < 1 << 14) length = length << 2 | 0b10; + else if(length < 1 << 21) length = length << 3 | 0b100; + else if(length < 1 << 28) length = length << 4 | 0b1000; + else /*length < 1 << 35*/length = length << 5 | 0b10000; + while(length) stringLengths.append(length), length >>= 8; + }; + + auto offsetWrite = [&](u32 offset) { + stringOffsets.append(offset >> 0); if(index < 1 << 8) return; + stringOffsets.append(offset >> 8); if(index < 1 << 16) return; + stringOffsets.append(offset >> 16); if(index < 1 << 24) return; + stringOffsets.append(offset >> 24); + }; + + while(index < input.size()) { + s32 length, offset; + suffixArray.previous(length, offset, index); + +/* for(u32 ahead = 1; ahead <= 2; ahead++) { + s32 aheadLength, aheadOffset; + suffixArray.previous(aheadLength, aheadOffset, index + ahead); + if(aheadLength > length && aheadOffset >= 0) { + length = 0; + break; + } + } */ + + if(length < 6 || offset < 0) { + flagWrite(0); + literalWrite(input[index++]); + } else { + flagWrite(1); + lengthWrite(length - 6); + offsetWrite(index - offset); + index += length; + } + } + while(bits) flagWrite(0); + + auto save = [&](const vector& buffer) { + for(u32 byte : range(8)) output.append(buffer.size() >> byte * 8); + output.append(buffer); + }; + + save(Encode::Huffman(flags)); + save(Encode::Huffman(literals)); + save(Encode::Huffman(stringLengths)); + save(Encode::Huffman(stringOffsets)); + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/encode/mtf.hpp b/waterbox/ares64/ares/nall/encode/mtf.hpp new file mode 100644 index 0000000000..0525031bc6 --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/mtf.hpp @@ -0,0 +1,30 @@ +#pragma once + +//move to front + +namespace nall::Encode { + +inline auto MTF(array_view input) -> vector { + vector output; + output.resize(input.size()); + + u8 order[256]; + for(u32 n : range(256)) order[n] = n; + + for(u32 offset : range(input.size())) { + u32 data = input[offset]; + for(u32 index : range(256)) { + u32 value = order[index]; + if(value == data) { + output[offset] = index; + memory::move(&order[1], &order[0], index); + order[0] = value; + break; + } + } + } + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/encode/png.hpp b/waterbox/ares64/ares/nall/encode/png.hpp new file mode 100644 index 0000000000..6bf2d9c12c --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/png.hpp @@ -0,0 +1,189 @@ +#pragma once + +#include +#include +#include + +namespace nall::Encode { + +//this encodes an array of pixels into an uncompressed PNG image file. +//if optipng or pngcrush are installed, the resulting PNG file will be quickly compressed. +//if nall gains a deflate implementation one day, then this can be improved to offer integrated compression. + +struct PNG { + static auto RGB8 (const string& filename, const void* data, u32 pitch, u32 width, u32 height) -> bool; + static auto RGBA8(const string& filename, const void* data, u32 pitch, u32 width, u32 height) -> bool; + +private: + auto compress(const string& filename) -> bool; + auto open(const string& filename) -> bool; + auto close() -> void; + auto header() -> void; + auto footer() -> void; + auto information(u32 width, u32 height, u32 depth, u32 type) -> void; + auto dataHeader(u32 width, u32 height, u32 bitsPerPixel) -> void; + auto dataLine(bool lastLine) -> void; + auto dataFooter() -> void; + auto write(u8 data) -> void; + auto adler(u8 data) -> void; + + file_buffer fp; + Hash::CRC32 crc32; + u16 adler1 = 1; + u16 adler2 = 0; + u16 bytesPerLine = 0; +}; + +inline auto PNG::RGB8(const string& filename, const void* data, u32 pitch, u32 width, u32 height) -> bool { + PNG png; + if(!png.open(filename)) return false; + + png.header(); + png.information(width, height, 8, 2); + png.dataHeader(width, height, 24); + for(u32 y : range(height)) { + const auto input = (const u32*)data + y * (pitch >> 2); + png.dataLine(y == height - 1); + for(u32 x : range(width)) { + auto pixel = input[x]; //RGB + png.adler(pixel >> 16); //R + png.adler(pixel >> 8); //G + png.adler(pixel >> 0); //B + } + } + png.dataFooter(); + png.footer(); + png.close(); + png.compress(filename); + return true; +} + +inline auto PNG::RGBA8(const string& filename, const void* data, u32 pitch, u32 width, u32 height) -> bool { + PNG png; + if(!png.open(filename)) return false; + + png.header(); + png.information(width, height, 8, 6); + png.dataHeader(width, height, 32); + for(u32 y : range(height)) { + const auto input = (const u32*)data + y * (pitch >> 2); + png.dataLine(y == height - 1); + for(u32 x : range(width)) { + auto pixel = input[x]; //ARGB + png.adler(pixel >> 16); //R + png.adler(pixel >> 8); //G + png.adler(pixel >> 0); //B + png.adler(pixel >> 24); //A + } + } + png.dataFooter(); + png.footer(); + png.close(); + png.compress(filename); + return true; +} + +inline auto PNG::compress(const string& filename) -> bool { + auto size = file::size(filename); + execute("optipng", "-o1", filename); + if(file::size(filename) < size) return true; + execute("pngcrush", "-ow", "-l", "1", filename); + if(file::size(filename) < size) return true; + return false; +} + +inline auto PNG::open(const string& filename) -> bool { + fp = file::open(filename, file::mode::write); + return (bool)fp; +} + +inline auto PNG::close() -> void { + fp.close(); +} + +inline auto PNG::header() -> void { + fp.write(0x89); + fp.write('P'); + fp.write('N'); + fp.write('G'); + fp.write(0x0d); + fp.write(0x0a); + fp.write(0x1a); + fp.write(0x0a); +} + +inline auto PNG::footer() -> void { + fp.writem(0, 4L); + crc32.reset(); + write('I'); + write('E'); + write('N'); + write('D'); + fp.writem(crc32.value(), 4L); +} + +inline auto PNG::information(u32 width, u32 height, u32 depth, u32 type) -> void { + fp.writem(13, 4L); + crc32.reset(); + write('I'); + write('H'); + write('D'); + write('R'); + write(width >> 24); + write(width >> 16); + write(width >> 8); + write(width >> 0); + write(height >> 24); + write(height >> 16); + write(height >> 8); + write(height >> 0); + write(depth); + write(type); + write(0x00); //no compression + write(0x00); //no filter + write(0x00); //no interlace + fp.writem(crc32.value(), 4L); +} + +inline auto PNG::dataHeader(u32 width, u32 height, u32 bitsPerPixel) -> void { + bytesPerLine = 1 + width * (bitsPerPixel / 8); + u32 idatSize = 2 + height * (5 + bytesPerLine) + 4; + fp.writem(idatSize, 4L); + crc32.reset(); + write('I'); + write('D'); + write('A'); + write('T'); + write(0x78); + write(0xda); +} + +inline auto PNG::dataLine(bool lastLine) -> void { + write(lastLine); + write( bytesPerLine >> 0); + write( bytesPerLine >> 8); + write(~bytesPerLine >> 0); + write(~bytesPerLine >> 8); + adler(0x00); //no filter +} + +inline auto PNG::dataFooter() -> void { + write(adler2 >> 8); + write(adler2 >> 0); + write(adler1 >> 8); + write(adler1 >> 0); + fp.writem(crc32.value(), 4L); +} + +inline auto PNG::write(u8 data) -> void { + fp.write(data); + crc32.input(data); +} + +inline auto PNG::adler(u8 data) -> void { + write(data); + adler1 = (adler1 + data ) % 65521; + adler2 = (adler2 + adler1) % 65521; +} + +} diff --git a/waterbox/ares64/ares/nall/encode/rle.hpp b/waterbox/ares64/ares/nall/encode/rle.hpp new file mode 100644 index 0000000000..71ce5273a4 --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/rle.hpp @@ -0,0 +1,56 @@ +#pragma once + +namespace nall::Encode { + +template //S = word size; M = match length +inline auto RLE(array_view input) -> vector { + vector output; + for(u32 byte : range(8)) output.append(input.size() >> byte * 8); + + u32 base = 0; + u32 skip = 0; + + auto load = [&](u32 offset) -> u8 { + return input(offset); + }; + + auto read = [&](u32 offset) -> u64 { + u64 value = 0; + for(u32 byte : range(S)) value |= load(offset + byte) << byte * 8; + return value; + }; + + auto write = [&](u64 value) -> void { + for(u32 byte : range(S)) output.append(value >> byte * 8); + }; + + auto flush = [&] { + output.append(skip - 1); + do { + write(read(base)); + base += S; + } while(--skip); + }; + + while(base + S * skip < input.size()) { + u32 same = 1; + for(u32 offset = base + S * (skip + 1); offset < input.size(); offset += S) { + if(read(offset) != read(base + S * skip)) break; + if(++same == 127 + M) break; + } + + if(same < M) { + if(++skip == 128) flush(); + } else { + if(skip) flush(); + output.append(128 | same - M); + write(read(base)); + base += S * same; + } + } + if(skip) flush(); + + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/encode/url.hpp b/waterbox/ares64/ares/nall/encode/url.hpp new file mode 100644 index 0000000000..55ee2bf92f --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/url.hpp @@ -0,0 +1,27 @@ +#pragma once + +namespace nall::Encode { + +inline auto URL(string_view input) -> string { + string output; + for(auto c : input) { + //unreserved characters + if(c >= 'A' && c <= 'Z') { output.append(c); continue; } + if(c >= 'a' && c <= 'z') { output.append(c); continue; } + if(c >= '0' && c <= '9') { output.append(c); continue; } + if(c == '-' || c == '_' || c == '.' || c == '~') { output.append(c); continue; } + + //special characters + if(c == ' ') { output.append('+'); continue; } + + //reserved characters + u32 hi = (c >> 4) & 15; + u32 lo = (c >> 0) & 15; + output.append('%'); + output.append((char)(hi < 10 ? ('0' + hi) : ('a' + hi - 10))); + output.append((char)(lo < 10 ? ('0' + lo) : ('a' + lo - 10))); + } + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/encode/wav.hpp b/waterbox/ares64/ares/nall/encode/wav.hpp new file mode 100644 index 0000000000..cbee9e6d6a --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/wav.hpp @@ -0,0 +1,52 @@ +#pragma once + +namespace nall::Encode { + +struct WAV { + static auto stereo_16bit(const string& filename, array_view left, array_view right, u32 frequency) -> bool { + if(left.size() != right.size()) return false; + static u32 channels = 2; + static u32 bits = 16; + static u32 samples = left.size(); + + file_buffer fp; + if(!fp.open(filename, file::mode::write)) return false; + + fp.write('R'); + fp.write('I'); + fp.write('F'); + fp.write('F'); + fp.writel(4 + (8 + 16) + (8 + samples * 4), 4); + + fp.write('W'); + fp.write('A'); + fp.write('V'); + fp.write('E'); + + fp.write('f'); + fp.write('m'); + fp.write('t'); + fp.write(' '); + fp.writel(16, 4); + fp.writel(1, 2); + fp.writel(channels, 2); + fp.writel(frequency, 4); + fp.writel(frequency * channels * bits, 4); + fp.writel(channels * bits, 2); + fp.writel(bits, 2); + + fp.write('d'); + fp.write('a'); + fp.write('t'); + fp.write('a'); + fp.writel(samples * 4, 4); + for(u32 sample : range(samples)) { + fp.writel(left[sample], 2); + fp.writel(right[sample], 2); + } + + return true; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/encode/zip.hpp b/waterbox/ares64/ares/nall/encode/zip.hpp new file mode 100644 index 0000000000..f23cfa740e --- /dev/null +++ b/waterbox/ares64/ares/nall/encode/zip.hpp @@ -0,0 +1,101 @@ +#pragma once + +//creates uncompressed ZIP archives + +#include +#include + +namespace nall::Encode { + +struct ZIP { + ZIP(const string& filename) { + fp.open(filename, file::mode::write); + timestamp = time(nullptr); + } + + //append path: append("path/"); + //append file: append("path/file", data, size); + auto append(string filename, const u8* data = nullptr, u32 size = 0u, time_t timestamp = 0) -> void { + filename.transform("\\", "/"); + if(!timestamp) timestamp = this->timestamp; + u32 checksum = Hash::CRC32({data, size}).digest().hex(); + directory.append({filename, timestamp, checksum, size, (u32)fp.offset()}); + + fp.writel(0x04034b50, 4); //signature + fp.writel(0x0014, 2); //minimum version (2.0) + fp.writel(0x0000, 2); //general purpose bit flags + fp.writel(0x0000, 2); //compression method (0 = uncompressed) + fp.writel(makeTime(timestamp), 2); + fp.writel(makeDate(timestamp), 2); + fp.writel(checksum, 4); + fp.writel(size, 4); //compressed size + fp.writel(size, 4); //uncompressed size + fp.writel(filename.length(), 2); //file name length + fp.writel(0x0000, 2); //extra field length + fp.print(filename); //file name + + fp.write({data, size}); //file data + } + + ~ZIP() { + //central directory + u32 baseOffset = fp.offset(); + for(auto& entry : directory) { + fp.writel(0x02014b50, 4); //signature + fp.writel(0x0014, 2); //version made by (2.0) + fp.writel(0x0014, 2); //version needed to extract (2.0) + fp.writel(0x0000, 2); //general purpose bit flags + fp.writel(0x0000, 2); //compression method (0 = uncompressed) + fp.writel(makeTime(entry.timestamp), 2); + fp.writel(makeDate(entry.timestamp), 2); + fp.writel(entry.checksum, 4); + fp.writel(entry.size, 4); //compressed size + fp.writel(entry.size, 4); //uncompressed size + fp.writel(entry.filename.length(), 2); //file name length + fp.writel(0x0000, 2); //extra field length + fp.writel(0x0000, 2); //file comment length + fp.writel(0x0000, 2); //disk number start + fp.writel(0x0000, 2); //internal file attributes + fp.writel(0x00000000, 4); //external file attributes + fp.writel(entry.offset, 4); //relative offset of file header + fp.print(entry.filename); + } + u32 finishOffset = fp.offset(); + + //end of central directory + fp.writel(0x06054b50, 4); //signature + fp.writel(0x0000, 2); //number of this disk + fp.writel(0x0000, 2); //disk where central directory starts + fp.writel(directory.size(), 2); //number of central directory records on this disk + fp.writel(directory.size(), 2); //total number of central directory records + fp.writel(finishOffset - baseOffset, 4); //size of central directory + fp.writel(baseOffset, 4); //offset of central directory + fp.writel(0x0000, 2); //comment length + + fp.close(); + } + +protected: + auto makeTime(time_t timestamp) -> u16 { + tm* info = localtime(×tamp); + return (info->tm_hour << 11) | (info->tm_min << 5) | (info->tm_sec >> 1); + } + + auto makeDate(time_t timestamp) -> u16 { + tm* info = localtime(×tamp); + return ((info->tm_year - 80) << 9) | ((1 + info->tm_mon) << 5) + (info->tm_mday); + } + + file_buffer fp; + time_t timestamp; + struct entry_t { + string filename; + time_t timestamp; + u32 checksum; + u32 size; + u32 offset; + }; + vector directory; +}; + +} diff --git a/waterbox/ares64/ares/nall/endian.hpp b/waterbox/ares64/ares/nall/endian.hpp new file mode 100644 index 0000000000..97cd25ab94 --- /dev/null +++ b/waterbox/ares64/ares/nall/endian.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include + +#if defined(ENDIAN_LITTLE) + //little-endian: uint8_t[] { 0x01, 0x02, 0x03, 0x04 } == 0x04030201 + #define order_lsb1(a) a + #define order_lsb2(a,b) a,b + #define order_lsb3(a,b,c) a,b,c + #define order_lsb4(a,b,c,d) a,b,c,d + #define order_lsb5(a,b,c,d,e) a,b,c,d,e + #define order_lsb6(a,b,c,d,e,f) a,b,c,d,e,f + #define order_lsb7(a,b,c,d,e,f,g) a,b,c,d,e,f,g + #define order_lsb8(a,b,c,d,e,f,g,h) a,b,c,d,e,f,g,h + #define order_msb1(a) a + #define order_msb2(a,b) b,a + #define order_msb3(a,b,c) c,b,a + #define order_msb4(a,b,c,d) d,c,b,a + #define order_msb5(a,b,c,d,e) e,d,c,b,a + #define order_msb6(a,b,c,d,e,f) f,e,d,c,b,a + #define order_msb7(a,b,c,d,e,f,g) g,f,e,d,c,b,a + #define order_msb8(a,b,c,d,e,f,g,h) h,g,f,e,d,c,b,a +#elif defined(ENDIAN_BIG) + //big-endian: uint8_t[] { 0x01, 0x02, 0x03, 0x04 } == 0x01020304 + #define order_lsb1(a) a + #define order_lsb2(a,b) b,a + #define order_lsb3(a,b,c) c,b,a + #define order_lsb4(a,b,c,d) d,c,b,a + #define order_lsb5(a,b,c,d,e) e,d,c,b,a + #define order_lsb6(a,b,c,d,e,f) f,e,d,c,b,a + #define order_lsb7(a,b,c,d,e,f,g) g,f,e,d,c,b,a + #define order_lsb8(a,b,c,d,e,f,g,h) h,g,f,e,d,c,b,a + #define order_msb1(a) a + #define order_msb2(a,b) a,b + #define order_msb3(a,b,c) a,b,c + #define order_msb4(a,b,c,d) a,b,c,d + #define order_msb5(a,b,c,d,e) a,b,c,d,e + #define order_msb6(a,b,c,d,e,f) a,b,c,d,e,f + #define order_msb7(a,b,c,d,e,f,g) a,b,c,d,e,f,g + #define order_msb8(a,b,c,d,e,f,g,h) a,b,c,d,e,f,g,h +#else + #error "Unknown endian. Please specify in nall/intrinsics.hpp" +#endif diff --git a/waterbox/ares64/ares/nall/file-buffer.hpp b/waterbox/ares64/ares/nall/file-buffer.hpp new file mode 100644 index 0000000000..7aa155ab71 --- /dev/null +++ b/waterbox/ares64/ares/nall/file-buffer.hpp @@ -0,0 +1,248 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nall { + +//on Windows (at least for 7 and earlier), FILE* is not buffered +//thus, reading/writing one byte at a time will be dramatically slower +//on all other OSes, FILE* is buffered +//in order to ensure good performance, file_buffer implements its own buffer +//this speeds up Windows substantially, without harming performance elsewhere much + +struct file_buffer { + struct mode { enum : u32 { read, write, modify, append }; }; + struct index { enum : u32 { absolute, relative }; }; + + file_buffer(const file_buffer&) = delete; + auto operator=(const file_buffer&) -> file_buffer& = delete; + + file_buffer() = default; + file_buffer(const string& filename, u32 mode) { open(filename, mode); } + + file_buffer(file_buffer&& source) { operator=(move(source)); } + + ~file_buffer() { close(); } + + auto operator=(file_buffer&& source) -> file_buffer& { + buffer = source.buffer; + bufferOffset = source.bufferOffset; + bufferDirty = source.bufferDirty; + fileHandle = source.fileHandle; + fileOffset = source.fileOffset; + fileSize = source.fileSize; + fileMode = source.fileMode; + + source.bufferOffset = -1LL; + source.bufferDirty = false; + source.fileHandle = nullptr; + source.fileOffset = 0; + source.fileSize = 0; + source.fileMode = mode::read; + + return *this; + } + + explicit operator bool() const { + return (bool)fileHandle; + } + + auto read() -> u8 { + if(!fileHandle) return 0; //file not open + if(fileOffset >= fileSize) return 0; //cannot read past end of file + bufferSynchronize(); + return buffer[fileOffset++ & buffer.size() - 1]; + } + + template auto readl(u32 length = 1) -> T { + T data = 0; + for(u32 n : range(length)) { + data |= (T)read() << n * 8; + } + return data; + } + + template auto readm(u32 length = 1) -> T { + T data = 0; + while(length--) { + data <<= 8; + data |= read(); + } + return data; + } + + auto reads(u64 length) -> string { + string result; + result.resize(length); + for(auto& byte : result) byte = read(); + return result; + } + + auto read(array_span memory) -> void { + for(auto& byte : memory) byte = read(); + } + + auto write(u8 data) -> void { + if(!fileHandle) return; //file not open + if(fileMode == mode::read) return; //writes not permitted + bufferSynchronize(); + buffer[fileOffset++ & buffer.size() - 1] = data; + bufferDirty = true; + if(fileOffset > fileSize) fileSize = fileOffset; + } + + template auto writel(T data, u32 length = 1) -> void { + while(length--) { + write(u8(data)); + data >>= 8; + } + } + + template auto writem(T data, u32 length = 1) -> void { + for(u32 n : reverse(range(length))) { + write(u8(data >> n * 8)); + } + } + + auto writes(const string& s) -> void { + for(auto& byte : s) write(byte); + } + + auto write(array_view memory) -> void { + for(auto& byte : memory) write(byte); + } + + template auto print(P&&... p) -> void { + string s{forward

(p)...}; + for(auto& byte : s) write(byte); + } + + auto flush() -> void { + bufferFlush(); + fflush(fileHandle); + } + + auto seek(s64 offset, u32 index_ = index::absolute) -> void { + if(!fileHandle) return; + bufferFlush(); + + s64 seekOffset = fileOffset; + switch(index_) { + case index::absolute: seekOffset = offset; break; + case index::relative: seekOffset += offset; break; + } + + if(seekOffset < 0) seekOffset = 0; //cannot seek before start of file + if(seekOffset > fileSize) { + if(fileMode == mode::read) { //cannot seek past end of file + seekOffset = fileSize; + } else { //pad file to requested location + fileOffset = fileSize; + while(fileSize < seekOffset) write(0); + } + } + + fileOffset = seekOffset; + } + + auto offset() const -> u64 { + if(!fileHandle) return 0; + return fileOffset; + } + + auto size() const -> u64 { + if(!fileHandle) return 0; + return fileSize; + } + + auto truncate(u64 size) -> bool { + if(!fileHandle) return false; + #if defined(API_POSIX) + return ftruncate(fileno(fileHandle), size) == 0; + #elif defined(API_WINDOWS) + return _chsize(fileno(fileHandle), size) == 0; + #endif + } + + auto end() const -> bool { + if(!fileHandle) return true; + return fileOffset >= fileSize; + } + + auto open(const string& filename, u32 mode_) -> bool { + close(); + + switch(fileMode = mode_) { + #if defined(API_POSIX) + case mode::read: fileHandle = fopen(filename, "rb" ); break; + case mode::write: fileHandle = fopen(filename, "wb+"); break; //need read permission for buffering + case mode::modify: fileHandle = fopen(filename, "rb+"); break; + case mode::append: fileHandle = fopen(filename, "ab+"); break; + #elif defined(API_WINDOWS) + case mode::read: fileHandle = _wfopen(utf16_t(filename), L"rb" ); break; + case mode::write: fileHandle = _wfopen(utf16_t(filename), L"wb+"); break; + case mode::modify: fileHandle = _wfopen(utf16_t(filename), L"rb+"); break; + case mode::append: fileHandle = _wfopen(utf16_t(filename), L"ab+"); break; + #endif + } + if(!fileHandle) return false; + + bufferOffset = -1LL; + fileOffset = 0; + fseek(fileHandle, 0, SEEK_END); + fileSize = ftell(fileHandle); + fseek(fileHandle, 0, SEEK_SET); + return true; + } + + auto close() -> void { + if(!fileHandle) return; + bufferFlush(); + fclose(fileHandle); + fileHandle = nullptr; + } + +private: + array buffer; + s64 bufferOffset = -1LL; + bool bufferDirty = false; + FILE* fileHandle = nullptr; + u64 fileOffset = 0; + u64 fileSize = 0; + u32 fileMode = mode::read; + + auto bufferSynchronize() -> void { + if(!fileHandle) return; + if(bufferOffset == (fileOffset & ~(buffer.size() - 1))) return; + + bufferFlush(); + bufferOffset = fileOffset & ~(buffer.size() - 1); + fseek(fileHandle, bufferOffset, SEEK_SET); + u64 length = bufferOffset + buffer.size() <= fileSize ? buffer.size() : fileSize & buffer.size() - 1; + if(length) (void)fread(buffer.data(), 1, length, fileHandle); + } + + auto bufferFlush() -> void { + if(!fileHandle) return; //file not open + if(fileMode == mode::read) return; //buffer cannot be written to + if(bufferOffset < 0) return; //buffer unused + if(!bufferDirty) return; //buffer unmodified since read + + fseek(fileHandle, bufferOffset, SEEK_SET); + u64 length = bufferOffset + buffer.size() <= fileSize ? buffer.size() : fileSize & buffer.size() - 1; + if(length) (void)fwrite(buffer.data(), 1, length, fileHandle); + bufferOffset = -1LL; + bufferDirty = false; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/file-map.hpp b/waterbox/ares64/ares/nall/file-map.hpp new file mode 100644 index 0000000000..1c7d79221d --- /dev/null +++ b/waterbox/ares64/ares/nall/file-map.hpp @@ -0,0 +1,225 @@ +#pragma once + +#include +#include + +#include +#include +#if defined(PLATFORM_WINDOWS) + #include +#else + #include + #include + #include + #include + #include +#endif + +#if !defined(MAP_NORESERVE) + //not supported on FreeBSD; flag removed in 11.0 + #define MAP_NORESERVE 0 +#endif + +namespace nall { + +struct file_map { + struct mode { enum : u32 { read, write, modify, append }; }; + + file_map(const file_map&) = delete; + auto operator=(const file_map&) = delete; + + file_map() = default; + file_map(file_map&& source) { operator=(move(source)); } + file_map(const string& filename, u32 mode) { open(filename, mode); } + + ~file_map() { close(); } + + explicit operator bool() const { return _open; } + auto size() const -> u64 { return _size; } + auto data() -> u8* { return _data; } + auto data() const -> const u8* { return _data; } + +//auto operator=(file_map&& source) -> file_map&; +//auto open(const string& filename, u32 mode) -> bool; +//auto close() -> void; + +private: + bool _open = false; //zero-byte files return _data = nullptr, _size = 0 + u8* _data = nullptr; + u64 _size = 0; + + #if defined(API_WINDOWS) + + HANDLE _file = INVALID_HANDLE_VALUE; + HANDLE _map = INVALID_HANDLE_VALUE; + +public: + auto operator=(file_map&& source) -> file_map& { + _open = source._open; + _data = source._data; + _size = source._size; + _file = source._file; + _map = source._map; + + source._open = false; + source._data = nullptr; + source._size = 0; + source._file = INVALID_HANDLE_VALUE; + source._map = INVALID_HANDLE_VALUE; + + return *this; + } + + auto open(const string& filename, u32 mode_) -> bool { + close(); + if(file::exists(filename) && file::size(filename) == 0) return _open = true; + + s32 desiredAccess, creationDisposition, protection, mapAccess; + + switch(mode_) { + default: return false; + case mode::read: + desiredAccess = GENERIC_READ; + creationDisposition = OPEN_EXISTING; + protection = PAGE_READONLY; + mapAccess = FILE_MAP_READ; + break; + case mode::write: + //write access requires read access + desiredAccess = GENERIC_WRITE; + creationDisposition = CREATE_ALWAYS; + protection = PAGE_READWRITE; + mapAccess = FILE_MAP_ALL_ACCESS; + break; + case mode::modify: + desiredAccess = GENERIC_READ | GENERIC_WRITE; + creationDisposition = OPEN_EXISTING; + protection = PAGE_READWRITE; + mapAccess = FILE_MAP_ALL_ACCESS; + break; + case mode::append: + desiredAccess = GENERIC_READ | GENERIC_WRITE; + creationDisposition = CREATE_NEW; + protection = PAGE_READWRITE; + mapAccess = FILE_MAP_ALL_ACCESS; + break; + } + + _file = CreateFileW(utf16_t(filename), desiredAccess, FILE_SHARE_READ, nullptr, + creationDisposition, FILE_ATTRIBUTE_NORMAL, nullptr); + if(_file == INVALID_HANDLE_VALUE) return false; + + _size = GetFileSize(_file, nullptr); + + _map = CreateFileMapping(_file, nullptr, protection, 0, _size, nullptr); + if(_map == INVALID_HANDLE_VALUE) { + CloseHandle(_file); + _file = INVALID_HANDLE_VALUE; + return false; + } + + _data = (u8*)MapViewOfFile(_map, mapAccess, 0, 0, _size); + return _open = true; + } + + auto close() -> void { + if(_data) { + UnmapViewOfFile(_data); + _data = nullptr; + } + + if(_map != INVALID_HANDLE_VALUE) { + CloseHandle(_map); + _map = INVALID_HANDLE_VALUE; + } + + if(_file != INVALID_HANDLE_VALUE) { + CloseHandle(_file); + _file = INVALID_HANDLE_VALUE; + } + + _open = false; + } + + #else + + s32 _fd = -1; + +public: + auto operator=(file_map&& source) -> file_map& { + _open = source._open; + _data = source._data; + _size = source._size; + _fd = source._fd; + + source._open = false; + source._data = nullptr; + source._size = 0; + source._fd = -1; + + return *this; + } + + auto open(const string& filename, u32 mode_) -> bool { + close(); + if(file::exists(filename) && file::size(filename) == 0) return _open = true; + + s32 openFlags = 0; + s32 mmapFlags = 0; + + switch(mode_) { + default: return false; + case mode::read: + openFlags = O_RDONLY; + mmapFlags = PROT_READ; + break; + case mode::write: + openFlags = O_RDWR | O_CREAT; //mmap() requires read access + mmapFlags = PROT_WRITE; + break; + case mode::modify: + openFlags = O_RDWR; + mmapFlags = PROT_READ | PROT_WRITE; + break; + case mode::append: + openFlags = O_RDWR | O_CREAT; + mmapFlags = PROT_READ | PROT_WRITE; + break; + } + + _fd = ::open(filename, openFlags, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); + if(_fd < 0) return false; + + struct stat _stat; + fstat(_fd, &_stat); + _size = _stat.st_size; + + _data = (u8*)mmap(nullptr, _size, mmapFlags, MAP_SHARED | MAP_NORESERVE, _fd, 0); + if(_data == MAP_FAILED) { + _data = nullptr; + ::close(_fd); + _fd = -1; + return false; + } + + return _open = true; + } + + auto close() -> void { + if(_data) { + munmap(_data, _size); + _data = nullptr; + } + + if(_fd >= 0) { + ::close(_fd); + _fd = -1; + } + + _open = false; + } + + #endif +}; + +} diff --git a/waterbox/ares64/ares/nall/file.hpp b/waterbox/ares64/ares/nall/file.hpp new file mode 100644 index 0000000000..a4bec188ba --- /dev/null +++ b/waterbox/ares64/ares/nall/file.hpp @@ -0,0 +1,104 @@ +#pragma once + +#include + +namespace nall { + +struct file : inode { + struct mode { enum : u32 { read, write, modify, append }; }; + struct index { enum : u32 { absolute, relative }; }; + + file() = delete; + + static auto open(const string& filename, u32 mode) -> file_buffer { + return file_buffer{filename, mode}; + } + + static auto copy(const string& sourcename, const string& targetname) -> bool { + if(sourcename == targetname) return true; + if(auto reader = file::open(sourcename, mode::read)) { + if(auto writer = file::open(targetname, mode::write)) { + for(u64 n : range(reader.size())) writer.write(reader.read()); + return true; + } + } + return false; + } + + //attempt to rename file first + //this will fail if paths point to different file systems; fall back to copy+remove in this case + static auto move(const string& sourcename, const string& targetname) -> bool { + if(sourcename == targetname) return true; + if(rename(sourcename, targetname)) return true; + if(!writable(sourcename)) return false; + if(copy(sourcename, targetname)) return remove(sourcename), true; + return false; + } + + static auto truncate(const string& filename, u64 size) -> bool { + #if defined(API_POSIX) + return ::truncate(filename, size) == 0; + #elif defined(API_WINDOWS) + if(auto fp = _wfopen(utf16_t(filename), L"rb+")) { + bool result = _chsize(fileno(fp), size) == 0; + fclose(fp); + return result; + } + return false; + #endif + } + + //returns false if specified filename is a directory + static auto exists(const string& filename) -> bool { + #if defined(API_POSIX) + struct stat data; + if(stat(filename, &data) != 0) return false; + #elif defined(API_WINDOWS) + struct __stat64 data; + if(_wstat64(utf16_t(filename), &data) != 0) return false; + #endif + return !(data.st_mode & S_IFDIR); + } + + static auto size(const string& filename) -> u64 { + #if defined(API_POSIX) + struct stat data; + stat(filename, &data); + #elif defined(API_WINDOWS) + struct __stat64 data; + _wstat64(utf16_t(filename), &data); + #endif + return S_ISREG(data.st_mode) ? data.st_size : 0u; + } + + static auto read(const string& filename) -> vector { + vector memory; + if(auto fp = file::open(filename, mode::read)) { + memory.resize(fp.size()); + fp.read(memory); + } + return memory; + } + + static auto read(const string& filename, array_span memory) -> bool { + if(auto fp = file::open(filename, mode::read)) return fp.read(memory), true; + return false; + } + + static auto write(const string& filename, array_view memory) -> bool { + if(auto fp = file::open(filename, mode::write)) return fp.write(memory), true; + return false; + } + + //create an empty file (will replace existing files) + static auto create(const string& filename) -> bool { + if(auto fp = file::open(filename, mode::write)) return true; + return false; + } + + static auto sha256(const string& filename) -> string { + return Hash::SHA256(read(filename)).digest(); + } +}; + +} diff --git a/waterbox/ares64/ares/nall/function.hpp b/waterbox/ares64/ares/nall/function.hpp new file mode 100644 index 0000000000..3b59e05c2f --- /dev/null +++ b/waterbox/ares64/ares/nall/function.hpp @@ -0,0 +1,78 @@ +#pragma once + +#include + +namespace nall { + +template struct function; + +template struct function R> { + using cast = auto (*)(P...) -> R; + + //value = true if auto L::operator()(P...) -> R exists + template struct is_compatible { + template static auto exists(T*) -> const typename is_same().operator()(declval

()...))>::type; + template static auto exists(...) -> const false_type; + static constexpr bool value = decltype(exists(0))::value; + }; + + function() {} + function(const function& source) { operator=(source); } + function(auto (*function)(P...) -> R) { callback = new global(function); } + template function(auto (C::*function)(P...) -> R, C* object) { callback = new member(function, object); } + template function(auto (C::*function)(P...) const -> R, C* object) { callback = new member((auto (C::*)(P...) -> R)function, object); } + template::value>> function(const L& object) { callback = new lambda(object); } + explicit function(void* function) { if(function) callback = new global((auto (*)(P...) -> R)function); } + ~function() { if(callback) delete callback; } + + explicit operator bool() const { return callback; } + auto operator()(P... p) const -> R { return (*callback)(forward

(p)...); } + auto reset() -> void { if(callback) { delete callback; callback = nullptr; } } + + auto operator=(const function& source) -> function& { + if(this != &source) { + if(callback) { delete callback; callback = nullptr; } + if(source.callback) callback = source.callback->copy(); + } + return *this; + } + + auto operator=(void* source) -> function& { + if(callback) { delete callback; callback = nullptr; } + callback = new global((auto (*)(P...) -> R)source); + return *this; + } + +private: + struct container { + virtual auto operator()(P... p) const -> R = 0; + virtual auto copy() const -> container* = 0; + virtual ~container() = default; + }; + + container* callback = nullptr; + + struct global : container { + auto (*function)(P...) -> R; + auto operator()(P... p) const -> R { return function(forward

(p)...); } + auto copy() const -> container* { return new global(function); } + global(auto (*function)(P...) -> R) : function(function) {} + }; + + template struct member : container { + auto (C::*function)(P...) -> R; + C* object; + auto operator()(P... p) const -> R { return (object->*function)(forward

(p)...); } + auto copy() const -> container* { return new member(function, object); } + member(auto (C::*function)(P...) -> R, C* object) : function(function), object(object) {} + }; + + template struct lambda : container { + mutable L object; + auto operator()(P... p) const -> R { return object(forward

(p)...); } + auto copy() const -> container* { return new lambda(object); } + lambda(const L& object) : object(object) {} + }; +}; + +} diff --git a/waterbox/ares64/ares/nall/galois-field.hpp b/waterbox/ares64/ares/nall/galois-field.hpp new file mode 100644 index 0000000000..38f3bc5c63 --- /dev/null +++ b/waterbox/ares64/ares/nall/galois-field.hpp @@ -0,0 +1,70 @@ +#pragma once + +//table-driven galois field modulo 2 +//do not use with GF(2^17) or larger + +namespace nall { + +template +struct GaloisField { + using type = GaloisField; + + GaloisField(u32 x = 0) : x(x) {} + operator field() const { return x; } + + auto operator^(field y) const -> type { return x ^ y; } + auto operator+(field y) const -> type { return x ^ y; } + auto operator-(field y) const -> type { return x ^ y; } + auto operator*(field y) const -> type { return x && y ? exp(log(x) + log(y)) : 0; } + auto operator/(field y) const -> type { return x && y ? exp(log(x) + Elements - log(y)) : 0; } + + auto& operator =(field y) { return x = y, *this; } + auto& operator^=(field y) { return x = operator^(y), *this; } + auto& operator+=(field y) { return x = operator^(y), *this; } + auto& operator-=(field y) { return x = operator^(y), *this; } + auto& operator*=(field y) { return x = operator*(y), *this; } + auto& operator/=(field y) { return x = operator/(y), *this; } + + auto pow(field y) const -> type { return exp(log(x) * y); } + auto inv() const -> type { return exp(Elements - log(x)); } // 1/x + + static auto log(u32 x) -> u32 { + enum : u32 { Size = bit::round(Elements), Mask = Size - 1 }; + static array log = [] { + u32 shift = 0, polynomial = Polynomial; + while(polynomial >>= 1) shift++; + shift--; + + array log; + field x = 1; + for(u32 n : range(Elements)) { + log[x] = n; + x = x << 1 ^ (x >> shift ? Polynomial : 0); + } + log[0] = 0; //-inf (undefined) + return log; + }(); + return log[x & Mask]; + } + + static auto exp(u32 x) -> u32 { + static array exp = [] { + u32 shift = 0, polynomial = Polynomial; + while(polynomial >>= 1) shift++; + shift--; + + array exp; + field x = 1; + for(u32 n : range(Elements)) { + exp[n] = x; + x = x << 1 ^ (x >> shift ? Polynomial : 0); + } + return exp; + }(); + return exp[x % Elements]; + } + + field x; +}; + +} diff --git a/waterbox/ares64/ares/nall/hash/crc16.hpp b/waterbox/ares64/ares/nall/hash/crc16.hpp new file mode 100644 index 0000000000..ba041c4860 --- /dev/null +++ b/waterbox/ares64/ares/nall/hash/crc16.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include + +namespace nall::Hash { + +struct CRC16 : Hash { + using Hash::input; + + CRC16(array_view buffer = {}) { + reset(); + input(buffer); + } + + auto reset() -> void override { + checksum = ~0; + } + + auto input(u8 value) -> void override { + checksum = (checksum >> 8) ^ table(checksum ^ value); + } + + auto output() const -> vector override { + vector result; + for(auto n : reverse(range(2))) result.append(~checksum >> n * 8); + return result; + } + + auto value() const -> u16 { + return ~checksum; + } + +private: + static auto table(u8 index) -> u16 { + static u16 table[256] = {}; + static bool initialized = false; + + if(!initialized) { + initialized = true; + for(auto index : range(256)) { + u16 crc = index; + for(auto bit : range(8)) { + crc = (crc >> 1) ^ (crc & 1 ? 0x8408 : 0); + } + table[index] = crc; + } + } + + return table[index]; + } + + u16 checksum = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/hash/crc32.hpp b/waterbox/ares64/ares/nall/hash/crc32.hpp new file mode 100644 index 0000000000..1b100ecbf5 --- /dev/null +++ b/waterbox/ares64/ares/nall/hash/crc32.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include + +namespace nall::Hash { + +struct CRC32 : Hash { + using Hash::input; + + CRC32(array_view buffer = {}) { + reset(); + input(buffer); + } + + auto reset() -> void override { + checksum = ~0; + } + + auto input(u8 value) -> void override { + checksum = (checksum >> 8) ^ table(checksum ^ value); + } + + auto output() const -> vector override { + vector result; + for(auto n : reverse(range(4))) result.append(~checksum >> n * 8); + return result; + } + + auto value() const -> u32 { + return ~checksum; + } + +private: + static auto table(u8 index) -> u32 { + static u32 table[256] = {}; + static bool initialized = false; + + if(!initialized) { + initialized = true; + for(auto index : range(256)) { + u32 crc = index; + for(auto bit : range(8)) { + crc = (crc >> 1) ^ (crc & 1 ? 0xedb8'8320 : 0); + } + table[index] = crc; + } + } + + return table[index]; + } + + u32 checksum = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/hash/crc64.hpp b/waterbox/ares64/ares/nall/hash/crc64.hpp new file mode 100644 index 0000000000..f065b07a7d --- /dev/null +++ b/waterbox/ares64/ares/nall/hash/crc64.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include + +namespace nall::Hash { + +struct CRC64 : Hash { + using Hash::input; + + CRC64(array_view buffer = {}) { + reset(); + input(buffer); + } + + auto reset() -> void override { + checksum = ~0; + } + + auto input(u8 value) -> void override { + checksum = (checksum >> 8) ^ table(checksum ^ value); + } + + auto output() const -> vector override { + vector result; + for(auto n : reverse(range(8))) result.append(~checksum >> n * 8); + return result; + } + + auto value() const -> u64 { + return ~checksum; + } + +private: + static auto table(u8 index) -> u64 { + static u64 table[256] = {}; + static bool initialized = false; + + if(!initialized) { + initialized = true; + for(auto index : range(256)) { + u64 crc = index; + for(auto bit : range(8)) { + crc = (crc >> 1) ^ (crc & 1 ? 0xc96c'5795'd787'0f42 : 0); + } + table[index] = crc; + } + } + + return table[index]; + } + + u64 checksum = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/hash/hash.hpp b/waterbox/ares64/ares/nall/hash/hash.hpp new file mode 100644 index 0000000000..5c1b3f0d3e --- /dev/null +++ b/waterbox/ares64/ares/nall/hash/hash.hpp @@ -0,0 +1,47 @@ +#pragma once + +#include +#include +#include + +//cannot use constructor inheritance due to needing to call virtual reset(); +//instead, define a macro to reduce boilerplate code in every Hash subclass +#define nallHash(Name) \ + Name() { reset(); } \ + Name(const void* data, u64 size) : Name() { input(data, size); } \ + Name(const vector& data) : Name() { input(data); } \ + Name(const string& data) : Name() { input(data); } \ + using Hash::input; \ + +namespace nall::Hash { + +struct Hash { + virtual auto reset() -> void = 0; + virtual auto input(u8 data) -> void = 0; + virtual auto output() const -> vector = 0; + + auto input(array_view data) -> void { + for(auto byte : data) input(byte); + } + + auto input(const void* data, u64 size) -> void { + auto p = (const u8*)data; + while(size--) input(*p++); + } + + auto input(const vector& data) -> void { + for(auto byte : data) input(byte); + } + + auto input(const string& data) -> void { + for(auto byte : data) input(byte); + } + + auto digest() const -> string { + string result; + for(auto n : output()) result.append(hex(n, 2L)); + return result; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/hash/sha224.hpp b/waterbox/ares64/ares/nall/hash/sha224.hpp new file mode 100644 index 0000000000..ed56e651d9 --- /dev/null +++ b/waterbox/ares64/ares/nall/hash/sha224.hpp @@ -0,0 +1,106 @@ +#pragma once + +#include + +namespace nall::Hash { + +struct SHA224 : Hash { + using Hash::input; + + SHA224(array_view buffer = {}) { + reset(); + input(buffer); + } + + auto reset() -> void override { + for(auto& n : queue) n = 0; + for(auto& n : w) n = 0; + for(auto n : range(8)) h[n] = square(n); + queued = length = 0; + } + + auto input(u8 value) -> void override { + byte(value); + length++; + } + + auto output() const -> vector override { + SHA224 self(*this); + self.finish(); + vector result; + for(auto h : range(7)) { + for(auto n : reverse(range(4))) result.append(self.h[h] >> n * 8); + } + return result; + } + + auto value() const -> u256 { + u256 value = 0; + for(auto byte : output()) value = value << 8 | byte; + return value; + } + +private: + auto byte(u8 value) -> void { + u32 shift = (3 - (queued & 3)) * 8; + queue[queued >> 2] &= ~(0xff << shift); + queue[queued >> 2] |= (value << shift); + if(++queued == 64) block(), queued = 0; + } + + auto block() -> void { + for(auto n : range(16)) w[n] = queue[n]; + for(auto n : range(16, 64)) { + u32 a = ror(w[n - 15], 7) ^ ror(w[n - 15], 18) ^ (w[n - 15] >> 3); + u32 b = ror(w[n - 2], 17) ^ ror(w[n - 2], 19) ^ (w[n - 2] >> 10); + w[n] = w[n - 16] + w[n - 7] + a + b; + } + u32 t[8]; + for(auto n : range(8)) t[n] = h[n]; + for(auto n : range(64)) { + u32 a = ror(t[0], 2) ^ ror(t[0], 13) ^ ror(t[0], 22); + u32 b = ror(t[4], 6) ^ ror(t[4], 11) ^ ror(t[4], 25); + u32 c = (t[0] & t[1]) ^ (t[0] & t[2]) ^ (t[1] & t[2]); + u32 d = (t[4] & t[5]) ^ (~t[4] & t[6]); + u32 e = t[7] + w[n] + cube(n) + b + d; + t[7] = t[6]; t[6] = t[5]; t[5] = t[4]; t[4] = t[3] + e; + t[3] = t[2]; t[2] = t[1]; t[1] = t[0]; t[0] = a + c + e; + } + for(auto n : range(8)) h[n] += t[n]; + } + + auto finish() -> void { + byte(0x80); + while(queued != 56) byte(0x00); + for(auto n : range(8)) byte(length * 8 >> (7 - n) * 8); + } + + auto square(u32 n) -> u32 { + static const u32 value[8] = { + 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4, + }; + return value[n]; + } + + auto cube(u32 n) -> u32 { + static const u32 value[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, + }; + return value[n]; + } + + u32 queue[16] = {}; + u32 w[64] = {}; + u32 h[8] = {}; + u32 queued = 0; + u64 length = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/hash/sha256.hpp b/waterbox/ares64/ares/nall/hash/sha256.hpp new file mode 100644 index 0000000000..4899ea54e7 --- /dev/null +++ b/waterbox/ares64/ares/nall/hash/sha256.hpp @@ -0,0 +1,106 @@ +#pragma once + +#include + +namespace nall::Hash { + +struct SHA256 : Hash { + using Hash::input; + + SHA256(array_view buffer = {}) { + reset(); + input(buffer); + } + + auto reset() -> void override { + for(auto& n : queue) n = 0; + for(auto& n : w) n = 0; + for(auto n : range(8)) h[n] = square(n); + queued = length = 0; + } + + auto input(u8 value) -> void override { + byte(value); + length++; + } + + auto output() const -> vector override { + SHA256 self(*this); + self.finish(); + vector result; + for(auto h : self.h) { + for(auto n : reverse(range(4))) result.append(h >> n * 8); + } + return result; + } + + auto value() const -> u256 { + u256 value = 0; + for(auto byte : output()) value = value << 8 | byte; + return value; + } + +private: + auto byte(u8 value) -> void { + u32 shift = (3 - (queued & 3)) * 8; + queue[queued >> 2] &= ~(0xff << shift); + queue[queued >> 2] |= (value << shift); + if(++queued == 64) block(), queued = 0; + } + + auto block() -> void { + for(auto n : range(16)) w[n] = queue[n]; + for(auto n : range(16, 64)) { + u32 a = ror(w[n - 15], 7) ^ ror(w[n - 15], 18) ^ (w[n - 15] >> 3); + u32 b = ror(w[n - 2], 17) ^ ror(w[n - 2], 19) ^ (w[n - 2] >> 10); + w[n] = w[n - 16] + w[n - 7] + a + b; + } + u32 t[8]; + for(auto n : range(8)) t[n] = h[n]; + for(auto n : range(64)) { + u32 a = ror(t[0], 2) ^ ror(t[0], 13) ^ ror(t[0], 22); + u32 b = ror(t[4], 6) ^ ror(t[4], 11) ^ ror(t[4], 25); + u32 c = (t[0] & t[1]) ^ (t[0] & t[2]) ^ (t[1] & t[2]); + u32 d = (t[4] & t[5]) ^ (~t[4] & t[6]); + u32 e = t[7] + w[n] + cube(n) + b + d; + t[7] = t[6]; t[6] = t[5]; t[5] = t[4]; t[4] = t[3] + e; + t[3] = t[2]; t[2] = t[1]; t[1] = t[0]; t[0] = a + c + e; + } + for(auto n : range(8)) h[n] += t[n]; + } + + auto finish() -> void { + byte(0x80); + while(queued != 56) byte(0x00); + for(auto n : range(8)) byte(length * 8 >> (7 - n) * 8); + } + + auto square(u32 n) -> u32 { + static const u32 value[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, + }; + return value[n]; + } + + auto cube(u32 n) -> u32 { + static const u32 value[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, + }; + return value[n]; + } + + u32 queue[16] = {}; + u32 w[64] = {}; + u32 h[8] = {}; + u32 queued = 0; + u64 length = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/hash/sha384.hpp b/waterbox/ares64/ares/nall/hash/sha384.hpp new file mode 100644 index 0000000000..5d32485d7c --- /dev/null +++ b/waterbox/ares64/ares/nall/hash/sha384.hpp @@ -0,0 +1,119 @@ +#pragma once + +#include + +namespace nall::Hash { + +struct SHA384 : Hash { + using Hash::input; + + SHA384(array_view buffer = {}) { + reset(); + input(buffer); + } + + auto reset() -> void override { + for(auto& n : queue) n = 0; + for(auto& n : w) n = 0; + for(auto n : range(8)) h[n] = square(n); + queued = length = 0; + } + + auto input(u8 data) -> void override { + byte(data); + length++; + } + + auto output() const -> vector override { + SHA384 self(*this); + self.finish(); + vector result; + for(auto h : range(6)) { + for(auto n : reverse(range(8))) result.append(self.h[h] >> n * 8); + } + return result; + } + + auto value() const -> u512 { + u512 value = 0; + for(auto byte : output()) value = value << 8 | byte; + return value; + } + +private: + auto byte(u8 data) -> void { + u64 shift = (7 - (queued & 7)) * 8; + queue[queued >> 3] &=~((u64)0xff << shift); + queue[queued >> 3] |= ((u64)data << shift); + if(++queued == 128) block(), queued = 0; + } + + auto block() -> void { + for(auto n : range(16)) w[n] = queue[n]; + for(auto n : range(16, 80)) { + u64 a = ror(w[n - 15], 1) ^ ror(w[n - 15], 8) ^ (w[n - 15] >> 7); + u64 b = ror(w[n - 2], 19) ^ ror(w[n - 2], 61) ^ (w[n - 2] >> 6); + w[n] = w[n - 16] + w[n - 7] + a + b; + } + u64 t[8]; + for(auto n : range(8)) t[n] = h[n]; + for(auto n : range(80)) { + u64 a = ror(t[0], 28) ^ ror(t[0], 34) ^ ror(t[0], 39); + u64 b = ror(t[4], 14) ^ ror(t[4], 18) ^ ror(t[4], 41); + u64 c = (t[0] & t[1]) ^ (t[0] & t[2]) ^ (t[1] & t[2]); + u64 d = (t[4] & t[5]) ^ (~t[4] & t[6]); + u64 e = t[7] + w[n] + cube(n) + b + d; + t[7] = t[6]; t[6] = t[5]; t[5] = t[4]; t[4] = t[3] + e; + t[3] = t[2]; t[2] = t[1]; t[1] = t[0]; t[0] = a + c + e; + } + for(auto n : range(8)) h[n] += t[n]; + } + + auto finish() -> void { + byte(0x80); + while(queued != 112) byte(0x00); + for(auto n : range(16)) byte(length * 8 >> (15 - n) * 8); + } + + auto square(u32 n) -> u64 { + static const u64 data[8] = { + 0xcbbb9d5dc1059ed8, 0x629a292a367cd507, 0x9159015a3070dd17, 0x152fecd8f70e5939, + 0x67332667ffc00b31, 0x8eb44a8768581511, 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4, + }; + return data[n]; + } + + auto cube(u32 n) -> u64 { + static const u64 data[80] = { + 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, + 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, + 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, + 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, + 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, + 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, + 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, + 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, + 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, + 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, + 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, + 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, + 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, + 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, + 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, + 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, + 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, + 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, + 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, + 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, + }; + return data[n]; + } + + u64 queue[16] = {}; + u64 w[80] = {}; + u64 h[8] = {}; + u64 queued = 0; + u128 length = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/hash/sha512.hpp b/waterbox/ares64/ares/nall/hash/sha512.hpp new file mode 100644 index 0000000000..eeda189e37 --- /dev/null +++ b/waterbox/ares64/ares/nall/hash/sha512.hpp @@ -0,0 +1,119 @@ +#pragma once + +#include + +namespace nall::Hash { + +struct SHA512 : Hash { + using Hash::input; + + SHA512(array_view buffer = {}) { + reset(); + input(buffer); + } + + auto reset() -> void override { + for(auto& n : queue) n = 0; + for(auto& n : w) n = 0; + for(auto n : range(8)) h[n] = square(n); + queued = length = 0; + } + + auto input(u8 data) -> void override { + byte(data); + length++; + } + + auto output() const -> vector override { + SHA512 self(*this); + self.finish(); + vector result; + for(auto h : self.h) { + for(auto n : reverse(range(8))) result.append(h >> n * 8); + } + return result; + } + + auto value() const -> u512 { + u512 value = 0; + for(auto byte : output()) value = value << 8 | byte; + return value; + } + +private: + auto byte(u8 data) -> void { + u64 shift = (7 - (queued & 7)) * 8; + queue[queued >> 3] &=~((u64)0xff << shift); + queue[queued >> 3] |= ((u64)data << shift); + if(++queued == 128) block(), queued = 0; + } + + auto block() -> void { + for(auto n : range(16)) w[n] = queue[n]; + for(auto n : range(16, 80)) { + u64 a = ror(w[n - 15], 1) ^ ror(w[n - 15], 8) ^ (w[n - 15] >> 7); + u64 b = ror(w[n - 2], 19) ^ ror(w[n - 2], 61) ^ (w[n - 2] >> 6); + w[n] = w[n - 16] + w[n - 7] + a + b; + } + u64 t[8]; + for(auto n : range(8)) t[n] = h[n]; + for(auto n : range(80)) { + u64 a = ror(t[0], 28) ^ ror(t[0], 34) ^ ror(t[0], 39); + u64 b = ror(t[4], 14) ^ ror(t[4], 18) ^ ror(t[4], 41); + u64 c = (t[0] & t[1]) ^ (t[0] & t[2]) ^ (t[1] & t[2]); + u64 d = (t[4] & t[5]) ^ (~t[4] & t[6]); + u64 e = t[7] + w[n] + cube(n) + b + d; + t[7] = t[6]; t[6] = t[5]; t[5] = t[4]; t[4] = t[3] + e; + t[3] = t[2]; t[2] = t[1]; t[1] = t[0]; t[0] = a + c + e; + } + for(auto n : range(8)) h[n] += t[n]; + } + + auto finish() -> void { + byte(0x80); + while(queued != 112) byte(0x00); + for(auto n : range(16)) byte(length * 8 >> (15 - n) * 8); + } + + auto square(u32 n) -> u64 { + static const u64 data[8] = { + 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, + 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, + }; + return data[n]; + } + + auto cube(u32 n) -> u64 { + static const u64 data[80] = { + 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, + 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, + 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, + 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, + 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, + 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, + 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, + 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, + 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, + 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, + 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, + 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, + 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, + 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, + 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, + 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, + 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, + 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, + 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, + 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, + }; + return data[n]; + } + + u64 queue[16] = {}; + u64 w[80] = {}; + u64 h[8] = {}; + u64 queued = 0; + u128 length = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/hashset.hpp b/waterbox/ares64/ares/nall/hashset.hpp new file mode 100644 index 0000000000..8da76fec8b --- /dev/null +++ b/waterbox/ares64/ares/nall/hashset.hpp @@ -0,0 +1,133 @@ +#pragma once + +//hashset +// +//search: O(1) average; O(n) worst +//insert: O(1) average; O(n) worst +//remove: O(1) average; O(n) worst +// +//requirements: +// auto T::hash() const -> u32; +// auto T::operator==(const T&) const -> bool; + +namespace nall { + +template +struct hashset { + hashset() = default; + hashset(u32 length) : length(bit::round(length)) {} + hashset(const hashset& source) { operator=(source); } + hashset(hashset&& source) { operator=(move(source)); } + ~hashset() { reset(); } + + auto operator=(const hashset& source) -> hashset& { + reset(); + if(source.pool) { + for(u32 n : range(source.count)) { + insert(*source.pool[n]); + } + } + return *this; + } + + auto operator=(hashset&& source) -> hashset& { + reset(); + pool = source.pool; + length = source.length; + count = source.count; + source.pool = nullptr; + source.length = 8; + source.count = 0; + return *this; + } + + explicit operator bool() const { return count; } + auto capacity() const -> u32 { return length; } + auto size() const -> u32 { return count; } + + auto reset() -> void { + if(pool) { + for(u32 n : range(length)) { + if(pool[n]) { + delete pool[n]; + pool[n] = nullptr; + } + } + delete pool; + pool = nullptr; + } + length = 8; + count = 0; + } + + auto reserve(u32 size) -> void { + //ensure all items will fit into pool (with <= 50% load) and amortize growth + size = bit::round(max(size, count << 1)); + T** copy = new T*[size](); + + if(pool) { + for(u32 n : range(length)) { + if(pool[n]) { + u32 hash = (*pool[n]).hash() & (size - 1); + while(copy[hash]) if(++hash >= size) hash = 0; + copy[hash] = pool[n]; + pool[n] = nullptr; + } + } + } + + delete pool; + pool = copy; + length = size; + } + + auto find(const T& value) -> maybe { + if(!pool) return nothing; + + u32 hash = value.hash() & (length - 1); + while(pool[hash]) { + if(value == *pool[hash]) return *pool[hash]; + if(++hash >= length) hash = 0; + } + + return nothing; + } + + auto insert(const T& value) -> maybe { + if(!pool) pool = new T*[length](); + + //double pool size when load is >= 50% + if(count >= (length >> 1)) reserve(length << 1); + count++; + + u32 hash = value.hash() & (length - 1); + while(pool[hash]) if(++hash >= length) hash = 0; + pool[hash] = new T(value); + + return *pool[hash]; + } + + auto remove(const T& value) -> bool { + if(!pool) return false; + + u32 hash = value.hash() & (length - 1); + while(pool[hash]) { + if(value == *pool[hash]) { + delete pool[hash]; + pool[hash] = nullptr; + count--; + return true; + } + if(++hash >= length) hash = 0; + } + + return false; + } + +protected: + T** pool = nullptr; + u32 length = 8; //length of pool + u32 count = 0; //number of objects inside of the pool +}; + +} diff --git a/waterbox/ares64/ares/nall/hid.hpp b/waterbox/ares64/ares/nall/hid.hpp new file mode 100644 index 0000000000..cc8e7f120d --- /dev/null +++ b/waterbox/ares64/ares/nall/hid.hpp @@ -0,0 +1,122 @@ +#pragma once + +#include +#include +#include +#include + +namespace nall::HID { + +struct Input { + Input(const string& name) : _name(name) {} + + auto name() const -> string { return _name; } + auto value() const -> s16 { return _value; } + auto setValue(s16 value) -> void { _value = value; } + +private: + string _name; + s16 _value = 0; + friend class Group; +}; + +struct Group : vector { + Group(const string& name) : _name(name) {} + + auto name() const -> string { return _name; } + auto input(u32 id) -> Input& { return operator[](id); } + auto append(const string& name) -> void { vector::append(Input{name}); } + + auto find(const string& name) const -> maybe { + for(auto id : range(size())) { + if(operator[](id)._name == name) return id; + } + return nothing; + } + +private: + string _name; + friend class Device; +}; + +struct Device : vector { + Device(const string& name) : _name(name) {} + virtual ~Device() = default; + + //id => {pathID}-{vendorID}-{productID} + auto pathID() const -> u32 { return (u32)(_id >> 32); } //32-63 + auto vendorID() const -> u16 { return (u16)(_id >> 16); } //16-31 + auto productID() const -> u16 { return (u16)(_id >> 0); } // 0-15 + + auto setPathID (u32 pathID ) -> void { _id = (u64)pathID << 32 | vendorID() << 16 | productID() << 0; } + auto setVendorID (u16 vendorID ) -> void { _id = (u64)pathID() << 32 | vendorID << 16 | productID() << 0; } + auto setProductID(u16 productID) -> void { _id = (u64)pathID() << 32 | vendorID() << 16 | productID << 0; } + + virtual auto isNull() const -> bool { return false; } + virtual auto isKeyboard() const -> bool { return false; } + virtual auto isMouse() const -> bool { return false; } + virtual auto isJoypad() const -> bool { return false; } + + auto name() const -> string { return _name; } + auto id() const -> u64 { return _id; } + auto setID(u64 id) -> void { _id = id; } + auto group(u32 id) -> Group& { return operator[](id); } + auto append(const string& name) -> void { vector::append(Group{name}); } + + auto find(const string& name) const -> maybe { + for(auto id : range(size())) { + if(operator[](id)._name == name) return id; + } + return nothing; + } + +private: + string _name; + u64 _id = 0; +}; + +struct Null : Device { + enum : u16 { GenericVendorID = 0x0000, GenericProductID = 0x0000 }; + + Null() : Device("Null") {} + auto isNull() const -> bool { return true; } +}; + +struct Keyboard : Device { + enum : u16 { GenericVendorID = 0x0000, GenericProductID = 0x0001 }; + enum GroupID : u32 { Button }; + + Keyboard() : Device("Keyboard") { append("Button"); } + auto isKeyboard() const -> bool { return true; } + auto buttons() -> Group& { return group(GroupID::Button); } +}; + +struct Mouse : Device { + enum : u16 { GenericVendorID = 0x0000, GenericProductID = 0x0002 }; + enum GroupID : u32 { Axis, Button }; + + Mouse() : Device("Mouse") { append("Axis"), append("Button"); } + auto isMouse() const -> bool { return true; } + auto axes() -> Group& { return group(GroupID::Axis); } + auto buttons() -> Group& { return group(GroupID::Button); } +}; + +struct Joypad : Device { + enum : u16 { GenericVendorID = 0x0000, GenericProductID = 0x0003 }; + enum GroupID : u32 { Axis, Hat, Trigger, Button }; + + Joypad() : Device("Joypad") { append("Axis"), append("Hat"), append("Trigger"), append("Button"); } + auto isJoypad() const -> bool { return true; } + auto axes() -> Group& { return group(GroupID::Axis); } + auto hats() -> Group& { return group(GroupID::Hat); } + auto triggers() -> Group& { return group(GroupID::Trigger); } + auto buttons() -> Group& { return group(GroupID::Button); } + + auto rumble() const -> bool { return _rumble; } + auto setRumble(bool rumble) -> void { _rumble = rumble; } + +private: + bool _rumble = false; +}; + +} diff --git a/waterbox/ares64/ares/nall/http/client.hpp b/waterbox/ares64/ares/nall/http/client.hpp new file mode 100644 index 0000000000..1fbf54a04b --- /dev/null +++ b/waterbox/ares64/ares/nall/http/client.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include + +namespace nall::HTTP { + +struct Client : Role { + auto open(const string& hostname, u16 port = 80) -> bool; + auto upload(const Request& request) -> bool; + auto download(const Request& request) -> Response; + auto close() -> void; + ~Client() { close(); } + +private: + s32 fd = -1; + addrinfo* info = nullptr; +}; + +inline auto Client::open(const string& hostname, u16 port) -> bool { + addrinfo hint = {}; + hint.ai_family = AF_UNSPEC; + hint.ai_socktype = SOCK_STREAM; + hint.ai_flags = AI_ADDRCONFIG; + + if(getaddrinfo(hostname, string{port}, &hint, &info) != 0) return close(), false; + + fd = socket(info->ai_family, info->ai_socktype, info->ai_protocol); + if(fd < 0) return close(), false; + + if(connect(fd, info->ai_addr, info->ai_addrlen) < 0) return close(), false; + return true; +} + +inline auto Client::upload(const Request& request) -> bool { + return Role::upload(fd, request); +} + +inline auto Client::download(const Request& request) -> Response { + Response response(request); + Role::download(fd, response); + return response; +} + +inline auto Client::close() -> void { + if(fd) { + ::close(fd); + fd = -1; + } + + if(info) { + freeaddrinfo(info); + info = nullptr; + } +} + +} diff --git a/waterbox/ares64/ares/nall/http/message.hpp b/waterbox/ares64/ares/nall/http/message.hpp new file mode 100644 index 0000000000..373ccae7d4 --- /dev/null +++ b/waterbox/ares64/ares/nall/http/message.hpp @@ -0,0 +1,104 @@ +#pragma once + +//httpMessage: base class for httpRequest and httpResponse +//provides shared functionality + +namespace nall::HTTP { + +struct Variable { + string name; + string value; +}; + +struct SharedVariable { + SharedVariable(const nall::string& name = "", const nall::string& value = "") : shared(new Variable{name, value}) {} + + explicit operator bool() const { return (bool)shared->name; } + auto operator()() const { return shared->value; } + auto& operator=(const nall::string& value) { shared->value = value; return *this; } + + auto name() const { return shared->name; } + auto value() const { return shared->value; } + auto string() const { return nall::string{shared->value}.strip().replace("\r", ""); } + auto boolean() const { return string() == "true"; } + auto integer() const { return string().integer(); } + auto natural() const { return string().natural(); } + auto real() const { return string().real(); } + + auto& setName(const nall::string& name) { shared->name = name; return *this; } + auto& setValue(const nall::string& value = "") { shared->value = value; return *this; } + + shared_pointer shared; +}; + +struct Variables { + auto operator[](const string& name) const -> SharedVariable { + for(auto& variable : variables) { + if(variable.shared->name.iequals(name)) return variable; + } + return {}; + } + + auto operator()(const string& name) -> SharedVariable { + for(auto& variable : variables) { + if(variable.shared->name.iequals(name)) return variable; + } + return append(name); + } + + auto find(const string& name) const -> vector { + vector result; + for(auto& variable : variables) { + if(variable.shared->name.iequals(name)) result.append(variable); + } + return result; + } + + auto assign(const string& name, const string& value = "") -> SharedVariable { + for(auto& variable : variables) { + if(variable.shared->name.iequals(name)) { + variable.shared->value = value; + return variable; + } + } + return append(name, value); + } + + auto append(const string& name, const string& value = "") -> SharedVariable { + SharedVariable variable{name, value}; + variables.append(variable); + return variable; + } + + auto remove(const string& name) -> void { + for(auto n : reverse(range(variables.size()))) { + if(variables[n].shared->name.iequals(name)) variables.remove(n); + } + } + + auto size() const { return variables.size(); } + auto begin() const { return variables.begin(); } + auto end() const { return variables.end(); } + auto begin() { return variables.begin(); } + auto end() { return variables.end(); } + + vector variables; +}; + +struct Message { + using type = Message; + + virtual auto head(const function& callback) const -> bool = 0; + virtual auto setHead() -> bool = 0; + + virtual auto body(const function& callback) const -> bool = 0; + virtual auto setBody() -> bool = 0; + + Variables header; + +//private: + string _head; + string _body; +}; + +} diff --git a/waterbox/ares64/ares/nall/http/request.hpp b/waterbox/ares64/ares/nall/http/request.hpp new file mode 100644 index 0000000000..c30c282f9d --- /dev/null +++ b/waterbox/ares64/ares/nall/http/request.hpp @@ -0,0 +1,184 @@ +#pragma once + +#include +#include +#include + +namespace nall::HTTP { + +struct Request : Message { + using type = Request; + + enum class RequestType : u32 { None, Head, Get, Post }; + + explicit operator bool() const { return requestType() != RequestType::None; } + + auto head(const function& callback) const -> bool override; + auto setHead() -> bool override; + + auto body(const function& callback) const -> bool override; + auto setBody() -> bool override; + + auto ipv4() const -> bool { return _ipv6 == false; } + auto ipv6() const -> bool { return _ipv6 == true; } + auto ip() const -> string { return _ip; } + + auto requestType() const -> RequestType { return _requestType; } + auto setRequestType(RequestType value) -> void { _requestType = value; } + + auto path() const -> string { return _path; } + auto setPath(const string& value) -> void { _path = value; } + + Variables cookie; + Variables get; + Variables post; + +//private: + bool _ipv6 = false; + string _ip; + RequestType _requestType = RequestType::None; + string _path; +}; + +inline auto Request::head(const function& callback) const -> bool { + if(!callback) return false; + string output; + + string request = path(); + if(get.size()) { + request.append("?"); + for(auto& variable : get) { + request.append(Encode::URL(variable.name()), "=", Encode::URL(variable.value()), "&"); + } + request.trimRight("&", 1L); + } + + switch(requestType()) { + case RequestType::Head: output.append("HEAD ", request, " HTTP/1.1\r\n"); break; + case RequestType::Get : output.append("GET ", request, " HTTP/1.1\r\n"); break; + case RequestType::Post: output.append("POST ", request, " HTTP/1.1\r\n"); break; + default: return false; + } + + for(auto& variable : header) { + output.append(variable.name(), ": ", variable.value(), "\r\n"); + } + output.append("\r\n"); + + return callback(output.data(), output.size()); +} + +inline auto Request::setHead() -> bool { + auto headers = _head.split("\n"); + string request = headers.takeLeft().trimRight("\r", 1L); + string requestHost; + + if(request.iendsWith(" HTTP/1.0")) request.itrimRight(" HTTP/1.0", 1L); + else if(request.iendsWith(" HTTP/1.1")) request.itrimRight(" HTTP/1.1", 1L); + else return false; + + if(request.ibeginsWith("HEAD ")) request.itrimLeft("HEAD ", 1L), setRequestType(RequestType::Head); + else if(request.ibeginsWith("GET " )) request.itrimLeft("GET ", 1L), setRequestType(RequestType::Get ); + else if(request.ibeginsWith("POST ")) request.itrimLeft("POST ", 1L), setRequestType(RequestType::Post); + else return false; + + //decode absolute URIs + request.strip().itrimLeft("http://", 1L); + if(!request.beginsWith("/")) { + auto components = request.split("/", 1L); + requestHost = components(0); + request = {"/", components(1)}; + } + + auto components = request.split("?", 1L); + setPath(components(0)); + + if(auto queryString = components(1)) { + for(auto& block : queryString.split("&")) { + auto p = block.split("=", 1L); + auto name = Decode::URL(p(0)); + auto value = Decode::URL(p(1)); + if(name) get.append(name, value); + } + } + + for(auto& header : headers) { + if(header.beginsWith(" ") || header.beginsWith("\t")) continue; + auto part = header.split(":", 1L).strip(); + if(!part[0] || part.size() != 2) continue; + this->header.append(part[0], part[1]); + + if(part[0].iequals("Cookie")) { + for(auto& block : part[1].split(";")) { + auto p = block.split("=", 1L).strip(); + auto name = p(0); + auto value = p(1).trim("\"", "\"", 1L); + if(name) cookie.append(name, value); + } + } + } + + if(requestHost) header.assign("Host", requestHost); //request URI overrides host header + return true; +} + +inline auto Request::body(const function& callback) const -> bool { + if(!callback) return false; + + if(_body) { + return callback(_body.data(), _body.size()); + } + + return true; +} + +inline auto Request::setBody() -> bool { + if(requestType() == RequestType::Post) { + auto contentType = header["Content-Type"].value(); + if(contentType.iequals("application/x-www-form-urlencoded")) { + for(auto& block : _body.split("&")) { + auto p = block.trimRight("\r").split("=", 1L); + auto name = Decode::URL(p(0)); + auto value = Decode::URL(p(1)); + if(name) post.append(name, value); + } + } else if(contentType.imatch("multipart/form-data; boundary=?*")) { + auto boundary = contentType.itrimLeft("multipart/form-data; boundary=", 1L).trim("\"", "\"", 1L); + auto blocks = _body.split({"--", boundary}, 1024L); //limit blocks to prevent memory exhaustion + for(auto& block : blocks) block.trim("\r\n", "\r\n", 1L); + if(blocks.size() < 2 || (blocks.takeLeft(), !blocks.takeRight().beginsWith("--"))) return false; + for(auto& block : blocks) { + string name; + string filename; + string contentType; + + auto segments = block.split("\r\n\r\n", 1L); + for(auto& segment : segments(0).split("\r\n")) { + auto statement = segment.split(":", 1L); + if(statement(0).ibeginsWith("Content-Disposition")) { + for(auto& component : statement(1).split(";")) { + auto part = component.split("=", 1L).strip(); + if(part(0).iequals("name")) { + name = part(1).trim("\"", "\"", 1L); + } else if(part(0).iequals("filename")) { + filename = part(1).trim("\"", "\"", 1L); + } + } + } else if(statement(0).ibeginsWith("Content-Type")) { + contentType = statement(1).strip(); + } + } + + if(name) { + post.append(name, segments(1)); + post.append({name, ".filename"}, filename); + post.append({name, ".content-type"}, contentType); + } + } + } + } + + return true; +} + +} diff --git a/waterbox/ares64/ares/nall/http/response.hpp b/waterbox/ares64/ares/nall/http/response.hpp new file mode 100644 index 0000000000..93a3b7a7b5 --- /dev/null +++ b/waterbox/ares64/ares/nall/http/response.hpp @@ -0,0 +1,290 @@ +#pragma once + +#include + +namespace nall::HTTP { + +struct Response : Message { + using type = Response; + + Response() = default; + Response(const Request& request) { setRequest(request); } + + explicit operator bool() const { return responseType() != 0; } + auto operator()(u32 responseType) -> type& { return setResponseType(responseType); } + + auto head(const function& callback) const -> bool override; + auto setHead() -> bool override; + + auto body(const function& callback) const -> bool override; + auto setBody() -> bool override; + + auto request() const -> const Request* { return _request; } + auto setRequest(const Request& value) -> type& { _request = &value; return *this; } + + auto responseType() const -> u32 { return _responseType; } + auto setResponseType(u32 value) -> type& { _responseType = value; return *this; } + + auto hasData() const -> bool { return (bool)_data; } + auto data() const -> const vector& { return _data; } + auto setData(const vector& value) -> type&; + + auto hasFile() const -> bool { return (bool)_file; } + auto file() const -> const string& { return _file; } + auto setFile(const string& value) -> type&; + + auto hasText() const -> bool { return (bool)_text; } + auto text() const -> const string& { return _text; } + auto setText(const string& value) -> type&; + + auto hasBody() const -> bool; + auto findContentLength() const -> u32; + auto findContentType() const -> string; + auto findContentType(const string& suffix) const -> string; + auto findResponseType() const -> string; + auto findResponseTypeVerbose() const -> string; + auto setFileETag() -> void; + + const Request* _request = nullptr; + u32 _responseType = 0; + vector _data; + string _file; + string _text; +}; + +inline auto Response::head(const function& callback) const -> bool { + if(!callback) return false; + string output; + + if(auto request = this->request()) { + if(auto eTag = header["ETag"]) { + if(eTag.value() == request->header["If-None-Match"].value()) { + output.append("HTTP/1.1 304 Not Modified\r\n"); + output.append("Connection: close\r\n"); + output.append("\r\n"); + return callback(output.data(), output.size()); + } + } + } + + output.append("HTTP/1.1 ", findResponseType(), "\r\n"); + for(auto& variable : header) { + output.append(variable.name(), ": ", variable.value(), "\r\n"); + } + if(hasBody()) { + if(!header["Content-Length"] && !header["Transfer-Encoding"].value().iequals("chunked")) { + output.append("Content-Length: ", findContentLength(), "\r\n"); + } + if(!header["Content-Type"]) { + output.append("Content-Type: ", findContentType(), "\r\n"); + } + } + if(!header["Connection"]) { + output.append("Connection: close\r\n"); + } + output.append("\r\n"); + + return callback(output.data(), output.size()); +} + +inline auto Response::setHead() -> bool { + auto headers = _head.split("\n"); + string response = headers.takeLeft().trimRight("\r"); + + if(response.ibeginsWith("HTTP/1.0 ")) response.itrimLeft("HTTP/1.0 ", 1L); + else if(response.ibeginsWith("HTTP/1.1 ")) response.itrimLeft("HTTP/1.1 ", 1L); + else return false; + + setResponseType(response.natural()); + + for(auto& header : headers) { + if(header.beginsWith(" ") || header.beginsWith("\t")) continue; + auto variable = header.split(":", 1L).strip(); + if(variable.size() != 2) continue; + this->header.append(variable[0], variable[1]); + } + + return true; +} + +inline auto Response::body(const function& callback) const -> bool { + if(!callback) return false; + if(!hasBody()) return true; + bool chunked = header["Transfer-Encoding"].value() == "chunked"; + + if(chunked) { + string prefix = {hex(findContentLength()), "\r\n"}; + if(!callback(prefix.data(), prefix.size())) return false; + } + + if(_body) { + if(!callback(_body.data(), _body.size())) return false; + } else if(hasData()) { + if(!callback(data().data(), data().size())) return false; + } else if(hasFile()) { + file_map map(file(), file_map::mode::read); + if(!callback(map.data(), map.size())) return false; + } else if(hasText()) { + if(!callback(text().data(), text().size())) return false; + } else { + string response = findResponseType(); + if(!callback(response.data(), response.size())) return false; + } + + if(chunked) { + string suffix = {"\r\n0\r\n\r\n"}; + if(!callback(suffix.data(), suffix.size())) return false; + } + + return true; +} + +inline auto Response::setBody() -> bool { + return true; +} + +inline auto Response::hasBody() const -> bool { + if(auto request = this->request()) { + if(request->requestType() == Request::RequestType::Head) return false; + } + if(responseType() == 301) return false; + if(responseType() == 302) return false; + if(responseType() == 303) return false; + if(responseType() == 304) return false; + if(responseType() == 307) return false; + return true; +} + +inline auto Response::findContentLength() const -> u32 { + if(auto contentLength = header["Content-Length"]) return contentLength.value().natural(); + if(_body) return _body.size(); + if(hasData()) return data().size(); + if(hasFile()) return file::size(file()); + if(hasText()) return text().size(); + return findResponseType().size(); +} + +inline auto Response::findContentType() const -> string { + if(auto contentType = header["Content-Type"]) return contentType.value(); + if(hasData()) return "application/octet-stream"; + if(hasFile()) return findContentType(Location::suffix(file())); + return "text/html; charset=utf-8"; +} + +inline auto Response::findContentType(const string& s) const -> string { + if(s == ".7z" ) return "application/x-7z-compressed"; + if(s == ".avi" ) return "video/avi"; + if(s == ".bml" ) return "text/plain; charset=utf-8"; + if(s == ".bz2" ) return "application/x-bzip2"; + if(s == ".c" ) return "text/plain; charset=utf-8"; + if(s == ".cpp" ) return "text/plain; charset=utf-8"; + if(s == ".css" ) return "text/css; charset=utf-8"; + if(s == ".gif" ) return "image/gif"; + if(s == ".gz" ) return "application/gzip"; + if(s == ".h" ) return "text/plain; charset=utf-8"; + if(s == ".hpp" ) return "text/plain; charset=utf-8"; + if(s == ".htm" ) return "text/html; charset=utf-8"; + if(s == ".html") return "text/html; charset=utf-8"; + if(s == ".ico" ) return "image/x-icon"; + if(s == ".jpg" ) return "image/jpeg"; + if(s == ".jpeg") return "image/jpeg"; + if(s == ".js" ) return "application/javascript"; + if(s == ".mka" ) return "audio/x-matroska"; + if(s == ".mkv" ) return "video/x-matroska"; + if(s == ".mp3" ) return "audio/mpeg"; + if(s == ".mp4" ) return "video/mp4"; + if(s == ".mpeg") return "video/mpeg"; + if(s == ".mpg" ) return "video/mpeg"; + if(s == ".ogg" ) return "audio/ogg"; + if(s == ".pdf" ) return "application/pdf"; + if(s == ".png" ) return "image/png"; + if(s == ".rar" ) return "application/x-rar-compressed"; + if(s == ".svg" ) return "image/svg+xml"; + if(s == ".tar" ) return "application/x-tar"; + if(s == ".txt" ) return "text/plain; charset=utf-8"; + if(s == ".wav" ) return "audio/vnd.wave"; + if(s == ".webm") return "video/webm"; + if(s == ".xml" ) return "text/xml; charset=utf-8"; + if(s == ".xz" ) return "application/x-xz"; + if(s == ".zip" ) return "application/zip"; + return "application/octet-stream"; //binary +} + +inline auto Response::findResponseType() const -> string { + switch(responseType()) { + case 200: return "200 OK"; + case 301: return "301 Moved Permanently"; + case 302: return "302 Found"; + case 303: return "303 See Other"; + case 304: return "304 Not Modified"; + case 307: return "307 Temporary Redirect"; + case 400: return "400 Bad Request"; + case 403: return "403 Forbidden"; + case 404: return "404 Not Found"; + case 500: return "500 Internal Server Error"; + case 501: return "501 Not Implemented"; + case 503: return "503 Service Unavailable"; + } + return "501 Not Implemented"; +} + +inline auto Response::findResponseTypeVerbose() const -> string { + switch(responseType()) { + case 400: return "The server was unable to understand your request."; + case 403: return "You are not authorized to access this resource."; + case 404: return "The requested content could not be found."; + case 500: return "The server has encountered an error."; + case 501: return "The requested feature has not been implemented."; + case 503: return "The requested service is not currently available."; + } + return findResponseType(); //fallback for uncommon responses +} + +inline auto Response::setData(const vector& value) -> type& { + _data = value; + header.assign("Content-Length", value.size()); + return *this; +} + +inline auto Response::setFile(const string& value) -> type& { + //block path escalation exploits ("../" and "..\" in the file location) + bool valid = true; + for(u32 n : range(value.size())) { + if(value(n + 0, '\0') != '.') continue; + if(value(n + 1, '\0') != '.') continue; + if(value(n + 2, '\0') != '/' && value(n + 2, '\0') != '\\') continue; + valid = false; + break; + } + if(!valid) return *this; + + //cache images for seven days + auto suffix = Location::suffix(value); + u32 maxAge = 0; + if(suffix == ".svg" + || suffix == ".ico" + || suffix == ".png" + || suffix == ".gif" + || suffix == ".jpg" + || suffix == ".jpeg") { + maxAge = 7 * 24 * 60 * 60; + } + + _file = value; + header.assign("Content-Length", file::size(value)); + header.assign("ETag", {"\"", chrono::utc::datetime(file::timestamp(value, file::time::modify)), "\""}); + if(maxAge == 0) { + header.assign("Cache-Control", {"public"}); + } else { + header.assign("Cache-Control", {"public, max-age=", maxAge}); + } + return *this; +} + +inline auto Response::setText(const string& value) -> type& { + _text = value; + header.assign("Content-Length", value.size()); + return *this; +} + +} diff --git a/waterbox/ares64/ares/nall/http/role.hpp b/waterbox/ares64/ares/nall/http/role.hpp new file mode 100644 index 0000000000..4978f9c96a --- /dev/null +++ b/waterbox/ares64/ares/nall/http/role.hpp @@ -0,0 +1,158 @@ +#pragma once + +//Role: base class for Client and Server +//provides shared functionality + +#include +#include + +namespace nall::HTTP { + +struct Role { + struct Settings { + s32 connectionLimit = 1 * 1024; //server + s32 headSizeLimit = 16 * 1024; //client, server + s32 bodySizeLimit = 65536 * 1024; //client, server + s32 chunkSize = 32 * 1024; //client, server + s32 threadStackSize = 128 * 1024; //server + s32 timeoutReceive = 15 * 1000; //server + s32 timeoutSend = 15 * 1000; //server + } settings; + + auto configure(const string& parameters) -> bool; + auto download(s32 fd, Message& message) -> bool; + auto upload(s32 fd, const Message& message) -> bool; +}; + +inline auto Role::configure(const string& parameters) -> bool { + auto document = BML::unserialize(parameters); + for(auto parameter : document) { + auto name = parameter.name(); + auto value = parameter.integer(); + + if(0); + else if(name == "connectionLimit") settings.connectionLimit = value; + else if(name == "headSizeLimit") settings.headSizeLimit = value; + else if(name == "bodySizeLimit") settings.bodySizeLimit = value; + else if(name == "chunkSize") settings.chunkSize = value; + else if(name == "threadStackSize") settings.threadStackSize = value; + else if(name == "timeoutReceive") settings.timeoutReceive = value; + else if(name == "timeoutSend") settings.timeoutSend = value; + } + return true; +} + +inline auto Role::download(s32 fd, Message& message) -> bool { + auto& head = message._head; + auto& body = message._body; + string chunk; + u8 packet[settings.chunkSize], *p = nullptr; + + head.reset(), head.reserve(4095); + body.reset(), body.reserve(4095); + + bool headReceived = false; + bool chunked = false; + bool chunkReceived = false; + bool chunkFooterReceived = true; + s32 length = 0; + s32 chunkLength = 0; + s32 contentLength = 0; + + while(true) { + if(auto limit = settings.headSizeLimit) if(head.size() >= limit) return false; + if(auto limit = settings.bodySizeLimit) if(body.size() >= limit) return false; + + if(headReceived && !chunked && body.size() >= contentLength) { + body.resize(contentLength); + break; + } + + if(length == 0) { + length = recv(fd, packet, settings.chunkSize, MSG_NOSIGNAL); + if(length <= 0) return false; + p = packet; + } + + if(!headReceived) { + head.append((char)*p++); + --length; + + if(head.endsWith("\r\n\r\n") || head.endsWith("\n\n")) { + headReceived = true; + if(!message.setHead()) return false; + chunked = message.header["Transfer-Encoding"].value().iequals("chunked"); + contentLength = message.header["Content-Length"].value().natural(); + } + + continue; + } + + if(chunked && !chunkReceived) { + char n = *p++; + --length; + + if(!chunkFooterReceived) { + if(n == '\n') chunkFooterReceived = true; + continue; + } + + chunk.append(n); + + if(chunk.endsWith("\r\n") || chunk.endsWith("\n")) { + chunkReceived = true; + chunkLength = chunk.hex(); + if(chunkLength == 0) break; + chunk.reset(); + } + + continue; + } + + if(!chunked) { + body.resize(body.size() + length); + memory::copy(body.get() + body.size() - length, p, length); + + p += length; + length = 0; + } else { + s32 transferLength = min(length, chunkLength); + body.resize(body.size() + transferLength); + memory::copy(body.get() + body.size() - transferLength, p, transferLength); + + p += transferLength; + length -= transferLength; + chunkLength -= transferLength; + + if(chunkLength == 0) { + chunkReceived = false; + chunkFooterReceived = false; + } + } + } + + if(!message.setBody()) return false; + return true; +} + +inline auto Role::upload(s32 fd, const Message& message) -> bool { + auto transfer = [&](const u8* data, u32 size) -> bool { + while(size) { + s32 length = send(fd, data, min(size, settings.chunkSize), MSG_NOSIGNAL); + if(length < 0) return false; + data += length; + size -= length; + } + return true; + }; + + if(message.head([&](const u8* data, u32 size) -> bool { return transfer(data, size); })) { + if(message.body([&](const u8* data, u32 size) -> bool { return transfer(data, size); })) { + return true; + } + } + + return false; +} + +} diff --git a/waterbox/ares64/ares/nall/http/server.hpp b/waterbox/ares64/ares/nall/http/server.hpp new file mode 100644 index 0000000000..5560fec27e --- /dev/null +++ b/waterbox/ares64/ares/nall/http/server.hpp @@ -0,0 +1,226 @@ +#pragma once + +#include +#include + +namespace nall::HTTP { + +struct Server : Role, service { + auto open(u16 port = 8080, const string& serviceName = "", const string& command = "") -> bool; + auto main(const function& function = {}) -> void; + auto scan() -> string; + auto close() -> void; + ~Server() { close(); } + +private: + function callback; + std::atomic connections{0}; + + s32 fd4 = -1; + s32 fd6 = -1; + struct sockaddr_in addrin4 = {0}; + struct sockaddr_in6 addrin6 = {0}; + + auto ipv4() const -> bool { return fd4 >= 0; } + auto ipv6() const -> bool { return fd6 >= 0; } + + auto ipv4_close() -> void { if(fd4 >= 0) ::close(fd4); fd4 = -1; } + auto ipv6_close() -> void { if(fd6 >= 0) ::close(fd6); fd6 = -1; } + + auto ipv4_scan() -> bool; + auto ipv6_scan() -> bool; +}; + +inline auto Server::open(u16 port, const string& serviceName, const string& command) -> bool { + if(serviceName) { + if(!service::command(serviceName, command)) return false; + } + + fd4 = socket(AF_INET, SOCK_STREAM, 0); + fd6 = socket(AF_INET6, SOCK_STREAM, 0); + if(!ipv4() && !ipv6()) return false; + + { + #if defined(SO_RCVTIMEO) + if(settings.timeoutReceive) { + struct timeval rcvtimeo; + rcvtimeo.tv_sec = settings.timeoutReceive / 1000; + rcvtimeo.tv_usec = settings.timeoutReceive % 1000 * 1000; + if(ipv4()) setsockopt(fd4, SOL_SOCKET, SO_RCVTIMEO, &rcvtimeo, sizeof(struct timeval)); + if(ipv6()) setsockopt(fd6, SOL_SOCKET, SO_RCVTIMEO, &rcvtimeo, sizeof(struct timeval)); + } + #endif + + #if defined(SO_SNDTIMEO) + if(settings.timeoutSend) { + struct timeval sndtimeo; + sndtimeo.tv_sec = settings.timeoutSend / 1000; + sndtimeo.tv_usec = settings.timeoutSend % 1000 * 1000; + if(ipv4()) setsockopt(fd4, SOL_SOCKET, SO_SNDTIMEO, &sndtimeo, sizeof(struct timeval)); + if(ipv6()) setsockopt(fd6, SOL_SOCKET, SO_SNDTIMEO, &sndtimeo, sizeof(struct timeval)); + } + #endif + + #if defined(SO_NOSIGPIPE) //BSD, OSX + s32 nosigpipe = 1; + if(ipv4()) setsockopt(fd4, SOL_SOCKET, SO_NOSIGPIPE, &nosigpipe, sizeof(s32)); + if(ipv6()) setsockopt(fd6, SOL_SOCKET, SO_NOSIGPIPE, &nosigpipe, sizeof(s32)); + #endif + + #if defined(SO_REUSEADDR) //BSD, Linux, OSX + s32 reuseaddr = 1; + if(ipv4()) setsockopt(fd4, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(s32)); + if(ipv6()) setsockopt(fd6, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(s32)); + #endif + + #if defined(SO_REUSEPORT) //BSD, OSX + s32 reuseport = 1; + if(ipv4()) setsockopt(fd4, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(s32)); + if(ipv6()) setsockopt(fd6, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(s32)); + #endif + } + + addrin4.sin_family = AF_INET; + addrin4.sin_addr.s_addr = htonl(INADDR_ANY); + addrin4.sin_port = htons(port); + + addrin6.sin6_family = AF_INET6; + addrin6.sin6_addr = in6addr_any; + addrin6.sin6_port = htons(port); + + if(bind(fd4, (struct sockaddr*)&addrin4, sizeof(addrin4)) < 0 || listen(fd4, SOMAXCONN) < 0) ipv4_close(); + if(bind(fd6, (struct sockaddr*)&addrin6, sizeof(addrin6)) < 0 || listen(fd6, SOMAXCONN) < 0) ipv6_close(); + return ipv4() || ipv6(); +} + +inline auto Server::main(const function& function) -> void { + callback = function; +} + +inline auto Server::scan() -> string { + if(auto command = service::receive()) return command; + if(connections >= settings.connectionLimit) return "busy"; + if(ipv4() && ipv4_scan()) return "ok"; + if(ipv6() && ipv6_scan()) return "ok"; + return "idle"; +} + +inline auto Server::ipv4_scan() -> bool { + struct pollfd query = {0}; + query.fd = fd4; + query.events = POLLIN; + poll(&query, 1, 0); + + if(query.fd == fd4 && query.revents & POLLIN) { + ++connections; + + thread::create([&](uintptr) { + thread::detach(); + + s32 clientfd = -1; + struct sockaddr_in settings = {0}; + socklen_t socklen = sizeof(sockaddr_in); + + clientfd = accept(fd4, (struct sockaddr*)&settings, &socklen); + if(clientfd < 0) return; + + u32 ip = ntohl(settings.sin_addr.s_addr); + + Request request; + request._ipv6 = false; + request._ip = { + (u8)(ip >> 24), ".", + (u8)(ip >> 16), ".", + (u8)(ip >> 8), ".", + (u8)(ip >> 0) + }; + + if(download(clientfd, request) && callback) { + auto response = callback(request); + upload(clientfd, response); + } else { + upload(clientfd, Response()); //"501 Not Implemented" + } + + ::close(clientfd); + --connections; + }, 0, settings.threadStackSize); + + return true; + } + + return false; +} + +inline auto Server::ipv6_scan() -> bool { + struct pollfd query = {0}; + query.fd = fd6; + query.events = POLLIN; + poll(&query, 1, 0); + + if(query.fd == fd6 && query.revents & POLLIN) { + ++connections; + + thread::create([&](uintptr) { + thread::detach(); + + s32 clientfd = -1; + struct sockaddr_in6 settings = {0}; + socklen_t socklen = sizeof(sockaddr_in6); + + clientfd = accept(fd6, (struct sockaddr*)&settings, &socklen); + if(clientfd < 0) return; + + u8* ip = settings.sin6_addr.s6_addr; + u16 ipSegment[8]; + for(auto n : range(8)) ipSegment[n] = ip[n * 2 + 0] * 256 + ip[n * 2 + 1]; + + Request request; + request._ipv6 = true; + //RFC5952 IPv6 encoding: the first longest 2+ consecutive zero-sequence is compressed to "::" + s32 zeroOffset = -1; + s32 zeroLength = 0; + s32 zeroCounter = 0; + for(auto n : range(8)) { + u16 value = ipSegment[n]; + if(value == 0) zeroCounter++; + if(zeroCounter > zeroLength) { + zeroLength = zeroCounter; + zeroOffset = 1 + n - zeroLength; + } + if(value != 0) zeroCounter = 0; + } + if(zeroLength == 1) zeroOffset = -1; + for(u32 n = 0; n < 8;) { + if(n == zeroOffset) { + request._ip.append(n == 0 ? "::" : ":"); + n += zeroLength; + } else { + u16 value = ipSegment[n]; + request._ip.append(hex(value), n++ != 7 ? ":" : ""); + } + } + + if(download(clientfd, request) && callback) { + auto response = callback(request); + upload(clientfd, response); + } else { + upload(clientfd, Response()); //"501 Not Implemented" + } + + ::close(clientfd); + --connections; + }, 0, settings.threadStackSize); + + return true; + } + + return false; +} + +inline auto Server::close() -> void { + ipv4_close(); + ipv6_close(); +} + +} diff --git a/waterbox/ares64/ares/nall/image.hpp b/waterbox/ares64/ares/nall/image.hpp new file mode 100644 index 0000000000..3d7c0fa014 --- /dev/null +++ b/waterbox/ares64/ares/nall/image.hpp @@ -0,0 +1,194 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include + +namespace nall { + +struct image { + enum class blend : u32 { + add, + sourceAlpha, //color = sourceColor * sourceAlpha + targetColor * (1 - sourceAlpha) + sourceColor, //color = sourceColor + targetAlpha, //color = targetColor * targetAlpha + sourceColor * (1 - targetAlpha) + targetColor, //color = targetColor + }; + + struct channel { + channel(u64 mask, u32 depth, u32 shift) : _mask(mask), _depth(depth), _shift(shift) { + } + + auto operator==(const channel& source) const -> bool { + return _mask == source._mask && _depth == source._depth && _shift == source._shift; + } + + auto operator!=(const channel& source) const -> bool { + return !operator==(source); + } + + auto mask() const { return _mask; } + auto depth() const { return _depth; } + auto shift() const { return _shift; } + + private: + u64 _mask; + u32 _depth; + u32 _shift; + }; + + //core.hpp + image(const image& source); + image(image&& source); + image(bool endian, u32 depth, u64 alphaMask, u64 redMask, u64 greenMask, u64 blueMask); + image(const string& filename); + image(const void* data, u32 size); + image(const vector& buffer); + template image(const u8 (&Name)[Size]); + image(); + ~image(); + + auto operator=(const image& source) -> image&; + auto operator=(image&& source) -> image&; + + explicit operator bool() const; + auto operator==(const image& source) const -> bool; + auto operator!=(const image& source) const -> bool; + + auto read(const u8* data) const -> u64; + auto write(u8* data, u64 value) const -> void; + + auto free() -> void; + auto load(const string& filename) -> bool; + auto copy(const void* data, u32 pitch, u32 width, u32 height) -> void; + auto allocate(u32 width, u32 height) -> void; + + //fill.hpp + auto fill(u64 color = 0) -> void; + auto gradient(u64 a, u64 b, u64 c, u64 d) -> void; + auto gradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY, function callback) -> void; + auto crossGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void; + auto diamondGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void; + auto horizontalGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void; + auto radialGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void; + auto sphericalGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void; + auto squareGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void; + auto verticalGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void; + + //scale.hpp + auto scale(u32 width, u32 height, bool linear = true) -> void; + + //blend.hpp + auto impose(blend mode, u32 targetX, u32 targetY, image source, u32 x, u32 y, u32 width, u32 height) -> void; + + //utility.hpp + auto shrink(u64 transparentColor = 0) -> void; + auto crop(u32 x, u32 y, u32 width, u32 height) -> bool; + auto alphaBlend(u64 alphaColor) -> void; + auto alphaMultiply() -> void; + auto transform(const image& source = {}) -> void; + auto transform(bool endian, u32 depth, u64 alphaMask, u64 redMask, u64 greenMask, u64 blueMask) -> void; + + //static.hpp + static auto bitDepth(u64 color) -> u32; + static auto bitShift(u64 color) -> u32; + static auto normalize(u64 color, u32 sourceDepth, u32 targetDepth) -> u64; + + //access + auto data() { return _data; } + auto data() const { return _data; } + auto width() const { return _width; } + auto height() const { return _height; } + + auto endian() const { return _endian; } + auto depth() const { return _depth; } + auto stride() const { return (_depth + 7) >> 3; } + + auto pitch() const { return _width * stride(); } + auto size() const { return _height * pitch(); } + + auto alpha() const { return _alpha; } + auto red() const { return _red; } + auto green() const { return _green; } + auto blue() const { return _blue; } + +private: + //core.hpp + auto allocate(u32 width, u32 height, u32 stride) -> u8*; + + //scale.hpp + auto scaleLinearWidth(u32 width) -> void; + auto scaleLinearHeight(u32 height) -> void; + auto scaleLinear(u32 width, u32 height) -> void; + auto scaleNearest(u32 width, u32 height) -> void; + + //load.hpp + auto loadBMP(const string& filename) -> bool; + auto loadBMP(const u8* data, u32 size) -> bool; + auto loadPNG(const string& filename) -> bool; + auto loadPNG(const u8* data, u32 size) -> bool; + + //interpolation.hpp + auto isplit(u64* component, u64 color) -> void; + auto imerge(const u64* component) -> u64; + auto interpolate1f(u64 a, u64 b, f64 x) -> u64; + auto interpolate1f(u64 a, u64 b, u64 c, u64 d, f64 x, f64 y) -> u64; + auto interpolate1i(s64 a, s64 b, u32 x) -> u64; + auto interpolate1i(s64 a, s64 b, s64 c, s64 d, u32 x, u32 y) -> u64; + auto interpolate4f(u64 a, u64 b, f64 x) -> u64; + auto interpolate4f(u64 a, u64 b, u64 c, u64 d, f64 x, f64 y) -> u64; + auto interpolate4i(u64 a, u64 b, u32 x) -> u64; + auto interpolate4i(u64 a, u64 b, u64 c, u64 d, u32 x, u32 y) -> u64; + + u8* _data = nullptr; + u32 _width = 0; + u32 _height = 0; + + bool _endian = 0; //0 = lsb, 1 = msb + u32 _depth = 32; + + channel _alpha{255u << 24, 8, 24}; + channel _red {255u << 16, 8, 16}; + channel _green{255u << 8, 8, 8}; + channel _blue {255u << 0, 8, 0}; +}; + +struct multiFactorImage : public image { + using image::image; + + multiFactorImage(const multiFactorImage& source); + multiFactorImage(multiFactorImage&& source); + multiFactorImage(const image& lowDPI, const image& highDPI); + multiFactorImage(const image& source); + multiFactorImage(image&& source); + multiFactorImage(); + ~multiFactorImage(); + + auto operator=(const multiFactorImage& source) -> multiFactorImage&; + auto operator=(multiFactorImage&& source) -> multiFactorImage&; + + auto operator==(const multiFactorImage& source) const -> bool; + auto operator!=(const multiFactorImage& source) const -> bool; + + const image& lowDPI() const { return *this; } + const image& highDPI() const { return _highDPI; } + +private: + image _highDPI; +}; + +} + +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/waterbox/ares64/ares/nall/image/blend.hpp b/waterbox/ares64/ares/nall/image/blend.hpp new file mode 100644 index 0000000000..721952cedb --- /dev/null +++ b/waterbox/ares64/ares/nall/image/blend.hpp @@ -0,0 +1,71 @@ +#pragma once + +namespace nall { + +inline auto image::impose(blend mode, u32 targetX, u32 targetY, image source, u32 sourceX, u32 sourceY, u32 sourceWidth, u32 sourceHeight) -> void { + source.transform(_endian, _depth, _alpha.mask(), _red.mask(), _green.mask(), _blue.mask()); + + for(u32 y = 0; y < sourceHeight; y++) { + const u8* sp = source._data + source.pitch() * (sourceY + y) + source.stride() * sourceX; + u8* dp = _data + pitch() * (targetY + y) + stride() * targetX; + for(u32 x = 0; x < sourceWidth; x++) { + u64 sourceColor = source.read(sp); + u64 targetColor = read(dp); + + s64 sa = (sourceColor & _alpha.mask()) >> _alpha.shift(); + s64 sr = (sourceColor & _red.mask() ) >> _red.shift(); + s64 sg = (sourceColor & _green.mask()) >> _green.shift(); + s64 sb = (sourceColor & _blue.mask() ) >> _blue.shift(); + + s64 da = (targetColor & _alpha.mask()) >> _alpha.shift(); + s64 dr = (targetColor & _red.mask() ) >> _red.shift(); + s64 dg = (targetColor & _green.mask()) >> _green.shift(); + s64 db = (targetColor & _blue.mask() ) >> _blue.shift(); + + u64 a, r, g, b; + + switch(mode) { + case blend::add: + a = max(sa, da); + r = min(_red.mask() >> _red.shift(), ((sr * sa) >> _alpha.depth()) + ((dr * da) >> _alpha.depth())); + g = min(_green.mask() >> _green.shift(), ((sg * sa) >> _alpha.depth()) + ((dg * da) >> _alpha.depth())); + b = min(_blue.mask() >> _blue.shift(), ((sb * sa) >> _alpha.depth()) + ((db * da) >> _alpha.depth())); + break; + + case blend::sourceAlpha: + a = max(sa, da); + r = dr + (((sr - dr) * sa) >> _alpha.depth()); + g = dg + (((sg - dg) * sa) >> _alpha.depth()); + b = db + (((sb - db) * sa) >> _alpha.depth()); + break; + + case blend::sourceColor: + a = sa; + r = sr; + g = sg; + b = sb; + break; + + case blend::targetAlpha: + a = max(sa, da); + r = sr + (((dr - sr) * da) >> _alpha.depth()); + g = sg + (((dg - sg) * da) >> _alpha.depth()); + b = sb + (((db - sb) * da) >> _alpha.depth()); + break; + + case blend::targetColor: + a = da; + r = dr; + g = dg; + b = db; + break; + } + + write(dp, (a << _alpha.shift()) | (r << _red.shift()) | (g << _green.shift()) | (b << _blue.shift())); + sp += source.stride(); + dp += stride(); + } + } +} + +} diff --git a/waterbox/ares64/ares/nall/image/core.hpp b/waterbox/ares64/ares/nall/image/core.hpp new file mode 100644 index 0000000000..34e7b25bd8 --- /dev/null +++ b/waterbox/ares64/ares/nall/image/core.hpp @@ -0,0 +1,173 @@ +#pragma once + +namespace nall { + +inline image::image(const image& source) { + operator=(source); +} + +inline image::image(image&& source) { + operator=(forward(source)); +} + +inline image::image(bool endian, u32 depth, u64 alphaMask, u64 redMask, u64 greenMask, u64 blueMask) { + _endian = endian; + _depth = depth; + + _alpha = {alphaMask, bitDepth(alphaMask), bitShift(alphaMask)}; + _red = {redMask, bitDepth(redMask), bitShift(redMask )}; + _green = {greenMask, bitDepth(greenMask), bitShift(greenMask)}; + _blue = {blueMask, bitDepth(blueMask), bitShift(blueMask )}; +} + +inline image::image(const string& filename) { + load(filename); +} + +inline image::image(const void* data_, u32 size) { + auto data = (const u8*)data_; + if(size < 4); + else if(data[0] == 'B' && data[1] == 'M') loadBMP(data, size); + else if(data[1] == 'P' && data[2] == 'N' && data[3] == 'G') loadPNG(data, size); +} + +inline image::image(const vector& buffer) : image(buffer.data(), buffer.size()) { +} + +template inline image::image(const u8 (&Name)[Size]) : image(Name, Size) { +} + +inline image::image() { +} + +inline image::~image() { + free(); +} + +inline auto image::operator=(const image& source) -> image& { + if(this == &source) return *this; + free(); + + _width = source._width; + _height = source._height; + + _endian = source._endian; + _depth = source._depth; + + _alpha = source._alpha; + _red = source._red; + _green = source._green; + _blue = source._blue; + + _data = allocate(_width, _height, stride()); + memory::copy(_data, source._data, source.size()); + return *this; +} + +inline auto image::operator=(image&& source) -> image& { + if(this == &source) return *this; + free(); + + _width = source._width; + _height = source._height; + + _endian = source._endian; + _depth = source._depth; + + _alpha = source._alpha; + _red = source._red; + _green = source._green; + _blue = source._blue; + + _data = source._data; + source._data = nullptr; + return *this; +} + +inline image::operator bool() const { + return _data && _width && _height; +} + +inline auto image::operator==(const image& source) const -> bool { + if(_width != source._width) return false; + if(_height != source._height) return false; + + if(_endian != source._endian) return false; + if(_depth != source._depth) return false; + + if(_alpha != source._alpha) return false; + if(_red != source._red) return false; + if(_green != source._green) return false; + if(_blue != source._blue) return false; + + return memory::compare(_data, source._data, size()) == 0; +} + +inline auto image::operator!=(const image& source) const -> bool { + return !operator==(source); +} + +inline auto image::read(const u8* data) const -> u64 { + u64 result = 0; + if(_endian == 0) { + for(s32 n = stride() - 1; n >= 0; n--) result = (result << 8) | data[n]; + } else { + for(s32 n = 0; n < stride(); n++) result = (result << 8) | data[n]; + } + return result; +} + +inline auto image::write(u8* data, u64 value) const -> void { + if(_endian == 0) { + for(s32 n = 0; n < stride(); n++) { + data[n] = value; + value >>= 8; + } + } else { + for(s32 n = stride() - 1; n >= 0; n--) { + data[n] = value; + value >>= 8; + } + } +} + +inline auto image::free() -> void { + if(_data) delete[] _data; + _data = nullptr; +} + +inline auto image::load(const string& filename) -> bool { + if(loadBMP(filename) == true) return true; + if(loadPNG(filename) == true) return true; + return false; +} + +//assumes image and data are in the same format; pitch is adapted to image +inline auto image::copy(const void* data, u32 pitch, u32 width, u32 height) -> void { + allocate(width, height); + for(u32 y : range(height)) { + auto input = (const u8*)data + y * pitch; + auto output = (u8*)_data + y * this->pitch(); + memory::copy(output, input, width * stride()); + } +} + +inline auto image::allocate(u32 width, u32 height) -> void { + if(_data && _width == width && _height == height) return; + free(); + _width = width; + _height = height; + _data = allocate(_width, _height, stride()); +} + +//private +inline auto image::allocate(u32 width, u32 height, u32 stride) -> u8* { + //allocate 1x1 larger than requested; so that linear interpolation does not require bounds-checking + u32 size = width * height * stride; + u32 padding = width * stride + stride; + auto data = new u8[size + padding]; + memory::fill(data + size, padding); + return data; +} + +} diff --git a/waterbox/ares64/ares/nall/image/fill.hpp b/waterbox/ares64/ares/nall/image/fill.hpp new file mode 100644 index 0000000000..ee3b6aeea6 --- /dev/null +++ b/waterbox/ares64/ares/nall/image/fill.hpp @@ -0,0 +1,84 @@ +#pragma once + +namespace nall { + +inline auto image::fill(u64 color) -> void { + for(u32 y = 0; y < _height; y++) { + u8* dp = _data + pitch() * y; + for(u32 x = 0; x < _width; x++) { + write(dp, color); + dp += stride(); + } + } +} + +inline auto image::gradient(u64 a, u64 b, u64 c, u64 d) -> void { + for(u32 y = 0; y < _height; y++) { + u8* dp = _data + pitch() * y; + f64 muY = (f64)y / (f64)_height; + for(u32 x = 0; x < _width; x++) { + f64 muX = (f64)x / (f64)_width; + write(dp, interpolate4f(a, b, c, d, muX, muY)); + dp += stride(); + } + } +} + +inline auto image::gradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY, function callback) -> void { + for(s32 y = 0; y < _height; y++) { + u8* dp = _data + pitch() * y; + f64 py = max(-radiusY, min(+radiusY, y - centerY)) * 1.0 / radiusY; + for(s32 x = 0; x < _width; x++) { + f64 px = max(-radiusX, min(+radiusX, x - centerX)) * 1.0 / radiusX; + f64 mu = max(0.0, min(1.0, callback(px, py))); + if(mu != mu) mu = 1.0; //NaN + write(dp, interpolate4f(a, b, mu)); + dp += stride(); + } + } +} + +inline auto image::crossGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void { + return gradient(a, b, radiusX, radiusY, centerX, centerY, [](f64 x, f64 y) -> f64 { + x = fabs(x), y = fabs(y); + return min(x, y) * min(x, y); + }); +} + +inline auto image::diamondGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void { + return gradient(a, b, radiusX, radiusY, centerX, centerY, [](f64 x, f64 y) -> f64 { + return fabs(x) + fabs(y); + }); +} + +inline auto image::horizontalGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void { + return gradient(a, b, radiusX, radiusY, centerX, centerY, [](f64 x, f64 y) -> f64 { + return fabs(x); + }); +} + +inline auto image::radialGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void { + return gradient(a, b, radiusX, radiusY, centerX, centerY, [](f64 x, f64 y) -> f64 { + return sqrt(x * x + y * y); + }); +} + +inline auto image::sphericalGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void { + return gradient(a, b, radiusX, radiusY, centerX, centerY, [](f64 x, f64 y) -> f64 { + return x * x + y * y; + }); +} + +inline auto image::squareGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void { + return gradient(a, b, radiusX, radiusY, centerX, centerY, [](f64 x, f64 y) -> f64 { + return max(fabs(x), fabs(y)); + }); +} + +inline auto image::verticalGradient(u64 a, u64 b, s32 radiusX, s32 radiusY, s32 centerX, s32 centerY) -> void { + return gradient(a, b, radiusX, radiusY, centerX, centerY, [](f64 x, f64 y) -> f64 { + return fabs(y); + }); +} + +} diff --git a/waterbox/ares64/ares/nall/image/interpolation.hpp b/waterbox/ares64/ares/nall/image/interpolation.hpp new file mode 100644 index 0000000000..1512d4aa6e --- /dev/null +++ b/waterbox/ares64/ares/nall/image/interpolation.hpp @@ -0,0 +1,62 @@ +#pragma once + +namespace nall { + +inline auto image::isplit(u64 c[4], u64 color) -> void { + c[0] = (color & _alpha.mask()) >> _alpha.shift(); + c[1] = (color & _red.mask() ) >> _red.shift(); + c[2] = (color & _green.mask()) >> _green.shift(); + c[3] = (color & _blue.mask() ) >> _blue.shift(); +} + +inline auto image::imerge(const u64 c[4]) -> u64 { + return c[0] << _alpha.shift() | c[1] << _red.shift() | c[2] << _green.shift() | c[3] << _blue.shift(); +} + +inline auto image::interpolate1f(u64 a, u64 b, f64 x) -> u64 { + return a * (1.0 - x) + b * x; +} + +inline auto image::interpolate1f(u64 a, u64 b, u64 c, u64 d, f64 x, f64 y) -> u64 { + return a * (1.0 - x) * (1.0 - y) + b * x * (1.0 - y) + c * (1.0 - x) * y + d * x * y; +} + +inline auto image::interpolate1i(s64 a, s64 b, u32 x) -> u64 { + return a + (((b - a) * x) >> 32); //a + (b - a) * x +} + +inline auto image::interpolate1i(s64 a, s64 b, s64 c, s64 d, u32 x, u32 y) -> u64 { + a = a + (((b - a) * x) >> 32); //a + (b - a) * x + c = c + (((d - c) * x) >> 32); //c + (d - c) * x + return a + (((c - a) * y) >> 32); //a + (c - a) * y +} + +inline auto image::interpolate4f(u64 a, u64 b, f64 x) -> u64 { + u64 o[4], pa[4], pb[4]; + isplit(pa, a), isplit(pb, b); + for(u32 n = 0; n < 4; n++) o[n] = interpolate1f(pa[n], pb[n], x); + return imerge(o); +} + +inline auto image::interpolate4f(u64 a, u64 b, u64 c, u64 d, f64 x, f64 y) -> u64 { + u64 o[4], pa[4], pb[4], pc[4], pd[4]; + isplit(pa, a), isplit(pb, b), isplit(pc, c), isplit(pd, d); + for(u32 n = 0; n < 4; n++) o[n] = interpolate1f(pa[n], pb[n], pc[n], pd[n], x, y); + return imerge(o); +} + +inline auto image::interpolate4i(u64 a, u64 b, u32 x) -> u64 { + u64 o[4], pa[4], pb[4]; + isplit(pa, a), isplit(pb, b); + for(u32 n = 0; n < 4; n++) o[n] = interpolate1i(pa[n], pb[n], x); + return imerge(o); +} + +inline auto image::interpolate4i(u64 a, u64 b, u64 c, u64 d, u32 x, u32 y) -> u64 { + u64 o[4], pa[4], pb[4], pc[4], pd[4]; + isplit(pa, a), isplit(pb, b), isplit(pc, c), isplit(pd, d); + for(u32 n = 0; n < 4; n++) o[n] = interpolate1i(pa[n], pb[n], pc[n], pd[n], x, y); + return imerge(o); +} + +} diff --git a/waterbox/ares64/ares/nall/image/load.hpp b/waterbox/ares64/ares/nall/image/load.hpp new file mode 100644 index 0000000000..5947103b28 --- /dev/null +++ b/waterbox/ares64/ares/nall/image/load.hpp @@ -0,0 +1,99 @@ +#pragma once + +namespace nall { + +inline auto image::loadBMP(const string& filename) -> bool { + if(!file::exists(filename)) return false; + auto buffer = file::read(filename); + return loadBMP(buffer.data(), buffer.size()); +} + +inline auto image::loadBMP(const u8* bmpData, u32 bmpSize) -> bool { + Decode::BMP source; + if(!source.load(bmpData, bmpSize)) return false; + + allocate(source.width(), source.height()); + const u32* sp = source.data(); + u8* dp = _data; + + for(u32 y = 0; y < _height; y++) { + for(u32 x = 0; x < _width; x++) { + u32 color = *sp++; + u64 a = normalize((u8)(color >> 24), 8, _alpha.depth()); + u64 r = normalize((u8)(color >> 16), 8, _red.depth()); + u64 g = normalize((u8)(color >> 8), 8, _green.depth()); + u64 b = normalize((u8)(color >> 0), 8, _blue.depth()); + write(dp, (a << _alpha.shift()) | (r << _red.shift()) | (g << _green.shift()) | (b << _blue.shift())); + dp += stride(); + } + } + + return true; +} + +inline auto image::loadPNG(const string& filename) -> bool { + if(!file::exists(filename)) return false; + auto buffer = file::read(filename); + return loadPNG(buffer.data(), buffer.size()); +} + +inline auto image::loadPNG(const u8* pngData, u32 pngSize) -> bool { + Decode::PNG source; + if(!source.load(pngData, pngSize)) return false; + + allocate(source.info.width, source.info.height); + const u8* sp = source.data; + u8* dp = _data; + + auto decode = [&]() -> u64 { + u64 p = 0, r = 0, g = 0, b = 0, a = 0; + + switch(source.info.colorType) { + case 0: //L + r = g = b = source.readbits(sp); + a = (1 << source.info.bitDepth) - 1; + break; + case 2: //R,G,B + r = source.readbits(sp); + g = source.readbits(sp); + b = source.readbits(sp); + a = (1 << source.info.bitDepth) - 1; + break; + case 3: //P + p = source.readbits(sp); + r = source.info.palette[p][0]; + g = source.info.palette[p][1]; + b = source.info.palette[p][2]; + a = (1 << source.info.bitDepth) - 1; + break; + case 4: //L,A + r = g = b = source.readbits(sp); + a = source.readbits(sp); + break; + case 6: //R,G,B,A + r = source.readbits(sp); + g = source.readbits(sp); + b = source.readbits(sp); + a = source.readbits(sp); + break; + } + + a = normalize(a, source.info.bitDepth, _alpha.depth()); + r = normalize(r, source.info.bitDepth, _red.depth()); + g = normalize(g, source.info.bitDepth, _green.depth()); + b = normalize(b, source.info.bitDepth, _blue.depth()); + + return (a << _alpha.shift()) | (r << _red.shift()) | (g << _green.shift()) | (b << _blue.shift()); + }; + + for(u32 y = 0; y < _height; y++) { + for(u32 x = 0; x < _width; x++) { + write(dp, decode()); + dp += stride(); + } + } + + return true; +} + +} diff --git a/waterbox/ares64/ares/nall/image/multifactor.hpp b/waterbox/ares64/ares/nall/image/multifactor.hpp new file mode 100644 index 0000000000..f9756bff0c --- /dev/null +++ b/waterbox/ares64/ares/nall/image/multifactor.hpp @@ -0,0 +1,59 @@ +#pragma once + +namespace nall { + +inline multiFactorImage::multiFactorImage(const multiFactorImage& source) { + (*this) = source; +} + +inline multiFactorImage::multiFactorImage(multiFactorImage&& source) { + operator=(forward(source)); +} + +inline multiFactorImage::multiFactorImage(const image& lowDPI, const image& highDPI) { + (*(image*)this) = lowDPI; + _highDPI = highDPI; +} + +inline multiFactorImage::multiFactorImage(const image& source) { + (*(image*)this) = source; +} + +inline multiFactorImage::multiFactorImage(image&& source) { + operator=(forward(source)); +} + +inline multiFactorImage::multiFactorImage() { +} + +inline multiFactorImage::~multiFactorImage() { +} + +inline auto multiFactorImage::operator=(const multiFactorImage& source) -> multiFactorImage& { + if(this == &source) return *this; + + (*(image*)this) = source; + _highDPI = source._highDPI; + + return *this; +} + +inline auto multiFactorImage::operator=(multiFactorImage&& source) -> multiFactorImage& { + if(this == &source) return *this; + + (*(image*)this) = source; + _highDPI = source._highDPI; + + return *this; +} + +inline auto multiFactorImage::operator==(const multiFactorImage& source) const -> bool { + if((const image&)*this != (const image&)source) return false; + return _highDPI != source._highDPI; +} + +inline auto multiFactorImage::operator!=(const multiFactorImage& source) const -> bool { + return !operator==(source); +} + +} diff --git a/waterbox/ares64/ares/nall/image/scale.hpp b/waterbox/ares64/ares/nall/image/scale.hpp new file mode 100644 index 0000000000..66bb4a7d4d --- /dev/null +++ b/waterbox/ares64/ares/nall/image/scale.hpp @@ -0,0 +1,177 @@ +#pragma once + +namespace nall { + +inline auto image::scale(u32 outputWidth, u32 outputHeight, bool linear) -> void { + if(!_data) return; + if(_width == outputWidth && _height == outputHeight) return; //no scaling necessary + if(linear == false) return scaleNearest(outputWidth, outputHeight); + + if(_width == outputWidth ) return scaleLinearHeight(outputHeight); + if(_height == outputHeight) return scaleLinearWidth(outputWidth); + + //find fastest scaling method, based on number of interpolation operations required + //magnification usually benefits from two-pass linear interpolation + //minification usually benefits from one-pass bilinear interpolation + u32 d1wh = ((_width * outputWidth ) + (outputWidth * outputHeight)) * 1; + u32 d1hw = ((_height * outputHeight) + (outputWidth * outputHeight)) * 1; + u32 d2wh = (outputWidth * outputHeight) * 3; + + if(d1wh <= d1hw && d1wh <= d2wh) return scaleLinearWidth(outputWidth), scaleLinearHeight(outputHeight); + if(d1hw <= d2wh) return scaleLinearHeight(outputHeight), scaleLinearWidth(outputWidth); + return scaleLinear(outputWidth, outputHeight); +} + +inline auto image::scaleLinearWidth(u32 outputWidth) -> void { + u8* outputData = allocate(outputWidth, _height, stride()); + u32 outputPitch = outputWidth * stride(); + u64 xstride = ((u64)(_width - 1) << 32) / max(1u, outputWidth - 1); + + for(u32 y = 0; y < _height; y++) { + u64 xfraction = 0; + + const u8* sp = _data + pitch() * y; + u8* dp = outputData + outputPitch * y; + + u64 a = read(sp); + u64 b = read(sp + stride()); + sp += stride(); + + u32 x = 0; + while(true) { + while(xfraction < 0x100000000 && x++ < outputWidth) { + write(dp, interpolate4i(a, b, xfraction)); + dp += stride(); + xfraction += xstride; + } + if(x >= outputWidth) break; + + sp += stride(); + a = b; + b = read(sp); + xfraction -= 0x100000000; + } + } + + free(); + _data = outputData; + _width = outputWidth; +} + +inline auto image::scaleLinearHeight(u32 outputHeight) -> void { + u8* outputData = allocate(_width, outputHeight, stride()); + u64 ystride = ((u64)(_height - 1) << 32) / max(1u, outputHeight - 1); + + for(u32 x = 0; x < _width; x++) { + u64 yfraction = 0; + + const u8* sp = _data + stride() * x; + u8* dp = outputData + stride() * x; + + u64 a = read(sp); + u64 b = read(sp + pitch()); + sp += pitch(); + + u32 y = 0; + while(true) { + while(yfraction < 0x100000000 && y++ < outputHeight) { + write(dp, interpolate4i(a, b, yfraction)); + dp += pitch(); + yfraction += ystride; + } + if(y >= outputHeight) break; + + sp += pitch(); + a = b; + b = read(sp); + yfraction -= 0x100000000; + } + } + + free(); + _data = outputData; + _height = outputHeight; +} + +inline auto image::scaleLinear(u32 outputWidth, u32 outputHeight) -> void { + u8* outputData = allocate(outputWidth, outputHeight, stride()); + u32 outputPitch = outputWidth * stride(); + + u64 xstride = ((u64)(_width - 1) << 32) / max(1u, outputWidth - 1); + u64 ystride = ((u64)(_height - 1) << 32) / max(1u, outputHeight - 1); + + for(u32 y = 0; y < outputHeight; y++) { + u64 yfraction = ystride * y; + u64 xfraction = 0; + + const u8* sp = _data + pitch() * (yfraction >> 32); + u8* dp = outputData + outputPitch * y; + + u64 a = read(sp); + u64 b = read(sp + stride()); + u64 c = read(sp + pitch()); + u64 d = read(sp + pitch() + stride()); + sp += stride(); + + u32 x = 0; + while(true) { + while(xfraction < 0x100000000 && x++ < outputWidth) { + write(dp, interpolate4i(a, b, c, d, xfraction, yfraction)); + dp += stride(); + xfraction += xstride; + } + if(x >= outputWidth) break; + + sp += stride(); + a = b; + c = d; + b = read(sp); + d = read(sp + pitch()); + xfraction -= 0x100000000; + } + } + + free(); + _data = outputData; + _width = outputWidth; + _height = outputHeight; +} + +inline auto image::scaleNearest(u32 outputWidth, u32 outputHeight) -> void { + u8* outputData = allocate(outputWidth, outputHeight, stride()); + u32 outputPitch = outputWidth * stride(); + + u64 xstride = ((u64)_width << 32) / outputWidth; + u64 ystride = ((u64)_height << 32) / outputHeight; + + for(u32 y = 0; y < outputHeight; y++) { + u64 yfraction = ystride * y; + u64 xfraction = 0; + + const u8* sp = _data + pitch() * (yfraction >> 32); + u8* dp = outputData + outputPitch * y; + + u64 a = read(sp); + + u32 x = 0; + while(true) { + while(xfraction < 0x100000000 && x++ < outputWidth) { + write(dp, a); + dp += stride(); + xfraction += xstride; + } + if(x >= outputWidth) break; + + sp += stride(); + a = read(sp); + xfraction -= 0x100000000; + } + } + + free(); + _data = outputData; + _width = outputWidth; + _height = outputHeight; +} + +} diff --git a/waterbox/ares64/ares/nall/image/static.hpp b/waterbox/ares64/ares/nall/image/static.hpp new file mode 100644 index 0000000000..7108aced5f --- /dev/null +++ b/waterbox/ares64/ares/nall/image/static.hpp @@ -0,0 +1,28 @@ +#pragma once + +namespace nall { + +inline auto image::bitDepth(u64 color) -> u32 { + u32 depth = 0; + if(color) while((color & 1) == 0) color >>= 1; + while((color & 1) == 1) { color >>= 1; depth++; } + return depth; +} + +inline auto image::bitShift(u64 color) -> u32 { + u32 shift = 0; + if(color) while((color & 1) == 0) { color >>= 1; shift++; } + return shift; +} + +inline auto image::normalize(u64 color, u32 sourceDepth, u32 targetDepth) -> u64 { + if(sourceDepth == 0 || targetDepth == 0) return 0; + while(sourceDepth < targetDepth) { + color = (color << sourceDepth) | color; + sourceDepth += sourceDepth; + } + if(targetDepth < sourceDepth) color >>= (sourceDepth - targetDepth); + return color; +} + +} diff --git a/waterbox/ares64/ares/nall/image/utility.hpp b/waterbox/ares64/ares/nall/image/utility.hpp new file mode 100644 index 0000000000..5bda69e5e1 --- /dev/null +++ b/waterbox/ares64/ares/nall/image/utility.hpp @@ -0,0 +1,179 @@ +#pragma once + +namespace nall { + +//scan all four sides of the image for fully transparent pixels, and then crop them +//imagine an icon centered on a transparent background: this function removes the bordering +//this certainly won't win any speed awards, but nall::image is meant to be correct and simple, not fast +inline auto image::shrink(u64 transparentColor) -> void { + //top + { u32 padding = 0; + for(u32 y : range(_height)) { + const u8* sp = _data + pitch() * y; + bool found = false; + for(u32 x : range(_width)) { + if(read(sp) != transparentColor) { found = true; break; } + sp += stride(); + } + if(found) break; + padding++; + } + crop(0, padding, _width, _height - padding); + } + + //bottom + { u32 padding = 0; + for(u32 y : reverse(range(_height))) { + const u8* sp = _data + pitch() * y; + bool found = false; + for(u32 x : range(_width)) { + if(read(sp) != transparentColor) { found = true; break; } + sp += stride(); + } + if(found) break; + padding++; + } + crop(0, 0, _width, _height - padding); + } + + //left + { u32 padding = 0; + for(u32 x : range(_width)) { + const u8* sp = _data + stride() * x; + bool found = false; + for(u32 y : range(_height)) { + if(read(sp) != transparentColor) { found = true; break; } + sp += pitch(); + } + if(found) break; + padding++; + } + crop(padding, 0, _width - padding, _height); + } + + //right + { u32 padding = 0; + for(u32 x : reverse(range(_width))) { + const u8* sp = _data + stride() * x; + bool found = false; + for(u32 y : range(_height)) { + if(read(sp) != transparentColor) { found = true; break; } + sp += pitch(); + } + if(found) break; + padding++; + } + crop(0, 0, _width - padding, _height); + } +} + +inline auto image::crop(u32 outputX, u32 outputY, u32 outputWidth, u32 outputHeight) -> bool { + if(outputX + outputWidth > _width) return false; + if(outputY + outputHeight > _height) return false; + + u8* outputData = allocate(outputWidth, outputHeight, stride()); + u32 outputPitch = outputWidth * stride(); + + for(u32 y = 0; y < outputHeight; y++) { + const u8* sp = _data + pitch() * (outputY + y) + stride() * outputX; + u8* dp = outputData + outputPitch * y; + for(u32 x = 0; x < outputWidth; x++) { + write(dp, read(sp)); + sp += stride(); + dp += stride(); + } + } + + delete[] _data; + _data = outputData; + _width = outputWidth; + _height = outputHeight; + return true; +} + +inline auto image::alphaBlend(u64 alphaColor) -> void { + u64 alphaR = (alphaColor & _red.mask() ) >> _red.shift(); + u64 alphaG = (alphaColor & _green.mask()) >> _green.shift(); + u64 alphaB = (alphaColor & _blue.mask() ) >> _blue.shift(); + + for(u32 y = 0; y < _height; y++) { + u8* dp = _data + pitch() * y; + for(u32 x = 0; x < _width; x++) { + u64 color = read(dp); + + u64 colorA = (color & _alpha.mask()) >> _alpha.shift(); + u64 colorR = (color & _red.mask() ) >> _red.shift(); + u64 colorG = (color & _green.mask()) >> _green.shift(); + u64 colorB = (color & _blue.mask() ) >> _blue.shift(); + f64 alphaScale = (f64)colorA / (f64)((1 << _alpha.depth()) - 1); + + colorA = (1 << _alpha.depth()) - 1; + colorR = (colorR * alphaScale) + (alphaR * (1.0 - alphaScale)); + colorG = (colorG * alphaScale) + (alphaG * (1.0 - alphaScale)); + colorB = (colorB * alphaScale) + (alphaB * (1.0 - alphaScale)); + + write(dp, (colorA << _alpha.shift()) | (colorR << _red.shift()) | (colorG << _green.shift()) | (colorB << _blue.shift())); + dp += stride(); + } + } +} + +inline auto image::alphaMultiply() -> void { + u32 divisor = (1 << _alpha.depth()) - 1; + + for(u32 y = 0; y < _height; y++) { + u8* dp = _data + pitch() * y; + for(u32 x = 0; x < _width; x++) { + u64 color = read(dp); + + u64 colorA = (color & _alpha.mask()) >> _alpha.shift(); + u64 colorR = (color & _red.mask() ) >> _red.shift(); + u64 colorG = (color & _green.mask()) >> _green.shift(); + u64 colorB = (color & _blue.mask() ) >> _blue.shift(); + + colorR = (colorR * colorA) / divisor; + colorG = (colorG * colorA) / divisor; + colorB = (colorB * colorA) / divisor; + + write(dp, (colorA << _alpha.shift()) | (colorR << _red.shift()) | (colorG << _green.shift()) | (colorB << _blue.shift())); + dp += stride(); + } + } +} + +inline auto image::transform(const image& source) -> void { + return transform(source._endian, source._depth, source._alpha.mask(), source._red.mask(), source._green.mask(), source._blue.mask()); +} + +inline auto image::transform(bool outputEndian, u32 outputDepth, u64 outputAlphaMask, u64 outputRedMask, u64 outputGreenMask, u64 outputBlueMask) -> void { + if(_endian == outputEndian && _depth == outputDepth && _alpha.mask() == outputAlphaMask && _red.mask() == outputRedMask && _green.mask() == outputGreenMask && _blue.mask() == outputBlueMask) return; + + image output(outputEndian, outputDepth, outputAlphaMask, outputRedMask, outputGreenMask, outputBlueMask); + output.allocate(_width, _height); + + for(u32 y = 0; y < _height; y++) { + const u8* sp = _data + pitch() * y; + u8* dp = output._data + output.pitch() * y; + for(u32 x = 0; x < _width; x++) { + u64 color = read(sp); + sp += stride(); + + u64 a = (color & _alpha.mask()) >> _alpha.shift(); + u64 r = (color & _red.mask() ) >> _red.shift(); + u64 g = (color & _green.mask()) >> _green.shift(); + u64 b = (color & _blue.mask() ) >> _blue.shift(); + + a = normalize(a, _alpha.depth(), output._alpha.depth()); + r = normalize(r, _red.depth(), output._red.depth()); + g = normalize(g, _green.depth(), output._green.depth()); + b = normalize(b, _blue.depth(), output._blue.depth()); + + output.write(dp, (a << output._alpha.shift()) | (r << output._red.shift()) | (g << output._green.shift()) | (b << output._blue.shift())); + dp += output.stride(); + } + } + + operator=(move(output)); +} + +} diff --git a/waterbox/ares64/ares/nall/induced-sort.hpp b/waterbox/ares64/ares/nall/induced-sort.hpp new file mode 100644 index 0000000000..0df42c3ef2 --- /dev/null +++ b/waterbox/ares64/ares/nall/induced-sort.hpp @@ -0,0 +1,176 @@ +#pragma once + +//suffix array construction via induced sorting +//many thanks to Screwtape for the thorough explanation of this algorithm +//this implementation would not be possible without his help + +namespace nall { + +//note that induced_sort will return an array of size+1 characters, +//where the first character is the empty suffix, equal to size + +template +inline auto induced_sort(array_view data, const u32 characters = 256) -> vector { + const u32 size = data.size(); + if(size == 0) return vector{0}; //required to avoid out-of-bounds accesses + if(size == 1) return vector{1, 0}; //not strictly necessary; but more performant + + vector types; //0 = S-suffix (sort before next suffix), 1 = L-suffix (sort after next suffix) + types.resize(size + 1); + + types[size - 0] = 0; //empty suffix is always S-suffix + types[size - 1] = 1; //last suffix is always L-suffix compared to empty suffix + for(u32 n : reverse(range(size - 1))) { + if(data[n] < data[n + 1]) { + types[n] = 0; //this suffix is smaller than the one after it + } else if(data[n] > data[n + 1]) { + types[n] = 1; //this suffix is larger than the one after it + } else { + types[n] = types[n + 1]; //this suffix will be the same as the one after it + } + } + + //left-most S-suffix + auto isLMS = [&](s32 n) -> bool { + if(n == 0) return 0; //no character to the left of the first suffix + return !types[n] && types[n - 1]; //true if this is the start of a new S-suffix + }; + + //test if two LMS-substrings are equal + auto isEqual = [&](s32 lhs, s32 rhs) -> bool { + if(lhs == size || rhs == size) return false; //no other suffix can be equal to the empty suffix + + for(u32 n = 0;; n++) { + bool lhsLMS = isLMS(lhs + n); + bool rhsLMS = isLMS(rhs + n); + if(n && lhsLMS && rhsLMS) return true; //substrings are identical + if(lhsLMS != rhsLMS) return false; //length mismatch: substrings cannot be identical + if(data[lhs + n] != data[rhs + n]) return false; //character mismatch: substrings are different + } + }; + + //determine the sizes of each bucket: one bucket per character + vector counts; + counts.resize(characters); + for(u32 n : range(size)) counts[data[n]]++; + + //bucket sorting start offsets + vector heads; + heads.resize(characters); + + u32 headOffset; + auto getHeads = [&] { + headOffset = 1; + for(u32 n : range(characters)) { + heads[n] = headOffset; + headOffset += counts[n]; + } + }; + + //bucket sorting end offsets + vector tails; + tails.resize(characters); + + u32 tailOffset; + auto getTails = [&] { + tailOffset = 1; + for(u32 n : range(characters)) { + tailOffset += counts[n]; + tails[n] = tailOffset - 1; + } + }; + + //inaccurate LMS bucket sort + vector suffixes; + suffixes.resize(size + 1, (s32)-1); + + getTails(); + for(u32 n : range(size)) { + if(!isLMS(n)) continue; //skip non-LMS-suffixes + suffixes[tails[data[n]]--] = n; //advance from the tail of the bucket + } + + suffixes[0] = size; //the empty suffix is always an LMS-suffix, and is the first suffix + + //sort all L-suffixes to the left of LMS-suffixes + auto sortL = [&] { + getHeads(); + for(u32 n : range(size + 1)) { + if(suffixes[n] == -1) continue; //offsets may not be known yet here ... + auto l = suffixes[n] - 1; + if(l < 0 || !types[l]) continue; //skip S-suffixes + suffixes[heads[data[l]]++] = l; //advance from the head of the bucket + } + }; + + auto sortS = [&] { + getTails(); + for(u32 n : reverse(range(size + 1))) { + auto l = suffixes[n] - 1; + if(l < 0 || types[l]) continue; //skip L-suffixes + suffixes[tails[data[l]]--] = l; //advance from the tail of the bucket + } + }; + + sortL(); + sortS(); + + //analyze data for the summary suffix array + vector names; + names.resize(size + 1, (s32)-1); + + u32 currentName = 0; //keep a count to tag each unique LMS-substring with unique IDs + auto lastLMSOffset = suffixes[0]; //location in the original data of the last checked LMS suffix + names[lastLMSOffset] = currentName; //the first LMS-substring is always the empty suffix entry, at position 0 + + for(u32 n : range(1, size + 1)) { + auto offset = suffixes[n]; + if(!isLMS(offset)) continue; //only LMS suffixes are important + + //if this LMS suffix starts with a different LMS substring than the last suffix observed ... + if(!isEqual(lastLMSOffset, offset)) currentName++; //then it gets a new name + lastLMSOffset = offset; //keep track of the new most-recent LMS suffix + names[lastLMSOffset] = currentName; //store the LMS suffix name where the suffix appears at in the original data + } + + vector summaryOffsets; + vector summaryData; + for(u32 n : range(size + 1)) { + if(names[n] == -1) continue; + summaryOffsets.append(n); + summaryData.append(names[n]); + } + u32 summaryCharacters = currentName + 1; //zero-indexed, so the total unique characters is currentName + 1 + + //make the summary suffix array + vector summaries; + if(summaryData.size() == summaryCharacters) { + //simple bucket sort when every character in summaryData appears only once + summaries.resize(summaryData.size() + 1, (s32)-1); + summaries[0] = summaryData.size(); //always include the empty suffix at the beginning + for(s32 x : range(summaryData.size())) { + s32 y = summaryData[x]; + summaries[y + 1] = x; + } + } else { + //recurse until every character in summaryData is unique ... + summaries = induced_sort({summaryData.data(), summaryData.size()}, summaryCharacters); + } + + suffixes.fill(-1); //reuse existing buffer for accurate sort + + //accurate LMS sort + getTails(); + for(u32 n : reverse(range(2, summaries.size()))) { + auto index = summaryOffsets[summaries[n]]; + suffixes[tails[data[index]]--] = index; //advance from the tail of the bucket + } + suffixes[0] = size; //always include the empty suffix at the beginning + + sortL(); + sortS(); + + return suffixes; +} + +} diff --git a/waterbox/ares64/ares/nall/inline-if.hpp b/waterbox/ares64/ares/nall/inline-if.hpp new file mode 100644 index 0000000000..b7b7a86590 --- /dev/null +++ b/waterbox/ares64/ares/nall/inline-if.hpp @@ -0,0 +1,11 @@ +#pragma once +#warning "these defines break if statements with multiple parameters to templates" + +#define if1(statement) if(statement) +#define if2(condition, false) ([&](auto&& value) -> decltype(condition) { \ + return (bool)value ? value : (decltype(condition))false; \ +})(condition) +#define if3(condition, true, false) ((condition) ? (true) : (decltype(true))(false)) +#define if4(type, condition, true, false) ((condition) ? (type)(true) : (type)(false)) +#define if_(_1, _2, _3, _4, name, ...) name +#define if(...) if_(__VA_ARGS__, if4, if3, if2, if1)(__VA_ARGS__) diff --git a/waterbox/ares64/ares/nall/inode.hpp b/waterbox/ares64/ares/nall/inode.hpp new file mode 100644 index 0000000000..491fdd1612 --- /dev/null +++ b/waterbox/ares64/ares/nall/inode.hpp @@ -0,0 +1,163 @@ +#pragma once + +//generic abstraction layer for common storage operations against both files and directories +//these functions are not recursive; use directory::create() and directory::remove() for recursion + +#include +#include + +namespace nall { + +struct inode { + enum class time : u32 { create, modify, access }; + + inode() = delete; + inode(const inode&) = delete; + auto operator=(const inode&) -> inode& = delete; + + static auto exists(const string& name) -> bool { + return access(name, F_OK) == 0; + } + + static auto readable(const string& name) -> bool { + return access(name, R_OK) == 0; + } + + static auto writable(const string& name) -> bool { + return access(name, W_OK) == 0; + } + + static auto executable(const string& name) -> bool { + return access(name, X_OK) == 0; + } + + static auto hidden(const string& name) -> bool { + #if defined(PLATFORM_WINDOWS) + auto attributes = GetFileAttributes(utf16_t(name)); + return attributes & FILE_ATTRIBUTE_HIDDEN; + #else + //todo: is this really the best way to do this? stat doesn't have S_ISHIDDEN ... + return name.split("/").last().beginsWith("."); + #endif + } + + static auto mode(const string& name) -> u32 { + struct stat data{}; + stat(name, &data); + return data.st_mode; + } + + static auto uid(const string& name) -> u32 { + struct stat data{}; + stat(name, &data); + return data.st_uid; + } + + static auto gid(const string& name) -> u32 { + struct stat data{}; + stat(name, &data); + return data.st_gid; + } + + static auto owner(const string& name) -> string { + #if !defined(PLATFORM_WINDOWS) + struct passwd* pw = getpwuid(uid(name)); + if(pw && pw->pw_name) return pw->pw_name; + #endif + return {}; + } + + static auto group(const string& name) -> string { + #if !defined(PLATFORM_WINDOWS) + struct group* gr = getgrgid(gid(name)); + if(gr && gr->gr_name) return gr->gr_name; + #endif + return {}; + } + + static auto timestamp(const string& name, time mode = time::modify) -> u64 { + struct stat data{}; + stat(name, &data); + switch(mode) { + #if defined(PLATFORM_WINDOWS) + //on Windows, the last status change time (ctime) holds the file creation time instead + case time::create: return data.st_ctime; + #elif defined(PLATFORM_BSD) || defined(PLATFORM_MACOS) + //st_birthtime may return -1 or st_atime if it is not supported by the file system + //the best that can be done in this case is to return st_mtime if it's older + case time::create: return min((u32)data.st_birthtime, (u32)data.st_mtime); + #else + //Linux simply doesn't support file creation time at all + //this is also our fallback case for unsupported operating systems + case time::create: return data.st_mtime; + #endif + case time::modify: return data.st_mtime; + //for performance reasons, last access time is usually not enabled on various filesystems + //ensure that the last access time is not older than the last modify time (eg for NTFS) + case time::access: return max((u32)data.st_atime, data.st_mtime); + } + return 0; + } + + static auto setMode(const string& name, u32 mode) -> bool { + #if !defined(PLATFORM_WINDOWS) + return chmod(name, mode) == 0; + #else + return _wchmod(utf16_t(name), (mode & 0400 ? _S_IREAD : 0) | (mode & 0200 ? _S_IWRITE : 0)) == 0; + #endif + } + + static auto setOwner(const string& name, const string& owner) -> bool { + #if !defined(PLATFORM_WINDOWS) + struct passwd* pwd = getpwnam(owner); + if(!pwd) return false; + return chown(name, pwd->pw_uid, inode::gid(name)) == 0; + #else + return true; + #endif + } + + static auto setGroup(const string& name, const string& group) -> bool { + #if !defined(PLATFORM_WINDOWS) + struct group* grp = getgrnam(group); + if(!grp) return false; + return chown(name, inode::uid(name), grp->gr_gid) == 0; + #else + return true; + #endif + } + + static auto setTimestamp(const string& name, u64 value, time mode = time::modify) -> bool { + struct utimbuf timeBuffer; + timeBuffer.modtime = mode == time::modify ? value : inode::timestamp(name, time::modify); + timeBuffer.actime = mode == time::access ? value : inode::timestamp(name, time::access); + return utime(name, &timeBuffer) == 0; + } + + //returns true if 'name' already exists + static auto create(const string& name, u32 permissions = 0755) -> bool { + if(exists(name)) return true; + if(name.endsWith("/")) return mkdir(name, permissions) == 0; + s32 fd = open(name, O_CREAT | O_EXCL, permissions); + if(fd < 0) return false; + return close(fd), true; + } + + //returns false if 'name' and 'targetname' are on different file systems (requires copy) + static auto rename(const string& name, const string& targetname) -> bool { + return ::rename(name, targetname) == 0; + } + + //returns false if 'name' is a directory that is not empty + static auto remove(const string& name) -> bool { + #if defined(PLATFORM_WINDOWS) + if(name.endsWith("/")) return _wrmdir(utf16_t(name)) == 0; + return _wunlink(utf16_t(name)) == 0; + #else + if(name.endsWith("/")) return rmdir(name) == 0; + return unlink(name) == 0; + #endif + } +}; + +} diff --git a/waterbox/ares64/ares/nall/instance.hpp b/waterbox/ares64/ares/nall/instance.hpp new file mode 100644 index 0000000000..a3cbcedc21 --- /dev/null +++ b/waterbox/ares64/ares/nall/instance.hpp @@ -0,0 +1,39 @@ +#pragma once + +namespace nall { + +template +struct Instance { + ~Instance() { + destruct(); + } + + auto operator()() -> T& { + return instance.object; + } + + template + auto construct(P&&... p) { + if(constructed) return; + constructed = true; + new((void*)(&instance.object)) T(forward

(p)...); + } + + auto destruct() -> void { + if(!constructed) return; + constructed = false; + instance.object.~T(); + } + +private: + bool constructed = false; + union Union { + Union() {} + ~Union() {} + + T object; + char storage[sizeof(T)]; + } instance; +}; + +} diff --git a/waterbox/ares64/ares/nall/interpolation.hpp b/waterbox/ares64/ares/nall/interpolation.hpp new file mode 100644 index 0000000000..dc753b2891 --- /dev/null +++ b/waterbox/ares64/ares/nall/interpolation.hpp @@ -0,0 +1,56 @@ +#pragma once + +namespace nall { + +struct Interpolation { + static inline auto Nearest(f64 mu, f64 a, f64 b, f64 c, f64 d) -> f64 { + return (mu <= 0.5 ? b : c); + } + + static inline auto Sublinear(f64 mu, f64 a, f64 b, f64 c, f64 d) -> f64 { + mu = ((mu - 0.5) * 2.0) + 0.5; + if(mu < 0) mu = 0; + if(mu > 1) mu = 1; + return b * (1.0 - mu) + c * mu; + } + + static inline auto Linear(f64 mu, f64 a, f64 b, f64 c, f64 d) -> f64 { + return b * (1.0 - mu) + c * mu; + } + + static inline auto Cosine(f64 mu, f64 a, f64 b, f64 c, f64 d) -> f64 { + mu = (1.0 - cos(mu * Math::Pi)) / 2.0; + return b * (1.0 - mu) + c * mu; + } + + static inline auto Cubic(f64 mu, f64 a, f64 b, f64 c, f64 d) -> f64 { + f64 A = d - c - a + b; + f64 B = a - b - A; + f64 C = c - a; + f64 D = b; + return A * (mu * mu * mu) + B * (mu * mu) + C * mu + D; + } + + static inline auto Hermite(f64 mu1, f64 a, f64 b, f64 c, f64 d) -> f64 { + const f64 tension = 0.0; //-1 = low, 0 = normal, +1 = high + const f64 bias = 0.0; //-1 = left, 0 = even, +1 = right + f64 mu2, mu3, m0, m1, a0, a1, a2, a3; + + mu2 = mu1 * mu1; + mu3 = mu2 * mu1; + + m0 = (b - a) * (1.0 + bias) * (1.0 - tension) / 2.0; + m0 += (c - b) * (1.0 - bias) * (1.0 - tension) / 2.0; + m1 = (c - b) * (1.0 + bias) * (1.0 - tension) / 2.0; + m1 += (d - c) * (1.0 - bias) * (1.0 - tension) / 2.0; + + a0 = +2 * mu3 - 3 * mu2 + 1; + a1 = mu3 - 2 * mu2 + mu1; + a2 = mu3 - mu2; + a3 = -2 * mu3 + 3 * mu2; + + return (a0 * b) + (a1 * m0) + (a2 * m1) + (a3 * c); + } +}; + +} diff --git a/waterbox/ares64/ares/nall/intrinsics.hpp b/waterbox/ares64/ares/nall/intrinsics.hpp new file mode 100644 index 0000000000..755a349560 --- /dev/null +++ b/waterbox/ares64/ares/nall/intrinsics.hpp @@ -0,0 +1,316 @@ +#pragma once + +#if defined(__APPLE__) + #include +#elif defined(linux) || defined(__linux__) + #include +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) + #include +#endif + +namespace nall { + +/* Compiler detection */ + +#if defined(__clang__) + #define COMPILER_CLANG + struct Compiler { + static constexpr bool Clang = 1; + static constexpr bool GCC = 0; + static constexpr bool Microsoft = 0; + }; + #pragma clang diagnostic warning "-Wreturn-type" + #pragma clang diagnostic ignored "-Wunused-result" + #pragma clang diagnostic ignored "-Wunknown-pragmas" + #pragma clang diagnostic ignored "-Wempty-body" + #pragma clang diagnostic ignored "-Wparentheses" + #pragma clang diagnostic ignored "-Wswitch" + #pragma clang diagnostic ignored "-Wswitch-bool" + #pragma clang diagnostic ignored "-Wabsolute-value" + #pragma clang diagnostic ignored "-Wtrigraphs" + #pragma clang diagnostic ignored "-Wnarrowing" + #pragma clang diagnostic ignored "-Wattributes" +#elif defined(__GNUC__) + #define COMPILER_GCC + struct Compiler { + static constexpr bool Clang = 0; + static constexpr bool GCC = 1; + static constexpr bool Microsoft = 0; + }; + #pragma GCC diagnostic warning "-Wreturn-type" + #pragma GCC diagnostic ignored "-Wunused-result" + #pragma GCC diagnostic ignored "-Wunknown-pragmas" + #pragma GCC diagnostic ignored "-Wpragmas" + #pragma GCC diagnostic ignored "-Wswitch-bool" + #pragma GCC diagnostic ignored "-Wtrigraphs" + #pragma GCC diagnostic ignored "-Wnarrowing" + #pragma GCC diagnostic ignored "-Wattributes" + #pragma GCC diagnostic ignored "-Wstringop-overflow" //GCC 10.2 warning heuristic is buggy +#elif defined(_MSC_VER) + #define COMPILER_MICROSOFT + struct Compiler { + static constexpr bool Clang = 0; + static constexpr bool GCC = 0; + static constexpr bool Microsoft = 1; + }; + #pragma warning(disable:4996) //libc "deprecation" warnings +#else + #error "unable to detect compiler" +#endif + +/* Platform detection */ + +#if defined(_WIN32) + #define PLATFORM_WINDOWS + struct Platform { + static constexpr bool Windows = 1; + static constexpr bool MacOS = 0; + static constexpr bool Android = 0; + static constexpr bool Linux = 0; + static constexpr bool BSD = 0; + }; +#elif defined(__APPLE__) + #define PLATFORM_MACOS + struct Platform { + static constexpr bool Windows = 0; + static constexpr bool MacOS = 1; + static constexpr bool Android = 0; + static constexpr bool Linux = 0; + static constexpr bool BSD = 0; + }; +#elif defined(__ANDROID__) + #define PLATFORM_ANDROID + struct Platform { + static constexpr bool Windows = 0; + static constexpr bool MacOS = 0; + static constexpr bool Android = 1; + static constexpr bool Linux = 0; + static constexpr bool BSD = 0; + }; +#elif defined(linux) || defined(__linux__) + #define PLATFORM_LINUX + struct Platform { + static constexpr bool Windows = 0; + static constexpr bool MacOS = 0; + static constexpr bool Android = 0; + static constexpr bool Linux = 1; + static constexpr bool BSD = 0; + }; +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) + #define PLATFORM_BSD + struct Platform { + static constexpr bool Windows = 0; + static constexpr bool MacOS = 0; + static constexpr bool Android = 0; + static constexpr bool Linux = 0; + static constexpr bool BSD = 1; + }; +#else + #error "unable to detect platform" +#endif + +/* ABI detection */ + +#if defined(_WIN32) + #define ABI_WINDOWS + struct ABI { + static constexpr bool Windows = 1; + static constexpr bool SystemV = 0; + }; +#else + #define ABI_SYSTEMV + struct ABI { + static constexpr bool Windows = 0; + static constexpr bool SystemV = 1; + }; +#endif + +/* API detection */ + +#if defined(_WIN32) + #define API_WINDOWS + struct API { + static constexpr bool Windows = 1; + static constexpr bool Posix = 0; + }; +#else + #define API_POSIX + struct API { + static constexpr bool Windows = 0; + static constexpr bool Posix = 1; + }; +#endif + +/* Display server detection */ + +#if defined(_WIN32) + #define DISPLAY_WINDOWS + struct DisplayServer { + static constexpr bool Windows = 1; + static constexpr bool Quartz = 0; + static constexpr bool Xorg = 0; + }; +#elif defined(__APPLE__) + #define DISPLAY_QUARTZ + struct DisplayServer { + static constexpr bool Windows = 0; + static constexpr bool Quartz = 1; + static constexpr bool Xorg = 0; + }; +#else + #define DISPLAY_XORG + struct DisplayServer { + static constexpr bool Windows = 0; + static constexpr bool Quartz = 0; + static constexpr bool Xorg = 1; + }; +#endif + +/* Architecture detection */ + +#if defined(__i386__) || defined(_M_IX86) + #define ARCHITECTURE_X86 + struct Architecture { + static constexpr bool x86 = 1; + static constexpr bool amd64 = 0; + static constexpr bool arm64 = 0; + static constexpr bool arm32 = 0; + static constexpr bool ppc64 = 0; + static constexpr bool ppc32 = 0; + }; +#elif defined(__amd64__) || defined(_M_AMD64) + #define ARCHITECTURE_AMD64 + struct Architecture { + static constexpr bool x86 = 0; + static constexpr bool amd64 = 1; + static constexpr bool arm64 = 0; + static constexpr bool arm32 = 0; + static constexpr bool ppc64 = 0; + static constexpr bool ppc32 = 0; + }; +#elif defined(__aarch64__) + #define ARCHITECTURE_ARM64 + struct Architecture { + static constexpr bool x86 = 0; + static constexpr bool amd64 = 0; + static constexpr bool arm64 = 1; + static constexpr bool arm32 = 0; + static constexpr bool ppc64 = 0; + static constexpr bool ppc32 = 0; + }; +#elif defined(__arm__) + #define ARCHITECTURE_ARM32 + struct Architecture { + static constexpr bool x86 = 0; + static constexpr bool amd64 = 0; + static constexpr bool arm64 = 0; + static constexpr bool arm32 = 1; + static constexpr bool ppc64 = 0; + static constexpr bool ppc32 = 0; + }; +#elif defined(__ppc64__) || defined(_ARCH_PPC64) + #define ARCHITECTURE_PPC64 + struct Architecture { + static constexpr bool x86 = 0; + static constexpr bool amd64 = 0; + static constexpr bool arm64 = 0; + static constexpr bool arm32 = 0; + static constexpr bool ppc64 = 1; + static constexpr bool ppc32 = 0; + }; +#elif defined(__ppc__) || defined(_ARCH_PPC) || defined(_M_PPC) + #define ARCHITECTURE_PPC32 + struct Architecture { + static constexpr bool x86 = 0; + static constexpr bool amd64 = 0; + static constexpr bool arm64 = 0; + static constexpr bool arm32 = 0; + static constexpr bool ppc64 = 0; + static constexpr bool ppc32 = 1; + }; +#else + #error "unable to detect architecture" +#endif + +/* Endian detection */ + +#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) || defined(__i386__) || defined(__amd64__) || defined(_M_IX86) || defined(_M_AMD64) + #define ENDIAN_LITTLE + struct Endian { + static constexpr bool Little = 1; + static constexpr bool Big = 0; + }; +#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN) || defined(__BIG_ENDIAN__) || defined(__powerpc__) || defined(_M_PPC) + #define ENDIAN_BIG + struct Endian { + static constexpr bool Little = 0; + static constexpr bool Big = 1; + }; +#else + #error "unable to detect endian" +#endif + +/* Build optimization level detection */ + +#undef DEBUG +#undef NDEBUG + +#if defined(BUILD_DEBUG) + #define DEBUG + struct Build { + static constexpr bool Debug = 1; + static constexpr bool Stable = 0; + static constexpr bool Minified = 0; + static constexpr bool Release = 0; + static constexpr bool Optimized = 0; + }; +#elif defined(BUILD_STABLE) + #define DEBUG + struct Build { + static constexpr bool Debug = 0; + static constexpr bool Stable = 1; + static constexpr bool Minified = 0; + static constexpr bool Release = 0; + static constexpr bool Optimized = 0; + }; +#elif defined(BUILD_MINIFIED) + #define NDEBUG + struct Build { + static constexpr bool Debug = 0; + static constexpr bool Stable = 0; + static constexpr bool Minified = 1; + static constexpr bool Release = 0; + static constexpr bool Optimized = 0; + }; +#elif defined(BUILD_RELEASE) + #define NDEBUG + struct Build { + static constexpr bool Debug = 0; + static constexpr bool Stable = 0; + static constexpr bool Minified = 0; + static constexpr bool Release = 1; + static constexpr bool Optimized = 0; + }; +#elif defined(BUILD_OPTIMIZED) + #define NDEBUG + struct Build { + static constexpr bool Debug = 0; + static constexpr bool Stable = 0; + static constexpr bool Minified = 0; + static constexpr bool Release = 0; + static constexpr bool Optimized = 1; + }; +#else + //default to debug mode + #define BUILD_DEBUG + #define DEBUG + struct Build { + static constexpr bool Debug = 1; + static constexpr bool Stable = 0; + static constexpr bool Minified = 0; + static constexpr bool Release = 0; + static constexpr bool Optimized = 0; + }; +#endif + +} diff --git a/waterbox/ares64/ares/nall/iterator.hpp b/waterbox/ares64/ares/nall/iterator.hpp new file mode 100644 index 0000000000..3e45917694 --- /dev/null +++ b/waterbox/ares64/ares/nall/iterator.hpp @@ -0,0 +1,79 @@ +#pragma once + +namespace nall { + +template struct iterator { + iterator(T* self, u64 offset) : _self(self), _offset(offset) {} + auto operator*() -> T& { return _self[_offset]; } + auto operator!=(const iterator& source) const -> bool { return _offset != source._offset; } + auto operator++() -> iterator& { return _offset++, *this; } + auto offset() const -> u64 { return _offset; } + +private: + T* _self; + u64 _offset; +}; + +template struct iterator_const { + iterator_const(const T* self, u64 offset) : _self(self), _offset(offset) {} + auto operator*() -> const T& { return _self[_offset]; } + auto operator!=(const iterator_const& source) const -> bool { return _offset != source._offset; } + auto operator++() -> iterator_const& { return _offset++, *this; } + auto offset() const -> u64 { return _offset; } + +private: + const T* _self; + u64 _offset; +}; + +template struct reverse_iterator { + reverse_iterator(T* self, u64 offset) : _self(self), _offset(offset) {} + auto operator*() -> T& { return _self[_offset]; } + auto operator!=(const reverse_iterator& source) const -> bool { return _offset != source._offset; } + auto operator++() -> reverse_iterator& { return _offset--, *this; } + auto offset() const -> u64 { return _offset; } + +private: + T* _self; + u64 _offset; +}; + +template struct reverse_iterator_const { + reverse_iterator_const(const T* self, u64 offset) : _self(self), _offset(offset) {} + auto operator*() -> const T& { return _self[_offset]; } + auto operator!=(const reverse_iterator_const& source) const -> bool { return _offset != source._offset; } + auto operator++() -> reverse_iterator_const& { return _offset--, *this; } + auto offset() const -> u64 { return _offset; } + +private: + const T* _self; + u64 _offset; +}; + +//std::rbegin(), std::rend() is missing from GCC 4.9; which I still target + +template auto rbegin(T (&array)[Size]) { return reverse_iterator{array, Size - 1}; } +template auto rend(T (&array)[Size]) { return reverse_iterator{array, (u64)-1}; } + +template auto rbegin(T& self) { return self.rbegin(); } +template auto rend(T& self) { return self.rend(); } + +template struct reverse_wrapper { + auto begin() { return rbegin(_self); } + auto end() { return rend(_self); } + + auto begin() const { return rbegin(_self); } + auto end() const { return rend(_self); } + + T _self; +}; + +template auto reverse(T& object) -> reverse_wrapper { + return {object}; +} + +template auto reverse(T&& object) -> reverse_wrapper { + return {object}; +} + +} diff --git a/waterbox/ares64/ares/nall/literals.hpp b/waterbox/ares64/ares/nall/literals.hpp new file mode 100644 index 0000000000..304823be82 --- /dev/null +++ b/waterbox/ares64/ares/nall/literals.hpp @@ -0,0 +1,20 @@ +#pragma once + +namespace nall { + +inline constexpr auto operator"" _Kibit(unsigned long long value) { return value * 1024 / 8; } +inline constexpr auto operator"" _Mibit(unsigned long long value) { return value * 1024 * 1024 / 8; } +inline constexpr auto operator"" _Gibit(unsigned long long value) { return value * 1024 * 1024 * 1024 / 8; } +inline constexpr auto operator"" _Tibit(unsigned long long value) { return value * 1024 * 1024 * 1024 * 1024 / 8; } + +inline constexpr auto operator"" _KiB(unsigned long long value) { return value * 1024; } +inline constexpr auto operator"" _MiB(unsigned long long value) { return value * 1024 * 1024; } +inline constexpr auto operator"" _GiB(unsigned long long value) { return value * 1024 * 1024 * 1024; } +inline constexpr auto operator"" _TiB(unsigned long long value) { return value * 1024 * 1024 * 1024 * 1024; } + +inline constexpr auto operator"" _KHz(unsigned long long value) { return value * 1000; } +inline constexpr auto operator"" _MHz(unsigned long long value) { return value * 1000 * 1000; } +inline constexpr auto operator"" _GHz(unsigned long long value) { return value * 1000 * 1000 * 1000; } +inline constexpr auto operator"" _THz(unsigned long long value) { return value * 1000 * 1000 * 1000 * 1000; } + +} diff --git a/waterbox/ares64/ares/nall/locale.hpp b/waterbox/ares64/ares/nall/locale.hpp new file mode 100644 index 0000000000..7d8ca73ea2 --- /dev/null +++ b/waterbox/ares64/ares/nall/locale.hpp @@ -0,0 +1,87 @@ +#pragma once + +namespace nall { + +struct Locale { + struct Dictionary { + string location; + string language; + Markup::Node document; + }; + + auto scan(string pathname) -> void { + dictionaries.reset(); + selected.reset(); + for(auto filename : directory::icontents(pathname, "*.bml")) { + Dictionary dictionary; + dictionary.location = {pathname, filename}; + dictionary.document = BML::unserialize(string::read(dictionary.location)); + dictionary.language = dictionary.document["locale/language"].text(); + dictionaries.append(dictionary); + } + } + + auto available() const -> vector { + vector result; + for(auto& dictionary : dictionaries) { + result.append(dictionary.language); + } + return result; + } + + auto select(string option) -> bool { + selected.reset(); + for(auto& dictionary : dictionaries) { + if(option == Location::prefix(dictionary.location) || option == dictionary.language) { + selected = dictionary; + return true; + } + } + return false; + } + + template + auto operator()(string ns, string input, P&&... p) const -> string { + vector arguments{forward

(p)...}; + if(selected) { + for(auto node : selected().document) { + if(node.name() == "namespace" && node.text() == ns) { + for(auto map : node) { + if(map.name() == "map" && map["input"].text() == input) { + input = map["value"].text(); + break; + } + } + } + } + } + for(u32 index : range(arguments.size())) { + input.replace({"{", index, "}"}, arguments[index]); + } + return input; + } + + struct Namespace { + Namespace(Locale& _locale, string _namespace) : _locale(_locale), _namespace(_namespace) {} + + template + auto operator()(string input, P&&... p) const -> string { + return _locale(_namespace, input, forward

(p)...); + } + + template + auto tr(string input, P&&... p) const -> string { + return _locale(_namespace, input, forward

(p)...); + } + + private: + Locale& _locale; + string _namespace; + }; + +private: + vector dictionaries; + maybe selected; +}; + +} diff --git a/waterbox/ares64/ares/nall/location.hpp b/waterbox/ares64/ares/nall/location.hpp new file mode 100644 index 0000000000..b525620955 --- /dev/null +++ b/waterbox/ares64/ares/nall/location.hpp @@ -0,0 +1,78 @@ +#pragma once + +#include + +namespace nall::Location { + +// (/parent/child.type/) +// (/parent/child.type/)name.type +inline auto path(string_view self) -> string { + const char* p = self.data() + self.size() - 1; + for(s32 offset = self.size() - 1; offset >= 0; offset--, p--) { + if(*p == '/') return slice(self, 0, offset + 1); + } + return ""; //no path found +} + +// /parent/child.type/() +// /parent/child.type/(name.type) +inline auto file(string_view self) -> string { + const char* p = self.data() + self.size() - 1; + for(s32 offset = self.size() - 1; offset >= 0; offset--, p--) { + if(*p == '/') return slice(self, offset + 1); + } + return self; //no path found +} + +// (/parent/)child.type/ +// (/parent/child.type/)name.type +inline auto dir(string_view self) -> string { + const char* p = self.data() + self.size() - 1, *last = p; + for(s32 offset = self.size() - 1; offset >= 0; offset--, p--) { + if(*p == '/' && p == last) continue; + if(*p == '/') return slice(self, 0, offset + 1); + } + return ""; //no path found +} + +// /parent/(child.type/) +// /parent/child.type/(name.type) +inline auto base(string_view self) -> string { + const char* p = self.data() + self.size() - 1, *last = p; + for(s32 offset = self.size() - 1; offset >= 0; offset--, p--) { + if(*p == '/' && p == last) continue; + if(*p == '/') return slice(self, offset + 1); + } + return self; //no path found +} + +// /parent/(child).type/ +// /parent/child.type/(name).type +inline auto prefix(string_view self) -> string { + const char* p = self.data() + self.size() - 1, *last = p; + for(s32 offset = self.size() - 1, suffix = -1; offset >= 0; offset--, p--) { + if(*p == '/' && p == last) continue; + if(*p == '/') return slice(self, offset + 1, (suffix >= 0 ? suffix : self.size()) - offset - 1).trimRight("/"); + if(*p == '.' && suffix == -1) { suffix = offset; continue; } + if(offset == 0) return slice(self, offset, suffix).trimRight("/"); + } + return ""; //no prefix found +} + +// /parent/child(.type)/ +// /parent/child.type/name(.type) +inline auto suffix(string_view self) -> string { + const char* p = self.data() + self.size() - 1, *last = p; + for(s32 offset = self.size() - 1; offset >= 0; offset--, p--) { + if(*p == '/' && p == last) continue; + if(*p == '/') break; + if(*p == '.') return slice(self, offset).trimRight("/"); + } + return ""; //no suffix found +} + +inline auto notsuffix(string_view self) -> string { + return {path(self), prefix(self)}; +} + +} diff --git a/waterbox/ares64/ares/nall/mac/poly1305.hpp b/waterbox/ares64/ares/nall/mac/poly1305.hpp new file mode 100644 index 0000000000..fca21c2591 --- /dev/null +++ b/waterbox/ares64/ares/nall/mac/poly1305.hpp @@ -0,0 +1,122 @@ +#pragma once + +#include + +namespace nall::MAC { + +struct Poly1305 { + auto authenticate(array_view memory, u256 nonce) -> u128 { + initialize(nonce); + process(memory.data(), memory.size()); + return finish(); + } + + auto initialize(u256 key) -> void { + u64 t0 = key >> 0; + u64 t1 = key >> 64; + pad[0] = key >> 128; + pad[1] = key >> 192; + + r[0] = (t0 ) & 0xffc0fffffff; + r[1] = (t0 >> 44 | t1 << 20) & 0xfffffc0ffff; + r[2] = ( t1 >> 24) & 0x00ffffffc0f; + + h[0] = 0, h[1] = 0, h[2] = 0; + offset = 0; + } + + auto process(const u8* data, u64 size) -> void { + while(size--) { + buffer[offset++] = *data++; + if(offset >= 16) { + block(); + offset = 0; + } + } + } + + auto finish() -> u128 { + if(offset) { + buffer[offset++] = 1; + while(offset < 16) buffer[offset++] = 0; + block(true); + } + + u64 h0 = h[0], h1 = h[1], h2 = h[2]; + + u64 c = h1 >> 44; h1 &= 0xfffffffffff; + h2 += c; c = h2 >> 42; h2 &= 0x3ffffffffff; + h0 += c * 5; c = h0 >> 44; h0 &= 0xfffffffffff; + h1 += c; c = h1 >> 44; h1 &= 0xfffffffffff; + h2 += c; c = h2 >> 42; h2 &= 0x3ffffffffff; + h0 += c * 5; c = h0 >> 44; h0 &= 0xfffffffffff; + h1 += c; + + u64 g0 = h0 + 5; c = g0 >> 44; g0 &= 0xfffffffffff; + u64 g1 = h1 + c; c = g1 >> 44; g1 &= 0xfffffffffff; + u64 g2 = h2 + c - (1ull << 42); + + c = (g2 >> 63) - 1; + g0 &= c, g1 &= c, g2 &= c; + c = ~c; + h0 = (h0 & c) | g0; + h1 = (h1 & c) | g1; + h2 = (h2 & c) | g2; + + u64 t0 = pad[0], t1 = pad[1]; + + h0 += ((t0 ) & 0xfffffffffff) ; c = h0 >> 44; h0 &= 0xfffffffffff; + h1 += ((t0 >> 44 | t1 << 20) & 0xfffffffffff) + c; c = h1 >> 44; h1 &= 0xfffffffffff; + h2 += (( t1 >> 24) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff; + + h0 = (h0 >> 0 | h1 << 44); + h1 = (h1 >> 20 | h2 << 24); + + r[0] = 0, r[1] = 0, r[2] = 0; + h[0] = 0, h[1] = 0, h[2] = 0; + pad[0] = 0, pad[1] = 0; + memory::fill(buffer, sizeof(buffer)); + offset = 0; + + return u128(h1) << 64 | h0; + } + +private: + auto block(bool last = false) -> void { + u64 r0 = r[0], r1 = r[1], r2 = r[2]; + u64 h0 = h[0], h1 = h[1], h2 = h[2]; + + u64 s1 = r1 * 20; + u64 s2 = r2 * 20; + + u64 t0 = memory::readl<8>(buffer + 0); + u64 t1 = memory::readl<8>(buffer + 8); + + h0 += ((t0 ) & 0xfffffffffff); + h1 += ((t0 >> 44 | t1 << 20) & 0xfffffffffff); + h2 += (( t1 >> 24) & 0x3ffffffffff) | (last ? 0 : 1ull << 40); + + u128 d, d0, d1, d2; + d0 = (u128)h0 * r0; d = (u128)h1 * s2; d0 += d; d = (u128)h2 * s1; d0 += d; + d1 = (u128)h0 * r1; d = (u128)h1 * r0; d1 += d; d = (u128)h2 * s2; d1 += d; + d2 = (u128)h0 * r2; d = (u128)h1 * r1; d2 += d; d = (u128)h2 * r0; d2 += d; + + u64 c = (u64)(d0 >> 44); h0 = (u64)d0 & 0xfffffffffff; + d1 += c; c = (u64)(d1 >> 44); h1 = (u64)d1 & 0xfffffffffff; + d2 += c; c = (u64)(d2 >> 42); h2 = (u64)d2 & 0x3ffffffffff; + + h0 += c * 5; c = h0 >> 44; h0 &= 0xfffffffffff; + h1 += c; + + h[0] = h0, h[1] = h1, h[2] = h2; + } + + u64 r[3]; + u64 h[3]; + u64 pad[2]; + + u8 buffer[16]; + u32 offset; +}; + +} diff --git a/waterbox/ares64/ares/nall/macos/guard.hpp b/waterbox/ares64/ares/nall/macos/guard.hpp new file mode 100644 index 0000000000..96cd4d6904 --- /dev/null +++ b/waterbox/ares64/ares/nall/macos/guard.hpp @@ -0,0 +1,15 @@ +#ifndef NALL_MACOS_GUARD_HPP +#define NALL_MACOS_GUARD_HPP + +#define Boolean CocoaBoolean +#define decimal CocoaDecimal +#define DEBUG CocoaDebug + +#else +#undef NALL_MACOS_GUARD_HPP + +#undef Boolean +#undef decimal +#undef DEBUG + +#endif diff --git a/waterbox/ares64/ares/nall/main.hpp b/waterbox/ares64/ares/nall/main.hpp new file mode 100644 index 0000000000..273e43f605 --- /dev/null +++ b/waterbox/ares64/ares/nall/main.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include + +namespace nall { + auto main(Arguments arguments) -> void; + + auto main(int argc, char** argv) -> int { + #if defined(PLATFORM_WINDOWS) + CoInitialize(0); + WSAData wsaData{0}; + WSAStartup(MAKEWORD(2, 2), &wsaData); + _setmode(_fileno(stdin ), O_BINARY); + _setmode(_fileno(stdout), O_BINARY); + _setmode(_fileno(stderr), O_BINARY); + #endif + + main(Arguments{argc, argv}); + + #if !defined(PLATFORM_WINDOWS) + //when a program is running, input on the terminal queues in stdin + //when terminating the program, the shell proceeds to try and execute all stdin data + //this is annoying behavior: this code tries to minimize the impact as much as it can + //we can flush all of stdin up to the last line feed, preventing spurious commands from executing + //however, even with setvbuf(_IONBF), we can't stop the last line from echoing to the terminal + auto flags = fcntl(fileno(stdin), F_GETFL, 0); + fcntl(fileno(stdin), F_SETFL, flags | O_NONBLOCK); //don't allow read() to block when empty + char buffer[4096], data = false; + while(read(fileno(stdin), buffer, sizeof(buffer)) > 0) data = true; + fcntl(fileno(stdin), F_SETFL, flags); //restore original flags for the terminal + if(data) putchar('\r'); //ensures PS1 is printed at the start of the line + #endif + + return EXIT_SUCCESS; + } +} + +auto main(int argc, char** argv) -> int { + return nall::main(argc, argv); +} diff --git a/waterbox/ares64/ares/nall/map.hpp b/waterbox/ares64/ares/nall/map.hpp new file mode 100644 index 0000000000..58b1e11b86 --- /dev/null +++ b/waterbox/ares64/ares/nall/map.hpp @@ -0,0 +1,58 @@ +#pragma once + +#include + +namespace nall { + +template struct map { + struct node_t { + T key; + U value; + node_t() = default; + node_t(const T& key) : key(key) {} + node_t(const T& key, const U& value) : key(key), value(value) {} + auto operator< (const node_t& source) const -> bool { return key < source.key; } + auto operator==(const node_t& source) const -> bool { return key == source.key; } + }; + + auto find(const T& key) const -> maybe { + if(auto node = root.find({key})) return node().value; + return nothing; + } + + auto insert(const T& key, const U& value) -> void { root.insert({key, value}); } + auto remove(const T& key) -> void { root.remove({key}); } + auto size() const -> unsigned { return root.size(); } + auto reset() -> void { root.reset(); } + + auto begin() -> typename set::iterator { return root.begin(); } + auto end() -> typename set::iterator { return root.end(); } + + auto begin() const -> const typename set::iterator { return root.begin(); } + auto end() const -> const typename set::iterator { return root.end(); } + +protected: + set root; +}; + +template struct bimap { + auto find(const T& key) const -> maybe { return tmap.find(key); } + auto find(const U& key) const -> maybe { return umap.find(key); } + auto insert(const T& key, const U& value) -> void { tmap.insert(key, value); umap.insert(value, key); } + auto remove(const T& key) -> void { if(auto p = tmap.find(key)) { umap.remove(p().value); tmap.remove(key); } } + auto remove(const U& key) -> void { if(auto p = umap.find(key)) { tmap.remove(p().value); umap.remove(key); } } + auto size() const -> unsigned { return tmap.size(); } + auto reset() -> void { tmap.reset(); umap.reset(); } + + auto begin() -> typename set::node_t>::iterator { return tmap.begin(); } + auto end() -> typename set::node_t>::iterator { return tmap.end(); } + + auto begin() const -> const typename set::node_t>::iterator { return tmap.begin(); } + auto end() const -> const typename set::node_t>::iterator { return tmap.end(); } + +protected: + map tmap; + map umap; +}; + +} diff --git a/waterbox/ares64/ares/nall/matrix-multiply.hpp b/waterbox/ares64/ares/nall/matrix-multiply.hpp new file mode 100644 index 0000000000..9568bb9355 --- /dev/null +++ b/waterbox/ares64/ares/nall/matrix-multiply.hpp @@ -0,0 +1,36 @@ +#pragma once + +//matrix multiplication primitives +//used in: ruby/opengl/quark + +namespace nall { + +template inline auto MatrixMultiply( +T* output, +const T* xdata, u32 xrows, u32 xcols, +const T* ydata, u32 yrows, u32 ycols +) -> void { + if(xcols != yrows) return; + + for(u32 y : range(xrows)) { + for(u32 x : range(ycols)) { + T sum = 0; + for(u32 z : range(xcols)) { + sum += xdata[y * xcols + z] * ydata[z * ycols + x]; + } + *output++ = sum; + } + } +} + +template inline auto MatrixMultiply( +const T* xdata, u32 xrows, u32 xcols, +const T* ydata, u32 yrows, u32 ycols +) -> vector { + vector output; + output.resize(xrows * ycols); + MatrixMultiply(output.data(), xdata, xrows, xcols, ydata, yrows, ycols); + return output; +} + +} diff --git a/waterbox/ares64/ares/nall/matrix.hpp b/waterbox/ares64/ares/nall/matrix.hpp new file mode 100644 index 0000000000..3923f2d835 --- /dev/null +++ b/waterbox/ares64/ares/nall/matrix.hpp @@ -0,0 +1,213 @@ +#pragma once + +namespace nall { + +template +struct Matrix { + static_assert(Rows > 0 && Cols > 0); + + Matrix() = default; + Matrix(const Matrix&) = default; + Matrix(const initializer_list& source) { + u32 index = 0; + for(auto& value : source) { + if(index >= Rows * Cols) break; + values[index / Cols][index % Cols] = value; + } + } + + operator array_span() { return {values, Rows * Cols}; } + operator array_view() const { return {values, Rows * Cols}; } + + //1D matrices (for polynomials, etc) + auto operator[](u32 row) -> T& { return values[row][0]; } + auto operator[](u32 row) const -> T { return values[row][0]; } + + //2D matrices + auto operator()(u32 row, u32 col) -> T& { return values[row][col]; } + auto operator()(u32 row, u32 col) const -> T { return values[row][col]; } + + //operators + auto operator+() const -> Matrix { + Matrix result; + for(u32 row : range(Rows)) { + for(u32 col : range(Cols)) { + result(row, col) = +target(row, col); + } + } + return result; + } + + auto operator-() const -> Matrix { + Matrix result; + for(u32 row : range(Rows)) { + for(u32 col : range(Cols)) { + result(row, col) = -target(row, col); + } + } + return result; + } + + auto operator+(const Matrix& source) const -> Matrix { + Matrix result; + for(u32 row : range(Rows)) { + for(u32 col : range(Cols)) { + result(row, col) = target(row, col) + source(row, col); + } + } + return result; + } + + auto operator-(const Matrix& source) const -> Matrix { + Matrix result; + for(u32 row : range(Rows)) { + for(u32 col : range(Cols)) { + result(row, col) = target(row, col) - source(row, col); + } + } + return result; + } + + auto operator*(T source) const -> Matrix { + Matrix result; + for(u32 row : range(Rows)) { + for(u32 col : range(Cols)) { + result(row, col) = target(row, col) * source; + } + } + return result; + } + + auto operator/(T source) const -> Matrix { + Matrix result; + for(u32 row : range(Rows)) { + for(u32 col : range(Cols)) { + result(row, col) = target(row, col) / source; + } + } + return result; + } + + //warning: matrix multiplication is not commutative! + template + auto operator*(const Matrix& source) const -> Matrix { + static_assert(Cols == SourceRows); + Matrix result; + for(u32 y : range(Rows)) { + for(u32 x : range(SourceCols)) { + T sum{}; + for(u32 z : range(Cols)) { + sum += target(y, z) * source(z, x); + } + result(y, x) = sum; + } + } + return result; + } + + template + auto operator/(const Matrix& source) const -> maybe> { + static_assert(Cols == SourceRows && SourceRows == SourceCols); + if(auto inverted = source.invert()) return operator*(inverted()); + return {}; + } + + auto& operator+=(const Matrix& source) { return *this = operator+(source); } + auto& operator-=(const Matrix& source) { return *this = operator-(source); } + auto& operator*=(T source) { return *this = operator*(source); } + auto& operator/=(T source) { return *this = operator/(source); } + template + auto& operator*=(const Matrix& source) { return *this = operator*(source); } + //matrix division is not always possible (when matrix cannot be inverted), so operator/= is not provided + + //algorithm: Gauss-Jordan + auto invert() const -> maybe { + static_assert(Rows == Cols); + Matrix source = *this; + Matrix result = identity(); + + const auto add = [&](u32 targetRow, u32 sourceRow, T factor = 1) { + for(u32 col : range(Cols)) { + result(targetRow, col) += result(sourceRow, col) * factor; + source(targetRow, col) += source(sourceRow, col) * factor; + } + }; + + const auto sub = [&](u32 targetRow, u32 sourceRow, T factor = 1) { + for(u32 col : range(Cols)) { + result(targetRow, col) -= result(sourceRow, col) * factor; + source(targetRow, col) -= source(sourceRow, col) * factor; + } + }; + + const auto mul = [&](u32 row, T factor) { + for(u32 col : range(Cols)) { + result(row, col) *= factor; + source(row, col) *= factor; + } + }; + + for(u32 i : range(Cols)) { + if(source(i, i) == 0) { + for(u32 row : range(Rows)) { + if(source(row, i) != 0) { + add(i, row); + break; + } + } + //matrix is not invertible: + if(source(i, i) == 0) return {}; + } + + mul(i, T{1} / source(i, i)); + for(u32 row : range(Rows)) { + if(row == i) continue; + sub(row, i, source(row, i)); + } + } + + return result; + } + + auto transpose() const -> Matrix { + Matrix result; + for(u32 row : range(Rows)) { + for(u32 col : range(Cols)) { + result(col, row) = target(row, col); + } + } + return result; + } + + static auto identity() -> Matrix { + static_assert(Rows == Cols); + Matrix result; + for(u32 row : range(Rows)) { + for(u32 col : range(Cols)) { + result(row, col) = row == col; + } + } + return result; + } + + //debugging function: do not use in production code + template + auto _print() const -> void { + for(u32 row : range(Rows)) { + nall::print("[ "); + for(u32 col : range(Cols)) { + nall::print(pad(target(row, col), Pad, ' '), " "); + } + nall::print("]\n"); + } + } + +protected: + //same as operator(), but with easier to read syntax inside Matrix class + auto target(u32 row, u32 col) -> T& { return values[row][col]; } + auto target(u32 row, u32 col) const -> T { return values[row][col]; } + + T values[Rows][Cols]{}; +}; + +} diff --git a/waterbox/ares64/ares/nall/maybe.hpp b/waterbox/ares64/ares/nall/maybe.hpp new file mode 100644 index 0000000000..2717e7f5f0 --- /dev/null +++ b/waterbox/ares64/ares/nall/maybe.hpp @@ -0,0 +1,93 @@ +#pragma once + +#include + +namespace nall { + +struct nothing_t {}; +static nothing_t nothing; +struct else_t {}; + +template +struct maybe { + maybe() {} + maybe(nothing_t) {} + maybe(const T& source) { operator=(source); } + maybe(T&& source) { operator=(move(source)); } + maybe(const maybe& source) { operator=(source); } + maybe(maybe&& source) { operator=(move(source)); } + ~maybe() { reset(); } + + auto operator=(nothing_t) -> maybe& { reset(); return *this; } + auto operator=(const T& source) -> maybe& { reset(); _valid = true; new(&_value.t) T(source); return *this; } + auto operator=(T&& source) -> maybe& { reset(); _valid = true; new(&_value.t) T(move(source)); return *this; } + + auto operator=(const maybe& source) -> maybe& { + if(this == &source) return *this; + reset(); + if(_valid = source._valid) new(&_value.t) T(source.get()); + return *this; + } + + auto operator=(maybe&& source) -> maybe& { + if(this == &source) return *this; + reset(); + if(_valid = source._valid) new(&_value.t) T(move(source.get())); + return *this; + } + + explicit operator bool() const { return _valid; } + auto reset() -> void { if(_valid) { _value.t.~T(); _valid = false; } } + auto data() -> T* { return _valid ? &_value.t : nullptr; } + auto get() -> T& { assert(_valid); return _value.t; } + + auto data() const -> const T* { return ((maybe*)this)->data(); } + auto get() const -> const T& { return ((maybe*)this)->get(); } + auto operator->() -> T* { return data(); } + auto operator->() const -> const T* { return data(); } + auto operator*() -> T& { return get(); } + auto operator*() const -> const T& { return get(); } + auto operator()() -> T& { return get(); } + auto operator()() const -> const T& { return get(); } + auto operator()(const T& invalid) const -> const T& { return _valid ? get() : invalid; } + +private: + union U { + T t; + U() {} + ~U() {} + } _value; + bool _valid = false; +}; + +template +struct maybe { + maybe() : _value(nullptr) {} + maybe(nothing_t) : _value(nullptr) {} + maybe(const T& source) : _value((T*)&source) {} + maybe(const maybe& source) : _value(source._value) {} + + auto operator=(nothing_t) -> maybe& { _value = nullptr; return *this; } + auto operator=(const T& source) -> maybe& { _value = (T*)&source; return *this; } + auto operator=(const maybe& source) -> maybe& { _value = source._value; return *this; } + + explicit operator bool() const { return _value; } + auto reset() -> void { _value = nullptr; } + auto data() -> T* { return _value; } + auto get() -> T& { assert(_value); return *_value; } + + auto data() const -> const T* { return ((maybe*)this)->data(); } + auto get() const -> const T& { return ((maybe*)this)->get(); } + auto operator->() -> T* { return data(); } + auto operator->() const -> const T* { return data(); } + auto operator*() -> T& { return get(); } + auto operator*() const -> const T& { return get(); } + auto operator()() -> T& { return get(); } + auto operator()() const -> const T& { return get(); } + auto operator()(const T& invalid) const -> const T& { return _value ? get() : invalid; } + +private: + T* _value; +}; + +} diff --git a/waterbox/ares64/ares/nall/memory.hpp b/waterbox/ares64/ares/nall/memory.hpp new file mode 100644 index 0000000000..3e82c06938 --- /dev/null +++ b/waterbox/ares64/ares/nall/memory.hpp @@ -0,0 +1,248 @@ +#pragma once + +#include +#include + +namespace nall::memory { + template auto allocate(u32 size) -> T*; + template auto allocate(u32 size, const T& value) -> T*; + template auto resize(void* target, u32 size) -> T*; + template auto free(void* target) -> void; + + template auto compare(const void* target, u32 capacity, const void* source, u32 size) -> s32; + template auto compare(const void* target, const void* source, u32 size) -> s32; + + template auto icompare(const void* target, u32 capacity, const void* source, u32 size) -> s32; + template auto icompare(const void* target, const void* source, u32 size) -> s32; + + template auto copy(void* target, u32 capacity, const void* source, u32 size) -> T*; + template auto copy(void* target, const void* source, u32 size) -> T*; + + template auto move(void* target, u32 capacity, const void* source, u32 size) -> T*; + template auto move(void* target, const void* source, u32 size) -> T*; + + template auto fill(void* target, u32 capacity, const T& value = {}) -> T*; + + template auto assign(T* target) -> void {} + template auto assign(T* target, const U& value, P&&... p) -> void; + + template auto readl(const void* source) -> T; + template auto readm(const void* source) -> T; + + template auto writel(void* target, T data) -> void; + template auto writem(void* target, T data) -> void; + + auto map(u32 size, bool executable) -> void*; + auto unmap(void* target, u32 size) -> void; + auto protect(void* target, u32 size, bool executable) -> void; + auto jitprotect(bool executable) -> void; +} + +namespace nall::memory { + +//implementation notes: +//memcmp, memcpy, memmove have terrible performance on small block sizes (FreeBSD 10.0-amd64) +//as this library is used extensively by nall/string, and most strings tend to be small, +//this library hand-codes these functions instead. surprisingly, it's a substantial speedup + +template auto allocate(u32 size) -> T* { + if constexpr(Align == 0) { + return (T*)malloc(size * sizeof(T)); + } + #if defined(API_WINDOWS) + return (T*)_aligned_malloc(size * sizeof(T), Align); + #elif defined(API_POSIX) + T* result = nullptr; + posix_memalign((void**)&result, Align, size * sizeof(T)); + return result; + #else + return (T*)malloc(size * sizeof(T)); + #endif +} + +template auto allocate(u32 size, const T& value) -> T* { + auto result = allocate(size); + if(result) fill(result, size, value); + return result; +} + +template auto resize(void* target, u32 size) -> T* { + if constexpr(Align == 0) { + return (T*)realloc(target, size * sizeof(T)); + } + #if defined(API_WINDOWS) + return (T*)_aligned_realloc(target, size * sizeof(T), Align); + #elif defined(API_POSIX) + //realloc() cannot be used safely with posix_memalign(); a copy is always required + T* result = allocate(size); + copy(result, target, size); + free(target); + return result; + #else + return (T*)realloc(target, size * sizeof(T)); + #endif +} + +template auto free(void* target) -> void { + if constexpr(Align == 0) { + ::free(target); + return; + } + #if defined(API_WINDOWS) + _aligned_free(target); + #else + ::free(target); + #endif +} + +template auto compare(const void* target, u32 capacity, const void* source, u32 size) -> s32 { + auto t = (u8*)target; + auto s = (u8*)source; + auto l = min(capacity, size) * sizeof(T); + while(l--) { + auto x = *t++; + auto y = *s++; + if(x != y) return x - y; + } + if(capacity == size) return 0; + return -(capacity < size); +} + +template auto compare(const void* target, const void* source, u32 size) -> s32 { + return compare(target, size, source, size); +} + +template auto icompare(const void* target, u32 capacity, const void* source, u32 size) -> s32 { + auto t = (u8*)target; + auto s = (u8*)source; + auto l = min(capacity, size) * sizeof(T); + while(l--) { + auto x = *t++; + auto y = *s++; + if(x - 'A' < 26) x += 32; + if(y - 'A' < 26) y += 32; + if(x != y) return x - y; + } + return -(capacity < size); +} + +template auto icompare(const void* target, const void* source, u32 size) -> s32 { + return icompare(target, size, source, size); +} + +template auto copy(void* target, u32 capacity, const void* source, u32 size) -> T* { + auto t = (u8*)target; + auto s = (u8*)source; + auto l = min(capacity, size) * sizeof(T); + while(l--) *t++ = *s++; + return (T*)target; +} + +template auto copy(void* target, const void* source, u32 size) -> T* { + return copy(target, size, source, size); +} + +template auto move(void* target, u32 capacity, const void* source, u32 size) -> T* { + auto t = (u8*)target; + auto s = (u8*)source; + auto l = min(capacity, size) * sizeof(T); + if(t < s) { + while(l--) *t++ = *s++; + } else { + t += l; + s += l; + while(l--) *--t = *--s; + } + return (T*)target; +} + +template auto move(void* target, const void* source, u32 size) -> T* { + return move(target, size, source, size); +} + +template auto fill(void* target, u32 capacity, const T& value) -> T* { + auto t = (T*)target; + while(capacity--) *t++ = value; + return (T*)target; +} + +template auto assign(T* target, const U& value, P&&... p) -> void { + *target++ = value; + assign(target, forward

(p)...); +} + +template auto readl(const void* source) -> T { + auto p = (const u8*)source; + T data = 0; + for(u32 n = 0; n < size; n++) data |= T(*p++) << n * 8; + return data; +} + +template auto readm(const void* source) -> T { + auto p = (const u8*)source; + T data = 0; + for(s32 n = size - 1; n >= 0; n--) data |= T(*p++) << n * 8; + return data; +} + +template auto writel(void* target, T data) -> void { + auto p = (u8*)target; + for(u32 n = 0; n < size; n++) *p++ = data >> n * 8; +} + +template auto writem(void* target, T data) -> void { + auto p = (u8*)target; + for(s32 n = size - 1; n >= 0; n--) *p++ = data >> n * 8; +} + +inline auto map(u32 size, bool executable) -> void* { + #if defined(API_WINDOWS) + DWORD protect = executable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; + return VirtualAlloc(nullptr, size, MEM_RESERVE | MEM_COMMIT, protect); + #elif defined(API_POSIX) + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_ANON | MAP_PRIVATE; + if(executable) { + prot |= PROT_EXEC; + #if defined(PLATFORM_MACOS) && defined(ARCHITECTURE_ARM64) + flags |= MAP_JIT; + #endif + } + return mmap(nullptr, size, prot, flags, -1, 0); + #else + return nullptr; + #endif +} + +inline auto unmap(void* target, u32 size) -> void { + #if defined(API_WINDOWS) + VirtualFree(target, 0, MEM_RELEASE); + #elif defined(API_POSIX) + munmap(target, size); + #endif +} + +inline auto protect(void* target, u32 size, bool executable) -> void { + #if defined(API_WINDOWS) + DWORD protect = executable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; + DWORD oldProtect; + VirtualProtect(target, size, protect, &oldProtect); + #elif defined(API_POSIX) + int prot = PROT_READ | PROT_WRITE; + if(executable) { + prot |= PROT_EXEC; + } + int ret = mprotect(target, size, prot); + assert(ret == 0); + #endif +} + +inline auto jitprotect(bool executable) -> void { + #if defined(PLATFORM_MACOS) && defined(ARCHITECTURE_ARM64) + if(__builtin_available(macOS 11.0, *)) { + pthread_jit_write_protect_np(executable); + } + #endif +} + +} diff --git a/waterbox/ares64/ares/nall/merge-sort.hpp b/waterbox/ares64/ares/nall/merge-sort.hpp new file mode 100644 index 0000000000..059fec6925 --- /dev/null +++ b/waterbox/ares64/ares/nall/merge-sort.hpp @@ -0,0 +1,82 @@ +#pragma once + +#include +#include + +//class: merge sort +//average: O(n log n) +//worst: O(n log n) +//memory: O(n) +//stack: O(log n) +//stable?: yes + +//note: merge sort was chosen over quick sort, because: +//* it is a stable sort +//* it lacks O(n^2) worst-case overhead +//* it usually runs faster than quick sort anyway + +//note: insertion sort is generally more performant than selection sort +#define NALL_MERGE_SORT_INSERTION +//#define NALL_MERGE_SORT_SELECTION + +namespace nall { + +template auto sort(T list[], u32 size, const Comparator& lessthan) -> void { + if(size <= 1) return; //nothing to sort + + //sort smaller blocks using an O(n^2) algorithm (which for small sizes, increases performance) + if(size < 64) { + //insertion sort requires a copy (via move construction) + #if defined(NALL_MERGE_SORT_INSERTION) + for(s32 i = 1, j; i < size; i++) { + T copy(move(list[i])); + for(j = i - 1; j >= 0; j--) { + if(!lessthan(copy, list[j])) break; + list[j + 1] = move(list[j]); + } + list[j + 1] = move(copy); + } + //selection sort requires a swap + #elif defined(NALL_MERGE_SORT_SELECTION) + for(u32 i = 0; i < size; i++) { + u32 min = i; + for(u32 j = i + 1; j < size; j++) { + if(lessthan(list[j], list[min])) min = j; + } + if(min != i) swap(list[i], list[min]); + } + #endif + return; + } + + //split list in half and recursively sort both + u32 middle = size / 2; + sort(list, middle, lessthan); + sort(list + middle, size - middle, lessthan); + + //left and right are sorted here; perform merge sort + //use placement new to avoid needing T to be default-constructable + auto buffer = memory::allocate(size); + u32 offset = 0, left = 0, right = middle; + while(left < middle && right < size) { + if(!lessthan(list[right], list[left])) { + new(buffer + offset++) T(move(list[left++])); + } else { + new(buffer + offset++) T(move(list[right++])); + } + } + while(left < middle) new(buffer + offset++) T(move(list[left++])); + while(right < size ) new(buffer + offset++) T(move(list[right++])); + + for(u32 i = 0; i < size; i++) { + list[i] = move(buffer[i]); + buffer[i].~T(); + } + memory::free(buffer); +} + +template auto sort(T list[], u32 size) -> void { + return sort(list, size, [](const T& l, const T& r) { return l < r; }); +} + +} diff --git a/waterbox/ares64/ares/nall/nall.hpp b/waterbox/ares64/ares/nall/nall.hpp new file mode 100644 index 0000000000..95b064988e --- /dev/null +++ b/waterbox/ares64/ares/nall/nall.hpp @@ -0,0 +1,93 @@ +#pragma once + +//include the most common nall headers with one statement +//does not include the most obscure components with high cost and low usage + +#include + +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include //todo: compilation errors when included earlier +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(PLATFORM_WINDOWS) + #include + #include +#endif + +#if defined(API_POSIX) + #include +#endif diff --git a/waterbox/ares64/ares/nall/path.hpp b/waterbox/ares64/ares/nall/path.hpp new file mode 100644 index 0000000000..75794ba23c --- /dev/null +++ b/waterbox/ares64/ares/nall/path.hpp @@ -0,0 +1,165 @@ +#pragma once + +#include + +namespace nall::Path { + +inline auto active() -> string { + char path[PATH_MAX] = ""; + (void)getcwd(path, PATH_MAX); + string result = path; + if(!result) result = "."; + result.transform("\\", "/"); + if(!result.endsWith("/")) result.append("/"); + return result; +} + +inline auto real(string_view name) -> string { + string result; + char path[PATH_MAX] = ""; + if(::realpath(name, path)) result = Location::path(string{path}.transform("\\", "/")); + if(!result) return active(); + result.transform("\\", "/"); + if(!result.endsWith("/")) result.append("/"); + return result; +} + +inline auto program() -> string { + #if defined(PLATFORM_WINDOWS) + wchar_t path[PATH_MAX] = L""; + GetModuleFileName(nullptr, path, PATH_MAX); + string result = (const char*)utf8_t(path); + result.transform("\\", "/"); + return Path::real(result); + #else + Dl_info info; + dladdr((void*)&program, &info); + return Path::real(info.dli_fname); + #endif +} + +// / +// c:/ +inline auto root() -> string { + #if defined(PLATFORM_WINDOWS) + wchar_t path[PATH_MAX] = L""; + SHGetFolderPathW(nullptr, CSIDL_WINDOWS | CSIDL_FLAG_CREATE, nullptr, 0, path); + string result = (const char*)utf8_t(path); + result.transform("\\", "/"); + return slice(result, 0, 3); + #else + return "/"; + #endif +} + +// /home/username/ +// c:/users/username/ +inline auto user() -> string { + #if defined(PLATFORM_WINDOWS) + wchar_t path[PATH_MAX] = L""; + SHGetFolderPathW(nullptr, CSIDL_PROFILE | CSIDL_FLAG_CREATE, nullptr, 0, path); + string result = (const char*)utf8_t(path); + result.transform("\\", "/"); + #else + struct passwd* userinfo = getpwuid(getuid()); + string result = userinfo->pw_dir; + #endif + if(!result) result = "."; + if(!result.endsWith("/")) result.append("/"); + return result; +} + +// /home/username/Desktop/ +// c:/users/username/Desktop/ +inline auto desktop(string_view name = {}) -> string { + return {user(), "Desktop/", name}; +} + +//todo: MacOS uses the same location for userData() and userSettings() +//... is there a better option here? + +// /home/username/.config/ +// ~/Library/Application Support/ +// c:/users/username/appdata/roaming/ +inline auto userSettings() -> string { + #if defined(PLATFORM_WINDOWS) + wchar_t path[PATH_MAX] = L""; + SHGetFolderPathW(nullptr, CSIDL_APPDATA | CSIDL_FLAG_CREATE, nullptr, 0, path); + string result = (const char*)utf8_t(path); + result.transform("\\", "/"); + #elif defined(PLATFORM_MACOS) + string result = {Path::user(), "Library/Application Support/"}; + #else + string result; + if(const char *env = getenv("XDG_CONFIG_HOME")) { + result = string(env); + } else { + result = {Path::user(), ".config/"}; + } + #endif + if(!result) result = "."; + if(!result.endsWith("/")) result.append("/"); + return result; +} + +// /home/username/.local/share/ +// ~/Library/Application Support/ +// c:/users/username/appdata/local/ +inline auto userData() -> string { + #if defined(PLATFORM_WINDOWS) + wchar_t path[PATH_MAX] = L""; + SHGetFolderPathW(nullptr, CSIDL_LOCAL_APPDATA | CSIDL_FLAG_CREATE, nullptr, 0, path); + string result = (const char*)utf8_t(path); + result.transform("\\", "/"); + #elif defined(PLATFORM_MACOS) + string result = {Path::user(), "Library/Application Support/"}; + #else + string result; + if(const char* env = getenv("XDG_DATA_HOME")) { + result = string(env); + } else { + result = {Path::user(), ".local/share/"}; + } + #endif + if(!result) result = "."; + if(!result.endsWith("/")) result.append("/"); + return result; +} + +// /usr/share +// /Library/Application Support/ +// c:/ProgramData/ +inline auto sharedData() -> string { + #if defined(PLATFORM_WINDOWS) + wchar_t path[PATH_MAX] = L""; + SHGetFolderPathW(nullptr, CSIDL_COMMON_APPDATA | CSIDL_FLAG_CREATE, nullptr, 0, path); + string result = (const char*)utf8_t(path); + result.transform("\\", "/"); + #elif defined(PLATFORM_MACOS) + string result = "/Library/Application Support/"; + #else + string result = "/usr/share/"; + #endif + if(!result) result = "."; + if(!result.endsWith("/")) result.append("/"); + return result; +} + +// /tmp +// c:/users/username/AppData/Local/Temp/ +inline auto temporary() -> string { + #if defined(PLATFORM_WINDOWS) + wchar_t path[PATH_MAX] = L""; + GetTempPathW(PATH_MAX, path); + string result = (const char*)utf8_t(path); + result.transform("\\", "/"); + #elif defined(P_tmpdir) + string result = P_tmpdir; + #else + string result = "/tmp/"; + #endif + if(!result.endsWith("/")) result.append("/"); + return result; +} + +} diff --git a/waterbox/ares64/ares/nall/platform.hpp b/waterbox/ares64/ares/nall/platform.hpp new file mode 100644 index 0000000000..b7fd690039 --- /dev/null +++ b/waterbox/ares64/ares/nall/platform.hpp @@ -0,0 +1,196 @@ +#pragma once + +#include +#include + +namespace Math { + static const long double e = 2.71828182845904523536; + static const long double Pi = 3.14159265358979323846; +} + +#if defined(PLATFORM_WINDOWS) + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if !defined(PLATFORM_WINDOWS) + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include +#endif + +#if defined(ARCHITECTURE_X86) || defined(ARCHITECTURE_AMD64) + #include + #undef _serialize +#endif + +#if !defined(__has_builtin) + #define __has_builtin(x) 0 +#endif + +#if defined(COMPILER_MICROSOFT) + #define va_copy(dest, src) ((dest) = (src)) +#endif + +#if defined(PLATFORM_WINDOWS) + #undef IN + #undef OUT + #undef interface + #define dllexport __declspec(dllexport) + #define MSG_NOSIGNAL 0 + + extern "C" { + using pollfd = WSAPOLLFD; + } + + inline auto access(const char* path, int amode) -> int { return _waccess(nall::utf16_t(path), amode); } + inline auto getcwd(char* buf, size_t size) -> char* { wchar_t wpath[PATH_MAX] = L""; if(!_wgetcwd(wpath, size)) return nullptr; strcpy(buf, nall::utf8_t(wpath)); return buf; } + inline auto mkdir(const char* path, int mode) -> int { return _wmkdir(nall::utf16_t(path)); } + inline auto poll(struct pollfd fds[], unsigned long nfds, int timeout) -> int { return WSAPoll(fds, nfds, timeout); } + inline auto putenv(const char* value) -> int { return _wputenv(nall::utf16_t(value)); } + inline auto realpath(const char* file_name, char* resolved_name) -> char* { wchar_t wfile_name[PATH_MAX] = L""; if(!_wfullpath(wfile_name, nall::utf16_t(file_name), PATH_MAX)) return nullptr; strcpy(resolved_name, nall::utf8_t(wfile_name)); return resolved_name; } + inline auto rename(const char* oldname, const char* newname) -> int { return _wrename(nall::utf16_t(oldname), nall::utf16_t(newname)); } + + namespace nall { + //network functions take void*, not char*. this allows them to be used without casting + + inline auto recv(int socket, void* buffer, size_t length, int flags) -> ssize_t { + return ::recv(socket, (char*)buffer, length, flags); + } + + inline auto send(int socket, const void* buffer, size_t length, int flags) -> ssize_t { + return ::send(socket, (const char*)buffer, length, flags); + } + + inline auto setsockopt(int socket, int level, int option_name, const void* option_value, socklen_t option_len) -> int { + return ::setsockopt(socket, level, option_name, (const char*)option_value, option_len); + } + } +#else + #define dllexport +#endif + +#undef bswap16 +#undef bswap32 +#undef bswap64 +#undef bswap128 +#undef likely +#undef unlikely +#if defined(COMPILER_CLANG) || defined(COMPILER_GCC) + #define bswap16(value) __builtin_bswap16(value) + #define bswap32(value) __builtin_bswap32(value) + #define bswap64(value) __builtin_bswap64(value) + #if defined(__SIZEOF_INT128__) + inline auto bswap128(u128 value) -> u128 { + #if defined(__SSSE3__) + static const __m128i shuffle = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return reinterpret_cast(_mm_shuffle_epi8(reinterpret_cast<__m128i>(value), shuffle)); + #else + return (u128)__builtin_bswap64(value) << 64 | __builtin_bswap64(value >> 64); + #endif + } + #endif + #define likely(expression) __builtin_expect(bool(expression), true) + #define unlikely(expression) __builtin_expect(bool(expression), false) +#else + inline auto bswap16(u16 value) -> u16 { + return value << 8 | value >> 8; + } + inline auto bswap32(u32 value) -> u32 { + return (u32)bswap16(value) << 16 | bswap16(value >> 16); + } + inline auto bswap64(u64 value) -> u64 { + return (u64)bswap32(value) << 32 | bswap32(value >> 32); + } + #if defined(__SIZEOF_INT128__) + inline auto bswap128(u128 value) -> u128 { + return (u128)bswap64(value) << 64 | bswap64(value >> 64); + } + #endif + #define likely(expression) expression + #define unlikely(expression) expression +#endif + +//notify the processor/operating system that this thread is currently awaiting an event (eg a spinloop) +//calling this function aims to avoid consuming 100% CPU resources on the active thread during spinloops +inline auto spinloop() -> void { + #if defined(COMPILER_CLANG) || defined(COMPILER_GCC) + #if defined(ARCHITECTURE_X86) || defined(ARCHITECTURE_AMD64) + __builtin_ia32_pause(); + return; + #endif + #endif + usleep(1); +} + +#if defined(PLATFORM_MACOS) + #define MSG_NOSIGNAL 0 +#endif + +#if defined(COMPILER_CLANG) || defined(COMPILER_GCC) + #define no_optimize __attribute__((optnone)) + #define noinline __attribute__((noinline)) + #define alwaysinline inline __attribute__((always_inline)) +#elif defined(COMPILER_MICROSOFT) + #define no_optimize + #define noinline __declspec(noinline) + #define alwaysinline inline __forceinline +#else + #define no_optimize + #define noinline + #define alwaysinline inline +#endif + +//P0627: [[unreachable]] -- impossible to simulate with identical syntax, must omit brackets ... +#if defined(COMPILER_CLANG) || defined(COMPILER_GCC) + #define unreachable __builtin_unreachable() +#else + #define unreachable throw +#endif + +#if defined(COMPILER_GCC) + #undef _serialize +#endif + +#define export $export +#define register $register diff --git a/waterbox/ares64/ares/nall/pointer.hpp b/waterbox/ares64/ares/nall/pointer.hpp new file mode 100644 index 0000000000..0e96ddb09a --- /dev/null +++ b/waterbox/ares64/ares/nall/pointer.hpp @@ -0,0 +1,34 @@ +#pragma once + +namespace nall { + +template +struct pointer { + explicit operator bool() const { return value; } + + pointer() = default; + pointer(T* source) { value = source; } + pointer(const pointer& source) { value = source.value; } + + auto& operator=(T* source) { value = source; return *this; } + auto& operator=(const pointer& source) { value = source.value; return *this; } + + auto operator()() -> T* { return value; } + auto operator()() const -> const T* { return value; } + + auto operator->() -> T* { return value; } + auto operator->() const -> const T* { return value; } + + auto operator*() -> T& { return *value; } + auto operator*() const -> const T& { return *value; } + + auto reset() -> void { value = nullptr; } + + auto data() -> T* { return value; } + auto data() const -> const T* { return value; } + +private: + T* value = nullptr; +}; + +} diff --git a/waterbox/ares64/ares/nall/posix/service.hpp b/waterbox/ares64/ares/nall/posix/service.hpp new file mode 100644 index 0000000000..4d71c6f4a8 --- /dev/null +++ b/waterbox/ares64/ares/nall/posix/service.hpp @@ -0,0 +1,114 @@ +#pragma once + +#include + +namespace nall { + +struct service { + explicit operator bool() const; + auto command(const string& name, const string& command) -> bool; + auto receive() -> string; + auto name() const -> string; + auto stop() const -> bool; + +private: + shared_memory shared; + string _name; + bool _stop = false; +}; + +inline service::operator bool() const { + return (bool)shared; +} + +//returns true on new service process creation (false is not necessarily an error) +inline auto service::command(const string& name, const string& command) -> bool { + if(!name) return false; + if(!command) return print("[{0}] usage: {service} command\n" + "commands:\n" + " status : query whether service is running\n" + " start : start service if it is not running\n" + " stop : stop service if it is running\n" + " remove : remove semaphore lock if service crashed\n" + " {value} : send custom command to service\n" + "", string_format{name}), false; + + if(shared.open(name, 4096)) { + if(command == "start") { + print("[{0}] already started\n", string_format{name}); + } else if(command == "status") { + print("[{0}] running\n", string_format{name}); + } + if(auto data = shared.acquire()) { + if(command == "stop") print("[{0}] stopped\n", string_format{name}); + memory::copy(data, 4096, command.data(), command.size()); + shared.release(); + } + if(command == "remove") { + shared.remove(); + print("[{0}] removed\n", string_format{name}); + } + return false; + } + + if(command == "start") { + if(shared.create(name, 4096)) { + print("[{0}] started\n", string_format{name}); + auto pid = fork(); + if(pid == 0) { + signal(SIGHUP, SIG_IGN); + signal(SIGPIPE, SIG_IGN); + _name = name; + return true; + } + shared.close(); + } else { + print("[{0}] start failed ({1})\n", string_format{name, strerror(errno)}); + } + return false; + } + + if(command == "status") { + print("[{0}] stopped\n", string_format{name}); + return false; + } + + return false; +} + +inline auto service::receive() -> string { + string command; + if(shared) { + if(auto data = shared.acquire()) { + if(*data) { + command.resize(4095); + memory::copy(command.get(), data, 4095); + memory::fill(data, 4096); + } + shared.release(); + if(command == "remove") { + _stop = true; + return ""; + } else if(command == "start") { + return ""; + } else if(command == "status") { + return ""; + } else if(command == "stop") { + _stop = true; + shared.remove(); + return ""; + } + } + } + return command; +} + +inline auto service::name() const -> string { + return _name; +} + +inline auto service::stop() const -> bool { + return _stop; +} + +} diff --git a/waterbox/ares64/ares/nall/posix/shared-memory.hpp b/waterbox/ares64/ares/nall/posix/shared-memory.hpp new file mode 100644 index 0000000000..cbad592508 --- /dev/null +++ b/waterbox/ares64/ares/nall/posix/shared-memory.hpp @@ -0,0 +1,147 @@ +#pragma once + +#include +#include + +namespace nall { + +struct shared_memory { + shared_memory() = default; + shared_memory(const shared_memory&) = delete; + auto operator=(const shared_memory&) -> shared_memory& = delete; + + ~shared_memory() { + reset(); + } + + explicit operator bool() const { + return _mode != mode::inactive; + } + + auto size() const -> u32 { + return _size; + } + + auto acquired() const -> bool { + return _acquired; + } + + auto acquire() -> u8* { + if(!acquired()) { + sem_wait(_semaphore); + _acquired = true; + } + return _data; + } + + auto release() -> void { + if(acquired()) { + sem_post(_semaphore); + _acquired = false; + } + } + + auto reset() -> void { + release(); + if(_mode == mode::server) return remove(); + if(_mode == mode::client) return close(); + } + + auto create(const string& name, u32 size) -> bool { + reset(); + + _name = {"/nall-", string{name}.transform("/:", "--")}; + _size = size; + + //O_CREAT | O_EXCL seems to throw ENOENT even when semaphore does not exist ... + _semaphore = sem_open(_name, O_CREAT, 0644, 1); + if(_semaphore == SEM_FAILED) return remove(), false; + + _descriptor = shm_open(_name, O_CREAT | O_TRUNC | O_RDWR, 0644); + if(_descriptor < 0) return remove(), false; + + if(ftruncate(_descriptor, _size) != 0) return remove(), false; + + _data = (u8*)mmap(nullptr, _size, PROT_READ | PROT_WRITE, MAP_SHARED, _descriptor, 0); + if(_data == MAP_FAILED) return remove(), false; + + memory::fill(_data, _size); + + _mode = mode::server; + return true; + } + + auto remove() -> void { + if(_data) { + munmap(_data, _size); + _data = nullptr; + } + + if(_descriptor) { + ::close(_descriptor); + shm_unlink(_name); + _descriptor = -1; + } + + if(_semaphore) { + sem_close(_semaphore); + sem_unlink(_name); + _semaphore = nullptr; + } + + _mode = mode::inactive; + _name = ""; + _size = 0; + } + + auto open(const string& name, u32 size) -> bool { + reset(); + + _name = {"/nall-", string{name}.transform("/:", "--")}; + _size = size; + + _semaphore = sem_open(_name, 0, 0644); + if(_semaphore == SEM_FAILED) return close(), false; + + _descriptor = shm_open(_name, O_RDWR, 0644); + if(_descriptor < 0) return close(), false; + + _data = (u8*)mmap(nullptr, _size, PROT_READ | PROT_WRITE, MAP_SHARED, _descriptor, 0); + if(_data == MAP_FAILED) return close(), false; + + _mode = mode::client; + return true; + } + + auto close() -> void { + if(_data) { + munmap(_data, _size); + _data = nullptr; + } + + if(_descriptor) { + ::close(_descriptor); + _descriptor = -1; + } + + if(_semaphore) { + sem_close(_semaphore); + _semaphore = nullptr; + } + + _mode = mode::inactive; + _name = ""; + _size = 0; + } + +private: + enum class mode : u32 { server, client, inactive } _mode = mode::inactive; + string _name; + sem_t* _semaphore = nullptr; + s32 _descriptor = -1; + u8* _data = nullptr; + u32 _size = 0; + bool _acquired = false; +}; + +} diff --git a/waterbox/ares64/ares/nall/primitives.hpp b/waterbox/ares64/ares/nall/primitives.hpp new file mode 100644 index 0000000000..0c7065911d --- /dev/null +++ b/waterbox/ares64/ares/nall/primitives.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include +#include +#include + +namespace nall { + struct Boolean; + template struct Natural; + template struct Integer; + template struct Real; +} + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nall { + template auto Natural::integer() const -> Integer { return Integer(*this); } + template auto Integer::natural() const -> Natural { return Natural(*this); } +} diff --git a/waterbox/ares64/ares/nall/primitives/bit-field.hpp b/waterbox/ares64/ares/nall/primitives/bit-field.hpp new file mode 100644 index 0000000000..d2bd0d667d --- /dev/null +++ b/waterbox/ares64/ares/nall/primitives/bit-field.hpp @@ -0,0 +1,122 @@ +#pragma once + +namespace nall { + +template struct BitField; + +/* static BitField */ + +template struct BitField { + static_assert(Precision >= 1 && Precision <= 64); + using type = + conditional_t>>>; + enum : u32 { shift = Index < 0 ? Precision + Index : Index }; + enum : type { mask = 1ull << shift }; + + BitField(const BitField&) = delete; + + auto& operator=(const BitField& source) { + target = target & ~mask | (bool)source << shift; + return *this; + } + + template BitField(T* source) : target((type&)*source) { + static_assert(sizeof(T) == sizeof(type)); + } + + auto bit() const { + return shift; + } + + operator bool() const { + return target & mask; + } + + auto& operator=(bool source) { + target = target & ~mask | source << shift; + return *this; + } + + auto& operator&=(bool source) { + target = target & (~mask | source << shift); + return *this; + } + + auto& operator^=(bool source) { + target = target ^ source << shift; + return *this; + } + + auto& operator|=(bool source) { + target = target | source << shift; + return *this; + } + +private: + type& target; +}; + +/* dynamic BitField */ + +template struct BitField { + static_assert(Precision >= 1 && Precision <= 64); + using type = + conditional_t>>>; + + BitField(const BitField&) = delete; + + auto& operator=(const BitField& source) { + target = target & ~mask | (bool)source << shift; + return *this; + } + + template BitField(T* source, s32 index) : target((type&)*source) { + static_assert(sizeof(T) == sizeof(type)); + if(index < 0) index = Precision + index; + mask = 1ull << index; + shift = index; + } + + auto bit() const { + return shift; + } + + operator bool() const { + return target & mask; + } + + auto& operator=(bool source) { + target = target & ~mask | source << shift; + return *this; + } + + auto& operator&=(bool source) { + target = target & (~mask | source << shift); + return *this; + } + + auto& operator^=(bool source) { + target = target ^ source << shift; + return *this; + } + + auto& operator|=(bool source) { + target = target | source << shift; + return *this; + } + +private: + type& target; + type mask; + u32 shift; +}; + +} diff --git a/waterbox/ares64/ares/nall/primitives/bit-range.hpp b/waterbox/ares64/ares/nall/primitives/bit-range.hpp new file mode 100644 index 0000000000..ea9345c07c --- /dev/null +++ b/waterbox/ares64/ares/nall/primitives/bit-range.hpp @@ -0,0 +1,259 @@ +#pragma once + +namespace nall { + +/* static BitRange */ + +template struct BitRange { + static_assert(Precision >= 1 && Precision <= 64); + static_assert(Lo < Precision && Hi < Precision); + static_assert(Lo <= Hi); + using type = + conditional_t>>>; + enum : u32 { lo = Lo < 0 ? Precision + Lo : Lo }; + enum : u32 { hi = Hi < 0 ? Precision + Hi : Hi }; + enum : type { mask = ~0ull >> 64 - (hi - lo + 1) << lo }; + enum : u32 { shift = lo }; + + BitRange(const BitRange& source) = delete; + + auto& operator=(const BitRange& source) { + target = target & ~mask | ((source.target & source.mask) >> source.shift) << shift & mask; + return *this; + } + + template BitRange(T* source) : target((type&)*source) { + static_assert(sizeof(T) == sizeof(type)); + } + + operator type() const { + return (target & mask) >> shift; + } + + auto operator++(s32) { + auto value = (target & mask) >> shift; + target = target & ~mask | target + (1 << shift) & mask; + return value; + } + + auto operator--(s32) { + auto value = (target & mask) >> shift; + target = target & ~mask | target - (1 << shift) & mask; + return value; + } + + auto& operator++() { + target = target & ~mask | target + (1 << shift) & mask; + return *this; + } + + auto& operator--() { + target = target & ~mask | target - (1 << shift) & mask; + return *this; + } + + template auto& operator=(const T& source) { + type value = source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator*=(const T& source) { + auto value = ((target & mask) >> shift) * source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator/=(const T& source) { + auto value = ((target & mask) >> shift) / source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator%=(const T& source) { + auto value = ((target & mask) >> shift) % source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator+=(const T& source) { + auto value = ((target & mask) >> shift) + source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator-=(const T& source) { + auto value = ((target & mask) >> shift) - source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator<<=(const T& source) { + auto value = ((target & mask) >> shift) << source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator>>=(const T& source) { + auto value = ((target & mask) >> shift) >> source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator&=(const T& source) { + type value = source; + target = target & (~mask | value << shift & mask); + return *this; + } + + template auto& operator^=(const T& source) { + type value = source; + target = target ^ value << shift & mask; + return *this; + } + + template auto& operator|=(const T& source) { + type value = source; + target = target | value << shift & mask; + return *this; + } + +private: + type& target; +}; + +/* dynamic BitRange */ + +template struct DynamicBitRange { + static_assert(Precision >= 1 && Precision <= 64); + using type = + conditional_t>>>; + + DynamicBitRange(const DynamicBitRange& source) = delete; + + auto& operator=(const DynamicBitRange& source) { + target = target & ~mask | ((source.target & source.mask) >> source.shift) << shift & mask; + return *this; + } + + DynamicBitRange(Type& source, s32 index) : target(source) { + if(index < 0) index = Precision + index; + mask = 1ull << index; + shift = index; + } + + DynamicBitRange(Type& source, s32 lo, s32 hi) : target(source) { + if(lo < 0) lo = Precision + lo; + if(hi < 0) hi = Precision + hi; + if(lo > hi) swap(lo, hi); + mask = ~0ull >> 64 - (hi - lo + 1) << lo; + shift = lo; + } + + operator type() const { + return (target & mask) >> shift; + } + + auto operator++(s32) { + auto value = (target & mask) >> shift; + target = target & ~mask | target + (1 << shift) & mask; + return value; + } + + auto operator--(s32) { + auto value = (target & mask) >> shift; + target = target & ~mask | target - (1 << shift) & mask; + return value; + } + + auto& operator++() { + target = target & ~mask | target + (1 << shift) & mask; + return *this; + } + + auto& operator--() { + target = target & ~mask | target - (1 << shift) & mask; + return *this; + } + + template auto& operator=(const T& source) { + type value = source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator*=(const T& source) { + auto value = ((target & mask) >> shift) * source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator/=(const T& source) { + auto value = ((target & mask) >> shift) / source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator%=(const T& source) { + auto value = ((target & mask) >> shift) % source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator+=(const T& source) { + auto value = ((target & mask) >> shift) + source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator-=(const T& source) { + auto value = ((target & mask) >> shift) - source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator<<=(const T& source) { + auto value = ((target & mask) >> shift) << source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator>>=(const T& source) { + auto value = ((target & mask) >> shift) >> source; + target = target & ~mask | value << shift & mask; + return *this; + } + + template auto& operator&=(const T& source) { + type value = source; + target = target & (~mask | value << shift & mask); + return *this; + } + + template auto& operator^=(const T& source) { + type value = source; + target = target ^ value << shift & mask; + return *this; + } + + template auto& operator|=(const T& source) { + type value = source; + target = target | value << shift & mask; + return *this; + } + +private: + Type& target; + type mask; + u32 shift; +}; + +} diff --git a/waterbox/ares64/ares/nall/primitives/boolean.hpp b/waterbox/ares64/ares/nall/primitives/boolean.hpp new file mode 100644 index 0000000000..b651177ae2 --- /dev/null +++ b/waterbox/ares64/ares/nall/primitives/boolean.hpp @@ -0,0 +1,30 @@ +#pragma once + +namespace nall { + +struct Boolean { + static constexpr auto bits() -> u32 { return 1; } + using btype = bool; + + Boolean() : data(false) {} + template Boolean(const T& value) : data(value) {} + explicit Boolean(const char* value) { data = !strcmp(value, "true"); } + + operator bool() const { return data; } + template auto& operator=(const T& value) { data = value; return *this; } + + auto flip() { return data ^= 1; } + auto raise() { return data == 0 ? data = 1, true : false; } + auto lower() { return data == 1 ? data = 0, true : false; } + + auto flip(bool value) { return data != value ? (data = value, true) : false; } + auto raise(bool value) { return !data && value ? (data = value, true) : (data = value, false); } + auto lower(bool value) { return data && !value ? (data = value, true) : (data = value, false); } + + auto serialize(serializer& s) { s(data); } + +private: + btype data; +}; + +} diff --git a/waterbox/ares64/ares/nall/primitives/integer.hpp b/waterbox/ares64/ares/nall/primitives/integer.hpp new file mode 100644 index 0000000000..b63bf11bc6 --- /dev/null +++ b/waterbox/ares64/ares/nall/primitives/integer.hpp @@ -0,0 +1,137 @@ +#pragma once + +namespace nall { + +template struct IntegerPrimitive { + static_assert(Precision >= 1 && Precision <= 64); + using stype = + conditional_t>>>; + using utype = typename Natural::utype; + + IntegerPrimitive() = default; + template IntegerPrimitive(IntegerPrimitive value) { data = cast(value); } + template IntegerPrimitive(const T& value) { data = cast(value); } + explicit IntegerPrimitive(const char* value) { data = cast(toInteger(value)); } + + operator stype() const { return data; } + + auto operator++(s32) { auto value = *this; data = cast(data + 1); return value; } + auto operator--(s32) { auto value = *this; data = cast(data - 1); return value; } + + auto& operator++() { data = cast(data + 1); return *this; } + auto& operator--() { data = cast(data - 1); return *this; } + + template auto& operator =(const T& value) { data = cast( value); return *this; } + template auto& operator *=(const T& value) { data = cast(data * value); return *this; } + template auto& operator /=(const T& value) { data = cast(data / value); return *this; } + template auto& operator %=(const T& value) { data = cast(data % value); return *this; } + template auto& operator +=(const T& value) { data = cast(data + value); return *this; } + template auto& operator -=(const T& value) { data = cast(data - value); return *this; } + template auto& operator<<=(const T& value) { data = cast(data << value); return *this; } + template auto& operator>>=(const T& value) { data = cast(data >> value); return *this; } + template auto& operator &=(const T& value) { data = cast(data & value); return *this; } + template auto& operator ^=(const T& value) { data = cast(data ^ value); return *this; } + template auto& operator |=(const T& value) { data = cast(data | value); return *this; } + +private: + static constexpr auto mask() -> utype { + return ~0ull >> 64 - Precision; + } + + static constexpr auto sign() -> utype { + return 1ull << Precision - 1; + } + + auto cast(stype value) const -> stype { + return (value & mask() ^ sign()) - sign(); + } + + stype data; +}; + +template struct Integer { + static_assert(Precision >= 1 && Precision <= 64); + static constexpr auto bits() -> u32 { return Precision; } + using stype = + conditional_t>>>; + using utype = typename Natural::utype; + static constexpr auto mask() -> utype { return ~0ull >> 64 - Precision; } + static constexpr auto sign() -> utype { return 1ull << Precision - 1; } + + Integer() : data(0) {} + template Integer(Integer value) { data = cast(value); } + template Integer(const T& value) { data = cast(value); } + explicit Integer(const char* value) { data = cast(toInteger(value)); } + + operator stype() const { return data; } + + auto operator++(s32) { auto value = *this; data = cast(data + 1); return value; } + auto operator--(s32) { auto value = *this; data = cast(data - 1); return value; } + + auto& operator++() { data = cast(data + 1); return *this; } + auto& operator--() { data = cast(data - 1); return *this; } + + template auto& operator =(const T& value) { data = cast( value); return *this; } + template auto& operator *=(const T& value) { data = cast(data * value); return *this; } + template auto& operator /=(const T& value) { data = cast(data / value); return *this; } + template auto& operator %=(const T& value) { data = cast(data % value); return *this; } + template auto& operator +=(const T& value) { data = cast(data + value); return *this; } + template auto& operator -=(const T& value) { data = cast(data - value); return *this; } + template auto& operator<<=(const T& value) { data = cast(data << value); return *this; } + template auto& operator>>=(const T& value) { data = cast(data >> value); return *this; } + template auto& operator &=(const T& value) { data = cast(data & value); return *this; } + template auto& operator ^=(const T& value) { data = cast(data ^ value); return *this; } + template auto& operator |=(const T& value) { data = cast(data | value); return *this; } + + auto bit(s32 index) -> DynamicBitRange { return {*this, index}; } + auto bit(s32 index) const -> const DynamicBitRange { return {(Integer&)*this, index}; } + + auto bit(s32 lo, s32 hi) -> DynamicBitRange { return {*this, lo, hi}; } + auto bit(s32 lo, s32 hi) const -> const DynamicBitRange { return {(Integer&)*this, lo, hi}; } + + auto byte(s32 index) -> DynamicBitRange { return {*this, index * 8 + 0, index * 8 + 7}; } + auto byte(s32 index) const -> const DynamicBitRange { return {(Integer&)*this, index * 8 + 0, index * 8 + 7}; } + + auto mask(s32 index) const -> utype { + return data & 1 << index; + } + + auto mask(s32 lo, s32 hi) const -> utype { + return data & (~0ull >> 64 - (hi - lo + 1) << lo); + } + + auto slice(s32 index) const { return Natural<>{bit(index)}; } + auto slice(s32 lo, s32 hi) const { return Natural<>{bit(lo, hi)}; } + + static auto clamp(s64 value) -> stype { + constexpr s64 b = 1ull << bits() - 1; + constexpr s64 m = b - 1; + return value > m ? m : value < -b ? -b : value; + } + + auto clip(u32 bits) -> stype { + const u64 b = 1ull << bits - 1; + const u64 m = b * 2 - 1; + return (data & m ^ b) - b; + } + + auto serialize(serializer& s) { s(data); } + auto natural() const -> Natural; + +private: + auto cast(stype value) const -> stype { + return (value & mask() ^ sign()) - sign(); + } + + stype data; +}; + +} diff --git a/waterbox/ares64/ares/nall/primitives/literals.hpp b/waterbox/ares64/ares/nall/primitives/literals.hpp new file mode 100644 index 0000000000..0e85233876 --- /dev/null +++ b/waterbox/ares64/ares/nall/primitives/literals.hpp @@ -0,0 +1,143 @@ +#pragma once + +namespace nall { + +inline auto operator"" _b(unsigned long long value) { return boolean{value}; } +inline auto operator"" _n(unsigned long long value) { return natural{value}; } +inline auto operator"" _i(unsigned long long value) { return integer{value}; } +inline auto operator"" _r(long double value) { return real{value}; } + +inline auto operator"" _n1(unsigned long long value) { return natural1{value}; } +inline auto operator"" _n2(unsigned long long value) { return natural2{value}; } +inline auto operator"" _n3(unsigned long long value) { return natural3{value}; } +inline auto operator"" _n4(unsigned long long value) { return natural4{value}; } +inline auto operator"" _n5(unsigned long long value) { return natural5{value}; } +inline auto operator"" _n6(unsigned long long value) { return natural6{value}; } +inline auto operator"" _n7(unsigned long long value) { return natural7{value}; } +inline auto operator"" _n8(unsigned long long value) { return natural8{value}; } +inline auto operator"" _n9(unsigned long long value) { return natural9{value}; } +inline auto operator"" _n10(unsigned long long value) { return natural10{value}; } +inline auto operator"" _n11(unsigned long long value) { return natural11{value}; } +inline auto operator"" _n12(unsigned long long value) { return natural12{value}; } +inline auto operator"" _n13(unsigned long long value) { return natural13{value}; } +inline auto operator"" _n14(unsigned long long value) { return natural14{value}; } +inline auto operator"" _n15(unsigned long long value) { return natural15{value}; } +inline auto operator"" _n16(unsigned long long value) { return natural16{value}; } +inline auto operator"" _n17(unsigned long long value) { return natural17{value}; } +inline auto operator"" _n18(unsigned long long value) { return natural18{value}; } +inline auto operator"" _n19(unsigned long long value) { return natural19{value}; } +inline auto operator"" _n20(unsigned long long value) { return natural20{value}; } +inline auto operator"" _n21(unsigned long long value) { return natural21{value}; } +inline auto operator"" _n22(unsigned long long value) { return natural22{value}; } +inline auto operator"" _n23(unsigned long long value) { return natural23{value}; } +inline auto operator"" _n24(unsigned long long value) { return natural24{value}; } +inline auto operator"" _n25(unsigned long long value) { return natural25{value}; } +inline auto operator"" _n26(unsigned long long value) { return natural26{value}; } +inline auto operator"" _n27(unsigned long long value) { return natural27{value}; } +inline auto operator"" _n28(unsigned long long value) { return natural28{value}; } +inline auto operator"" _n29(unsigned long long value) { return natural29{value}; } +inline auto operator"" _n30(unsigned long long value) { return natural30{value}; } +inline auto operator"" _n31(unsigned long long value) { return natural31{value}; } +inline auto operator"" _n32(unsigned long long value) { return natural32{value}; } +inline auto operator"" _n33(unsigned long long value) { return natural33{value}; } +inline auto operator"" _n34(unsigned long long value) { return natural34{value}; } +inline auto operator"" _n35(unsigned long long value) { return natural35{value}; } +inline auto operator"" _n36(unsigned long long value) { return natural36{value}; } +inline auto operator"" _n37(unsigned long long value) { return natural37{value}; } +inline auto operator"" _n38(unsigned long long value) { return natural38{value}; } +inline auto operator"" _n39(unsigned long long value) { return natural39{value}; } +inline auto operator"" _n40(unsigned long long value) { return natural40{value}; } +inline auto operator"" _n41(unsigned long long value) { return natural41{value}; } +inline auto operator"" _n42(unsigned long long value) { return natural42{value}; } +inline auto operator"" _n43(unsigned long long value) { return natural43{value}; } +inline auto operator"" _n44(unsigned long long value) { return natural44{value}; } +inline auto operator"" _n45(unsigned long long value) { return natural45{value}; } +inline auto operator"" _n46(unsigned long long value) { return natural46{value}; } +inline auto operator"" _n47(unsigned long long value) { return natural47{value}; } +inline auto operator"" _n48(unsigned long long value) { return natural48{value}; } +inline auto operator"" _n49(unsigned long long value) { return natural49{value}; } +inline auto operator"" _n50(unsigned long long value) { return natural50{value}; } +inline auto operator"" _n51(unsigned long long value) { return natural51{value}; } +inline auto operator"" _n52(unsigned long long value) { return natural52{value}; } +inline auto operator"" _n53(unsigned long long value) { return natural53{value}; } +inline auto operator"" _n54(unsigned long long value) { return natural54{value}; } +inline auto operator"" _n55(unsigned long long value) { return natural55{value}; } +inline auto operator"" _n56(unsigned long long value) { return natural56{value}; } +inline auto operator"" _n57(unsigned long long value) { return natural57{value}; } +inline auto operator"" _n58(unsigned long long value) { return natural58{value}; } +inline auto operator"" _n59(unsigned long long value) { return natural59{value}; } +inline auto operator"" _n60(unsigned long long value) { return natural60{value}; } +inline auto operator"" _n61(unsigned long long value) { return natural61{value}; } +inline auto operator"" _n62(unsigned long long value) { return natural62{value}; } +inline auto operator"" _n63(unsigned long long value) { return natural63{value}; } +inline auto operator"" _n64(unsigned long long value) { return natural64{value}; } + +inline auto operator"" _i1(unsigned long long value) { return integer1{value}; } +inline auto operator"" _i2(unsigned long long value) { return integer2{value}; } +inline auto operator"" _i3(unsigned long long value) { return integer3{value}; } +inline auto operator"" _i4(unsigned long long value) { return integer4{value}; } +inline auto operator"" _i5(unsigned long long value) { return integer5{value}; } +inline auto operator"" _i6(unsigned long long value) { return integer6{value}; } +inline auto operator"" _i7(unsigned long long value) { return integer7{value}; } +inline auto operator"" _i8(unsigned long long value) { return integer8{value}; } +inline auto operator"" _i9(unsigned long long value) { return integer9{value}; } +inline auto operator"" _i10(unsigned long long value) { return integer10{value}; } +inline auto operator"" _i11(unsigned long long value) { return integer11{value}; } +inline auto operator"" _i12(unsigned long long value) { return integer12{value}; } +inline auto operator"" _i13(unsigned long long value) { return integer13{value}; } +inline auto operator"" _i14(unsigned long long value) { return integer14{value}; } +inline auto operator"" _i15(unsigned long long value) { return integer15{value}; } +inline auto operator"" _i16(unsigned long long value) { return integer16{value}; } +inline auto operator"" _i17(unsigned long long value) { return integer17{value}; } +inline auto operator"" _i18(unsigned long long value) { return integer18{value}; } +inline auto operator"" _i19(unsigned long long value) { return integer19{value}; } +inline auto operator"" _i20(unsigned long long value) { return integer20{value}; } +inline auto operator"" _i21(unsigned long long value) { return integer21{value}; } +inline auto operator"" _i22(unsigned long long value) { return integer22{value}; } +inline auto operator"" _i23(unsigned long long value) { return integer23{value}; } +inline auto operator"" _i24(unsigned long long value) { return integer24{value}; } +inline auto operator"" _i25(unsigned long long value) { return integer25{value}; } +inline auto operator"" _i26(unsigned long long value) { return integer26{value}; } +inline auto operator"" _i27(unsigned long long value) { return integer27{value}; } +inline auto operator"" _i28(unsigned long long value) { return integer28{value}; } +inline auto operator"" _i29(unsigned long long value) { return integer29{value}; } +inline auto operator"" _i30(unsigned long long value) { return integer30{value}; } +inline auto operator"" _i31(unsigned long long value) { return integer31{value}; } +inline auto operator"" _i32(unsigned long long value) { return integer32{value}; } +inline auto operator"" _i33(unsigned long long value) { return integer33{value}; } +inline auto operator"" _i34(unsigned long long value) { return integer34{value}; } +inline auto operator"" _i35(unsigned long long value) { return integer35{value}; } +inline auto operator"" _i36(unsigned long long value) { return integer36{value}; } +inline auto operator"" _i37(unsigned long long value) { return integer37{value}; } +inline auto operator"" _i38(unsigned long long value) { return integer38{value}; } +inline auto operator"" _i39(unsigned long long value) { return integer39{value}; } +inline auto operator"" _i40(unsigned long long value) { return integer40{value}; } +inline auto operator"" _i41(unsigned long long value) { return integer41{value}; } +inline auto operator"" _i42(unsigned long long value) { return integer42{value}; } +inline auto operator"" _i43(unsigned long long value) { return integer43{value}; } +inline auto operator"" _i44(unsigned long long value) { return integer44{value}; } +inline auto operator"" _i45(unsigned long long value) { return integer45{value}; } +inline auto operator"" _i46(unsigned long long value) { return integer46{value}; } +inline auto operator"" _i47(unsigned long long value) { return integer47{value}; } +inline auto operator"" _i48(unsigned long long value) { return integer48{value}; } +inline auto operator"" _i49(unsigned long long value) { return integer49{value}; } +inline auto operator"" _i50(unsigned long long value) { return integer50{value}; } +inline auto operator"" _i51(unsigned long long value) { return integer51{value}; } +inline auto operator"" _i52(unsigned long long value) { return integer52{value}; } +inline auto operator"" _i53(unsigned long long value) { return integer53{value}; } +inline auto operator"" _i54(unsigned long long value) { return integer54{value}; } +inline auto operator"" _i55(unsigned long long value) { return integer55{value}; } +inline auto operator"" _i56(unsigned long long value) { return integer56{value}; } +inline auto operator"" _i57(unsigned long long value) { return integer57{value}; } +inline auto operator"" _i58(unsigned long long value) { return integer58{value}; } +inline auto operator"" _i59(unsigned long long value) { return integer59{value}; } +inline auto operator"" _i60(unsigned long long value) { return integer60{value}; } +inline auto operator"" _i61(unsigned long long value) { return integer61{value}; } +inline auto operator"" _i62(unsigned long long value) { return integer62{value}; } +inline auto operator"" _i63(unsigned long long value) { return integer63{value}; } +inline auto operator"" _i64(unsigned long long value) { return integer64{value}; } + +inline auto operator"" _r32(long double value) { return real32{value}; } +inline auto operator"" _r64(long double value) { return real32{value}; } + +} diff --git a/waterbox/ares64/ares/nall/primitives/natural.hpp b/waterbox/ares64/ares/nall/primitives/natural.hpp new file mode 100644 index 0000000000..3cf74e01a8 --- /dev/null +++ b/waterbox/ares64/ares/nall/primitives/natural.hpp @@ -0,0 +1,132 @@ +#pragma once + +namespace nall { + +template struct NaturalPrimitive { + static_assert(Precision >= 1 && Precision <= 64); + using utype = + conditional_t>>>; + + NaturalPrimitive() = default; + template NaturalPrimitive(NaturalPrimitive value) { data = cast(value); } + template NaturalPrimitive(const T& value) { data = cast(value); } + explicit NaturalPrimitive(const char* value) { data = cast(toNatural(value)); } + + operator utype() const { return data; } + + auto operator++(s32) { auto value = *this; data = cast(data + 1); return value; } + auto operator--(s32) { auto value = *this; data = cast(data - 1); return value; } + + auto& operator++() { data = cast(data + 1); return *this; } + auto& operator--() { data = cast(data - 1); return *this; } + + template auto& operator =(const T& value) { data = cast( value); return *this; } + template auto& operator *=(const T& value) { data = cast(data * value); return *this; } + template auto& operator /=(const T& value) { data = cast(data / value); return *this; } + template auto& operator %=(const T& value) { data = cast(data % value); return *this; } + template auto& operator +=(const T& value) { data = cast(data + value); return *this; } + template auto& operator -=(const T& value) { data = cast(data - value); return *this; } + template auto& operator<<=(const T& value) { data = cast(data << value); return *this; } + template auto& operator>>=(const T& value) { data = cast(data >> value); return *this; } + template auto& operator &=(const T& value) { data = cast(data & value); return *this; } + template auto& operator ^=(const T& value) { data = cast(data ^ value); return *this; } + template auto& operator |=(const T& value) { data = cast(data | value); return *this; } + + auto serialize(serializer& s) { s(data); } + +private: + static constexpr auto mask() -> utype { + return ~0ull >> 64 - Precision; + } + + auto cast(utype value) const -> utype { + return value & mask(); + } + + utype data; +}; + +template struct Natural { + static_assert(Precision >= 1 && Precision <= 64); + static constexpr auto bits() -> u32 { return Precision; } + using utype = + conditional_t>>>; + static constexpr auto mask() -> utype { return ~0ull >> 64 - Precision; } + + Natural() : data(0) {} + template Natural(Natural value) { data = cast(value); } + template Natural(const T& value) { data = cast(value); } + explicit Natural(const char* value) { data = cast(toNatural(value)); } + + operator utype() const { return data; } + + auto operator++(s32) { auto value = *this; data = cast(data + 1); return value; } + auto operator--(s32) { auto value = *this; data = cast(data - 1); return value; } + + auto& operator++() { data = cast(data + 1); return *this; } + auto& operator--() { data = cast(data - 1); return *this; } + + template auto& operator =(const T& value) { data = cast( value); return *this; } + template auto& operator *=(const T& value) { data = cast(data * value); return *this; } + template auto& operator /=(const T& value) { data = cast(data / value); return *this; } + template auto& operator %=(const T& value) { data = cast(data % value); return *this; } + template auto& operator +=(const T& value) { data = cast(data + value); return *this; } + template auto& operator -=(const T& value) { data = cast(data - value); return *this; } + template auto& operator<<=(const T& value) { data = cast(data << value); return *this; } + template auto& operator>>=(const T& value) { data = cast(data >> value); return *this; } + template auto& operator &=(const T& value) { data = cast(data & value); return *this; } + template auto& operator ^=(const T& value) { data = cast(data ^ value); return *this; } + template auto& operator |=(const T& value) { data = cast(data | value); return *this; } + + auto bit(s32 index) -> DynamicBitRange { return {*this, index}; } + auto bit(s32 index) const -> const DynamicBitRange { return {(Natural&)*this, index}; } + + auto bit(s32 lo, s32 hi) -> DynamicBitRange { return {*this, lo, hi}; } + auto bit(s32 lo, s32 hi) const -> const DynamicBitRange { return {(Natural&)*this, lo, hi}; } + + auto byte(s32 index) -> DynamicBitRange { return {*this, index * 8 + 0, index * 8 + 7}; } + auto byte(s32 index) const -> const DynamicBitRange { return {(Natural&)*this, index * 8 + 0, index * 8 + 7}; } + + auto mask(s32 index) const -> utype { + return data & 1 << index; + } + + auto mask(s32 lo, s32 hi) const -> utype { + return data & (~0ull >> 64 - (hi - lo + 1) << lo); + } + + auto slice(s32 index) const { return Natural<>{bit(index)}; } + auto slice(s32 lo, s32 hi) const { return Natural<>{bit(lo, hi)}; } + + static auto clamp(u64 value) -> utype { + constexpr u64 b = 1ull << bits() - 1; + constexpr u64 m = b * 2 - 1; + return value < m ? value : m; + } + + auto clip(u32 bits) -> utype { + const u64 b = 1ull << bits - 1; + const u64 m = b * 2 - 1; + return data & m; + } + + auto serialize(serializer& s) { s(data); } + auto integer() const -> Integer; + +private: + auto cast(utype value) const -> utype { + return value & mask(); + } + + utype data; +}; + +} diff --git a/waterbox/ares64/ares/nall/primitives/real.hpp b/waterbox/ares64/ares/nall/primitives/real.hpp new file mode 100644 index 0000000000..d41f86238e --- /dev/null +++ b/waterbox/ares64/ares/nall/primitives/real.hpp @@ -0,0 +1,39 @@ +#pragma once + +namespace nall { + +template struct Real { + static_assert(Precision == 32 || Precision == 64); + static constexpr auto bits() -> u32 { return Precision; } + using ftype = + conditional_t>; + + Real() : data(0.0) {} + template Real(Real value) : data((ftype)value) {} + template Real(const T& value) : data((ftype)value) {} + explicit Real(const char* value) : data((ftype)toReal(value)) {} + + operator ftype() const { return data; } + + auto operator++(s32) { auto value = *this; ++data; return value; } + auto operator--(s32) { auto value = *this; --data; return value; } + + auto& operator++() { data++; return *this; } + auto& operator--() { data--; return *this; } + + template auto& operator =(const T& value) { data = value; return *this; } + template auto& operator*=(const T& value) { data = data * value; return *this; } + template auto& operator/=(const T& value) { data = data / value; return *this; } + template auto& operator%=(const T& value) { data = data % value; return *this; } + template auto& operator+=(const T& value) { data = data + value; return *this; } + template auto& operator-=(const T& value) { data = data - value; return *this; } + + auto serialize(serializer& s) { s(data); } + +private: + ftype data; +}; + +} diff --git a/waterbox/ares64/ares/nall/primitives/types.hpp b/waterbox/ares64/ares/nall/primitives/types.hpp new file mode 100644 index 0000000000..74ca6a6ab3 --- /dev/null +++ b/waterbox/ares64/ares/nall/primitives/types.hpp @@ -0,0 +1,45 @@ +#pragma once + +namespace nall { + using boolean = Boolean; + using natural = Natural<>; + using integer = Integer<>; + using real = Real<>; + + using natural1 = Natural< 1>; using natural2 = Natural< 2>; using natural3 = Natural< 3>; using natural4 = Natural< 4>; + using natural5 = Natural< 5>; using natural6 = Natural< 6>; using natural7 = Natural< 7>; using natural8 = Natural< 8>; + using natural9 = Natural< 9>; using natural10 = Natural<10>; using natural11 = Natural<11>; using natural12 = Natural<12>; + using natural13 = Natural<13>; using natural14 = Natural<14>; using natural15 = Natural<15>; using natural16 = Natural<16>; + using natural17 = Natural<17>; using natural18 = Natural<18>; using natural19 = Natural<19>; using natural20 = Natural<20>; + using natural21 = Natural<21>; using natural22 = Natural<22>; using natural23 = Natural<23>; using natural24 = Natural<24>; + using natural25 = Natural<25>; using natural26 = Natural<26>; using natural27 = Natural<27>; using natural28 = Natural<28>; + using natural29 = Natural<29>; using natural30 = Natural<30>; using natural31 = Natural<31>; using natural32 = Natural<32>; + using natural33 = Natural<33>; using natural34 = Natural<34>; using natural35 = Natural<35>; using natural36 = Natural<36>; + using natural37 = Natural<37>; using natural38 = Natural<38>; using natural39 = Natural<39>; using natural40 = Natural<40>; + using natural41 = Natural<41>; using natural42 = Natural<42>; using natural43 = Natural<43>; using natural44 = Natural<44>; + using natural45 = Natural<45>; using natural46 = Natural<46>; using natural47 = Natural<47>; using natural48 = Natural<48>; + using natural49 = Natural<49>; using natural50 = Natural<50>; using natural51 = Natural<51>; using natural52 = Natural<52>; + using natural53 = Natural<53>; using natural54 = Natural<54>; using natural55 = Natural<55>; using natural56 = Natural<56>; + using natural57 = Natural<57>; using natural58 = Natural<58>; using natural59 = Natural<59>; using natural60 = Natural<60>; + using natural61 = Natural<61>; using natural62 = Natural<62>; using natural63 = Natural<63>; using natural64 = Natural<64>; + + using integer1 = Integer< 1>; using integer2 = Integer< 2>; using integer3 = Integer< 3>; using integer4 = Integer< 4>; + using integer5 = Integer< 5>; using integer6 = Integer< 6>; using integer7 = Integer< 7>; using integer8 = Integer< 8>; + using integer9 = Integer< 9>; using integer10 = Integer<10>; using integer11 = Integer<11>; using integer12 = Integer<12>; + using integer13 = Integer<13>; using integer14 = Integer<14>; using integer15 = Integer<15>; using integer16 = Integer<16>; + using integer17 = Integer<17>; using integer18 = Integer<18>; using integer19 = Integer<19>; using integer20 = Integer<20>; + using integer21 = Integer<21>; using integer22 = Integer<22>; using integer23 = Integer<23>; using integer24 = Integer<24>; + using integer25 = Integer<25>; using integer26 = Integer<26>; using integer27 = Integer<27>; using integer28 = Integer<28>; + using integer29 = Integer<29>; using integer30 = Integer<30>; using integer31 = Integer<31>; using integer32 = Integer<32>; + using integer33 = Integer<33>; using integer34 = Integer<34>; using integer35 = Integer<35>; using integer36 = Integer<36>; + using integer37 = Integer<37>; using integer38 = Integer<38>; using integer39 = Integer<39>; using integer40 = Integer<40>; + using integer41 = Integer<41>; using integer42 = Integer<42>; using integer43 = Integer<43>; using integer44 = Integer<44>; + using integer45 = Integer<45>; using integer46 = Integer<46>; using integer47 = Integer<47>; using integer48 = Integer<48>; + using integer49 = Integer<49>; using integer50 = Integer<50>; using integer51 = Integer<51>; using integer52 = Integer<52>; + using integer53 = Integer<53>; using integer54 = Integer<54>; using integer55 = Integer<55>; using integer56 = Integer<56>; + using integer57 = Integer<57>; using integer58 = Integer<58>; using integer59 = Integer<59>; using integer60 = Integer<60>; + using integer61 = Integer<61>; using integer62 = Integer<62>; using integer63 = Integer<63>; using integer64 = Integer<64>; + + using real32 = Real<32>; + using real64 = Real<64>; +} diff --git a/waterbox/ares64/ares/nall/priority-queue.hpp b/waterbox/ares64/ares/nall/priority-queue.hpp new file mode 100644 index 0000000000..3dd6613148 --- /dev/null +++ b/waterbox/ares64/ares/nall/priority-queue.hpp @@ -0,0 +1,116 @@ +#pragma once + +//priority queue implementation using binary min-heap array: +//O(1) find +//O(log n) insert +//O(log n) remove(first) +//O(n) remove(event) + +#include +#include + +namespace nall { + +template struct priority_queue; + +template +struct priority_queue { + explicit operator bool() const { + return size != 0; + } + + auto reset() -> void { + clock = 0; + size = 0; + } + + template + auto step(u32 clocks, const F& callback) -> void { + clock += clocks; + while(size && ge(clock, heap[0].clock)) { + if(auto event = remove()) callback(*event); + } + } + + auto insert(const T& event, u32 clock) -> bool { + if(size >= Size) return false; + + u32 child = size++; + clock += this->clock; + + while(child) { + u32 parent = (child - 1) >> 1; + if(ge(clock, heap[parent].clock)) break; + + heap[child].clock = heap[parent].clock; + heap[child].event = heap[parent].event; + heap[child].valid = heap[parent].valid; + child = parent; + } + + heap[child].clock = clock; + heap[child].event = event; + heap[child].valid = true; + return true; + } + + auto remove() -> maybe { + T event = heap[0].event; + bool valid = heap[0].valid; + + u32 parent = 0; + u32 clock = heap[--size].clock; + + while(true) { + u32 child = (parent << 1) + 1; + if(child >= size) break; + + if(child + 1 < size && ge(heap[child].clock, heap[child + 1].clock)) child++; + if(ge(heap[child].clock, clock)) break; + + heap[parent].clock = heap[child].clock; + heap[parent].event = heap[child].event; + heap[parent].valid = heap[child].valid; + parent = child; + } + + heap[parent].clock = clock; + heap[parent].event = heap[size].event; + heap[parent].valid = heap[size].valid; + + if(valid) return event; + return nothing; + } + + auto remove(const T& event) -> void { + for(auto& entry : heap) { + if(entry.event == event) entry.valid = false; + } + } + + auto serialize(serializer& s) -> void { + s(clock); + s(size); + for(auto& entry : heap) { + s(entry.clock); + s(entry.event); + s(entry.valid); + } + } + +private: + //returns true if x is greater than or equal to y + auto ge(u32 x, u32 y) -> bool { + return x - y < 0x7fffffff; + } + + u32 clock = 0; + u32 size = 0; + struct Entry { + u32 clock; + T event; + bool valid; + } heap[Size]; +}; + +} diff --git a/waterbox/ares64/ares/nall/property.hpp b/waterbox/ares64/ares/nall/property.hpp new file mode 100644 index 0000000000..00dd7c7f17 --- /dev/null +++ b/waterbox/ares64/ares/nall/property.hpp @@ -0,0 +1,13 @@ +#if !defined(property) + #define property1(declaration) public: declaration + #define property2(declaration, getter) public: __declspec(property(get=getter)) declaration; protected: declaration##_ + #define property3(declaration, getter, setter) public: __declspec(property(get=getter, put=setter)) declaration; protected: declaration##_ + #define property_(_1, _2, _3, name, ...) name + #define property(...) property_(__VA_ARGS__, property3, property2, property1)(__VA_ARGS__) +#else + #undef property1 + #undef property2 + #undef property3 + #undef property_ + #undef property +#endif diff --git a/waterbox/ares64/ares/nall/queue.hpp b/waterbox/ares64/ares/nall/queue.hpp new file mode 100644 index 0000000000..6b5e774434 --- /dev/null +++ b/waterbox/ares64/ares/nall/queue.hpp @@ -0,0 +1,4 @@ +#include +#include +#include +#include diff --git a/waterbox/ares64/ares/nall/queue/spsc.hpp b/waterbox/ares64/ares/nall/queue/spsc.hpp new file mode 100644 index 0000000000..e7a237b47b --- /dev/null +++ b/waterbox/ares64/ares/nall/queue/spsc.hpp @@ -0,0 +1,66 @@ +#pragma once + +//single-producer, single-consumer lockless queue +//includes await functions for spin-loops + +namespace nall { + +template struct queue_spsc; + +template +struct queue_spsc { + auto flush() -> void { + _read = 0; + _write = 2 * Size; + } + + auto size() const -> u32 { + return (_write - _read) % (2 * Size); + } + + auto empty() const -> bool { + return size() == 0; + } + + auto full() const -> bool { + return size() == Size; + } + + auto read() -> maybe { + if(empty()) return nothing; + auto value = _data[_read % Size]; + _read = _read + 1 < 2 * Size ? _read + 1 : 0; + return value; + } + + auto write(const T& value) -> bool { + if(full()) return false; + _data[_write % Size] = value; + _write = _write + 1 < 4 * Size ? _write + 1 : 2 * Size; + return true; + } + + auto await_empty() -> void { + while(!empty()) spinloop(); + } + + auto await_read() -> T { + while(empty()) spinloop(); + auto value = _data[_read % Size]; + _read = _read + 1 < 2 * Size ? _read + 1 : 0; + return value; + } + + auto await_write(const T& value) -> void { + while(full()) spinloop(); + _data[_write % Size] = value; + _write = _write + 1 < 4 * Size ? _write + 1 : 2 * Size; + } + +private: + T _data[Size]; + std::atomic _read = 0; + std::atomic _write = 2 * Size; +}; + +} diff --git a/waterbox/ares64/ares/nall/queue/st.hpp b/waterbox/ares64/ares/nall/queue/st.hpp new file mode 100644 index 0000000000..7631be6cda --- /dev/null +++ b/waterbox/ares64/ares/nall/queue/st.hpp @@ -0,0 +1,187 @@ +#pragma once + +//simple circular ring buffer (single-threaded) + +namespace nall { + +template struct queue; + +template +struct queue { + auto flush() -> void { + _read = 0; + _write = 2 * Size; + } + + auto size() const -> u32 { + return (_write - _read) % (2 * Size); + } + + auto capacity() const -> u32 { + return Size; + } + + auto empty() const -> bool { + return size() == 0; + } + + auto full() const -> bool { + return size() == Size; + } + + auto peek(u32 index = 0) const -> T { + return _data[(_read + index) % Size]; + } + + auto read() -> maybe { + if(empty()) return nothing; + auto value = _data[_read % Size]; + _read = _read + 1 < 2 * Size ? _read + 1 : 0; + return value; + } + + auto read(const T& fallback) -> T { + if(empty()) return fallback; + auto value = _data[_read % Size]; + _read = _read + 1 < 2 * Size ? _read + 1 : 0; + return value; + } + + auto write(const T& value) -> bool { + if(full()) return false; + _data[_write % Size] = value; + _write = _write + 1 < 4 * Size ? _write + 1 : 2 * Size; + return true; + } + + struct iterator_const { + iterator_const(const queue& self, u64 offset) : self(self), offset(offset) {} + auto operator*() -> T { return self.peek(offset); } + auto operator!=(const iterator_const& source) const -> bool { return offset != source.offset; } + auto operator++() -> iterator_const& { return offset++, *this; } + + const queue& self; + u64 offset; + }; + + auto begin() const -> iterator_const { return {*this, 0}; } + auto end() const -> iterator_const { return {*this, size()}; } + + auto serialize(serializer& s) -> void { + s(_data); + s(_read); + s(_write); + } + +private: + T _data[Size]; + u32 _read = 0; + u32 _write = 2 * Size; +}; + +template +struct queue { + queue() = default; + queue(const queue& source) { operator=(source); } + queue(queue&& source) { operator=(move(source)); } + ~queue() { reset(); } + + auto operator=(const queue& source) -> queue& { + if(this == &source) return *this; + delete[] _data; + _data = new T[source._capacity]; + _capacity = source._capacity; + _size = source._size; + _read = source._read; + _write = source._write; + for(u32 n : range(_capacity)) _data[n] = source._data[n]; + return *this; + } + + auto operator=(queue&& source) -> queue& { + if(this == &source) return *this; + _data = source._data; + _capacity = source._capacity; + _size = source._size; + _read = source._read; + _write = source._write; + source._data = nullptr; + source.reset(); + return *this; + } + + template auto capacity() const -> u32 { return _capacity * sizeof(T) / sizeof(U); } + template auto size() const -> u32 { return _size * sizeof(T) / sizeof(U); } + auto empty() const -> bool { return _size == 0; } + auto pending() const -> bool { return _size > 0; } + auto full() const -> bool { return _size >= (s32)_capacity; } + auto underflow() const -> bool { return _size < 0; } + auto overflow() const -> bool { return _size > (s32)_capacity; } + + auto data() -> T* { return _data; } + auto data() const -> const T* { return _data; } + + auto reset() { + delete[] _data; + _data = nullptr; + _capacity = 0; + _size = 0; + _read = 0; + _write = 0; + } + + auto resize(u32 capacity, const T& value = {}) -> void { + delete[] _data; + _data = new T[capacity]; + _capacity = capacity; + _size = 0; + _read = 0; + _write = 0; + for(u32 n : range(_capacity)) _data[n] = value; + } + + auto flush() -> void { + _size = 0; + _read = 0; + _write = 0; + } + + auto fill(const T& value = {}) -> void { + _size = 0; + _read = 0; + _write = 0; + for(u32 n : range(_capacity)) _data[n] = value; + } + + auto peek(u32 index = 0) const -> T { + return _data[(_read + index) % _capacity]; + } + + auto read() -> T { + T value = _data[_read++]; + if(_read >= _capacity) _read = 0; + _size--; + return value; + } + + auto write(const T& value) -> void { + _data[_write++] = value; + if(_write >= _capacity) _write = 0; + _size++; + } + + auto serialize(serializer& s) -> void { + s(array_span{_data, _capacity}); + s(_read); + s(_write); + } + +private: + T* _data = nullptr; + u32 _capacity = 0; + s32 _size = 0; + u32 _read = 0; + u32 _write = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/random.hpp b/waterbox/ares64/ares/nall/random.hpp new file mode 100644 index 0000000000..d93532f6ee --- /dev/null +++ b/waterbox/ares64/ares/nall/random.hpp @@ -0,0 +1,172 @@ +#pragma once + +#include +#include +#include +#include +#include +#if !defined(PLATFORM_ANDROID) +#include +#endif + +#if defined(PLATFORM_LINUX) && __has_include() + #include +#elif defined(PLATFORM_ANDROID) && __has_include() + #include +#elif defined(PLATFORM_WINDOWS) && __has_include() + #include +#else + #include +#endif + +namespace nall { + +template struct RNG { + template auto random() -> T { + u64 value = 0; + for(u32 n : range((sizeof(T) + 3) / 4)) { + value = value << 32 | (u32)static_cast(this)->read(); + } + return value; + } + + template auto bound(T range) -> T { + T threshold = -range % range; + while(true) { + T value = random(); + if(value >= threshold) return value % range; + } + } + +protected: + auto randomSeed() -> u256 { + u256 seed = 0; + #if defined(PLATFORM_BSD) || defined(PLATFORM_MACOS) + for(u32 n : range(8)) seed = seed << 32 | (u32)arc4random(); + #elif defined(PLATFORM_LINUX) && __has_include() + getrandom(&seed, 32, GRND_NONBLOCK); + #elif defined(PLATFORM_ANDROID) && __has_include() + syscall(__NR_getrandom, &seed, 32, 0x0001); //GRND_NONBLOCK + #elif defined(PLATFORM_WINDOWS) && __has_include() + HCRYPTPROV provider; + if(CryptAcquireContext(&provider, nullptr, MS_STRONG_PROV, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) { + CryptGenRandom(provider, 32, (BYTE*)&seed); + CryptReleaseContext(provider, 0); + } + #else + srand(time(nullptr)); + for(u32 n : range(32)) seed = seed << 8 | (u8)rand(); + if(auto fp = fopen("/dev/urandom", "rb")) { + fread(&seed, 32, 1, fp); + fclose(fp); + } + #endif + return seed; + } +}; + +namespace PRNG { + +//Galois linear feedback shift register using CRC64 polynomials +struct LFSR : RNG { + LFSR() { seed(); } + + auto seed(maybe seed = {}) -> void { + lfsr = seed ? seed() : (u64)randomSeed(); + for(u32 n : range(8)) read(); //hide the CRC64 polynomial from initial output + } + + auto serialize(serializer& s) -> void { + s(lfsr); + } + +private: + auto read() -> u64 { + return lfsr = (lfsr >> 1) ^ (-(lfsr & 1) & crc64); + } + + static const u64 crc64 = 0xc96c'5795'd787'0f42; + u64 lfsr = crc64; + + friend class RNG; +}; + +struct PCG : RNG { + PCG() { seed(); } + + auto seed(maybe seed = {}, maybe sequence = {}) -> void { + if(!seed) seed = (u32)randomSeed(); + if(!sequence) sequence = 0; + + state = 0; + increment = sequence() << 1 | 1; + read(); + state += seed(); + read(); + } + + auto serialize(serializer& s) -> void { + s(state); + s(increment); + } + +private: + auto read() -> u32 { + u64 state = this->state; + this->state = state * 6'364'136'223'846'793'005ull + increment; + u32 xorshift = (state >> 18 ^ state) >> 27; + u32 rotate = state >> 59; + return xorshift >> rotate | xorshift << (-rotate & 31); + } + + u64 state = 0; + u64 increment = 0; + + friend class RNG; +}; + +} + +#if !defined(PLATFORM_ANDROID) +namespace CSPRNG { + +//XChaCha20 cryptographically secure pseudo-random number generator +struct XChaCha20 : RNG { + XChaCha20() { seed(); } + + auto seed(maybe key = {}, maybe nonce = {}) -> void { + //the randomness comes from the key; the nonce just adds a bit of added entropy + if(!key) key = randomSeed(); + if(!nonce) nonce = (u192)clock() << 64 | chrono::nanosecond(); + context = {key(), nonce()}; + } + +private: + auto read() -> u32 { + if(!counter) { context.cipher(); context.increment(); } + u32 value = context.block[counter++]; + if(counter == 16) counter = 0; //64-bytes per block; 4 bytes per read + return value; + } + + Cipher::XChaCha20 context{0, 0}; + u32 counter = 0; + + friend class RNG; +}; + +} +#endif + +// + +inline auto pcgSingleton() -> PRNG::PCG& { + static PRNG::PCG pcg; + return pcg; +} + +template inline auto random() -> T { + return pcgSingleton().random(); +} + +} diff --git a/waterbox/ares64/ares/nall/range.hpp b/waterbox/ares64/ares/nall/range.hpp new file mode 100644 index 0000000000..eaba658410 --- /dev/null +++ b/waterbox/ares64/ares/nall/range.hpp @@ -0,0 +1,85 @@ +#pragma once + +#include + +namespace nall { + +template +struct range_t { + struct iterator { + iterator(T position, T step = 0) : position(position), step(step) {} + auto operator*() const -> T { return position; } + auto operator!=(const iterator& source) const -> bool { return step > 0 ? position < source.position : position > source.position; } + auto operator++() -> iterator& { position += step; return *this; } + + private: + T position; + const T step; + }; + + struct reverse_iterator { + reverse_iterator(T position, T step = 0) : position(position), step(step) {} + auto operator*() const -> T { return position; } + auto operator!=(const reverse_iterator& source) const -> bool { return step > 0 ? position > source.position : position < source.position; } + auto operator++() -> reverse_iterator& { position -= step; return *this; } + + private: + T position; + const T step; + }; + + auto begin() const -> iterator { return {origin, stride}; } + auto end() const -> iterator { return {target}; } + + auto rbegin() const -> reverse_iterator { return {target - stride, stride}; } + auto rend() const -> reverse_iterator { return {origin - stride}; } + + T origin; + T target; + T stride; +}; + +template +inline auto range(s64 size) { + return range_t{0, size, 1}; +} + +template +inline auto range(s64 offset, s64 size) { + return range_t{offset, size, 1}; +} + +template +inline auto range(s64 offset, s64 size, s64 step) { + return range_t{offset, size, step}; +} + +//returns true if {offset ... offset+length-1} is within {min ... max} in range {lo ... hi} +template +inline auto within(s64 offset, s64 length, s64 min, s64 max) -> bool { + static_assert(lo <= hi); + static constexpr s64 range = hi - lo + 1; + s64 lhs = (offset - lo) % range; + s64 rhs = (offset + length - 1) % range; + min = (min - lo) % range; + max = (max - lo) % range; + if(rhs < lhs) { + return lhs <= max || rhs >= min; + } else { + return max >= lhs && min <= rhs; + } +} + +//returns index of target within {offset ... offset+length-1} in range {lo ... hi} +template +inline auto within(s64 offset, s64 length, s64 target) -> maybe { + static_assert(lo <= hi); + static constexpr s64 range = hi - lo + 1; + s64 start = (offset - lo) % range; + s64 index = (target - lo) % range - start; + if(index < 0) index += range; + if(index < length) return index; + return {}; +} + +} diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/amd64.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/amd64.hpp new file mode 100644 index 0000000000..2679599f2a --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/amd64/amd64.hpp @@ -0,0 +1,14 @@ +#pragma once + +namespace nall::recompiler { + struct amd64 { + #include "emitter.hpp" + #include "constants.hpp" + #include "encoder-instructions.hpp" + #if defined(PLATFORM_WINDOWS) + #include "encoder-calls-windows.hpp" + #else + #include "encoder-calls-systemv.hpp" + #endif + }; +} diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/constants.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/constants.hpp new file mode 100644 index 0000000000..fb17e8b0ad --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/amd64/constants.hpp @@ -0,0 +1,173 @@ +#pragma once + +//{ + struct imm8 { + explicit imm8(u8 data) : data(data) {} + u8 data; + }; + + struct imm16 { + explicit imm16(u16 data) : data(data) {} + u16 data; + }; + + struct imm32 { + explicit imm32(u32 data) : data(data) {} + u32 data; + }; + + struct imm64 { + explicit imm64(u64 data) : data(data) {} + template explicit imm64(T* pointer) : data((u64)pointer) {} + template explicit imm64(auto (C::*function)(P...) -> R) { + union force_cast_ub { + auto (C::*function)(P...) -> R; + u64 pointer; + } cast{function}; + data = cast.pointer; + } + template explicit imm64(auto (C::*function)(P...) const -> R) { + union force_cast_ub { + auto (C::*function)(P...) const -> R; + u64 pointer; + } cast{function}; + data = cast.pointer; + } + u64 data; + }; + + struct mem32 { + explicit mem32(u64 data) : data(data) {} + template explicit mem32(T* pointer) : data((u64)pointer) {} + template explicit mem32(T C::*variable, C* object) { + union force_cast_ub { + T C::*variable; + u64 pointer; + } cast{variable}; + data = cast.pointer + u64(object); + } + u64 data; + }; + + struct mem64 { + explicit mem64(u64 data) : data(data) {} + template explicit mem64(T* pointer) : data((u64)pointer) {} + template explicit mem64(T C::*variable, C* object) { + union force_cast_ub { + T C::*variable; + u64 pointer; + } cast{variable}; + data = cast.pointer + u64(object); + } + u64 data; + }; + + enum class reg8 : u32 { + al, cl, dl, bl, ah, ch, dh, bh, r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b, + }; + friend auto operator&(reg8 r, u32 m) -> u32 { + return (u32)r & m; + } + static constexpr reg8 al = reg8::al; + static constexpr reg8 cl = reg8::cl; + static constexpr reg8 dl = reg8::dl; + static constexpr reg8 bl = reg8::bl; + static constexpr reg8 ah = reg8::ah; + static constexpr reg8 ch = reg8::ch; + static constexpr reg8 dh = reg8::dh; + static constexpr reg8 bh = reg8::bh; + static constexpr reg8 r8b = reg8::r8b; + static constexpr reg8 r9b = reg8::r9b; + static constexpr reg8 r10b = reg8::r10b; + static constexpr reg8 r11b = reg8::r11b; + static constexpr reg8 r12b = reg8::r12b; + static constexpr reg8 r13b = reg8::r13b; + static constexpr reg8 r14b = reg8::r14b; + static constexpr reg8 r15b = reg8::r15b; + + enum class reg16 : u32 { + ax, cx, dx, bx, sp, bp, si, di, r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w, + }; + friend auto operator&(reg16 r, u32 m) -> u32 { + return (u32)r & m; + } + static constexpr reg16 ax = reg16::ax; + static constexpr reg16 cx = reg16::cx; + static constexpr reg16 dx = reg16::dx; + static constexpr reg16 bx = reg16::bx; + static constexpr reg16 sp = reg16::sp; + static constexpr reg16 bp = reg16::bp; + static constexpr reg16 si = reg16::si; + static constexpr reg16 di = reg16::di; + static constexpr reg16 r8w = reg16::r8w; + static constexpr reg16 r9w = reg16::r9w; + static constexpr reg16 r10w = reg16::r10w; + static constexpr reg16 r11w = reg16::r11w; + static constexpr reg16 r12w = reg16::r12w; + static constexpr reg16 r13w = reg16::r13w; + static constexpr reg16 r14w = reg16::r14w; + static constexpr reg16 r15w = reg16::r15w; + + enum class reg32 : u32 { + eax, ecx, edx, ebx, esp, ebp, esi, edi, r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d, + }; + friend auto operator&(reg32 r, u32 m) -> u32 { + return (u32)r & m; + } + static constexpr reg32 eax = reg32::eax; + static constexpr reg32 ecx = reg32::ecx; + static constexpr reg32 edx = reg32::edx; + static constexpr reg32 ebx = reg32::ebx; + static constexpr reg32 esp = reg32::esp; + static constexpr reg32 ebp = reg32::ebp; + static constexpr reg32 esi = reg32::esi; + static constexpr reg32 edi = reg32::edi; + static constexpr reg32 r8d = reg32::r8d; + static constexpr reg32 r9d = reg32::r9d; + static constexpr reg32 r10d = reg32::r10d; + static constexpr reg32 r11d = reg32::r11d; + static constexpr reg32 r12d = reg32::r12d; + static constexpr reg32 r13d = reg32::r13d; + static constexpr reg32 r14d = reg32::r14d; + static constexpr reg32 r15d = reg32::r15d; + + enum class reg64 : u32 { + rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15, + }; + friend auto operator&(reg64 r, u32 m) -> u32 { + return (u32)r & m; + } + static constexpr reg64 rax = reg64::rax; + static constexpr reg64 rcx = reg64::rcx; + static constexpr reg64 rdx = reg64::rdx; + static constexpr reg64 rbx = reg64::rbx; + static constexpr reg64 rsp = reg64::rsp; + static constexpr reg64 rbp = reg64::rbp; + static constexpr reg64 rsi = reg64::rsi; + static constexpr reg64 rdi = reg64::rdi; + static constexpr reg64 r8 = reg64::r8; + static constexpr reg64 r9 = reg64::r9; + static constexpr reg64 r10 = reg64::r10; + static constexpr reg64 r11 = reg64::r11; + static constexpr reg64 r12 = reg64::r12; + static constexpr reg64 r13 = reg64::r13; + static constexpr reg64 r14 = reg64::r14; + static constexpr reg64 r15 = reg64::r15; + + struct dis { + explicit dis(reg64 reg) : reg(reg) {} + reg64 reg; + }; + + struct dis8 { + explicit dis8(reg64 reg, s8 imm) : reg(reg), imm(imm) {} + reg64 reg; + s8 imm; + }; + + struct dis32 { + explicit dis32(reg64 reg, s32 imm) : reg(reg), imm(imm) {} + reg64 reg; + s32 imm; + }; +//}; diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/emitter.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/emitter.hpp new file mode 100644 index 0000000000..631c9ce20f --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/amd64/emitter.hpp @@ -0,0 +1,119 @@ +#pragma once + +struct emitter { + auto byte() { + } + + template + alwaysinline auto byte(u8 data, P&&... p) { + span.write(data); + byte(forward

(p)...); + } + + alwaysinline auto word(u16 data) { + span.write(data >> 0); + span.write(data >> 8); + } + + alwaysinline auto dword(u32 data) { + span.write(data >> 0); + span.write(data >> 8); + span.write(data >> 16); + span.write(data >> 24); + } + + alwaysinline auto qword(u64 data) { + span.write(data >> 0); + span.write(data >> 8); + span.write(data >> 16); + span.write(data >> 24); + span.write(data >> 32); + span.write(data >> 40); + span.write(data >> 48); + span.write(data >> 56); + } + + alwaysinline auto rex(bool w, bool r, bool x, bool b) { + u8 data = 0x40 | w << 3 | r << 2 | x << 1 | b << 0; + if(data == 0x40) return; //rex prefix not needed + byte(data); + } + + //mod: {[r/m], [r/m+dis8], [r/m+dis32], r/m} + alwaysinline auto modrm(u8 mod, u8 reg, u8 rm) { + byte(mod << 6 | reg << 3 | rm << 0); + } + + //scale: {index*1, index*2, index*4, index*8} + //index: {eax, ecx, edx, ebx, invalid, ebp, esi, edi} + //base: {eax, ecx, edx, ebx, esp, displacement, esi, edi} + alwaysinline auto sib(u8 scale, u8 index, u8 base) { + byte(scale << 6 | index << 3 | base << 0); + } + + array_span span, origin; +} emit; + +struct label { + explicit label(u32 index) : index(index) {} + u32 index; +}; + +struct fixup { + u32 index; + u32 offset; + u32 size; +}; + +vector labelOffsets; +vector fixups; + +alwaysinline auto bind(array_span span) { + emit.span = span; + emit.origin = span; + labelOffsets.reset(); + assert(fixups.size() == 0); + fixups.reset(); +} + +alwaysinline auto declareLabel() -> label { + labelOffsets.append(~0); + return label{labelOffsets.size() - 1}; +} + +alwaysinline auto defineLabel(label label) -> amd64::label { + u32 labelOffset = size(); + labelOffsets[label.index] = labelOffset; + for(u32 n = 0; n < fixups.size(); ) { + auto fixup = fixups[n]; + if(fixup.index == label.index) { + u32 value = labelOffset - (fixup.offset + fixup.size); + emit.origin.span(fixup.offset, fixup.size).writel(value, fixup.size); + fixups.removeByIndex(n); + continue; + } + n++; + } + return label; +} + +alwaysinline auto defineLabel() -> label { + return defineLabel(declareLabel()); +} + +alwaysinline auto resolve(label label, u32 offset, u32 size) -> u32 { + u32 labelOffset = labelOffsets[label.index]; + if(labelOffset == ~0) { + fixups.append(fixup{label.index, this->size() + offset, size}); + return ~0; + } + return labelOffset - (this->size() + offset + size); +} + +alwaysinline auto distance(u64 target) const -> s64 { + return target - (u64)emit.span.data(); +} + +alwaysinline auto size() const -> u32 { + return emit.span.data() - emit.origin.data(); +} diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-systemv.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-systemv.hpp new file mode 100644 index 0000000000..7aedb1b542 --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-systemv.hpp @@ -0,0 +1,82 @@ +#pragma once + +//{ + //register aliases for function arguments + static constexpr reg32 ra0d = reg32::edi; + static constexpr reg32 ra1d = reg32::esi; + static constexpr reg32 ra2d = reg32::edx; + static constexpr reg32 ra3d = reg32::ecx; + static constexpr reg32 ra4d = reg32::r8d; + static constexpr reg32 ra5d = reg32::r9d; + + static constexpr reg64 ra0 = reg64::rdi; + static constexpr reg64 ra1 = reg64::rsi; + static constexpr reg64 ra2 = reg64::rdx; + static constexpr reg64 ra3 = reg64::rcx; + static constexpr reg64 ra4 = reg64::r8; + static constexpr reg64 ra5 = reg64::r9; + + //virtual instructions to call member functions + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object) { + sub(rsp, imm8{0x08}); + mov(rdi, imm64{object}); + call(imm64{function}, rax); + add(rsp, imm8{0x08}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0) { + sub(rsp, imm8{0x08}); + mov(rdi, imm64{object}); + mov(rsi, imm64{p0}); + call(imm64{function}, rax); + add(rsp, imm8{0x08}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1) { + sub(rsp, imm8{0x08}); + mov(rdi, imm64{object}); + mov(rsi, imm64{p0}); + mov(rdx, imm64{p1}); + call(imm64{function}, rax); + add(rsp, imm8{0x08}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1, P2 p2) { + sub(rsp, imm8{0x08}); + mov(rdi, imm64{object}); + mov(rsi, imm64{p0}); + mov(rdx, imm64{p1}); + mov(rcx, imm64{p2}); + call(imm64{function}, rax); + add(rsp, imm8{0x08}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1, P2 p2, P3 p3) { + sub(rsp, imm8{0x08}); + mov(rdi, imm64{object}); + mov(rsi, imm64{p0}); + mov(rdx, imm64{p1}); + mov(rcx, imm64{p2}); + mov(r8, imm64{p3}); + call(imm64{function}, rax); + add(rsp, imm8{0x08}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1, P2 p2, P3 p3, P4 p4) { + sub(rsp, imm8{0x08}); + mov(rdi, imm64{object}); + mov(rsi, imm64{p0}); + mov(rdx, imm64{p1}); + mov(rcx, imm64{p2}); + mov(r8, imm64{p3}); + mov(r9, imm64{p4}); + call(imm64{function}, rax); + add(rsp, imm8{0x08}); + } +//}; diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-windows.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-windows.hpp new file mode 100644 index 0000000000..a5232a3196 --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/amd64/encoder-calls-windows.hpp @@ -0,0 +1,85 @@ +#pragma once + +//{ + //register aliases for function arguments + static constexpr reg32 ra0d = reg32::ecx; + static constexpr reg32 ra1d = reg32::edx; + static constexpr reg32 ra2d = reg32::r8d; + static constexpr reg32 ra3d = reg32::r9d; + static constexpr reg32 ra4d = reg32::r10d; //actually passed on stack + static constexpr reg32 ra5d = reg32::r11d; //actually passed on stack + + static constexpr reg64 ra0 = reg64::rcx; + static constexpr reg64 ra1 = reg64::rdx; + static constexpr reg64 ra2 = reg64::r8; + static constexpr reg64 ra3 = reg64::r9; + static constexpr reg64 ra4 = reg64::r10; //actually passed on stack + static constexpr reg64 ra5 = reg64::r11; //actually passed on stack + + //virtual instructions to call member functions + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object) { + sub(rsp, imm8{0x28}); + mov(rcx, imm64{object}); + call(imm64{function}, rax); + add(rsp, imm8{0x28}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0) { + sub(rsp, imm8{0x28}); + mov(rcx, imm64{object}); + mov(rdx, imm64{p0}); + call(imm64{function}, rax); + add(rsp, imm8{0x28}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1) { + sub(rsp, imm8{0x28}); + mov(rcx, imm64{object}); + mov(rdx, imm64{p0}); + mov(r8, imm64{p1}); + call(imm64{function}, rax); + add(rsp, imm8{0x28}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1, P2 p2) { + sub(rsp, imm8{0x28}); + mov(rcx, imm64{object}); + mov(rdx, imm64{p0}); + mov(r8, imm64{p1}); + mov(r9, imm64{p2}); + call(imm64{function}, rax); + add(rsp, imm8{0x28}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1, P2 p2, P3 p3) { + sub(rsp, imm8{0x38}); + mov(rcx, imm64{object}); + mov(rdx, imm64{p0}); + mov(r8, imm64{p1}); + mov(r9, imm64{p2}); + mov(rax, imm64{p3}); + mov(dis8{rsp, 0x20}, rax); + call(imm64{function}, rax); + add(rsp, imm8{0x38}); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1, P2 p2, P3 p3, P4 p4) { + sub(rsp, imm8{0x38}); + mov(rcx, imm64{object}); + mov(rdx, imm64{p0}); + mov(r8, imm64{p1}); + mov(r9, imm64{p2}); + mov(rax, imm64{p3}); + mov(dis8{rsp, 0x20}, rax); + mov(rax, imm64{p4}); + mov(dis8{rsp, 0x28}, rax); + call(imm64{function}, rax); + add(rsp, imm8{0x38}); + } +//}; diff --git a/waterbox/ares64/ares/nall/recompiler/amd64/encoder-instructions.hpp b/waterbox/ares64/ares/nall/recompiler/amd64/encoder-instructions.hpp new file mode 100644 index 0000000000..9985c4112c --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/amd64/encoder-instructions.hpp @@ -0,0 +1,864 @@ +#pragma once + +//{ + alwaysinline auto clc() { emit.byte(0xf8); } + alwaysinline auto cmc() { emit.byte(0xf5); } + alwaysinline auto lahf() { emit.byte(0x9f); } + alwaysinline auto sahf() { emit.byte(0x9e); } + alwaysinline auto stc() { emit.byte(0xf9); } + alwaysinline auto ret() { emit.byte(0xc3); } + + //call imm32 + alwaysinline auto call(imm32 it) { + emit.byte(0xe8); + emit.dword(it.data); + } + + //jmp imm32 + alwaysinline auto jmp(imm32 it) { + emit.byte(0xe9); + emit.dword(it.data); + } + + //call reg64 + alwaysinline auto call(reg64 rt) { + emit.rex(0, 0, 0, rt & 8); + emit.byte(0xff); + emit.modrm(3, 2, rt & 7); + } + + //lea reg64,[reg64+imm8] + alwaysinline auto lea(reg64 rt, dis8 ds) { + emit.rex(1, rt & 8, 0, ds.reg & 8); + emit.byte(0x8d); + emit.modrm(1, rt & 7, ds.reg & 7); + if(ds.reg == rsp || ds.reg == r12) emit.sib(0, 4, 4); + emit.byte(ds.imm); + } + + //lea reg64,[reg64+imm32] + alwaysinline auto lea(reg64 rt, dis32 ds) { + emit.rex(1, rt & 8, 0, ds.reg & 8); + emit.byte(0x8d); + emit.modrm(2, rt & 7, ds.reg & 7); + if(ds.reg == rsp || ds.reg == r12) emit.sib(0, 4, 4); + emit.dword(ds.imm); + } + + //mov reg8,imm8 + alwaysinline auto mov(reg8 rt, imm8 is) { + emit.rex(0, 0, 0, rt & 8); + emit.byte(0xb0 | rt & 7); + emit.byte(is.data); + } + + //mov reg32,imm32 + alwaysinline auto mov(reg32 rt, imm32 is) { + emit.rex(0, 0, 0, rt & 8); + emit.byte(0xb8 | rt & 7); + emit.dword(is.data); + } + + //mov reg64,imm32 + alwaysinline auto mov(reg64 rt, imm32 is) { + emit.rex(1, 0, 0, rt & 8); + emit.byte(0xc7); + emit.modrm(3, 0, rt & 7); + emit.dword(is.data); + } + + //mov reg64,imm64 + alwaysinline auto mov(reg64 rt, imm64 is) { + emit.rex(1, 0, 0, rt & 8); + emit.byte(0xb8 | rt & 7); + emit.qword(is.data); + } + + //mov reg8,[mem64] + alwaysinline auto mov(reg8 rt, mem64 ps) { + if(unlikely(rt != al)) throw; + emit.byte(0xa0); + emit.qword(ps.data); + } + + //mov reg16,[mem64] + alwaysinline auto mov(reg16 rt, mem64 ps) { + if(unlikely(rt != ax)) throw; + emit.byte(0x66, 0xa1); + emit.qword(ps.data); + } + + //mov reg32,[mem64] + alwaysinline auto mov(reg32 rt, mem64 ps) { + if(unlikely(rt != eax)) throw; + emit.byte(0xa1); + emit.qword(ps.data); + } + + //mov reg64,[mem64] + alwaysinline auto mov(reg64 rt, mem64 ps) { + if(unlikely(rt != rax)) throw; + emit.rex(1, 0, 0, 0); + emit.byte(0xa1); + emit.qword(ps.data); + } + + //mov [mem64],reg8 + alwaysinline auto mov(mem64 pt, reg8 rs) { + if(unlikely(rs != al)) throw; + emit.byte(0xa2); + emit.qword(pt.data); + } + + //mov [mem64+imm8],imm8 + alwaysinline auto movb(dis8 dt, imm8 is) { + emit.rex(0, 0, 0, dt.reg & 8); + emit.byte(0xc6); + emit.modrm(1, 0, dt.reg & 7); + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); + emit.byte(dt.imm); + emit.byte(is.data); + } + + //mov [mem64],reg16 + alwaysinline auto mov(mem64 pt, reg16 rs) { + if(unlikely(rs != ax)) throw; + emit.byte(0x66, 0xa3); + emit.qword(pt.data); + } + + //mov [mem64],reg32 + alwaysinline auto mov(mem64 pt, reg32 rs) { + if(unlikely(rs != eax)) throw; + emit.byte(0xa3); + emit.qword(pt.data); + } + + //mov [mem64],reg64 + alwaysinline auto mov(mem64 pt, reg64 rs) { + if(unlikely(rs != rax)) throw; + emit.rex(1, 0, 0, 0); + emit.byte(0xa3); + emit.qword(pt.data); + } + + //op reg8,[reg64] + #define op(code) \ + emit.rex(0, rt & 8, 0, ds.reg & 8); \ + emit.byte(code); \ + emit.modrm(0, rt & 7, ds.reg & 7); \ + if(ds.reg == rsp || ds.reg == r12) emit.sib(0, 4, 4); + alwaysinline auto adc(reg8 rt, dis ds) { op(0x12); } + alwaysinline auto add(reg8 rt, dis ds) { op(0x02); } + alwaysinline auto and(reg8 rt, dis ds) { op(0x22); } + alwaysinline auto cmp(reg8 rt, dis ds) { op(0x3a); } + alwaysinline auto mov(reg8 rt, dis ds) { op(0x8a); } + alwaysinline auto or (reg8 rt, dis ds) { op(0x0a); } + alwaysinline auto sbb(reg8 rt, dis ds) { op(0x1a); } + alwaysinline auto sub(reg8 rt, dis ds) { op(0x2a); } + alwaysinline auto xor(reg8 rt, dis ds) { op(0x32); } + #undef op + + //op reg8,[reg64+imm8] + #define op(code) \ + emit.rex(0, rt & 8, 0, ds.reg & 8); \ + emit.byte(code); \ + emit.modrm(1, rt & 7, ds.reg & 7); \ + if(ds.reg == rsp || ds.reg == r12) emit.sib(0, 4, 4); \ + emit.byte(ds.imm); + alwaysinline auto adc(reg8 rt, dis8 ds) { op(0x12); } + alwaysinline auto add(reg8 rt, dis8 ds) { op(0x02); } + alwaysinline auto and(reg8 rt, dis8 ds) { op(0x22); } + alwaysinline auto cmp(reg8 rt, dis8 ds) { op(0x3a); } + alwaysinline auto mov(reg8 rt, dis8 ds) { op(0x8a); } + alwaysinline auto or (reg8 rt, dis8 ds) { op(0x0a); } + alwaysinline auto sbb(reg8 rt, dis8 ds) { op(0x1a); } + alwaysinline auto sub(reg8 rt, dis8 ds) { op(0x2a); } + alwaysinline auto xor(reg8 rt, dis8 ds) { op(0x32); } + #undef op + + //op reg32,[reg64] + #define op(code) \ + emit.rex(0, rt & 8, 0, ds.reg & 8); \ + emit.byte(code); \ + emit.modrm(0, rt & 7, ds.reg & 7); \ + if(ds.reg == rsp || ds.reg == r12) emit.sib(0, 4, 4); + alwaysinline auto adc(reg32 rt, dis ds) { op(0x13); } + alwaysinline auto add(reg32 rt, dis ds) { op(0x03); } + alwaysinline auto and(reg32 rt, dis ds) { op(0x23); } + alwaysinline auto cmp(reg32 rt, dis ds) { op(0x3b); } + alwaysinline auto mov(reg32 rt, dis ds) { op(0x8b); } + alwaysinline auto or (reg32 rt, dis ds) { op(0x0b); } + alwaysinline auto sbb(reg32 rt, dis ds) { op(0x1b); } + alwaysinline auto sub(reg32 rt, dis ds) { op(0x2b); } + alwaysinline auto xor(reg32 rt, dis ds) { op(0x33); } + #undef op + + //op reg32,[reg64+imm8] + #define op(code) \ + emit.rex(0, rt & 8, 0, ds.reg & 8); \ + emit.byte(code); \ + emit.modrm(1, rt & 7, ds.reg & 7); \ + if(ds.reg == rsp || ds.reg == r12) emit.sib(0, 4, 4); \ + emit.byte(ds.imm); + alwaysinline auto adc(reg32 rt, dis8 ds) { op(0x13); } + alwaysinline auto add(reg32 rt, dis8 ds) { op(0x03); } + alwaysinline auto and(reg32 rt, dis8 ds) { op(0x23); } + alwaysinline auto cmp(reg32 rt, dis8 ds) { op(0x3b); } + alwaysinline auto mov(reg32 rt, dis8 ds) { op(0x8b); } + alwaysinline auto or (reg32 rt, dis8 ds) { op(0x0b); } + alwaysinline auto sbb(reg32 rt, dis8 ds) { op(0x1b); } + alwaysinline auto sub(reg32 rt, dis8 ds) { op(0x2b); } + alwaysinline auto xor(reg32 rt, dis8 ds) { op(0x33); } + #undef op + + //op reg64,[reg64] + #define op(code) \ + emit.rex(1, rt & 8, 0, ds.reg & 8); \ + emit.byte(code); \ + emit.modrm(0, rt & 7, ds.reg & 7); \ + if(ds.reg == rsp || ds.reg == r12) emit.sib(0, 4, 4); + alwaysinline auto adc(reg64 rt, dis ds) { op(0x13); } + alwaysinline auto add(reg64 rt, dis ds) { op(0x03); } + alwaysinline auto and(reg64 rt, dis ds) { op(0x23); } + alwaysinline auto cmp(reg64 rt, dis ds) { op(0x3b); } + alwaysinline auto mov(reg64 rt, dis ds) { op(0x8b); } + alwaysinline auto or (reg64 rt, dis ds) { op(0x0b); } + alwaysinline auto sbb(reg64 rt, dis ds) { op(0x1b); } + alwaysinline auto sub(reg64 rt, dis ds) { op(0x2b); } + alwaysinline auto xor(reg64 rt, dis ds) { op(0x33); } + #undef op + + //op reg64,[reg64+imm8] + #define op(code) \ + emit.rex(1, rt & 8, 0, ds.reg & 8); \ + emit.byte(code); \ + emit.modrm(1, rt & 7, ds.reg & 7); \ + if(ds.reg == rsp || ds.reg == r12) emit.sib(0, 4, 4); \ + emit.byte(ds.imm); + alwaysinline auto adc(reg64 rt, dis8 ds) { op(0x13); } + alwaysinline auto add(reg64 rt, dis8 ds) { op(0x03); } + alwaysinline auto and(reg64 rt, dis8 ds) { op(0x23); } + alwaysinline auto cmp(reg64 rt, dis8 ds) { op(0x3b); } + alwaysinline auto mov(reg64 rt, dis8 ds) { op(0x8b); } + alwaysinline auto or (reg64 rt, dis8 ds) { op(0x0b); } + alwaysinline auto sbb(reg64 rt, dis8 ds) { op(0x1b); } + alwaysinline auto sub(reg64 rt, dis8 ds) { op(0x2b); } + alwaysinline auto xor(reg64 rt, dis8 ds) { op(0x33); } + #undef op + + //op reg64,[reg64+imm32] + #define op(code) \ + emit.rex(1, rt & 8, 0, ds.reg & 8); \ + emit.byte(code); \ + emit.modrm(2, rt & 7, ds.reg & 7); \ + if(ds.reg == rsp || ds.reg == r12) emit.sib(0, 4, 4); \ + emit.dword(ds.imm); + alwaysinline auto adc(reg64 rt, dis32 ds) { op(0x13); } + alwaysinline auto add(reg64 rt, dis32 ds) { op(0x03); } + alwaysinline auto and(reg64 rt, dis32 ds) { op(0x23); } + alwaysinline auto cmp(reg64 rt, dis32 ds) { op(0x3b); } + alwaysinline auto mov(reg64 rt, dis32 ds) { op(0x8b); } + alwaysinline auto or (reg64 rt, dis32 ds) { op(0x0b); } + alwaysinline auto sbb(reg64 rt, dis32 ds) { op(0x1b); } + alwaysinline auto sub(reg64 rt, dis32 ds) { op(0x2b); } + alwaysinline auto xor(reg64 rt, dis32 ds) { op(0x33); } + #undef op + + //op [reg64+imm8],reg8 + #define op(code) \ + emit.rex(0, rs & 8, 0, dt.reg & 8); \ + emit.byte(code); \ + emit.modrm(1, rs & 7, dt.reg & 7); \ + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); \ + emit.byte(dt.imm); + alwaysinline auto adc(dis8 dt, reg8 rs) { op(0x10); } + alwaysinline auto add(dis8 dt, reg8 rs) { op(0x00); } + alwaysinline auto and(dis8 dt, reg8 rs) { op(0x20); } + alwaysinline auto cmp(dis8 dt, reg8 rs) { op(0x38); } + alwaysinline auto mov(dis8 dt, reg8 rs) { op(0x88); } + alwaysinline auto or (dis8 dt, reg8 rs) { op(0x08); } + alwaysinline auto sbb(dis8 dt, reg8 rs) { op(0x18); } + alwaysinline auto sub(dis8 dt, reg8 rs) { op(0x28); } + alwaysinline auto xor(dis8 dt, reg8 rs) { op(0x30); } + #undef op + + //op reg64,imm32 + #define op(group) \ + emit.rex(1, 0, 0, rt & 8); \ + emit.byte(0x81); \ + emit.modrm(3, group, rt & 7); \ + emit.dword(is.data); + alwaysinline auto add(reg64 rt, imm32 is) { op(0); } + alwaysinline auto or (reg64 rt, imm32 is) { op(1); } + alwaysinline auto adc(reg64 rt, imm32 is) { op(2); } + alwaysinline auto sbb(reg64 rt, imm32 is) { op(3); } + alwaysinline auto and(reg64 rt, imm32 is) { op(4); } + alwaysinline auto sub(reg64 rt, imm32 is) { op(5); } + alwaysinline auto xor(reg64 rt, imm32 is) { op(6); } + alwaysinline auto cmp(reg64 rt, imm32 is) { op(7); } + #undef op + + //op.d [reg64+imm8],imm8 + #define op(group) \ + emit.rex(0, 0, 0, dt.reg & 8); \ + emit.byte(0x83); \ + emit.modrm(1, group, dt.reg & 7); \ + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); \ + emit.byte(dt.imm); \ + emit.byte(is.data); + alwaysinline auto addd(dis8 dt, imm8 is) { op(0); } + alwaysinline auto ord (dis8 dt, imm8 is) { op(1); } + alwaysinline auto adcd(dis8 dt, imm8 is) { op(2); } + alwaysinline auto sbbd(dis8 dt, imm8 is) { op(3); } + alwaysinline auto andd(dis8 dt, imm8 is) { op(4); } + alwaysinline auto subd(dis8 dt, imm8 is) { op(5); } + alwaysinline auto xord(dis8 dt, imm8 is) { op(6); } + alwaysinline auto cmpd(dis8 dt, imm8 is) { op(7); } + #undef op + + //op [reg64],reg32 + #define op(code) \ + emit.rex(0, rs & 8, 0, dt.reg & 8); \ + emit.byte(code); \ + emit.modrm(0, rs & 7, dt.reg & 7); \ + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); + alwaysinline auto adc(dis dt, reg32 rs) { op(0x11); } + alwaysinline auto add(dis dt, reg32 rs) { op(0x01); } + alwaysinline auto and(dis dt, reg32 rs) { op(0x21); } + alwaysinline auto cmp(dis dt, reg32 rs) { op(0x39); } + alwaysinline auto mov(dis dt, reg32 rs) { op(0x89); } + alwaysinline auto or (dis dt, reg32 rs) { op(0x09); } + alwaysinline auto sbb(dis dt, reg32 rs) { op(0x19); } + alwaysinline auto sub(dis dt, reg32 rs) { op(0x29); } + alwaysinline auto xor(dis dt, reg32 rs) { op(0x31); } + #undef op + + //op [reg64+imm8],reg32 + #define op(code) \ + emit.rex(0, rs & 8, 0, dt.reg & 8); \ + emit.byte(code); \ + emit.modrm(1, rs & 7, dt.reg & 7); \ + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); \ + emit.byte(dt.imm); + alwaysinline auto adc(dis8 dt, reg32 rs) { op(0x11); } + alwaysinline auto add(dis8 dt, reg32 rs) { op(0x01); } + alwaysinline auto and(dis8 dt, reg32 rs) { op(0x21); } + alwaysinline auto cmp(dis8 dt, reg32 rs) { op(0x39); } + alwaysinline auto mov(dis8 dt, reg32 rs) { op(0x89); } + alwaysinline auto or (dis8 dt, reg32 rs) { op(0x09); } + alwaysinline auto sbb(dis8 dt, reg32 rs) { op(0x19); } + alwaysinline auto sub(dis8 dt, reg32 rs) { op(0x29); } + alwaysinline auto xor(dis8 dt, reg32 rs) { op(0x31); } + #undef op + + //op [reg64],reg64 + #define op(code) \ + emit.rex(0, rs & 8, 0, dt.reg & 8); \ + emit.byte(code); \ + emit.modrm(0, rs & 7, dt.reg & 7); \ + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); + alwaysinline auto adc(dis dt, reg64 rs) { op(0x11); } + alwaysinline auto add(dis dt, reg64 rs) { op(0x01); } + alwaysinline auto and(dis dt, reg64 rs) { op(0x21); } + alwaysinline auto cmp(dis dt, reg64 rs) { op(0x39); } + alwaysinline auto mov(dis dt, reg64 rs) { op(0x89); } + alwaysinline auto or (dis dt, reg64 rs) { op(0x09); } + alwaysinline auto sbb(dis dt, reg64 rs) { op(0x19); } + alwaysinline auto sub(dis dt, reg64 rs) { op(0x29); } + alwaysinline auto xor(dis dt, reg64 rs) { op(0x31); } + #undef op + + //op [reg64+imm8],reg64 + #define op(code) \ + emit.rex(1, rs & 8, 0, dt.reg & 8); \ + emit.byte(code); \ + emit.modrm(1, rs & 7, dt.reg & 7); \ + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); \ + emit.byte(dt.imm); + alwaysinline auto adc(dis8 dt, reg64 rs) { op(0x11); } + alwaysinline auto add(dis8 dt, reg64 rs) { op(0x01); } + alwaysinline auto and(dis8 dt, reg64 rs) { op(0x21); } + alwaysinline auto cmp(dis8 dt, reg64 rs) { op(0x39); } + alwaysinline auto mov(dis8 dt, reg64 rs) { op(0x89); } + alwaysinline auto or (dis8 dt, reg64 rs) { op(0x09); } + alwaysinline auto sbb(dis8 dt, reg64 rs) { op(0x19); } + alwaysinline auto sub(dis8 dt, reg64 rs) { op(0x29); } + alwaysinline auto xor(dis8 dt, reg64 rs) { op(0x31); } + #undef op + + //op [reg64+imm32],reg64 + #define op(code) \ + emit.rex(1, rs & 8, 0, dt.reg & 8); \ + emit.byte(code); \ + emit.modrm(2, rs & 7, dt.reg & 7); \ + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); \ + emit.dword(dt.imm); + alwaysinline auto adc(dis32 dt, reg64 rs) { op(0x11); } + alwaysinline auto add(dis32 dt, reg64 rs) { op(0x01); } + alwaysinline auto and(dis32 dt, reg64 rs) { op(0x21); } + alwaysinline auto cmp(dis32 dt, reg64 rs) { op(0x39); } + alwaysinline auto mov(dis32 dt, reg64 rs) { op(0x89); } + alwaysinline auto or (dis32 dt, reg64 rs) { op(0x09); } + alwaysinline auto sbb(dis32 dt, reg64 rs) { op(0x19); } + alwaysinline auto sub(dis32 dt, reg64 rs) { op(0x29); } + alwaysinline auto xor(dis32 dt, reg64 rs) { op(0x31); } + #undef op + + //op reg32,reg8 + #define op(code) \ + emit.rex(0, rt & 8, 0, rs & 8); \ + emit.byte(0x0f, code); \ + emit.modrm(3, rt & 7, rs & 7); + alwaysinline auto movsx(reg32 rt, reg8 rs) { op(0xbe); } + alwaysinline auto movzx(reg32 rt, reg8 rs) { op(0xb6); } + #undef op + + //op reg32,reg16 + #define op(code) \ + emit.rex(0, rt & 8, 0, rs & 8); \ + emit.byte(0x0f, code); \ + emit.modrm(3, rt & 7, rs & 7); + alwaysinline auto movsx(reg32 rt, reg16 rs) { op(0xbf); } + alwaysinline auto movzx(reg32 rt, reg16 rs) { op(0xb7); } + #undef op + + alwaysinline auto movsxd(reg64 rt, reg32 rs) { + emit.rex(1, rt & 8, 0, rs & 8); + emit.byte(0x63); + emit.modrm(3, rt & 7, rs & 7); + } + + //incd [reg64+imm8] + alwaysinline auto incd(dis8 dt) { + emit.rex(0, 0, 0, dt.reg & 8); + emit.byte(0xff); + emit.modrm(1, 0, dt.reg & 7); + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); + emit.byte(dt.imm); + } + + //decd [reg64+imm8] + alwaysinline auto decd(dis8 dt) { + emit.rex(0, 0, 0, dt.reg & 8); + emit.byte(0xff); + emit.modrm(1, 1, dt.reg & 7); + if(dt.reg == rsp || dt.reg == r12) emit.sib(0, 4, 4); + emit.byte(dt.imm); + } + + //inc reg32 + alwaysinline auto inc(reg32 rt) { + emit.rex(0, 0, 0, rt & 8); + emit.byte(0xff); + emit.modrm(3, 0, rt & 7); + } + + //dec reg32 + alwaysinline auto dec(reg32 rt) { + emit.rex(0, 0, 0, rt & 8); + emit.byte(0xff); + emit.modrm(3, 1, rt & 7); + } + + //inc reg64 + alwaysinline auto inc(reg64 rt) { + emit.rex(1, 0, 0, rt & 8); + emit.byte(0xff); + emit.modrm(3, 0, rt & 7); + } + + //dec reg64 + alwaysinline auto dec(reg64 rt) { + emit.rex(1, 0, 0, rt & 8); + emit.byte(0xff); + emit.modrm(3, 1, rt & 7); + } + + #define op(code) \ + emit.rex(0, 0, 0, rt & 8); \ + emit.byte(0xd0); \ + emit.modrm(3, code, rt & 7); + alwaysinline auto rol(reg8 rt) { op(0); } + alwaysinline auto ror(reg8 rt) { op(1); } + alwaysinline auto rcl(reg8 rt) { op(2); } + alwaysinline auto rcr(reg8 rt) { op(3); } + alwaysinline auto shl(reg8 rt) { op(4); } + alwaysinline auto shr(reg8 rt) { op(5); } + alwaysinline auto sal(reg8 rt) { op(6); } + alwaysinline auto sar(reg8 rt) { op(7); } + #undef op + + #define op(code) \ + emit.rex(0, 0, 0, rt & 8); \ + emit.byte(0xd1); \ + emit.modrm(3, code, rt & 7); + alwaysinline auto rol(reg32 rt) { op(0); } + alwaysinline auto ror(reg32 rt) { op(1); } + alwaysinline auto rcl(reg32 rt) { op(2); } + alwaysinline auto rcr(reg32 rt) { op(3); } + alwaysinline auto shl(reg32 rt) { op(4); } + alwaysinline auto shr(reg32 rt) { op(5); } + alwaysinline auto sal(reg32 rt) { op(6); } + alwaysinline auto sar(reg32 rt) { op(7); } + #undef op + + #define op(code) \ + emit.rex(0, 0, 0, rt & 8); \ + emit.byte(0xc1); \ + emit.modrm(3, code, rt & 7); \ + emit.byte(is.data); + alwaysinline auto rol(reg32 rt, imm8 is) { op(0); } + alwaysinline auto ror(reg32 rt, imm8 is) { op(1); } + alwaysinline auto rcl(reg32 rt, imm8 is) { op(2); } + alwaysinline auto rcr(reg32 rt, imm8 is) { op(3); } + alwaysinline auto shl(reg32 rt, imm8 is) { op(4); } + alwaysinline auto shr(reg32 rt, imm8 is) { op(5); } + alwaysinline auto sal(reg32 rt, imm8 is) { op(6); } + alwaysinline auto sar(reg32 rt, imm8 is) { op(7); } + #undef op + + #define op(code) \ + if(unlikely(rs != cl)) throw; \ + emit.rex(0, 0, 0, rt & 8); \ + emit.byte(0xd3); \ + emit.modrm(3, code, rt & 7); + alwaysinline auto rol(reg32 rt, reg8 rs) { op(0); } + alwaysinline auto ror(reg32 rt, reg8 rs) { op(1); } + alwaysinline auto rcl(reg32 rt, reg8 rs) { op(2); } + alwaysinline auto rcr(reg32 rt, reg8 rs) { op(3); } + alwaysinline auto shl(reg32 rt, reg8 rs) { op(4); } + alwaysinline auto shr(reg32 rt, reg8 rs) { op(5); } + alwaysinline auto sal(reg32 rt, reg8 rs) { op(6); } + alwaysinline auto sar(reg32 rt, reg8 rs) { op(7); } + #undef op + + #define op(code) \ + emit.rex(1, 0, 0, rt & 8); \ + emit.byte(0xc1); \ + emit.modrm(3, code, rt & 7); \ + emit.byte(is.data); + alwaysinline auto rol(reg64 rt, imm8 is) { op(0); } + alwaysinline auto ror(reg64 rt, imm8 is) { op(1); } + alwaysinline auto rcl(reg64 rt, imm8 is) { op(2); } + alwaysinline auto rcr(reg64 rt, imm8 is) { op(3); } + alwaysinline auto shl(reg64 rt, imm8 is) { op(4); } + alwaysinline auto shr(reg64 rt, imm8 is) { op(5); } + alwaysinline auto sal(reg64 rt, imm8 is) { op(6); } + alwaysinline auto sar(reg64 rt, imm8 is) { op(7); } + #undef op + + #define op(code) \ + if(unlikely(rs != cl)) throw; \ + emit.rex(1, 0, 0, rt & 8); \ + emit.byte(0xd3); \ + emit.modrm(3, code, rt & 7); + alwaysinline auto rol(reg64 rt, reg8 rs) { op(0); } + alwaysinline auto ror(reg64 rt, reg8 rs) { op(1); } + alwaysinline auto rcl(reg64 rt, reg8 rs) { op(2); } + alwaysinline auto rcr(reg64 rt, reg8 rs) { op(3); } + alwaysinline auto shl(reg64 rt, reg8 rs) { op(4); } + alwaysinline auto shr(reg64 rt, reg8 rs) { op(5); } + alwaysinline auto sal(reg64 rt, reg8 rs) { op(6); } + alwaysinline auto sar(reg64 rt, reg8 rs) { op(7); } + #undef op + + //push reg + alwaysinline auto push(reg64 rt) { + emit.rex(0, 0, 0, rt & 8); + emit.byte(0x50 | rt & 7); + } + + //pop reg + alwaysinline auto pop(reg64 rt) { + emit.rex(0, 0, 0, rt & 8); + emit.byte(0x58 | rt & 7); + } + + #define op(code) \ + emit.rex(0, rs & 8, 0, rt & 8); \ + emit.byte(code); \ + emit.modrm(3, rs & 7, rt & 7); + alwaysinline auto adc (reg8 rt, reg8 rs) { op(0x10); } + alwaysinline auto add (reg8 rt, reg8 rs) { op(0x00); } + alwaysinline auto and (reg8 rt, reg8 rs) { op(0x20); } + alwaysinline auto cmp (reg8 rt, reg8 rs) { op(0x38); } + alwaysinline auto mov (reg8 rt, reg8 rs) { op(0x88); } + alwaysinline auto or (reg8 rt, reg8 rs) { op(0x08); } + alwaysinline auto sbb (reg8 rt, reg8 rs) { op(0x18); } + alwaysinline auto sub (reg8 rt, reg8 rs) { op(0x28); } + alwaysinline auto test(reg8 rt, reg8 rs) { op(0x84); } + alwaysinline auto xor (reg8 rt, reg8 rs) { op(0x30); } + #undef op + + #define op(code) \ + emit.byte(0x66); \ + emit.rex(0, rs & 8, 0, rt & 8); \ + emit.byte(code); \ + emit.modrm(3, rs & 7, rt & 7); + alwaysinline auto adc (reg16 rt, reg16 rs) { op(0x11); } + alwaysinline auto add (reg16 rt, reg16 rs) { op(0x01); } + alwaysinline auto and (reg16 rt, reg16 rs) { op(0x21); } + alwaysinline auto cmp (reg16 rt, reg16 rs) { op(0x39); } + alwaysinline auto mov (reg16 rt, reg16 rs) { op(0x89); } + alwaysinline auto or (reg16 rt, reg16 rs) { op(0x09); } + alwaysinline auto sbb (reg16 rt, reg16 rs) { op(0x19); } + alwaysinline auto sub (reg16 rt, reg16 rs) { op(0x29); } + alwaysinline auto test(reg16 rt, reg16 rs) { op(0x85); } + alwaysinline auto xor (reg16 rt, reg16 rs) { op(0x31); } + #undef op + + #define op(code) \ + emit.rex(0, rs & 8, 0, rt & 8); \ + emit.byte(code); \ + emit.modrm(3, rs & 7, rt & 7); + alwaysinline auto adc (reg32 rt, reg32 rs) { op(0x11); } + alwaysinline auto add (reg32 rt, reg32 rs) { op(0x01); } + alwaysinline auto and (reg32 rt, reg32 rs) { op(0x21); } + alwaysinline auto cmp (reg32 rt, reg32 rs) { op(0x39); } + alwaysinline auto mov (reg32 rt, reg32 rs) { op(0x89); } + alwaysinline auto or (reg32 rt, reg32 rs) { op(0x09); } + alwaysinline auto sbb (reg32 rt, reg32 rs) { op(0x19); } + alwaysinline auto sub (reg32 rt, reg32 rs) { op(0x29); } + alwaysinline auto test(reg32 rt, reg32 rs) { op(0x85); } + alwaysinline auto xor (reg32 rt, reg32 rs) { op(0x31); } + #undef op + + #define op(code) \ + emit.rex(1, rs & 8, 0, rt & 8); \ + emit.byte(code); \ + emit.modrm(3, rs & 7, rt & 7); + alwaysinline auto adc (reg64 rt, reg64 rs) { op(0x11); } + alwaysinline auto add (reg64 rt, reg64 rs) { op(0x01); } + alwaysinline auto and (reg64 rt, reg64 rs) { op(0x21); } + alwaysinline auto cmp (reg64 rt, reg64 rs) { op(0x39); } + alwaysinline auto mov (reg64 rt, reg64 rs) { op(0x89); } + alwaysinline auto or (reg64 rt, reg64 rs) { op(0x09); } + alwaysinline auto sbb (reg64 rt, reg64 rs) { op(0x19); } + alwaysinline auto sub (reg64 rt, reg64 rs) { op(0x29); } + alwaysinline auto test(reg64 rt, reg64 rs) { op(0x85); } + alwaysinline auto xor (reg64 rt, reg64 rs) { op(0x31); } + #undef op + + #define op(code) \ + emit.rex(0, 0, 0, rt & 8); \ + emit.byte(0x83); \ + emit.modrm(3, code, rt & 7); \ + emit.byte(is.data); + alwaysinline auto adc(reg32 rt, imm8 is) { op(2); } + alwaysinline auto add(reg32 rt, imm8 is) { op(0); } + alwaysinline auto and(reg32 rt, imm8 is) { op(4); } + alwaysinline auto cmp(reg32 rt, imm8 is) { op(7); } + alwaysinline auto or (reg32 rt, imm8 is) { op(1); } + alwaysinline auto sbb(reg32 rt, imm8 is) { op(3); } + alwaysinline auto sub(reg32 rt, imm8 is) { op(5); } + alwaysinline auto xor(reg32 rt, imm8 is) { op(6); } + #undef op + + #define op(code) \ + emit.rex(1, 0, 0, rt & 8); \ + emit.byte(0x83); \ + emit.modrm(3, code, rt & 7); \ + emit.byte(is.data); + alwaysinline auto adc(reg64 rt, imm8 is) { op(2); } + alwaysinline auto add(reg64 rt, imm8 is) { op(0); } + alwaysinline auto and(reg64 rt, imm8 is) { op(4); } + alwaysinline auto cmp(reg64 rt, imm8 is) { op(7); } + alwaysinline auto or (reg64 rt, imm8 is) { op(1); } + alwaysinline auto sbb(reg64 rt, imm8 is) { op(3); } + alwaysinline auto sub(reg64 rt, imm8 is) { op(5); } + alwaysinline auto xor(reg64 rt, imm8 is) { op(6); } + #undef op + + #define op(code, group) \ + if(rt == al) { \ + emit.byte(code); \ + emit.byte(is.data); \ + } else { \ + emit.rex(0, 0, 0, rt & 8); \ + emit.byte(0x80); \ + emit.modrm(3, group, rt & 7); \ + emit.byte(is.data); \ + } + alwaysinline auto adc(reg8 rt, imm8 is) { op(0x14, 2); } + alwaysinline auto add(reg8 rt, imm8 is) { op(0x04, 0); } + alwaysinline auto and(reg8 rt, imm8 is) { op(0x24, 4); } + alwaysinline auto cmp(reg8 rt, imm8 is) { op(0x3c, 7); } + alwaysinline auto or (reg8 rt, imm8 is) { op(0x0c, 1); } + alwaysinline auto sbb(reg8 rt, imm8 is) { op(0x1c, 3); } + alwaysinline auto sub(reg8 rt, imm8 is) { op(0x2c, 5); } + alwaysinline auto xor(reg8 rt, imm8 is) { op(0x34, 6); } + #undef op + + #define op(code, group) \ + if(rt == eax) { \ + emit.byte(code); \ + emit.dword(is.data); \ + } else { \ + emit.rex(0, 0, 0, rt & 8); \ + emit.byte(0x81); \ + emit.modrm(3, group, rt & 7); \ + emit.dword(is.data); \ + } + alwaysinline auto adc(reg32 rt, imm32 is) { op(0x15, 2); } + alwaysinline auto add(reg32 rt, imm32 is) { op(0x05, 0); } + alwaysinline auto and(reg32 rt, imm32 is) { op(0x25, 4); } + alwaysinline auto cmp(reg32 rt, imm32 is) { op(0x3d, 7); } + alwaysinline auto or (reg32 rt, imm32 is) { op(0x0d, 1); } + alwaysinline auto sbb(reg32 rt, imm32 is) { op(0x1d, 3); } + alwaysinline auto sub(reg32 rt, imm32 is) { op(0x2d, 5); } + alwaysinline auto xor(reg32 rt, imm32 is) { op(0x35, 6); } + #undef op + + #define op(code) \ + emit.rex(0, 0, 0, rt & 8); \ + emit.byte(0xf7); \ + emit.modrm(3, code, rt & 7); + alwaysinline auto not (reg32 rt) { op(2); } + alwaysinline auto neg (reg32 rt) { op(3); } + alwaysinline auto mul (reg32 rt) { op(4); } + alwaysinline auto imul(reg32 rt) { op(5); } + alwaysinline auto div (reg32 rt) { op(6); } + alwaysinline auto idiv(reg32 rt) { op(7); } + #undef op + + #define op(code) \ + emit.rex(1, 0, 0, rt & 8); \ + emit.byte(0xf7); \ + emit.modrm(3, code, rt & 7); + alwaysinline auto not (reg64 rt) { op(2); } + alwaysinline auto neg (reg64 rt) { op(3); } + alwaysinline auto mul (reg64 rt) { op(4); } + alwaysinline auto imul(reg64 rt) { op(5); } + alwaysinline auto div (reg64 rt) { op(6); } + alwaysinline auto idiv(reg64 rt) { op(7); } + #undef op + + #define op(code) \ + emit.byte(code); \ + emit.byte(it.data); + #define r imm8 it{resolve(l, 1, 1)} + alwaysinline auto jmp (imm8 it) { op(0xeb); } + alwaysinline auto jmp8(label l) { r; op(0xeb); } + alwaysinline auto jnz (imm8 it) { op(0x75); } + alwaysinline auto jnz8(label l) { r; op(0x75); } + alwaysinline auto jz (imm8 it) { op(0x74); } + alwaysinline auto jz8 (label l) { r; op(0x74); } + #undef r + #undef op + + #define op(code) \ + emit.byte(0x0f); \ + emit.byte(code); \ + emit.dword(it.data); + #define r imm32 it{resolve(l, 2, 4)} + alwaysinline auto jnz(imm32 it) { op(0x85); } + alwaysinline auto jnz(label l) { r; op(0x85); } + alwaysinline auto jz (imm32 it) { op(0x84); } + alwaysinline auto jz (label l) { r; op(0x84); } + #undef r + #undef op + + //op reg8 + #define op(code) \ + emit.rex(0, 0, 0, rt & 8); \ + emit.byte(0x0f); \ + emit.byte(code); \ + emit.modrm(3, 0, rt & 7); + alwaysinline auto seta (reg8 rt) { op(0x97); } + alwaysinline auto setbe(reg8 rt) { op(0x96); } + alwaysinline auto setb (reg8 rt) { op(0x92); } + alwaysinline auto setc (reg8 rt) { op(0x92); } + alwaysinline auto setg (reg8 rt) { op(0x9f); } + alwaysinline auto setge(reg8 rt) { op(0x9d); } + alwaysinline auto setl (reg8 rt) { op(0x9c); } + alwaysinline auto setle(reg8 rt) { op(0x9e); } + alwaysinline auto setnc(reg8 rt) { op(0x93); } + alwaysinline auto setno(reg8 rt) { op(0x91); } + alwaysinline auto setnp(reg8 rt) { op(0x9b); } + alwaysinline auto setns(reg8 rt) { op(0x99); } + alwaysinline auto setnz(reg8 rt) { op(0x95); } + alwaysinline auto seto (reg8 rt) { op(0x90); } + alwaysinline auto setp (reg8 rt) { op(0x9a); } + alwaysinline auto sets (reg8 rt) { op(0x98); } + alwaysinline auto setz (reg8 rt) { op(0x94); } + #undef op + + //op [reg64] + #define op(code) \ + emit.rex(0, 0, 0, dt.reg & 8); \ + emit.byte(0x0f); \ + emit.byte(code); \ + if(dt.reg == rsp || dt.reg == r12) { \ + emit.modrm(0, 0, dt.reg & 7); \ + emit.sib(0, 4, 4); \ + } else if(dt.reg == rbp || dt.reg == r13) { \ + emit.modrm(1, 0, dt.reg & 7); \ + emit.byte(0x00); \ + } else { \ + emit.modrm(0, 0, dt.reg & 7); \ + } + alwaysinline auto seta (dis dt) { op(0x97); } + alwaysinline auto setbe(dis dt) { op(0x96); } + alwaysinline auto setb (dis dt) { op(0x92); } + alwaysinline auto setc (dis dt) { op(0x92); } + alwaysinline auto setg (dis dt) { op(0x9f); } + alwaysinline auto setge(dis dt) { op(0x9d); } + alwaysinline auto setl (dis dt) { op(0x9c); } + alwaysinline auto setle(dis dt) { op(0x9e); } + alwaysinline auto setnc(dis dt) { op(0x93); } + alwaysinline auto setno(dis dt) { op(0x91); } + alwaysinline auto setnp(dis dt) { op(0x9b); } + alwaysinline auto setns(dis dt) { op(0x99); } + alwaysinline auto setnz(dis dt) { op(0x95); } + alwaysinline auto seto (dis dt) { op(0x90); } + alwaysinline auto setp (dis dt) { op(0x9a); } + alwaysinline auto sets (dis dt) { op(0x98); } + alwaysinline auto setz (dis dt) { op(0x94); } + #undef op + + //op [reg64+imm8] + #define op(code) \ + emit.rex(0, 0, 0, dt.reg & 8); \ + emit.byte(0x0f); \ + emit.byte(code); \ + emit.modrm(1, 0, dt.reg & 7); \ + if(dt.reg == rsp || dt.reg == r12) { \ + emit.sib(0, 4, 4); \ + } \ + emit.byte(dt.imm); + alwaysinline auto seta (dis8 dt) { op(0x97); } + alwaysinline auto setbe(dis8 dt) { op(0x96); } + alwaysinline auto setb (dis8 dt) { op(0x92); } + alwaysinline auto setc (dis8 dt) { op(0x92); } + alwaysinline auto setg (dis8 dt) { op(0x9f); } + alwaysinline auto setge(dis8 dt) { op(0x9d); } + alwaysinline auto setl (dis8 dt) { op(0x9c); } + alwaysinline auto setle(dis8 dt) { op(0x9e); } + alwaysinline auto setnc(dis8 dt) { op(0x93); } + alwaysinline auto setno(dis8 dt) { op(0x91); } + alwaysinline auto setnp(dis8 dt) { op(0x9b); } + alwaysinline auto setns(dis8 dt) { op(0x99); } + alwaysinline auto setnz(dis8 dt) { op(0x95); } + alwaysinline auto seto (dis8 dt) { op(0x90); } + alwaysinline auto setp (dis8 dt) { op(0x9a); } + alwaysinline auto sets (dis8 dt) { op(0x98); } + alwaysinline auto setz (dis8 dt) { op(0x94); } + #undef op + + //call imm64 (pseudo-op) + alwaysinline auto call(imm64 target, reg64 scratch) { + s64 dist = distance(target.data) - 5; + if(dist < INT32_MIN || dist > INT32_MAX) { + mov(scratch, target); + call(scratch); + } else { + call(imm32{dist}); + } + } + + //jmp label (pseudo-op) + alwaysinline auto jmp(label l) { + jmp(imm32{resolve(l, 1, 4)}); + } +//}; diff --git a/waterbox/ares64/ares/nall/recompiler/generic/constants.hpp b/waterbox/ares64/ares/nall/recompiler/generic/constants.hpp new file mode 100644 index 0000000000..dace1f821e --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/generic/constants.hpp @@ -0,0 +1,60 @@ +#pragma once + +//{ + enum set_flags { + set_z = SLJIT_SET_Z, + set_ult = SLJIT_SET_LESS, + set_uge = SLJIT_SET_GREATER_EQUAL, + set_ugt = SLJIT_SET_GREATER, + set_ule = SLJIT_SET_LESS_EQUAL, + set_slt = SLJIT_SET_SIG_LESS, + set_sge = SLJIT_SET_SIG_GREATER_EQUAL, + set_sgt = SLJIT_SET_SIG_GREATER, + set_sle = SLJIT_SET_SIG_LESS_EQUAL, + set_o = SLJIT_SET_OVERFLOW, + set_c = SLJIT_SET_CARRY, + }; + + enum flags { + flag_eq = SLJIT_EQUAL, + flag_z = flag_eq, + flag_ne = SLJIT_NOT_EQUAL, + flag_nz = flag_ne, + flag_ult = SLJIT_LESS, + flag_uge = SLJIT_GREATER_EQUAL, + flag_ugt = SLJIT_GREATER, + flag_ule = SLJIT_LESS_EQUAL, + flag_slt = SLJIT_SIG_LESS, + flag_sge = SLJIT_SIG_GREATER_EQUAL, + flag_sgt = SLJIT_SIG_GREATER, + flag_sle = SLJIT_SIG_LESS_EQUAL, + flag_o = SLJIT_OVERFLOW, + flag_no = SLJIT_NOT_OVERFLOW, + }; + + struct op_base { + op_base(sljit_s32 f, sljit_sw s) : fst(f), snd(s) {} + sljit_s32 fst; + sljit_sw snd; + }; + + struct imm : public op_base { + explicit imm(sljit_sw immediate) : op_base(SLJIT_IMM, immediate) {} + }; + + struct reg : public op_base { + explicit reg(sljit_s32 index) : op_base(SLJIT_R(index), 0) {} + }; + + struct sreg : public op_base { + explicit sreg(sljit_s32 index) : op_base(SLJIT_S(index), 0) {} + }; + + struct mem : public op_base { + mem(sreg base, sljit_sw offset) : op_base(SLJIT_MEM1(base.fst), offset) {} + }; + + struct unused : public op_base { + unused() : op_base(SLJIT_UNUSED, 0) {} + }; +//}; diff --git a/waterbox/ares64/ares/nall/recompiler/generic/encoder-calls.hpp b/waterbox/ares64/ares/nall/recompiler/generic/encoder-calls.hpp new file mode 100644 index 0000000000..af4382c497 --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/generic/encoder-calls.hpp @@ -0,0 +1,73 @@ +#pragma once + +//{ + struct imm64 { + explicit imm64(u64 data) : data(data) {} + template explicit imm64(T* pointer) : data((u64)pointer) {} + template explicit imm64(auto (C::*function)(P...) -> R) { + union force_cast_ub { + auto (C::*function)(P...) -> R; + u64 pointer; + } cast{function}; + data = cast.pointer; + } + template explicit imm64(auto (C::*function)(P...) const -> R) { + union force_cast_ub { + auto (C::*function)(P...) const -> R; + u64 pointer; + } cast{function}; + data = cast.pointer; + } + u64 data; + }; + + template + alwaysinline auto call(V (C::*function)(P...)) { + static_assert(sizeof...(P) <= 3); + sljit_s32 type = SLJIT_ARG1(SW); + if constexpr(sizeof...(P) >= 1) type |= SLJIT_ARG2(SW); + if constexpr(sizeof...(P) >= 2) type |= SLJIT_ARG3(SW); + if constexpr(sizeof...(P) >= 3) type |= SLJIT_ARG4(SW); + if constexpr(!std::is_void_v) type |= SLJIT_RET(SW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data)); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data); + sljit_s32 type = SLJIT_ARG1(SW); + if constexpr(!std::is_void_v) type |= SLJIT_RET(SW); + sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data)); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64{p0}.data); + sljit_s32 type = SLJIT_ARG1(SW) | SLJIT_ARG2(SW); + if constexpr(!std::is_void_v) type |= SLJIT_RET(SW); + sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data)); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64{p0}.data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, imm64{p1}.data); + sljit_s32 type = SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW); + if constexpr(!std::is_void_v) type |= SLJIT_RET(SW); + sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data)); + } + + template + alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0, P1 p1, P2 p2) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64{p0}.data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, imm64{p1}.data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, imm64{p2}.data); + sljit_s32 type = SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW); + if constexpr(!std::is_void_v) type |= SLJIT_RET(SW); + sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data)); + } +//}; diff --git a/waterbox/ares64/ares/nall/recompiler/generic/encoder-instructions.hpp b/waterbox/ares64/ares/nall/recompiler/generic/encoder-instructions.hpp new file mode 100644 index 0000000000..90869fc4c6 --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/generic/encoder-instructions.hpp @@ -0,0 +1,156 @@ +#pragma once + +//{ + //0 operand instructions + + auto brk() { + sljit_emit_op0(compiler, SLJIT_BREAKPOINT); + } + + //1 operand instructions + +#define OP1(name, op) \ + template \ + auto name(T x, U y) { \ + sljit_emit_op1(compiler, \ + SLJIT_##op, \ + x.fst, x.snd, \ + y.fst, y.snd); \ + } + + OP1(mov32, MOV32) + OP1(mov64, MOV) + OP1(mov32_u8, MOV32_U8) + OP1(mov64_u8, MOV_U8) + OP1(mov32_s8, MOV32_S8) + OP1(mov64_s8, MOV_S8) + OP1(mov32_u16, MOV32_U16) + OP1(mov64_u16, MOV_U16) + OP1(mov32_s16, MOV32_S16) + OP1(mov64_s16, MOV_S16) + OP1(mov64_u32, MOV_U32) + OP1(mov64_s32, MOV_S32) + OP1(not32, NOT32) + OP1(not64, NOT) + OP1(neg32, NEG32) + OP1(neg64, NEG) +#undef OP1 + + //2 operand instructions + +#define OP2(name, op) \ + template \ + auto name(T x, U y, V z, sljit_s32 flags = 0) { \ + sljit_emit_op2(compiler, \ + SLJIT_##op | flags, \ + x.fst, x.snd, \ + y.fst, y.snd, \ + z.fst, z.snd); \ + } + + OP2(add32, ADD32) + OP2(add64, ADD) + OP2(addc32, ADDC32) + OP2(addc64, ADDC) + OP2(sub32, SUB32) + OP2(sub64, SUB) + OP2(subc32, SUBC32) + OP2(subc64, SUBC) + OP2(mul32, MUL32) + OP2(mul64, MUL) + OP2(and32, AND32) + OP2(and64, AND) + OP2(or32, OR32) + OP2(or64, OR) + OP2(xor32, XOR32) + OP2(xor64, XOR) + OP2(shl32, SHL32) + OP2(shl64, SHL) + OP2(lshr32, LSHR32) + OP2(lshr64, LSHR) + OP2(ashr32, ASHR32) + OP2(ashr64, ASHR) +#undef OP2 + + //compare instructions + +#define OPC(name, op) \ + template \ + auto name(T x, U y, sljit_s32 flags) { \ + sljit_emit_op2(compiler, \ + SLJIT_##op | flags, \ + SLJIT_UNUSED, 0, \ + x.fst, x.snd, \ + y.fst, y.snd); \ + } + + OPC(cmp32, SUB32) + OPC(cmp64, SUB) + OPC(test32, AND32) + OPC(test64, AND) +#undef OPC + + template + auto cmp32_jump(T x, U y, sljit_s32 flags) -> sljit_jump* { + return sljit_emit_cmp(compiler, + SLJIT_I32_OP | flags, + x.fst, x.snd, + y.fst, y.snd); + } + + //flag instructions + +#define OPF(name, op) \ + template \ + auto name(T x, sljit_s32 flags) { \ + sljit_emit_op_flags(compiler, \ + SLJIT_##op, \ + x.fst, x.snd, \ + flags); \ + } + + OPF(mov32_f, MOV32) + OPF(mov64_f, MOV) + OPF(and32_f, AND32) + OPF(and64_f, AND) + OPF(or32_f, OR32) + OPF(or64_f, OR) + OPF(xor32_f, XOR32) + OPF(xor64_f, XOR) +#undef OPF + + //meta instructions + + auto mov32_to_c(mem m, int sign) { +#if defined(ARCHITECTURE_AMD64) + cmp32(imm(0), m, set_c); +#elif defined(ARCHITECTURE_ARM64) + if(sign < 0) { + cmp32(imm(0), m, set_c); + } else { + cmp32(m, imm(1), set_c); + } +#else +#error "Unimplemented architecture" +#endif + } + + auto mov32_from_c(reg r, int sign) { +#if defined(ARCHITECTURE_AMD64) + mov32(r, imm(0)); + addc32(r, r, r); +#elif defined(ARCHITECTURE_ARM64) + mov32(r, imm(0)); + addc32(r, r, r); + if(sign < 0) { + xor32(r, r, imm(1)); + } +#else +#error "Unimplemented architecture" +#endif + } + + auto lea(reg r, sreg base, sljit_sw offset) { + add64(r, base, imm(offset)); + } +//}; diff --git a/waterbox/ares64/ares/nall/recompiler/generic/generic.hpp b/waterbox/ares64/ares/nall/recompiler/generic/generic.hpp new file mode 100644 index 0000000000..883be7de68 --- /dev/null +++ b/waterbox/ares64/ares/nall/recompiler/generic/generic.hpp @@ -0,0 +1,61 @@ +#pragma once + +#if defined(SLJIT) +namespace nall::recompiler { + struct generic { + bump_allocator& allocator; + sljit_compiler* compiler = nullptr; + sljit_label* epilogue = nullptr; + + generic(bump_allocator& alloc) : allocator(alloc) {} + + auto beginFunction(int args) { + assert(args <= 3); + compiler = sljit_create_compiler(nullptr, &allocator); + + sljit_s32 options = 0; + if(args >= 1) options |= SLJIT_ARG1(SW); + if(args >= 2) options |= SLJIT_ARG2(SW); + if(args >= 3) options |= SLJIT_ARG3(SW); + sljit_emit_enter(compiler, 0, options, 4, 3, 0, 0, 0); + sljit_jump* entry = sljit_emit_jump(compiler, SLJIT_JUMP); + epilogue = sljit_emit_label(compiler); + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + sljit_set_label(entry, sljit_emit_label(compiler)); + } + + auto endFunction() -> u8* { + u8* code = (u8*)sljit_generate_code(compiler); + sljit_free_compiler(compiler); + compiler = nullptr; + epilogue = nullptr; + return code; + } + + auto testJumpEpilog() { + sljit_set_label(sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL | SLJIT_I32_OP, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0), epilogue); + } + + auto jumpEpilog() { + sljit_set_label(sljit_emit_jump(compiler, SLJIT_JUMP), epilogue); + } + + auto setLabel(sljit_jump* jump) { + sljit_set_label(jump, sljit_emit_label(compiler)); + } + + auto jump() -> sljit_jump* { + return sljit_emit_jump(compiler, SLJIT_JUMP); + } + + auto jump(sljit_s32 flag) -> sljit_jump* { + return sljit_emit_jump(compiler, flag); + } + + #include "constants.hpp" + #include "encoder-instructions.hpp" + #include "encoder-calls.hpp" + }; +} +#endif diff --git a/waterbox/ares64/ares/nall/reed-solomon.hpp b/waterbox/ares64/ares/nall/reed-solomon.hpp new file mode 100644 index 0000000000..390245b351 --- /dev/null +++ b/waterbox/ares64/ares/nall/reed-solomon.hpp @@ -0,0 +1,218 @@ +#pragma once + +namespace nall { + +//RS(n,k) = ReedSolomon +template +struct ReedSolomon { + enum : u32 { Parity = Length - Inputs }; + static_assert(Length <= 255 && Length > 0); + static_assert(Parity <= 32 && Parity > 0); + + using Field = GaloisField; + template using Polynomial = Matrix; + + template + static auto shift(Polynomial polynomial) -> Polynomial { + for(s32 n = Size - 1; n > 0; n--) polynomial[n] = polynomial[n - 1]; + polynomial[0] = 0; + return polynomial; + } + + template + static auto degree(const Polynomial& polynomial) -> u32 { + for(s32 n = Size; n > 0; n--) { + if(polynomial[n - 1] != 0) return n - 1; + } + return 0; + } + + template + static auto evaluate(const Polynomial& polynomial, Field field) -> Field { + Field sum = 0; + for(u32 n : range(Size)) sum += polynomial[n] * field.pow(n); + return sum; + } + + Polynomial message; + Polynomial syndromes; + Polynomial locators; + + ReedSolomon() = default; + ReedSolomon(const ReedSolomon&) = default; + + ReedSolomon(const initializer_list& source) { + u32 index = 0; + for(auto& value : source) { + if(index >= Length) break; + message[index++] = value; + } + } + + auto operator[](u32 index) -> Field& { return message[index]; } + auto operator[](u32 index) const -> Field { return message[index]; } + + auto calculateSyndromes() -> void { + static const Polynomial bases = [] { + Polynomial bases; + for(u32 n : range(Parity)) { + bases[n] = Field::exp(n); + } + return bases; + }(); + + syndromes = {}; + for(u32 m : range(Length)) { + for(u32 p : range(Parity)) { + syndromes[p] *= bases[p]; + syndromes[p] += message[m]; + } + } + } + + auto generateParity() -> void { + static const Polynomial matrix = [] { + Polynomial matrix{}; + for(u32 row : range(Parity)) { + for(u32 col : range(Parity)) { + matrix(row, col) = Field::exp(row * col); + } + } + if(auto result = matrix.invert()) return *result; + throw; //should never occur + }(); + + for(u32 p : range(Parity)) message[Inputs + p] = 0; + calculateSyndromes(); + auto parity = matrix * syndromes; + for(u32 p : range(Parity)) message[Inputs + p] = parity[Parity - (p + 1)]; + } + + auto syndromesAreZero() -> bool { + for(u32 p : range(Parity)) { + if(syndromes[p]) return false; + } + return true; + } + + //algorithm: Berlekamp-Massey + auto calculateLocators() -> void { + Polynomial history{1}; + locators = history; + u32 errors = 0; + + for(u32 n : range(Parity)) { + Field discrepancy = 0; + for(u32 l : range(errors + 1)) { + discrepancy += locators[l] * syndromes[n - l]; + } + + history = shift(history); + if(discrepancy) { + auto located = locators - history * discrepancy; + if(errors * 2 <= n) { + errors = (n + 1) - errors; + history = locators * discrepancy.inv(); + } + locators = located; + } + } + } + + //algorithm: brute force + //todo: implement Chien search here + auto calculateErrors() -> vector { + calculateSyndromes(); + if(syndromesAreZero()) return {}; //no errors detected + calculateLocators(); + vector errors; + for(u32 n : range(Length)) { + if(evaluate(locators, Field{2}.pow(255 - n))) continue; + errors.append(Length - (n + 1)); + } + return errors; + } + + template + static auto calculateErasures(array_view errors) -> maybe> { + Polynomial matrix{}; + for(u32 row : range(Size)) { + for(u32 col : range(Size)) { + u32 index = Length - (errors[col] + 1); + matrix(row, col) = Field::exp(row * index); + } + } + return matrix.invert(); + } + + template + auto correctErasures(array_view errors) -> s32 { + calculateSyndromes(); + if(syndromesAreZero()) return 0; //no errors detected + if(auto matrix = calculateErasures(errors)) { + Polynomial factors; + for(u32 n : range(Size)) factors[n] = syndromes[n]; + auto errata = matrix() * factors; + for(u32 m : range(Size)) { + message[errors[m]] += errata[m]; + } + calculateSyndromes(); + if(syndromesAreZero()) return Size; //corrected Size errors + return -Size; //failed to correct Size errors + } + return -Size; //should never occur, but might ... + } + + //note: the erasure matrix is generated as a Polynomial, where N is the number of errors to correct. + //because this is a template parameter, and the actual number of errors may very, this function is needed. + //the alternative would be to convert Matrix to a dynamically sized Matrix(Rows, Cols) type, + //but this would require heap memory allocations and would be a massive performance penalty. + auto correctErrata(array_view errors) -> s32 { + if(errors.size() >= Parity) return -errors.size(); //too many errors to be correctable + + switch(errors.size()) { + case 0: return 0; + case 1: return correctErasures< 1>(errors); + case 2: return correctErasures< 2>(errors); + case 3: return correctErasures< 3>(errors); + case 4: return correctErasures< 4>(errors); + case 5: return correctErasures< 5>(errors); + case 6: return correctErasures< 6>(errors); + case 7: return correctErasures< 7>(errors); + case 8: return correctErasures< 8>(errors); + case 9: return correctErasures< 9>(errors); + case 10: return correctErasures<10>(errors); + case 11: return correctErasures<11>(errors); + case 12: return correctErasures<12>(errors); + case 13: return correctErasures<13>(errors); + case 14: return correctErasures<14>(errors); + case 15: return correctErasures<15>(errors); + case 16: return correctErasures<16>(errors); + case 17: return correctErasures<17>(errors); + case 18: return correctErasures<18>(errors); + case 19: return correctErasures<19>(errors); + case 20: return correctErasures<20>(errors); + case 21: return correctErasures<21>(errors); + case 22: return correctErasures<22>(errors); + case 23: return correctErasures<23>(errors); + case 24: return correctErasures<24>(errors); + case 25: return correctErasures<25>(errors); + case 26: return correctErasures<26>(errors); + case 27: return correctErasures<27>(errors); + case 28: return correctErasures<28>(errors); + case 29: return correctErasures<29>(errors); + case 30: return correctErasures<30>(errors); + case 31: return correctErasures<31>(errors); + case 32: return correctErasures<32>(errors); + } + return -errors.size(); //it's possible to correct more errors if the above switch were extended ... + } + + //convenience function for when erasures aren't needed + auto correctErrors() -> s32 { + auto errors = calculateErrors(); + return correctErrata(errors); + } +}; + +} diff --git a/waterbox/ares64/ares/nall/run.hpp b/waterbox/ares64/ares/nall/run.hpp new file mode 100644 index 0000000000..14f92ec490 --- /dev/null +++ b/waterbox/ares64/ares/nall/run.hpp @@ -0,0 +1,214 @@ +#pragma once + +//auto execute(const string& name, const string& args...) -> string; +//[[synchronous]] +//executes program, waits for completion, and returns data written to stdout + +//auto invoke(const string& name, const string& args...) -> void; +//[[asynchronous]] +//if a program is specified, it is executed with the arguments provided +//if a file is specified, the file is opened using the program associated with said file type +//if a folder is specified, the folder is opened using the associated file explorer +//if a URL is specified, the default web browser is opened and pointed at the URL requested + +#include +#include + +namespace nall { + +struct execute_result_t { + explicit operator bool() const { return code == EXIT_SUCCESS; } + + int code = EXIT_FAILURE; + string output; + string error; +}; + +#if defined(PLATFORM_MACOS) || defined(PLATFORM_LINUX) || defined(PLATFORM_BSD) + +template inline auto execute(const string& name, P&&... p) -> execute_result_t { + int fdout[2]; + int fderr[2]; + if(pipe(fdout) == -1) return {}; + if(pipe(fderr) == -1) return {}; + + pid_t pid = fork(); + if(pid == 0) { + const char* argv[1 + sizeof...(p) + 1]; + const char** argp = argv; + vector argl(forward

(p)...); + *argp++ = (const char*)name; + for(auto& arg : argl) *argp++ = (const char*)arg; + *argp++ = nullptr; + + dup2(fdout[1], STDOUT_FILENO); + dup2(fderr[1], STDERR_FILENO); + close(fdout[0]); + close(fderr[0]); + close(fdout[1]); + close(fderr[1]); + execvp(name, (char* const*)argv); + //this is called only if execvp fails: + //use _exit instead of exit, to avoid destroying key shared file descriptors + _exit(EXIT_FAILURE); + } else { + close(fdout[1]); + close(fderr[1]); + + char buffer[256]; + execute_result_t result; + + while(true) { + auto size = read(fdout[0], buffer, sizeof(buffer)); + if(size <= 0) break; + + auto offset = result.output.size(); + result.output.resize(offset + size); + memory::copy(result.output.get() + offset, buffer, size); + } + + while(true) { + auto size = read(fderr[0], buffer, sizeof(buffer)); + if(size <= 0) break; + + auto offset = result.error.size(); + result.error.resize(offset + size); + memory::copy(result.error.get() + offset, buffer, size); + } + + close(fdout[0]); + close(fderr[0]); + + int status = 0; + waitpid(pid, &status, 0); + if(!WIFEXITED(status)) return {}; + result.code = WEXITSTATUS(status); + return result; + } +} + +template inline auto invoke(const string& name, P&&... p) -> void { + pid_t pid = fork(); + if(pid == 0) { + const char* argv[1 + sizeof...(p) + 1]; + const char** argp = argv; + vector argl(forward

(p)...); + *argp++ = (const char*)name; + for(auto& arg : argl) *argp++ = (const char*)arg; + *argp++ = nullptr; + + if(execvp(name, (char* const*)argv) < 0) { + #if defined(PLATFORM_MACOS) + execlp("open", "open", (const char*)name, nullptr); + #else + execlp("xdg-open", "xdg-open", (const char*)name, nullptr); + #endif + } + exit(0); + } +} + +#elif defined(PLATFORM_WINDOWS) + +template inline auto execute(const string& name, P&&... p) -> execute_result_t { + vector argl(name, forward

(p)...); + for(auto& arg : argl) if(arg.find(" ")) arg = {"\"", arg, "\""}; + string arguments = argl.merge(" "); + + SECURITY_ATTRIBUTES sa; + ZeroMemory(&sa, sizeof(SECURITY_ATTRIBUTES)); + sa.nLength = sizeof(SECURITY_ATTRIBUTES); + sa.bInheritHandle = true; + sa.lpSecurityDescriptor = nullptr; + + HANDLE stdoutRead; + HANDLE stdoutWrite; + if(!CreatePipe(&stdoutRead, &stdoutWrite, &sa, 0)) return {}; + if(!SetHandleInformation(stdoutRead, HANDLE_FLAG_INHERIT, 0)) return {}; + + HANDLE stderrRead; + HANDLE stderrWrite; + if(!CreatePipe(&stderrRead, &stderrWrite, &sa, 0)) return {}; + if(!SetHandleInformation(stderrRead, HANDLE_FLAG_INHERIT, 0)) return {}; + + HANDLE stdinRead; + HANDLE stdinWrite; + if(!CreatePipe(&stdinRead, &stdinWrite, &sa, 0)) return {}; + if(!SetHandleInformation(stdinWrite, HANDLE_FLAG_INHERIT, 0)) return {}; + + STARTUPINFO si; + ZeroMemory(&si, sizeof(STARTUPINFO)); + si.cb = sizeof(STARTUPINFO); + si.hStdOutput = stdoutWrite; + si.hStdError = stderrWrite; + si.hStdInput = stdinRead; + si.dwFlags = STARTF_USESTDHANDLES; + + PROCESS_INFORMATION pi; + ZeroMemory(&pi, sizeof(PROCESS_INFORMATION)); + + if(!CreateProcess( + nullptr, utf16_t(arguments), + nullptr, nullptr, true, CREATE_NO_WINDOW, + nullptr, nullptr, &si, &pi + )) return {}; + + DWORD exitCode = EXIT_FAILURE; + if(WaitForSingleObject(pi.hProcess, INFINITE)) return {}; + if(!GetExitCodeProcess(pi.hProcess, &exitCode)) return {}; + CloseHandle(pi.hThread); + CloseHandle(pi.hProcess); + + char buffer[256]; + execute_result_t result; + result.code = exitCode; + + while(true) { + DWORD read, available, remaining; + if(!PeekNamedPipe(stdoutRead, nullptr, sizeof(buffer), &read, &available, &remaining)) break; + if(read == 0) break; + + if(!ReadFile(stdoutRead, buffer, sizeof(buffer), &read, nullptr)) break; + if(read == 0) break; + + auto offset = result.output.size(); + result.output.resize(offset + read); + memory::copy(result.output.get() + offset, buffer, read); + } + + while(true) { + DWORD read, available, remaining; + if(!PeekNamedPipe(stderrRead, nullptr, sizeof(buffer), &read, &available, &remaining)) break; + if(read == 0) break; + + if(!ReadFile(stderrRead, buffer, sizeof(buffer), &read, nullptr)) break; + if(read == 0) break; + + auto offset = result.error.size(); + result.error.resize(offset + read); + memory::copy(result.error.get() + offset, buffer, read); + } + + return result; +} + +template inline auto invoke(const string& name, P&&... p) -> void { + vector argl(forward

(p)...); + for(auto& arg : argl) if(arg.find(" ")) arg = {"\"", arg, "\""}; + string arguments = argl.merge(" "); + string directory = Path::program().replace("/", "\\"); + ShellExecute(nullptr, nullptr, utf16_t(name), utf16_t(arguments), utf16_t(directory), SW_SHOWNORMAL); +} + +#else + +template inline auto execute(const string& name, P&&... p) -> string { + return ""; +} + +template inline auto invoke(const string& name, P&&... p) -> void { +} + +#endif + +} diff --git a/waterbox/ares64/ares/nall/serial.hpp b/waterbox/ares64/ares/nall/serial.hpp new file mode 100644 index 0000000000..33c5c09b0f --- /dev/null +++ b/waterbox/ares64/ares/nall/serial.hpp @@ -0,0 +1,113 @@ +#pragma once + +#include +#include +#include + +#if !defined(API_POSIX) + #error "nall/serial: unsupported system" +#endif + +#include +#include +#include +#include + +namespace nall { + +struct serial { + ~serial() { + close(); + } + + auto readable() -> bool { + if(!opened) return false; + fd_set fdset; + FD_ZERO(&fdset); + FD_SET(port, &fdset); + timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + int result = select(FD_SETSIZE, &fdset, nullptr, nullptr, &timeout); + if(result < 1) return false; + return FD_ISSET(port, &fdset); + } + + //-1 on error, otherwise return bytes read + auto read(u8* data, u32 length) -> s32 { + if(!opened) return -1; + return ::read(port, (void*)data, length); + } + + auto writable() -> bool { + if(!opened) return false; + fd_set fdset; + FD_ZERO(&fdset); + FD_SET(port, &fdset); + timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + int result = select(FD_SETSIZE, nullptr, &fdset, nullptr, &timeout); + if(result < 1) return false; + return FD_ISSET(port, &fdset); + } + + //-1 on error, otherwise return bytes written + auto write(const u8* data, u32 length) -> s32 { + if(!opened) return -1; + return ::write(port, (void*)data, length); + } + + //rate==0: use flow control (synchronous mode) + //rate!=0: baud-rate (asynchronous mode) + auto open(string device, u32 rate = 0) -> bool { + close(); + + if(!device) device = "/dev/ttyU0"; //note: default device name is for FreeBSD 10+ + port = ::open(device, O_RDWR | O_NOCTTY | O_NDELAY | O_NONBLOCK); + if(port == -1) return false; + + if(ioctl(port, TIOCEXCL) == -1) { close(); return false; } + if(fcntl(port, F_SETFL, 0) == -1) { close(); return false; } + if(tcgetattr(port, &original_attr) == -1) { close(); return false; } + + termios attr = original_attr; + cfmakeraw(&attr); + cfsetspeed(&attr, rate ? rate : 57600); //rate value has no effect in synchronous mode + + attr.c_lflag &=~ (ECHO | ECHONL | ISIG | ICANON | IEXTEN); + attr.c_iflag &=~ (BRKINT | PARMRK | INPCK | ISTRIP | INLCR | IGNCR | ICRNL | IXON | IXOFF | IXANY); + attr.c_iflag |= (IGNBRK | IGNPAR); + attr.c_oflag &=~ (OPOST); + attr.c_cflag &=~ (CSIZE | CSTOPB | PARENB | CLOCAL); + attr.c_cflag |= (CS8 | CREAD); + if(rate) { + attr.c_cflag &= ~CRTSCTS; + } else { + attr.c_cflag |= CRTSCTS; + } + attr.c_cc[VTIME] = attr.c_cc[VMIN] = 0; + + if(tcsetattr(port, TCSANOW, &attr) == -1) { close(); return false; } + return opened = true; + } + + auto close() -> void { + if(port != -1) { + tcdrain(port); + if(opened) { + tcsetattr(port, TCSANOW, &original_attr); + opened = false; + } + ::close(port); + port = -1; + } + } + +private: + int port = -1; + bool opened = false; + termios original_attr; +}; + +} diff --git a/waterbox/ares64/ares/nall/serializer.hpp b/waterbox/ares64/ares/nall/serializer.hpp new file mode 100644 index 0000000000..4e7c994682 --- /dev/null +++ b/waterbox/ares64/ares/nall/serializer.hpp @@ -0,0 +1,179 @@ +#pragma once + +//serializer: a class designed to save and restore the state of classes. +// +//benefits: +//- data() will be portable in size (it is not necessary to specify type sizes.) +//- data() will be portable in endianness (always stored internally as little-endian.) +//- one serialize function can both save and restore class states. +// +//caveats: +//- only plain-old-data can be stored. complex classes must provide serialize(serializer&); +//- floating-point usage is not portable across different implementations + +#include +#include +#include +#include +#include +#include + +namespace nall { + +struct serializer; + +template +struct has_serialize { + template static auto test(decltype(std::declval().serialize(std::declval()))*) -> char; + template static auto test(...) -> long; + static constexpr bool value = sizeof(test(0)) == sizeof(char); +}; +template constexpr bool has_serialize_v = has_serialize::value; + +struct serializer { + explicit operator bool() const { + return _size; + } + + auto reading() const -> bool { + return _mode == 0; + } + + auto writing() const -> bool { + return _mode == 1; + } + + auto setReading() -> void { + _mode = 0; + _size = 0; + } + + auto setWriting() -> void { + _mode = 1; + _size = 0; + } + + auto data() const -> const u8* { + return _data; + } + + auto size() const -> u32 { + return _size; + } + + auto capacity() const -> u32 { + return _capacity; + } + + auto reserve(u32 size) -> void { + if(size > _capacity) { + auto data = new u8[bit::round(size)](); + memory::copy(data, _data, _capacity); + delete[] _data; + _data = data; + _capacity = bit::round(size); + } + } + + template auto operator()(T& value) -> serializer& { + static_assert(has_serialize_v || is_integral_v || is_floating_point_v); + if constexpr(has_serialize_v) { + value.serialize(*this); + } else if constexpr(is_integral_v) { + integer(value); + } else if constexpr(is_floating_point_v) { + real(value); + } + return *this; + } + + template auto operator()(T (&array)[N]) -> serializer& { + for(auto& value : array) operator()(value); + return *this; + } + + template auto operator()(array_span array) -> serializer& { + for(auto& value : array) operator()(value); + return *this; + } + + auto operator=(const serializer& s) -> serializer& { + if(_data) delete[] _data; + + _mode = s._mode; + _data = new u8[s._capacity]; + _size = s._size; + _capacity = s._capacity; + + memory::copy(_data, s._data, s._capacity); + return *this; + } + + auto operator=(serializer&& s) -> serializer& { + if(_data) delete[] _data; + + _mode = s._mode; + _data = s._data; + _size = s._size; + _capacity = s._capacity; + + s._data = nullptr; + return *this; + } + + serializer(const serializer& s) { operator=(s); } + serializer(serializer&& s) { operator=(move(s)); } + + serializer() { + setWriting(); + _data = new u8[1024 * 1024](); + _size = 0; + _capacity = 1024 * 1024; + } + + serializer(const u8* data, u32 capacity) { + setReading(); + _data = new u8[capacity](); + _size = 0; + _capacity = capacity; + memory::copy(_data, data, capacity); + } + + ~serializer() { + if(_data) delete[] _data; + } + +private: + template auto integer(T& value) -> serializer& { + enum : u32 { size = std::is_same::value ? 1 : sizeof(T) }; + reserve(_size + size); + if(writing()) { + for(u32 n : range(size)) _data[_size++] = value >> (n << 3); + } else if(reading()) { + value = 0; + for(u32 n : range(size)) value |= (T)_data[_size++] << (n << 3); + } + return *this; + } + + template auto real(T& value) -> serializer& { + enum : u32 { size = sizeof(T) }; + reserve(_size + size); + //this is rather dangerous, and not cross-platform safe; + //but there is no standardized way to export floating point values + auto p = (u8*)&value; + if(writing()) { + for(u32 n : range(size)) _data[_size++] = p[n]; + } else if(reading()) { + for(u32 n : range(size)) p[n] = _data[_size++]; + } + return *this; + } + + bool _mode = 0; + u8* _data = nullptr; + u32 _size = 0; + u32 _capacity = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/service.hpp b/waterbox/ares64/ares/nall/service.hpp new file mode 100644 index 0000000000..3d3c902499 --- /dev/null +++ b/waterbox/ares64/ares/nall/service.hpp @@ -0,0 +1,13 @@ +#pragma once + +//service model template built on top of shared-memory + +#include + +#if defined(API_POSIX) + #include +#endif + +#if defined(API_WINDOWS) + #include +#endif diff --git a/waterbox/ares64/ares/nall/set.hpp b/waterbox/ares64/ares/nall/set.hpp new file mode 100644 index 0000000000..297a8c3fdb --- /dev/null +++ b/waterbox/ares64/ares/nall/set.hpp @@ -0,0 +1,266 @@ +#pragma once + +//set +//implementation: red-black tree +// +//search: O(log n) average; O(log n) worst +//insert: O(log n) average; O(log n) worst +//remove: O(log n) average; O(log n) worst +// +//requirements: +// bool T::operator==(const T&) const; +// bool T::operator< (const T&) const; + +#include +#include + +namespace nall { + +template struct set { + struct node_t { + T value; + bool red = 1; + node_t* link[2] = {nullptr, nullptr}; + node_t() = default; + node_t(const T& value) : value(value) {} + }; + + node_t* root = nullptr; + u32 nodes = 0; + + set() = default; + set(const set& source) { operator=(source); } + set(set&& source) { operator=(move(source)); } + set(std::initializer_list list) { for(auto& value : list) insert(value); } + ~set() { reset(); } + + auto operator=(const set& source) -> set& { + if(this == &source) return *this; + reset(); + copy(root, source.root); + nodes = source.nodes; + return *this; + } + + auto operator=(set&& source) -> set& { + if(this == &source) return *this; + root = source.root; + nodes = source.nodes; + source.root = nullptr; + source.nodes = 0; + return *this; + } + + explicit operator bool() const { return nodes; } + auto size() const -> u32 { return nodes; } + + auto reset() -> void { + reset(root); + nodes = 0; + } + + auto find(const T& value) -> maybe { + if(node_t* node = find(root, value)) return node->value; + return nothing; + } + + auto find(const T& value) const -> maybe { + if(node_t* node = find(root, value)) return node->value; + return nothing; + } + + auto insert(const T& value) -> maybe { + u32 count = size(); + node_t* v = insert(root, value); + root->red = 0; + if(size() == count) return nothing; + return v->value; + } + + template auto insert(const T& value, P&&... p) -> bool { + bool result = insert(value); + insert(forward

(p)...) | result; + return result; + } + + auto remove(const T& value) -> bool { + u32 count = size(); + bool done = 0; + remove(root, &value, done); + if(root) root->red = 0; + return size() < count; + } + + template auto remove(const T& value, P&&... p) -> bool { + bool result = remove(value); + return remove(forward

(p)...) | result; + } + + struct base_iterator { + auto operator!=(const base_iterator& source) const -> bool { return position != source.position; } + + auto operator++() -> base_iterator& { + if(++position >= source.size()) { position = source.size(); return *this; } + + if(stack.right()->link[1]) { + stack.append(stack.right()->link[1]); + while(stack.right()->link[0]) stack.append(stack.right()->link[0]); + } else { + node_t* child; + do child = stack.takeRight(); + while(child == stack.right()->link[1]); + } + + return *this; + } + + base_iterator(const set& source, u32 position) : source(source), position(position) { + node_t* node = source.root; + while(node) { + stack.append(node); + node = node->link[0]; + } + } + + protected: + const set& source; + u32 position; + vector stack; + }; + + struct iterator : base_iterator { + iterator(const set& source, u32 position) : base_iterator(source, position) {} + auto operator*() const -> T& { return base_iterator::stack.right()->value; } + }; + + auto begin() -> iterator { return iterator(*this, 0); } + auto end() -> iterator { return iterator(*this, size()); } + + struct const_iterator : base_iterator { + const_iterator(const set& source, u32 position) : base_iterator(source, position) {} + auto operator*() const -> const T& { return base_iterator::stack.right()->value; } + }; + + auto begin() const -> const const_iterator { return const_iterator(*this, 0); } + auto end() const -> const const_iterator { return const_iterator(*this, size()); } + +private: + auto reset(node_t*& node) -> void { + if(!node) return; + if(node->link[0]) reset(node->link[0]); + if(node->link[1]) reset(node->link[1]); + delete node; + node = nullptr; + } + + auto copy(node_t*& target, const node_t* source) -> void { + if(!source) return; + target = new node_t(source->value); + target->red = source->red; + copy(target->link[0], source->link[0]); + copy(target->link[1], source->link[1]); + } + + auto find(node_t* node, const T& value) const -> node_t* { + if(node == nullptr) return nullptr; + if(node->value == value) return node; + return find(node->link[node->value < value], value); + } + + auto red(node_t* node) const -> bool { return node && node->red; } + auto black(node_t* node) const -> bool { return !red(node); } + + auto rotate(node_t*& a, bool dir) -> void { + node_t*& b = a->link[!dir]; + node_t*& c = b->link[dir]; + a->red = 1, b->red = 0; + std::swap(a, b); + std::swap(b, c); + } + + auto rotateTwice(node_t*& node, bool dir) -> void { + rotate(node->link[!dir], !dir); + rotate(node, dir); + } + + auto insert(node_t*& node, const T& value) -> node_t* { + if(!node) { nodes++; node = new node_t(value); return node; } + if(node->value == value) { node->value = value; return node; } //prevent duplicate entries + + bool dir = node->value < value; + node_t* v = insert(node->link[dir], value); + if(black(node->link[dir])) return v; + + if(red(node->link[!dir])) { + node->red = 1; + node->link[0]->red = 0; + node->link[1]->red = 0; + } else if(red(node->link[dir]->link[dir])) { + rotate(node, !dir); + } else if(red(node->link[dir]->link[!dir])) { + rotateTwice(node, !dir); + } + + return v; + } + + auto balance(node_t*& node, bool dir, bool& done) -> void { + node_t* p = node; + node_t* s = node->link[!dir]; + if(!s) return; + + if(red(s)) { + rotate(node, dir); + s = p->link[!dir]; + } + + if(black(s->link[0]) && black(s->link[1])) { + if(red(p)) done = 1; + p->red = 0, s->red = 1; + } else { + bool save = p->red; + bool head = node == p; + + if(red(s->link[!dir])) rotate(p, dir); + else rotateTwice(p, dir); + + p->red = save; + p->link[0]->red = 0; + p->link[1]->red = 0; + + if(head) node = p; + else node->link[dir] = p; + + done = 1; + } + } + + auto remove(node_t*& node, const T* value, bool& done) -> void { + if(!node) { done = 1; return; } + + if(node->value == *value) { + if(!node->link[0] || !node->link[1]) { + node_t* save = node->link[!node->link[0]]; + + if(red(node)) done = 1; + else if(red(save)) save->red = 0, done = 1; + + nodes--; + delete node; + node = save; + return; + } else { + node_t* heir = node->link[0]; + while(heir->link[1]) heir = heir->link[1]; + node->value = heir->value; + value = &heir->value; + } + } + + bool dir = node->value < *value; + remove(node->link[dir], value, done); + if(!done) balance(node, dir, done); + } +}; + +} diff --git a/waterbox/ares64/ares/nall/shared-memory.hpp b/waterbox/ares64/ares/nall/shared-memory.hpp new file mode 100644 index 0000000000..9d40bca6a6 --- /dev/null +++ b/waterbox/ares64/ares/nall/shared-memory.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +#if defined(API_POSIX) + #include +#endif + +#if defined(API_WINDOWS) + #include +#endif diff --git a/waterbox/ares64/ares/nall/shared-pointer.hpp b/waterbox/ares64/ares/nall/shared-pointer.hpp new file mode 100644 index 0000000000..9ac271f3ae --- /dev/null +++ b/waterbox/ares64/ares/nall/shared-pointer.hpp @@ -0,0 +1,291 @@ +#pragma once + +#include +#include +#include +#include + +namespace nall { + +template struct shared_pointer; + +struct shared_pointer_manager { + void* pointer = nullptr; + function deleter; + u32 strong = 0; + u32 weak = 0; + + shared_pointer_manager(void* pointer) : pointer(pointer) { + } +}; + +template struct shared_pointer; +template struct shared_pointer_weak; +template struct shared_pointer_this; +struct shared_pointer_this_base{}; + +template +struct shared_pointer { + template static auto create(P&&... p) { + return shared_pointer{new T{forward

(p)...}}; + } + + using type = T; + shared_pointer_manager* manager = nullptr; + + template + struct is_compatible { + static constexpr bool value = is_base_of::value || is_base_of::value; + }; + + shared_pointer() { + } + + shared_pointer(T* source) { + operator=(source); + } + + shared_pointer(T* source, const function& deleter) { + operator=(source); + manager->deleter = function([=](void* p) { + deleter((T*)p); + }); + } + + shared_pointer(const shared_pointer& source) { + operator=(source); + } + + shared_pointer(shared_pointer&& source) { + operator=(move(source)); + } + + template::value>> + shared_pointer(const shared_pointer& source) { + operator=(source); + } + + template::value>> + shared_pointer(shared_pointer&& source) { + operator=(move(source)); + } + + template::value>> + shared_pointer(const shared_pointer_weak& source) { + operator=(source); + } + + template::value>> + shared_pointer(const shared_pointer& source, T* pointer) { + if((bool)source && (T*)source.manager->pointer == pointer) { + manager = source.manager; + manager->strong++; + } + } + + ~shared_pointer() { + reset(); + } + + auto operator=(T* source) -> shared_pointer& { + reset(); + if(source) { + manager = new shared_pointer_manager((void*)source); + manager->strong++; + if constexpr(is_base_of_v) { + source->weak = *this; + } + } + return *this; + } + + auto operator=(const shared_pointer& source) -> shared_pointer& { + if(this != &source) { + reset(); + if((bool)source) { + manager = source.manager; + manager->strong++; + } + } + return *this; + } + + auto operator=(shared_pointer&& source) -> shared_pointer& { + if(this != &source) { + reset(); + manager = source.manager; + source.manager = nullptr; + } + return *this; + } + + template::value>> + auto operator=(const shared_pointer& source) -> shared_pointer& { + if((uintptr)this != (uintptr)&source) { + reset(); + if((bool)source) { + manager = source.manager; + manager->strong++; + } + } + return *this; + } + + template::value>> + auto operator=(shared_pointer&& source) -> shared_pointer& { + if((uintptr)this != (uintptr)&source) { + reset(); + manager = source.manager; + source.manager = nullptr; + } + return *this; + } + + template::value>> + auto operator=(const shared_pointer_weak& source) -> shared_pointer& { + reset(); + if((bool)source) { + manager = source.manager; + manager->strong++; + } + return *this; + } + + auto data() -> T* { + if(manager) return (T*)manager->pointer; + return nullptr; + } + + auto data() const -> const T* { + if(manager) return (T*)manager->pointer; + return nullptr; + } + + auto operator->() -> T* { return data(); } + auto operator->() const -> const T* { return data(); } + + auto operator*() -> T& { return *data(); } + auto operator*() const -> const T& { return *data(); } + + auto operator()() -> T& { return *data(); } + auto operator()() const -> const T& { return *data(); } + + template + auto operator==(const shared_pointer& source) const -> bool { + return manager == source.manager; + } + + template + auto operator!=(const shared_pointer& source) const -> bool { + return manager != source.manager; + } + + explicit operator bool() const { + return manager && manager->strong; + } + + auto unique() const -> bool { + return manager && manager->strong == 1; + } + + auto references() const -> u32 { + return manager ? manager->strong : 0; + } + + auto reset() -> void { + if(manager && manager->strong) { + //pointer may contain weak references; if strong==0 it may destroy manager + //as such, we must destroy strong before decrementing it to zero + if(manager->strong == 1) { + if(manager->deleter) { + manager->deleter(manager->pointer); + } else { + delete (T*)manager->pointer; + } + manager->pointer = nullptr; + } + if(--manager->strong == 0) { + if(manager->weak == 0) { + delete manager; + } + } + } + manager = nullptr; + } + + template + auto cast() -> shared_pointer { + if(auto pointer = dynamic_cast(data())) { + return {*this, pointer}; + } + return {}; + } +}; + +template +struct shared_pointer_weak { + using type = T; + shared_pointer_manager* manager = nullptr; + + shared_pointer_weak() { + } + + shared_pointer_weak(const shared_pointer& source) { + operator=(source); + } + + auto operator=(const shared_pointer& source) -> shared_pointer_weak& { + reset(); + if(manager = source.manager) manager->weak++; + return *this; + } + + ~shared_pointer_weak() { + reset(); + } + + auto operator==(const shared_pointer_weak& source) const -> bool { + return manager == source.manager; + } + + auto operator!=(const shared_pointer_weak& source) const -> bool { + return manager != source.manager; + } + + explicit operator bool() const { + return manager && manager->strong; + } + + auto acquire() const -> shared_pointer { + return shared_pointer(*this); + } + + auto reset() -> void { + if(manager && --manager->weak == 0) { + if(manager->strong == 0) { + delete manager; + } + } + manager = nullptr; + } +}; + +template +struct shared_pointer_this : shared_pointer_this_base { + shared_pointer_weak weak; + auto shared() -> shared_pointer { return weak; } + auto shared() const -> shared_pointer { return weak; } +}; + +template +auto shared_pointer_make(P&&... p) -> shared_pointer { + return shared_pointer{new T{forward

(p)...}}; +} + +template +struct shared_pointer_new : shared_pointer { + shared_pointer_new(const shared_pointer& source) : shared_pointer(source) {} + template shared_pointer_new(P&&... p) : shared_pointer(new T(forward

(p)...)) {} +}; + +} diff --git a/waterbox/ares64/ares/nall/smtp.hpp b/waterbox/ares64/ares/nall/smtp.hpp new file mode 100644 index 0000000000..82f2d7d2c4 --- /dev/null +++ b/waterbox/ares64/ares/nall/smtp.hpp @@ -0,0 +1,314 @@ +#pragma once + +#include +#include +#include + +#if !defined(PLATFORM_WINDOWS) + #include + #include + #include + #include +#else + #include + #include +#endif + +namespace nall { + +struct SMTP { + enum class Format : u32 { Plain, HTML }; + + auto server(string server, u16 port = 25) -> void; + auto from(string mail, string name = "") -> void; + auto to(string mail, string name = "") -> void; + auto cc(string mail, string name = "") -> void; + auto bcc(string mail, string name = "") -> void; + auto attachment(const u8* data, u32 size, string name) -> void; + auto attachment(string filename, string name = "") -> bool; + auto subject(string subject) -> void; + auto body(string body, Format format = Format::Plain) -> void; + + auto send() -> bool; + auto message() -> string; + auto response() -> string; + + #if defined(API_WINDOWS) + auto close(s32) -> s32; + SMTP(); + #endif + +private: + struct Information { + string server; + u16 port; + struct Contact { + string mail; + string name; + }; + Contact from; + vector to; + vector cc; + vector bcc; + struct Attachment { + vector buffer; + string name; + }; + string subject; + string body; + Format format = Format::Plain; + vector attachments; + + string message; + string response; + } info; + + auto send(s32 sock, const string& text) -> bool; + auto recv(s32 sock) -> string; + auto boundary() -> string; + auto filename(const string& filename) -> string; + auto contact(const Information::Contact& contact) -> string; + auto contacts(const vector& contacts) -> string; + auto split(const string& text) -> string; +}; + +inline auto SMTP::server(string server, u16 port) -> void { + info.server = server; + info.port = port; +} + +inline auto SMTP::from(string mail, string name) -> void { + info.from = {mail, name}; +} + +inline auto SMTP::to(string mail, string name) -> void { + info.to.append({mail, name}); +} + +inline auto SMTP::cc(string mail, string name) -> void { + info.cc.append({mail, name}); +} + +inline auto SMTP::bcc(string mail, string name) -> void { + info.bcc.append({mail, name}); +} + +inline auto SMTP::attachment(const u8* data, u32 size, string name) -> void { + vector buffer; + buffer.resize(size); + memcpy(buffer.data(), data, size); + info.attachments.append({std::move(buffer), name}); +} + +inline auto SMTP::attachment(string filename, string name) -> bool { + if(!file::exists(filename)) return false; + if(name == "") name = notdir(filename); + auto buffer = file::read(filename); + info.attachments.append({std::move(buffer), name}); + return true; +} + +inline auto SMTP::subject(string subject) -> void { + info.subject = subject; +} + +inline auto SMTP::body(string body, Format format) -> void { + info.body = body; + info.format = format; +} + +inline auto SMTP::send() -> bool { + info.message.append("From: =?UTF-8?B?", Base64::encode(contact(info.from)), "?=\r\n"); + info.message.append("To: =?UTF-8?B?", Base64::encode(contacts(info.to)), "?=\r\n"); + info.message.append("Cc: =?UTF-8?B?", Base64::encode(contacts(info.cc)), "?=\r\n"); + info.message.append("Subject: =?UTF-8?B?", Base64::encode(info.subject), "?=\r\n"); + + string uniqueID = boundary(); + + info.message.append("MIME-Version: 1.0\r\n"); + info.message.append("Content-Type: multipart/mixed; boundary=", uniqueID, "\r\n"); + info.message.append("\r\n"); + + string format = (info.format == Format::Plain ? "text/plain" : "text/html"); + + info.message.append("--", uniqueID, "\r\n"); + info.message.append("Content-Type: ", format, "; charset=UTF-8\r\n"); + info.message.append("Content-Transfer-Encoding: base64\r\n"); + info.message.append("\r\n"); + info.message.append(split(Base64::encode(info.body)), "\r\n"); + info.message.append("\r\n"); + + for(auto& attachment : info.attachments) { + info.message.append("--", uniqueID, "\r\n"); + info.message.append("Content-Type: application/octet-stream\r\n"); + info.message.append("Content-Transfer-Encoding: base64\r\n"); + info.message.append("Content-Disposition: attachment; size=", attachment.buffer.size(), "; filename*=UTF-8''", filename(attachment.name), "\r\n"); + info.message.append("\r\n"); + info.message.append(split(Base64::encode(attachment.buffer)), "\r\n"); + info.message.append("\r\n"); + } + + info.message.append("--", uniqueID, "--\r\n"); + + addrinfo hints; + memset(&hints, 0, sizeof(addrinfo)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_PASSIVE; + + addrinfo* serverinfo; + s32 status = getaddrinfo(info.server, string(info.port), &hints, &serverinfo); + if(status != 0) return false; + + s32 sock = socket(serverinfo->ai_family, serverinfo->ai_socktype, serverinfo->ai_protocol); + if(sock == -1) return false; + + s32 result = connect(sock, serverinfo->ai_addr, serverinfo->ai_addrlen); + if(result == -1) return false; + + string response; + info.response.append(response = recv(sock)); + if(!response.beginswith("220 ")) { close(sock); return false; } + + send(sock, {"HELO ", info.server, "\r\n"}); + info.response.append(response = recv(sock)); + if(!response.beginswith("250 ")) { close(sock); return false; } + + send(sock, {"MAIL FROM: <", info.from.mail, ">\r\n"}); + info.response.append(response = recv(sock)); + if(!response.beginswith("250 ")) { close(sock); return false; } + + for(auto& contact : info.to) { + send(sock, {"RCPT TO: <", contact.mail, ">\r\n"}); + info.response.append(response = recv(sock)); + if(!response.beginswith("250 ")) { close(sock); return false; } + } + + for(auto& contact : info.cc) { + send(sock, {"RCPT TO: <", contact.mail, ">\r\n"}); + info.response.append(response = recv(sock)); + if(!response.beginswith("250 ")) { close(sock); return false; } + } + + for(auto& contact : info.bcc) { + send(sock, {"RCPT TO: <", contact.mail, ">\r\n"}); + info.response.append(response = recv(sock)); + if(!response.beginswith("250 ")) { close(sock); return false; } + } + + send(sock, {"DATA\r\n"}); + info.response.append(response = recv(sock)); + if(!response.beginswith("354 ")) { close(sock); return false; } + + send(sock, {info.message, "\r\n", ".\r\n"}); + info.response.append(response = recv(sock)); + if(!response.beginswith("250 ")) { close(sock); return false; } + + send(sock, {"QUIT\r\n"}); + info.response.append(response = recv(sock)); +//if(!response.beginswith("221 ")) { close(sock); return false; } + + close(sock); + return true; +} + +inline auto SMTP::message() -> string { + return info.message; +} + +inline auto SMTP::response() -> string { + return info.response; +} + +inline auto SMTP::send(s32 sock, const string& text) -> bool { + const char* data = text.data(); + u32 size = text.size(); + while(size) { + s32 length = ::send(sock, (const char*)data, size, 0); + if(length == -1) return false; + data += length; + size -= length; + } + return true; +} + +inline auto SMTP::recv(s32 sock) -> string { + vector buffer; + while(true) { + char c; + if(::recv(sock, &c, sizeof(char), 0) < 1) break; + buffer.append(c); + if(c == '\n') break; + } + buffer.append(0); + return buffer; +} + +inline auto SMTP::boundary() -> string { + random_lfsr random; + random.seed(time(0)); + string boundary; + for(u32 n = 0; n < 16; n++) boundary.append(hex<2>(random())); + return boundary; +} + +inline auto SMTP::filename(const string& filename) -> string { + string result; + for(auto& n : filename) { + if(n <= 32 || n >= 127) result.append("%", hex<2>(n)); + else result.append(n); + } + return result; +} + +inline auto SMTP::contact(const Information::Contact& contact) -> string { + if(!contact.name) return contact.mail; + return {"\"", contact.name, "\" <", contact.mail, ">"}; +} + +inline auto SMTP::contacts(const vector& contacts) -> string { + string result; + for(auto& contact : contacts) { + result.append(this->contact(contact), "; "); + } + result.trimRight("; ", 1L); + return result; +} + +inline auto SMTP::split(const string& text) -> string { + string result; + + u32 offset = 0; + while(offset < text.size()) { + u32 length = min(76, text.size() - offset); + if(length < 76) { + result.append(text.slice(offset)); + } else { + result.append(text.slice(offset, 76), "\r\n"); + } + offset += length; + } + + return result; +} + +#if defined(API_WINDOWS) +inline auto SMTP::close(s32 sock) -> s32 { + return closesocket(sock); +} + +inline SMTP::SMTP() { + s32 sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if(sock == INVALID_SOCKET && WSAGetLastError() == WSANOTINITIALISED) { + WSADATA wsaData; + if(WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) { + WSACleanup(); + return; + } + } else { + close(sock); + } +} +#endif + +} diff --git a/waterbox/ares64/ares/nall/stdint.hpp b/waterbox/ares64/ares/nall/stdint.hpp new file mode 100644 index 0000000000..9cee334c93 --- /dev/null +++ b/waterbox/ares64/ares/nall/stdint.hpp @@ -0,0 +1,83 @@ +#pragma once + +#if defined(_MSC_VER) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef signed long long int64_t; + typedef int64_t intmax_t; + #if defined(_WIN64) + typedef int64_t intptr_t; + #else + typedef int32_t intptr_t; + #endif + + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + typedef unsigned long long uint64_t; + typedef uint64_t uintmax_t; + #if defined(_WIN64) + typedef uint64_t uintptr_t; + #else + typedef uint32_t uintptr_t; + #endif +#else + #include +#endif + +#if defined(__SIZEOF_INT128__) + using int128_t = signed __int128; + using uint128_t = unsigned __int128; +#endif + +using intmax = intmax_t; +using uintmax = uintmax_t; + +using intptr = intptr_t; +using uintptr = uintptr_t; + +using float32_t = float; +using float64_t = double; +//note: long double size is not reliable across platforms +//using float80_t = long double; + +static_assert(sizeof( int8_t) == 1, "int8_t is not of the correct size" ); +static_assert(sizeof(int16_t) == 2, "int16_t is not of the correct size"); +static_assert(sizeof(int32_t) == 4, "int32_t is not of the correct size"); +static_assert(sizeof(int64_t) == 8, "int64_t is not of the correct size"); + +static_assert(sizeof( uint8_t) == 1, "int8_t is not of the correct size" ); +static_assert(sizeof(uint16_t) == 2, "int16_t is not of the correct size"); +static_assert(sizeof(uint32_t) == 4, "int32_t is not of the correct size"); +static_assert(sizeof(uint64_t) == 8, "int64_t is not of the correct size"); + +static_assert(sizeof(float) >= 4, "float32_t is not of the correct size"); +static_assert(sizeof(double) >= 8, "float64_t is not of the correct size"); +//static_assert(sizeof(long double) >= 10, "float80_t is not of the correct size"); + +using sint = signed int; +using uint = unsigned int; +using real32_t = float; +using real64_t = double; + +//shorthand +using s8 = int8_t; +using s08 = int8_t; +using s16 = int16_t; +using s32 = int32_t; +using s64 = int64_t; + +using u8 = uint8_t; +using u08 = uint8_t; +using u16 = uint16_t; +using u32 = uint32_t; +using u64 = uint64_t; + +using f32 = float32_t; +using f64 = float64_t; + +#if defined(__SIZEOF_INT128__) + using s128 = int128_t; + using u128 = uint128_t; +#endif diff --git a/waterbox/ares64/ares/nall/string.hpp b/waterbox/ares64/ares/nall/string.hpp new file mode 100644 index 0000000000..a8784adbaf --- /dev/null +++ b/waterbox/ares64/ares/nall/string.hpp @@ -0,0 +1,370 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nall { + +struct string; +struct string_format; + +struct string_view { + using type = string_view; + + //view.hpp + string_view(); + string_view(const string_view& source); + string_view(string_view&& source); + string_view(const char* data); + string_view(const char* data, u32 size); + string_view(const string& source); + template string_view(P&&... p); + ~string_view(); + + auto operator=(const string_view& source) -> type&; + auto operator=(string_view&& source) -> type&; + + auto operator==(const char* source) const -> bool { return strcmp(data(), source) == 0; } + auto operator!=(const char* source) const -> bool { return strcmp(data(), source) != 0; } + + explicit operator bool() const; + operator const char*() const; + auto data() const -> const char*; + auto size() const -> u32; + + auto begin() const { return &_data[0]; } + auto end() const { return &_data[size()]; } + +protected: + string* _string; + const char* _data; + mutable s32 _size; +}; + +//adaptive (SSO + COW) is by far the best choice, the others exist solely to: +//1) demonstrate the performance benefit of combining SSO + COW +//2) rule out allocator bugs by trying different allocators when needed +#define NALL_STRING_ALLOCATOR_ADAPTIVE +//#define NALL_STRING_ALLOCATOR_COPY_ON_WRITE +//#define NALL_STRING_ALLOCATOR_SMALL_STRING_OPTIMIZATION +//#define NALL_STRING_ALLOCATOR_VECTOR + +//cast.hpp +template struct stringify; + +//format.hpp +template auto print(P&&...) -> void; +template auto print(FILE*, P&&...) -> void; +template auto pad(const T& value, long precision = 0, char padchar = ' ') -> string; +template auto hex(T value, long precision = 0, char padchar = '0') -> string; +template auto octal(T value, long precision = 0, char padchar = '0') -> string; +template auto binary(T value, long precision = 0, char padchar = '0') -> string; + +//match.hpp +auto tokenize(const char* s, const char* p) -> bool; +auto tokenize(vector& list, const char* s, const char* p) -> bool; + +//utf8.hpp +auto characters(string_view self, s32 offset = 0, s32 length = -1) -> u32; + +//utility.hpp +auto slice(string_view self, s32 offset = 0, s32 length = -1) -> string; +template auto fromInteger(char* result, T value) -> char*; +template auto fromNatural(char* result, T value) -> char*; +template auto fromHex(char* result, T value) -> char*; +template auto fromReal(char* str, T value) -> u32; + +struct string { + using type = string; + +protected: + #if defined(NALL_STRING_ALLOCATOR_ADAPTIVE) + enum : u32 { SSO = 24 }; + union { + struct { //copy-on-write + char* _data; + u32* _refs; + }; + struct { //small-string-optimization + char _text[SSO]; + }; + }; + auto _allocate() -> void; + auto _copy() -> void; + auto _resize() -> void; + #endif + + #if defined(NALL_STRING_ALLOCATOR_COPY_ON_WRITE) + char* _data; + mutable u32* _refs; + auto _allocate() -> char*; + auto _copy() -> char*; + #endif + + #if defined(NALL_STRING_ALLOCATOR_SMALL_STRING_OPTIMIZATION) + enum : u32 { SSO = 24 }; + union { + char* _data; + char _text[SSO]; + }; + #endif + + #if defined(NALL_STRING_ALLOCATOR_VECTOR) + char* _data; + #endif + + u32 _capacity; + u32 _size; + +public: + string(); + string(string& source) : string() { operator=(source); } + string(const string& source) : string() { operator=(source); } + string(string&& source) : string() { operator=(move(source)); } + template auto get() -> T*; + template auto data() const -> const T*; + template auto size() const -> u32 { return _size / sizeof(T); } + template auto capacity() const -> u32 { return _capacity / sizeof(T); } + auto reset() -> type&; + auto reserve(u32) -> type&; + auto resize(u32) -> type&; + auto operator=(const string&) -> type&; + auto operator=(string&&) -> type&; + + template string(T&& s, P&&... p) : string() { + append(forward(s), forward

(p)...); + } + ~string() { reset(); } + + explicit operator bool() const { return _size; } + operator const char*() const { return (const char*)data(); } + operator array_span() { return {(char*)get(), size()}; } + operator array_view() const { return {(const char*)data(), size()}; } + operator array_span() { return {(u8*)get(), size()}; } + operator array_view() const { return {(const u8*)data(), size()}; } + + auto operator==(const string& source) const -> bool { + return size() == source.size() && memory::compare(data(), source.data(), size()) == 0; + } + auto operator!=(const string& source) const -> bool { + return size() != source.size() || memory::compare(data(), source.data(), size()) != 0; + } + + auto operator==(const char* source) const -> bool { return strcmp(data(), source) == 0; } + auto operator!=(const char* source) const -> bool { return strcmp(data(), source) != 0; } + + auto operator==(string_view source) const -> bool { return compare(source) == 0; } + auto operator!=(string_view source) const -> bool { return compare(source) != 0; } + auto operator< (string_view source) const -> bool { return compare(source) < 0; } + auto operator<=(string_view source) const -> bool { return compare(source) <= 0; } + auto operator> (string_view source) const -> bool { return compare(source) > 0; } + auto operator>=(string_view source) const -> bool { return compare(source) >= 0; } + + auto begin() -> char* { return &get()[0]; } + auto end() -> char* { return &get()[size()]; } + auto begin() const -> const char* { return &data()[0]; } + auto end() const -> const char* { return &data()[size()]; } + + //atoi.hpp + auto boolean() const -> bool; + auto integer() const -> s64; + auto natural() const -> u64; + auto hex() const -> u64; + auto real() const -> f64; + + //core.hpp + auto operator[](u32) const -> const char&; + auto operator()(u32, char = 0) const -> char; + template auto assign(P&&...) -> type&; + template auto prepend(const T&, P&&...) -> type&; + template auto prepend(const nall::string_format&, P&&...) -> type&; + template auto _prepend(const stringify&) -> type&; + template auto append(const T&, P&&...) -> type&; + template auto append(const nall::string_format&, P&&...) -> type&; + template auto _append(const stringify&) -> type&; + auto length() const -> u32; + + //find.hpp + auto contains(string_view characters) const -> maybe; + + template auto _find(s32, string_view) const -> maybe; + + auto find(string_view source) const -> maybe; + auto ifind(string_view source) const -> maybe; + auto qfind(string_view source) const -> maybe; + auto iqfind(string_view source) const -> maybe; + + auto findFrom(s32 offset, string_view source) const -> maybe; + auto ifindFrom(s32 offset, string_view source) const -> maybe; + + auto findNext(s32 offset, string_view source) const -> maybe; + auto ifindNext(s32 offset, string_view source) const -> maybe; + + auto findPrevious(s32 offset, string_view source) const -> maybe; + auto ifindPrevious(s32 offset, string_view source) const -> maybe; + + //format.hpp + auto format(const nall::string_format& params) -> type&; + + //compare.hpp + template static auto _compare(const char*, u32, const char*, u32) -> s32; + + static auto compare(string_view, string_view) -> s32; + static auto icompare(string_view, string_view) -> s32; + + auto compare(string_view source) const -> s32; + auto icompare(string_view source) const -> s32; + + auto equals(string_view source) const -> bool; + auto iequals(string_view source) const -> bool; + + auto beginsWith(string_view source) const -> bool; + auto ibeginsWith(string_view source) const -> bool; + + auto endsWith(string_view source) const -> bool; + auto iendsWith(string_view source) const -> bool; + + //convert.hpp + auto downcase() -> type&; + auto upcase() -> type&; + + auto qdowncase() -> type&; + auto qupcase() -> type&; + + auto transform(string_view from, string_view to) -> type&; + + //match.hpp + auto match(string_view source) const -> bool; + auto imatch(string_view source) const -> bool; + + //replace.hpp + template auto _replace(string_view, string_view, long) -> type&; + auto replace(string_view from, string_view to, long limit = LONG_MAX) -> type&; + auto ireplace(string_view from, string_view to, long limit = LONG_MAX) -> type&; + auto qreplace(string_view from, string_view to, long limit = LONG_MAX) -> type&; + auto iqreplace(string_view from, string_view to, long limit = LONG_MAX) -> type&; + + //split.hpp + auto split(string_view key, long limit = LONG_MAX) const -> vector; + auto isplit(string_view key, long limit = LONG_MAX) const -> vector; + auto qsplit(string_view key, long limit = LONG_MAX) const -> vector; + auto iqsplit(string_view key, long limit = LONG_MAX) const -> vector; + + //trim.hpp + auto trim(string_view lhs, string_view rhs, long limit = LONG_MAX) -> type&; + auto trimLeft(string_view lhs, long limit = LONG_MAX) -> type&; + auto trimRight(string_view rhs, long limit = LONG_MAX) -> type&; + + auto itrim(string_view lhs, string_view rhs, long limit = LONG_MAX) -> type&; + auto itrimLeft(string_view lhs, long limit = LONG_MAX) -> type&; + auto itrimRight(string_view rhs, long limit = LONG_MAX) -> type&; + + auto strip() -> type&; + auto stripLeft() -> type&; + auto stripRight() -> type&; + + //utf8.hpp + auto characters(s32 offset = 0, s32 length = -1) const -> u32; + + //utility.hpp + static auto read(string_view filename) -> string; + static auto repeat(string_view pattern, u32 times) -> string; + auto fill(char fill = ' ') -> type&; + auto hash() const -> u32; + auto remove(u32 offset, u32 length) -> type&; + auto reverse() -> type&; + auto size(s32 length, char fill = ' ') -> type&; + auto slice(s32 offset = 0, s32 length = -1) const -> string; +}; + +template<> struct vector : vector_base { + using type = vector; + using vector_base::vector_base; + + vector(const vector& source) { vector_base::operator=(source); } + vector(vector& source) { vector_base::operator=(source); } + vector(vector&& source) { vector_base::operator=(move(source)); } + template vector(P&&... p) { append(forward

(p)...); } + + auto operator=(const vector& source) -> type& { return vector_base::operator=(source), *this; } + auto operator=(vector& source) -> type& { return vector_base::operator=(source), *this; } + auto operator=(vector&& source) -> type& { return vector_base::operator=(move(source)), *this; } + + //vector.hpp + template auto append(const string&, P&&...) -> type&; + auto append() -> type&; + + auto isort() -> type&; + auto find(string_view source) const -> maybe; + auto ifind(string_view source) const -> maybe; + auto match(string_view pattern) const -> vector; + auto merge(string_view separator = "") const -> string; + auto strip() -> type&; + + //split.hpp + template auto _split(string_view, string_view, long) -> type&; +}; + +struct string_format : vector { + using type = string_format; + + template string_format(P&&... p) { reserve(sizeof...(p)); append(forward

(p)...); } + template auto append(const T&, P&&... p) -> type&; + auto append() -> type&; +}; + +inline auto operator"" _s(const char* value, std::size_t) -> string { return {value}; } + +} + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include diff --git a/waterbox/ares64/ares/nall/string/allocator/adaptive.hpp b/waterbox/ares64/ares/nall/string/allocator/adaptive.hpp new file mode 100644 index 0000000000..5c4b8d7117 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/allocator/adaptive.hpp @@ -0,0 +1,123 @@ +#pragma once + +/***** + adaptive allocator + sizeof(string) == SSO + 8 + + aggressively tries to avoid heap allocations + small strings are stored on the stack + large strings are shared via copy-on-write + + SSO alone is very slow on large strings due to copying + SSO alone is very slightly faster than this allocator on small strings + + COW alone is very slow on small strings due to heap allocations + COW alone is very slightly faster than this allocator on large strings + + adaptive is thus very fast for all string sizes +*****/ + +namespace nall { + +inline string::string() : _data(nullptr), _capacity(SSO - 1), _size(0) { +} + +template +inline auto string::get() -> T* { + if(_capacity < SSO) return (T*)_text; + if(*_refs > 1) _copy(); + return (T*)_data; +} + +template +inline auto string::data() const -> const T* { + if(_capacity < SSO) return (const T*)_text; + return (const T*)_data; +} + +inline auto string::reset() -> type& { + if(_capacity >= SSO && !--*_refs) memory::free(_data); + _data = nullptr; + _capacity = SSO - 1; + _size = 0; + return *this; +} + +inline auto string::reserve(u32 capacity) -> type& { + if(capacity <= _capacity) return *this; + capacity = bit::round(capacity + 1) - 1; + if(_capacity < SSO) { + _capacity = capacity; + _allocate(); + } else if(*_refs > 1) { + _capacity = capacity; + _copy(); + } else { + _capacity = capacity; + _resize(); + } + return *this; +} + +inline auto string::resize(u32 size) -> type& { + reserve(size); + get()[_size = size] = 0; + return *this; +} + +inline auto string::operator=(const string& source) -> type& { + if(&source == this) return *this; + reset(); + if(source._capacity >= SSO) { + _data = source._data; + _refs = source._refs; + _capacity = source._capacity; + _size = source._size; + ++*_refs; + } else { + memory::copy(_text, source._text, SSO); + _capacity = source._capacity; + _size = source._size; + } + return *this; +} + +inline auto string::operator=(string&& source) -> type& { + if(&source == this) return *this; + reset(); + memory::copy(this, &source, sizeof(string)); + source._data = nullptr; + source._capacity = SSO - 1; + source._size = 0; + return *this; +} + +//SSO -> COW +inline auto string::_allocate() -> void { + char _temp[SSO]; + memory::copy(_temp, _text, SSO); + _data = memory::allocate(_capacity + 1 + sizeof(u32)); + memory::copy(_data, _temp, SSO); + _refs = (u32*)(_data + _capacity + 1); //always aligned by 32 via reserve() + *_refs = 1; +} + +//COW -> Unique +inline auto string::_copy() -> void { + auto _temp = memory::allocate(_capacity + 1 + sizeof(u32)); + memory::copy(_temp, _data, _size = min(_capacity, _size)); + _temp[_size] = 0; + --*_refs; + _data = _temp; + _refs = (u32*)(_data + _capacity + 1); + *_refs = 1; +} + +//COW -> Resize +inline auto string::_resize() -> void { + _data = memory::resize(_data, _capacity + 1 + sizeof(u32)); + _refs = (u32*)(_data + _capacity + 1); + *_refs = 1; +} + +} diff --git a/waterbox/ares64/ares/nall/string/allocator/copy-on-write.hpp b/waterbox/ares64/ares/nall/string/allocator/copy-on-write.hpp new file mode 100644 index 0000000000..c7e0b4217d --- /dev/null +++ b/waterbox/ares64/ares/nall/string/allocator/copy-on-write.hpp @@ -0,0 +1,92 @@ +#pragma once + +namespace nall { + +inline string::string() : _data(nullptr), _refs(nullptr), _capacity(0), _size(0) { +} + +template +inline auto string::get() -> T* { + static char _null[] = ""; + if(!_data) return (T*)_null; + if(*_refs > 1) _data = _copy(); //make unique for write operations + return (T*)_data; +} + +template +inline auto string::data() const -> const T* { + static const char _null[] = ""; + if(!_data) return (const T*)_null; + return (const T*)_data; +} + +inline auto string::reset() -> type& { + if(_data && !--*_refs) { + memory::free(_data); + _data = nullptr; //_refs = nullptr; is unnecessary + } + _capacity = 0; + _size = 0; + return *this; +} + +inline auto string::reserve(u32 capacity) -> type& { + if(capacity > _capacity) { + _capacity = bit::round(max(31u, capacity) + 1) - 1; + _data = _data ? _copy() : _allocate(); + } + return *this; +} + +inline auto string::resize(u32 size) -> type& { + reserve(size); + get()[_size = size] = 0; + return *this; +} + +inline auto string::operator=(const string& source) -> string& { + if(&source == this) return *this; + reset(); + if(source._data) { + _data = source._data; + _refs = source._refs; + _capacity = source._capacity; + _size = source._size; + ++*_refs; + } + return *this; +} + +inline auto string::operator=(string&& source) -> string& { + if(&source == this) return *this; + reset(); + _data = source._data; + _refs = source._refs; + _capacity = source._capacity; + _size = source._size; + source._data = nullptr; + source._refs = nullptr; + source._capacity = 0; + source._size = 0; + return *this; +} + +inline auto string::_allocate() -> char* { + auto _temp = memory::allocate(_capacity + 1 + sizeof(u32)); + *_temp = 0; + _refs = (u32*)(_temp + _capacity + 1); //this will always be aligned by 32 via reserve() + *_refs = 1; + return _temp; +} + +inline auto string::_copy() -> char* { + auto _temp = memory::allocate(_capacity + 1 + sizeof(u32)); + memory::copy(_temp, _data, _size = min(_capacity, _size)); + _temp[_size] = 0; + --*_refs; + _refs = (u32*)(_temp + _capacity + 1); + *_refs = 1; + return _temp; +} + +} diff --git a/waterbox/ares64/ares/nall/string/allocator/small-string-optimization.hpp b/waterbox/ares64/ares/nall/string/allocator/small-string-optimization.hpp new file mode 100644 index 0000000000..baedeccc10 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/allocator/small-string-optimization.hpp @@ -0,0 +1,95 @@ +#pragma once + +/* +small string optimization (SSO) allocator +sizeof(string) == 8 + string::SSO + +utilizes a union to store small strings directly into text pointer +bypasses the need to allocate heap memory for small strings +requires extra computations, which can be slower for large strings + +pros: +* potential for in-place resize +* no heap allocation when (capacity < SSO) + +cons: +* added overhead to fetch data() +* pass-by-value requires heap allocation when (capacity >= SSO) + +*/ + +namespace nall { + +inline string::string() { + _data = nullptr; + _capacity = SSO - 1; + _size = 0; +} + +template +inline auto string::get() -> T* { + if(_capacity < SSO) return (T*)_text; + return (T*)_data; +} + +template +inline auto string::data() const -> const T* { + if(_capacity < SSO) return (const T*)_text; + return (const T*)_data; +} + +inline auto string::reset() -> type& { + if(_capacity >= SSO) memory::free(_data); + _data = nullptr; + _capacity = SSO - 1; + _size = 0; + return *this; +} + +inline auto string::reserve(u32 capacity) -> type& { + if(capacity <= _capacity) return *this; + capacity = bit::round(capacity + 1) - 1; + if(_capacity < SSO) { + char _temp[SSO]; + memory::copy(_temp, _text, SSO); + _data = memory::allocate(_capacity = capacity + 1); + memory::copy(_data, _temp, SSO); + } else { + _data = memory::resize(_data, _capacity = capacity + 1); + } + return *this; +} + +inline auto string::resize(u32 size) -> type& { + reserve(size); + get()[_size = size] = 0; + return *this; +} + +inline auto string::operator=(const string& source) -> type& { + if(&source == this) return *this; + reset(); + if(source._capacity >= SSO) { + _data = memory::allocate(source._capacity + 1); + _capacity = source._capacity; + _size = source._size; + memory::copy(_data, source._data, source._size + 1); + } else { + memory::copy(_text, source._text, SSO); + _capacity = SSO - 1; + _size = source._size; + } + return *this; +} + +inline auto string::operator=(string&& source) -> type& { + if(&source == this) return *this; + reset(); + memory::copy(this, &source, sizeof(string)); + source._data = nullptr; + source._capacity = SSO - 1; + source._size = 0; + return *this; +} + +} diff --git a/waterbox/ares64/ares/nall/string/allocator/vector.hpp b/waterbox/ares64/ares/nall/string/allocator/vector.hpp new file mode 100644 index 0000000000..ef3233948d --- /dev/null +++ b/waterbox/ares64/ares/nall/string/allocator/vector.hpp @@ -0,0 +1,84 @@ +#pragma once + +/* +vector allocator +sizeof(string) == 16 (amd64) + +utilizes a raw string pointer +always allocates memory onto the heap when string is not empty + +pros: +* potential for in-place resize +* simplicity + +cons: +* always allocates heap memory on (capacity > 0) +* pass-by-value requires heap allocation + +*/ + +namespace nall { + +template +inline auto string::get() -> T* { + if(_capacity == 0) reserve(1); + return (T*)_data; +} + +template +inline auto string::data() const -> const T* { + if(_capacity == 0) return (const T*)""; + return (const T*)_data; +} + +inline auto string::reset() -> type& { + if(_data) { memory::free(_data); _data = nullptr; } + _capacity = 0; + _size = 0; + return *this; +} + +inline auto string::reserve(u32 capacity) -> type& { + if(capacity > _capacity) { + _capacity = bit::round(capacity + 1) - 1; + _data = memory::resize(_data, _capacity + 1); + _data[_capacity] = 0; + } + return *this; +} + +inline auto string::resize(u32 size) -> type& { + reserve(size); + get()[_size = size] = 0; + return *this; +} + +inline auto string::operator=(const string& source) -> type& { + if(&source == this) return *this; + reset(); + _data = memory::allocate(source._size + 1); + _capacity = source._size; + _size = source._size; + memory::copy(_data, source.data(), source.size() + 1); + return *this; +} + +inline auto string::operator=(string&& source) -> type& { + if(&source == this) return *this; + reset(); + _data = source._data; + _capacity = source._capacity; + _size = source._size; + source._data = nullptr; + source._capacity = 0; + source._size = 0; + return *this; +} + +inline string::string() { + _data = nullptr; + _capacity = 0; + _size = 0; +} + +} diff --git a/waterbox/ares64/ares/nall/string/atoi.hpp b/waterbox/ares64/ares/nall/string/atoi.hpp new file mode 100644 index 0000000000..1b197959b4 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/atoi.hpp @@ -0,0 +1,25 @@ +#pragma once + +namespace nall { + +inline auto string::boolean() const -> bool { + return equals("true"); +} + +inline auto string::integer() const -> s64 { + return toInteger(data()); +} + +inline auto string::natural() const -> u64 { + return toNatural(data()); +} + +inline auto string::hex() const -> u64 { + return toHex(data()); +} + +inline auto string::real() const -> f64 { + return toReal(data()); +} + +} diff --git a/waterbox/ares64/ares/nall/string/cast.hpp b/waterbox/ares64/ares/nall/string/cast.hpp new file mode 100644 index 0000000000..99ba653d9f --- /dev/null +++ b/waterbox/ares64/ares/nall/string/cast.hpp @@ -0,0 +1,302 @@ +#pragma once + +//convert any (supported) type to a const char* without constructing a new nall::string +//this is used inside string{...} to build nall::string values + +namespace nall { + +//booleans + +template<> struct stringify { + stringify(bool value) : _value(value) {} + auto data() const -> const char* { return _value ? "true" : "false"; } + auto size() const -> u32 { return _value ? 4 : 5; } + bool _value; +}; + +template<> struct stringify { + stringify(bool value) : _value(value) {} + auto data() const -> const char* { return _value ? "true" : "false"; } + auto size() const -> u32 { return _value ? 4 : 5; } + bool _value; +}; + +//characters + +template<> struct stringify { + stringify(char source) { _data[0] = source; _data[1] = 0; } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return 1; } + char _data[2]; +}; + +//signed integers + +template<> struct stringify { + stringify(signed char source) { fromInteger(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[2 + sizeof(signed char) * 3]; +}; + +template<> struct stringify { + stringify(signed short source) { fromInteger(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[2 + sizeof(signed short) * 3]; +}; + +template<> struct stringify { + stringify(signed int source) { fromInteger(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[2 + sizeof(signed int) * 3]; +}; + +template<> struct stringify { + stringify(signed long source) { fromInteger(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[2 + sizeof(signed long) * 3]; +}; + +template<> struct stringify { + stringify(signed long long source) { fromInteger(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[2 + sizeof(signed long long) * 3]; +}; + +#if defined(__SIZEOF_INT128__) +template<> struct stringify { + stringify(int128_t source) { fromInteger(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[2 + sizeof(s128) * 3]; +}; +#endif + +template struct stringify> { + stringify(IntegerPrimitive source) { fromInteger(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[2 + sizeof(s64) * 3]; +}; + +template struct stringify> { + stringify(Integer source) { fromInteger(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[2 + sizeof(s64) * 3]; +}; + +//unsigned integers + +template<> struct stringify { + stringify(unsigned char source) { fromNatural(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[1 + sizeof(unsigned char) * 3]; +}; + +template<> struct stringify { + stringify(unsigned short source) { fromNatural(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[1 + sizeof(unsigned short) * 3]; +}; + +template<> struct stringify { + stringify(unsigned int source) { fromNatural(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[1 + sizeof(unsigned int) * 3]; +}; + +template<> struct stringify { + stringify(unsigned long source) { fromNatural(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[1 + sizeof(unsigned long) * 3]; +}; + +template<> struct stringify { + stringify(unsigned long long source) { fromNatural(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[1 + sizeof(unsigned long long) * 3]; +}; + +#if defined(__SIZEOF_INT128__) +template<> struct stringify { + stringify(u128 source) { fromNatural(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[1 + sizeof(u128) * 3]; +}; +#endif + +template struct stringify> { + stringify(NaturalPrimitive source) { fromNatural(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[1 + sizeof(u64) * 3]; +}; + +template struct stringify> { + stringify(Natural source) { fromNatural(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[1 + sizeof(u64) * 3]; +}; + +//floating-point + +template<> struct stringify { + stringify(float source) { fromReal(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[256]; +}; + +template<> struct stringify { + stringify(double source) { fromReal(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[256]; +}; + +template<> struct stringify { + stringify(long double source) { fromReal(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[256]; +}; + +template struct stringify> { + stringify(Real source) { fromReal(_data, source); } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[256]; +}; + +//arrays + +template<> struct stringify> { + stringify(vector source) { + _text.resize(source.size()); + memory::copy(_text.data(), source.data(), source.size()); + } + auto data() const -> const char* { return _text.data(); } + auto size() const -> u32 { return _text.size(); } + vector _text; +}; + +template<> struct stringify&> { + stringify(const vector& source) { + _text.resize(source.size()); + memory::copy(_text.data(), source.data(), source.size()); + } + auto data() const -> const char* { return _text.data(); } + auto size() const -> u32 { return _text.size(); } + vector _text; +}; + +//char arrays + +template<> struct stringify { + stringify(char* source) : _data(source ? source : "") {} + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + const char* _data; +}; + +template<> struct stringify { + stringify(const char* source) : _data(source ? source : "") {} + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + const char* _data; +}; + +//strings + +template<> struct stringify { + stringify(const string& source) : _text(source) {} + auto data() const -> const char* { return _text.data(); } + auto size() const -> u32 { return _text.size(); } + const string& _text; +}; + +template<> struct stringify { + stringify(const string& source) : _text(source) {} + auto data() const -> const char* { return _text.data(); } + auto size() const -> u32 { return _text.size(); } + const string& _text; +}; + +template<> struct stringify { + stringify(const string_view& source) : _view(source) {} + auto data() const -> const char* { return _view.data(); } + auto size() const -> u32 { return _view.size(); } + const string_view& _view; +}; + +template<> struct stringify { + stringify(const string_view& source) : _view(source) {} + auto data() const -> const char* { return _view.data(); } + auto size() const -> u32 { return _view.size(); } + const string_view& _view; +}; + +template<> struct stringify> { + stringify(const array_view& source) : _view(source) {} + auto data() const -> const char* { return _view.data(); } + auto size() const -> u32 { return _view.size(); } + const array_view& _view; +}; + +template<> struct stringify&> { + stringify(const array_view& source) : _view(source) {} + auto data() const -> const char* { return _view.data(); } + auto size() const -> u32 { return _view.size(); } + const array_view& _view; +}; + +template<> struct stringify { + stringify(const string_pascal& source) : _text(source) {} + auto data() const -> const char* { return _text.data(); } + auto size() const -> u32 { return _text.size(); } + const string_pascal& _text; +}; + +template<> struct stringify { + stringify(const string_pascal& source) : _text(source) {} + auto data() const -> const char* { return _text.data(); } + auto size() const -> u32 { return _text.size(); } + const string_pascal& _text; +}; + +//pointers + +//note: T = char* is matched by stringify +template struct stringify { + stringify(const T* source) { + if(!source) { + memory::copy(_data, "(nullptr)", 10); + } else { + memory::copy(_data, "0x", 2); + fromHex(_data + 2, (uintptr)source); + } + } + auto data() const -> const char* { return _data; } + auto size() const -> u32 { return strlen(_data); } + char _data[256]; +}; + +// + +template inline auto make_string(T value) -> stringify { + return stringify(forward(value)); +} + +} diff --git a/waterbox/ares64/ares/nall/string/compare.hpp b/waterbox/ares64/ares/nall/string/compare.hpp new file mode 100644 index 0000000000..c179d034f9 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/compare.hpp @@ -0,0 +1,58 @@ +#pragma once + +namespace nall { + +template +inline auto string::_compare(const char* target, u32 capacity, const char* source, u32 size) -> s32 { + if(Insensitive) return memory::icompare(target, capacity, source, size); + return memory::compare(target, capacity, source, size); +} + +//size() + 1 includes null-terminator; required to properly compare strings of differing lengths +inline auto string::compare(string_view x, string_view y) -> s32 { + return memory::compare(x.data(), x.size() + 1, y.data(), y.size() + 1); +} + +inline auto string::icompare(string_view x, string_view y) -> s32 { + return memory::icompare(x.data(), x.size() + 1, y.data(), y.size() + 1); +} + +inline auto string::compare(string_view source) const -> s32 { + return memory::compare(data(), size() + 1, source.data(), source.size() + 1); +} + +inline auto string::icompare(string_view source) const -> s32 { + return memory::icompare(data(), size() + 1, source.data(), source.size() + 1); +} + +inline auto string::equals(string_view source) const -> bool { + if(size() != source.size()) return false; + return memory::compare(data(), source.data(), source.size()) == 0; +} + +inline auto string::iequals(string_view source) const -> bool { + if(size() != source.size()) return false; + return memory::icompare(data(), source.data(), source.size()) == 0; +} + +inline auto string::beginsWith(string_view source) const -> bool { + if(source.size() > size()) return false; + return memory::compare(data(), source.data(), source.size()) == 0; +} + +inline auto string::ibeginsWith(string_view source) const -> bool { + if(source.size() > size()) return false; + return memory::icompare(data(), source.data(), source.size()) == 0; +} + +inline auto string::endsWith(string_view source) const -> bool { + if(source.size() > size()) return false; + return memory::compare(data() + size() - source.size(), source.data(), source.size()) == 0; +} + +inline auto string::iendsWith(string_view source) const -> bool { + if(source.size() > size()) return false; + return memory::icompare(data() + size() - source.size(), source.data(), source.size()) == 0; +} + +} diff --git a/waterbox/ares64/ares/nall/string/convert.hpp b/waterbox/ares64/ares/nall/string/convert.hpp new file mode 100644 index 0000000000..4056757728 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/convert.hpp @@ -0,0 +1,53 @@ +#pragma once + +namespace nall { + +inline auto string::downcase() -> string& { + char* p = get(); + for(u32 n = 0; n < size(); n++) { + if(p[n] >= 'A' && p[n] <= 'Z') p[n] += 0x20; + } + return *this; +} + +inline auto string::qdowncase() -> string& { + char* p = get(); + for(u32 n = 0, quoted = 0; n < size(); n++) { + if(p[n] == '\"') quoted ^= 1; + if(!quoted && p[n] >= 'A' && p[n] <= 'Z') p[n] += 0x20; + } + return *this; +} + +inline auto string::upcase() -> string& { + char* p = get(); + for(u32 n = 0; n < size(); n++) { + if(p[n] >= 'a' && p[n] <= 'z') p[n] -= 0x20; + } + return *this; +} + +inline auto string::qupcase() -> string& { + char* p = get(); + for(u32 n = 0, quoted = 0; n < size(); n++) { + if(p[n] == '\"') quoted ^= 1; + if(!quoted && p[n] >= 'a' && p[n] <= 'z') p[n] -= 0x20; + } + return *this; +} + +inline auto string::transform(string_view from, string_view to) -> string& { + if(from.size() != to.size() || from.size() == 0) return *this; //patterns must be the same length + char* p = get(); + for(u32 n = 0; n < size(); n++) { + for(u32 s = 0; s < from.size(); s++) { + if(p[n] == from[s]) { + p[n] = to[s]; + break; + } + } + } + return *this; +} + +} diff --git a/waterbox/ares64/ares/nall/string/core.hpp b/waterbox/ares64/ares/nall/string/core.hpp new file mode 100644 index 0000000000..a41fa69190 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/core.hpp @@ -0,0 +1,75 @@ +#pragma once + +//only allocators may access _data or modify _size and _capacity +//all other functions must use data(), size(), capacity() + +#if defined(NALL_STRING_ALLOCATOR_ADAPTIVE) + #include +#elif defined(NALL_STRING_ALLOCATOR_COPY_ON_WRITE) + #include +#elif defined(NALL_STRING_ALLOCATOR_SMALL_STRING_OPTIMIZATION) + #include +#elif defined(NALL_STRING_ALLOCATOR_VECTOR) + #include +#endif + +namespace nall { + +inline auto string::operator[](u32 position) const -> const char& { + #ifdef DEBUG + struct out_of_bounds {}; + if(position >= size() + 1) throw out_of_bounds{}; + #endif + return data()[position]; +} + +inline auto string::operator()(u32 position, char fallback) const -> char { + if(position >= size() + 1) return fallback; + return data()[position]; +} + +template inline auto string::assign(P&&... p) -> string& { + resize(0); + return append(forward

(p)...); +} + +template inline auto string::prepend(const T& value, P&&... p) -> string& { + if constexpr(sizeof...(p)) prepend(forward

(p)...); + return _prepend(make_string(value)); +} + +template inline auto string::prepend(const nall::string_format& value, P&&... p) -> string& { + if constexpr(sizeof...(p)) prepend(forward

(p)...); + return format(value); +} + +template inline auto string::_prepend(const stringify& source) -> string& { + resize(source.size() + size()); + memory::move(get() + source.size(), get(), size() - source.size()); + memory::copy(get(), source.data(), source.size()); + return *this; +} + +template inline auto string::append(const T& value, P&&... p) -> string& { + _append(make_string(value)); + if constexpr(sizeof...(p) > 0) append(forward

(p)...); + return *this; +} + +template inline auto string::append(const nall::string_format& value, P&&... p) -> string& { + format(value); + if constexpr(sizeof...(p)) append(forward

(p)...); + return *this; +} + +template inline auto string::_append(const stringify& source) -> string& { + resize(size() + source.size()); + memory::copy(get() + size() - source.size(), source.data(), source.size()); + return *this; +} + +inline auto string::length() const -> u32 { + return strlen(data()); +} + +} diff --git a/waterbox/ares64/ares/nall/string/eval/evaluator.hpp b/waterbox/ares64/ares/nall/string/eval/evaluator.hpp new file mode 100644 index 0000000000..130550870c --- /dev/null +++ b/waterbox/ares64/ares/nall/string/eval/evaluator.hpp @@ -0,0 +1,146 @@ +#pragma once + +namespace nall::Eval { + +inline auto evaluateExpression(Node* node) -> string { + #define p(n) evaluateExpression(node->link[n]) + switch(node->type) { + case Node::Type::Null: return "Null"; + case Node::Type::Literal: return {"Literal:", node->literal}; + case Node::Type::Function: return {"Function(0:", p(0), ", 1:", p(1), ")"}; + case Node::Type::Subscript: return {"Subscript(0:", p(0), ", 1:", p(1), ")"}; + case Node::Type::Member: return {"Member(0:", p(0), ", 1:", p(1), ")"}; + case Node::Type::SuffixIncrement: return {"SuffixIncrement(0:", p(0), ")"}; + case Node::Type::SuffixDecrement: return {"SuffixDecrement(0:", p(0), ")"}; + case Node::Type::Reference: return {"Reference(0:", p(0), ")"}; + case Node::Type::Dereference: return {"Dereference(0:", p(0), ")"}; + case Node::Type::BitwiseNot: return {"Complement(0:", p(0), ")"}; + case Node::Type::PrefixIncrement: return {"PrefixIncrement(0:", p(0), ")"}; + case Node::Type::PrefixDecrement: return {"PrefixDecrement(0:", p(0), ")"}; + case Node::Type::Add: return {"Add(0:", p(0), ", 1:", p(1), ")"}; + case Node::Type::Multiply: return {"Multiply(0:", p(0), ", 1:", p(1), ")"}; + case Node::Type::Concatenate: return {"Concatenate(0:", p(0), ", ", p(1), ")"}; + case Node::Type::Coalesce: return {"Coalesce(0:", p(0), ", ", p(1), ")"}; + case Node::Type::Condition: return {"Condition(0:", p(0), ", ", p(1), ", ", p(2), ")"}; + case Node::Type::Assign: return {"Assign(0:", p(0), ", ", p(1), ")"}; + case Node::Type::Separator: { + string result = "Separator("; + for(auto& link : node->link) { + result.append(evaluateExpression(link), ", "); + } + return result.trimRight(", ", 1L).append(")"); + } + } + #undef p + + throw "invalid operator"; +} + +inline auto evaluateInteger(Node* node) -> s64 { + if(node->type == Node::Type::Literal) return toInteger(node->literal); + + #define p(n) evaluateInteger(node->link[n]) + switch(node->type) { + case Node::Type::SuffixIncrement: return p(0); + case Node::Type::SuffixDecrement: return p(0); + case Node::Type::LogicalNot: return !p(0); + case Node::Type::BitwiseNot: return ~p(0); + case Node::Type::Positive: return +p(0); + case Node::Type::Negative: return -p(0); + case Node::Type::PrefixIncrement: return p(0) + 1; + case Node::Type::PrefixDecrement: return p(0) - 1; + case Node::Type::Multiply: return p(0) * p(1); + case Node::Type::Divide: return p(0) / p(1); + case Node::Type::Modulo: return p(0) % p(1); + case Node::Type::Add: return p(0) + p(1); + case Node::Type::Subtract: return p(0) - p(1); + case Node::Type::ShiftLeft: return p(0) << p(1); + case Node::Type::ShiftRight: return p(0) >> p(1); + case Node::Type::BitwiseAnd: return p(0) & p(1); + case Node::Type::BitwiseOr: return p(0) | p(1); + case Node::Type::BitwiseXor: return p(0) ^ p(1); + case Node::Type::Equal: return p(0) == p(1); + case Node::Type::NotEqual: return p(0) != p(1); + case Node::Type::LessThanEqual: return p(0) <= p(1); + case Node::Type::GreaterThanEqual: return p(0) >= p(1); + case Node::Type::LessThan: return p(0) < p(1); + case Node::Type::GreaterThan: return p(0) > p(1); + case Node::Type::LogicalAnd: return p(0) && p(1); + case Node::Type::LogicalOr: return p(0) || p(1); + case Node::Type::Condition: return p(0) ? p(1) : p(2); + case Node::Type::Assign: return p(1); + case Node::Type::AssignMultiply: return p(0) * p(1); + case Node::Type::AssignDivide: return p(0) / p(1); + case Node::Type::AssignModulo: return p(0) % p(1); + case Node::Type::AssignAdd: return p(0) + p(1); + case Node::Type::AssignSubtract: return p(0) - p(1); + case Node::Type::AssignShiftLeft: return p(0) << p(1); + case Node::Type::AssignShiftRight: return p(0) >> p(1); + case Node::Type::AssignBitwiseAnd: return p(0) & p(1); + case Node::Type::AssignBitwiseOr: return p(0) | p(1); + case Node::Type::AssignBitwiseXor: return p(0) ^ p(1); + } + #undef p + + throw "invalid operator"; +} + +inline auto integer(const string& expression) -> maybe { + try { + auto tree = new Node; + const char* p = expression; + parse(tree, p, 0); + auto result = evaluateInteger(tree); + delete tree; + return result; + } catch(const char*) { + return nothing; + } +} + +inline auto evaluateReal(Node* node) -> f64 { + if(node->type == Node::Type::Literal) return toReal(node->literal); + + #define p(n) evaluateReal(node->link[n]) + switch(node->type) { + case Node::Type::LogicalNot: return !p(0); + case Node::Type::Positive: return +p(0); + case Node::Type::Negative: return -p(0); + case Node::Type::Multiply: return p(0) * p(1); + case Node::Type::Divide: return p(0) / p(1); + case Node::Type::Add: return p(0) + p(1); + case Node::Type::Subtract: return p(0) - p(1); + case Node::Type::Equal: return p(0) == p(1); + case Node::Type::NotEqual: return p(0) != p(1); + case Node::Type::LessThanEqual: return p(0) <= p(1); + case Node::Type::GreaterThanEqual: return p(0) >= p(1); + case Node::Type::LessThan: return p(0) < p(1); + case Node::Type::GreaterThan: return p(0) > p(1); + case Node::Type::LogicalAnd: return p(0) && p(1); + case Node::Type::LogicalOr: return p(0) || p(1); + case Node::Type::Condition: return p(0) ? p(1) : p(2); + case Node::Type::Assign: return p(1); + case Node::Type::AssignMultiply: return p(0) * p(1); + case Node::Type::AssignDivide: return p(0) / p(1); + case Node::Type::AssignAdd: return p(0) + p(1); + case Node::Type::AssignSubtract: return p(0) - p(1); + } + #undef p + + throw "invalid operator"; +} + +inline auto real(const string& expression) -> maybe { + try { + auto tree = new Node; + const char* p = expression; + parse(tree, p, 0); + auto result = evaluateReal(tree); + delete tree; + return result; + } catch(const char*) { + return nothing; + } +} + +} diff --git a/waterbox/ares64/ares/nall/string/eval/literal.hpp b/waterbox/ares64/ares/nall/string/eval/literal.hpp new file mode 100644 index 0000000000..becd35b4b7 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/eval/literal.hpp @@ -0,0 +1,102 @@ +#pragma once + +namespace nall::Eval { + +inline auto isLiteral(const char*& s) -> bool { + char n = s[0]; + return (n >= 'A' && n <= 'Z') + || (n >= 'a' && n <= 'z') + || (n >= '0' && n <= '9') + || (n == '%' || n == '$' || n == '_' || n == '.') + || (n == '\'' || n == '\"'); +} + +inline auto literalNumber(const char*& s) -> string { + const char* p = s; + + //binary + if(p[0] == '%' || (p[0] == '0' && p[1] == 'b')) { + u32 prefix = 1 + (p[0] == '0'); + p += prefix; + while(p[0] == '\'' || p[0] == '0' || p[0] == '1') p++; + if(p - s <= prefix) throw "invalid binary literal"; + string result = slice(s, 0, p - s); + s = p; + return result; + } + + //octal + if(p[0] == '0' && p[1] == 'o') { + u32 prefix = 1 + (p[0] == '0'); + p += prefix; + while(p[0] == '\'' || (p[0] >= '0' && p[0] <= '7')) p++; + if(p - s <= prefix) throw "invalid octal literal"; + string result = slice(s, 0, p - s); + s = p; + return result; + } + + //hex + if(p[0] == '$' || (p[0] == '0' && p[1] == 'x')) { + u32 prefix = 1 + (p[0] == '0'); + p += prefix; + while(p[0] == '\'' || (p[0] >= '0' && p[0] <= '9') || (p[0] >= 'A' && p[0] <= 'F') || (p[0] >= 'a' && p[0] <= 'f')) p++; + if(p - s <= prefix) throw "invalid hex literal"; + string result = slice(s, 0, p - s); + s = p; + return result; + } + + //decimal + while(p[0] == '\'' || (p[0] >= '0' && p[0] <= '9')) p++; + if(p[0] != '.') { + string result = slice(s, 0, p - s); + s = p; + return result; + } + + //floating-point + p++; + while(p[0] == '\'' || (p[0] >= '0' && p[0] <= '9')) p++; + string result = slice(s, 0, p - s); + s = p; + return result; +} + +inline auto literalString(const char*& s) -> string { + const char* p = s; + char escape = *p++; + + while(p[0] && p[0] != escape) { + if(p[0] == '\\') p++; + p++; + } + if(*p++ != escape) throw "unclosed string literal"; + + string result = slice(s, 0, p - s); + s = p; + return result; +} + +inline auto literalVariable(const char*& s) -> string { + const char* p = s; + + while(p[0] == '_' || p[0] == '.' || (p[0] >= 'A' && p[0] <= 'Z') || (p[0] >= 'a' && p[0] <= 'z') || (p[0] >= '0' && p[0] <= '9')) p++; + + string result = slice(s, 0, p - s); + s = p; + return result; +} + +inline auto literal(const char*& s) -> string { + const char* p = s; + + if(p[0] >= '0' && p[0] <= '9') return literalNumber(s); + if(p[0] == '%' || p[0] == '$') return literalNumber(s); + if(p[0] == '\'' || p[0] == '\"') return literalString(s); + if(p[0] == '_' || p[0] == '.' || (p[0] >= 'A' && p[0] <= 'Z') || (p[0] >= 'a' && p[0] <= 'z')) return literalVariable(s); + + throw "invalid literal"; +} + +} diff --git a/waterbox/ares64/ares/nall/string/eval/node.hpp b/waterbox/ares64/ares/nall/string/eval/node.hpp new file mode 100644 index 0000000000..d79d4920e3 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/eval/node.hpp @@ -0,0 +1,37 @@ +#pragma once + +namespace nall::Eval { + +struct Node { + enum class Type : u32 { + Null, + Literal, + Function, Subscript, Member, SuffixIncrement, SuffixDecrement, + Reference, Dereference, LogicalNot, BitwiseNot, Positive, Negative, PrefixIncrement, PrefixDecrement, + Multiply, Divide, Modulo, + Add, Subtract, + RotateLeft, RotateRight, ShiftLeft, ShiftRight, + BitwiseAnd, BitwiseOr, BitwiseXor, + Concatenate, + Equal, NotEqual, LessThanEqual, GreaterThanEqual, LessThan, GreaterThan, + LogicalAnd, LogicalOr, + Coalesce, Condition, + Assign, Create, //all assignment operators have the same precedence + AssignMultiply, AssignDivide, AssignModulo, + AssignAdd, AssignSubtract, + AssignRotateLeft, AssignRotateRight, AssignShiftLeft, AssignShiftRight, + AssignBitwiseAnd, AssignBitwiseOr, AssignBitwiseXor, + AssignConcatenate, + Separator, + }; + + Type type; + string literal; + vector link; + + Node() : type(Type::Null) {} + Node(Type type) : type(type) {} + ~Node() { for(auto& node : link) delete node; } +}; + +} diff --git a/waterbox/ares64/ares/nall/string/eval/parser.hpp b/waterbox/ares64/ares/nall/string/eval/parser.hpp new file mode 100644 index 0000000000..47bbbd539a --- /dev/null +++ b/waterbox/ares64/ares/nall/string/eval/parser.hpp @@ -0,0 +1,195 @@ +#pragma once + +namespace nall::Eval { + +inline auto whitespace(char n) -> bool { + return n == ' ' || n == '\t' || n == '\r' || n == '\n'; +} + +//operator associativity and precedence: +// right: a[] a() a.b a++ a-- +// left: &a *a !a ~a +a -a ++a --a +// left: a*b a/b a%b +// left: a+b a-b +// left: a<<>>b a<>b +// left: a&b +// left: a^b +// left: a|b +// left: a~b +// left: a<=b a>=b ab +// left: a==b a!=b +// left: a&&b +// left: a||b +// right: a??b a?b:c +// right: a=b a:=b a*=b a/=b a%=b a+=b a-=b a<<<=b a>>>=b a<<=b a>>=b a&=b a^=b a|=b a~=b +// left: a,b +//differences from C++17: +// a&b, a^b, a|b precedence was elevated above comparison operators +// a?b:c precedence was elevated above assignment operators +// a<<>>b a<<<=b a>>>=b rotation operators were added +// a~b a~=b concatenation operators were added +// a??b coalesce operator was added +inline auto parse(Node*& node, const char*& s, u32 depth) -> void { + auto unaryPrefix = [&](Node::Type type, u32 seek, u32 depth) { + auto parent = new Node(type); + parse(parent->link(0) = new Node, s += seek, depth); + node = parent; + }; + + auto unarySuffix = [&](Node::Type type, u32 seek, u32 depth) { + auto parent = new Node(type); + parent->link(0) = node; + parse(parent, s += seek, depth); + node = parent; + }; + + auto binary = [&](Node::Type type, u32 seek, u32 depth) { + auto parent = new Node(type); + parent->link(0) = node; + parse(parent->link(1) = new Node, s += seek, depth); + node = parent; + }; + + auto ternary = [&](Node::Type type, u32 seek, u32 depth) { + auto parent = new Node(type); + parent->link(0) = node; + parse(parent->link(1) = new Node, s += seek, depth); + if(s[0] != ':') throw "mismatched ternary"; + parse(parent->link(2) = new Node, s += seek, depth); + node = parent; + }; + + auto separator = [&](Node::Type type, u32 seek, u32 depth) { + if(node->type != Node::Type::Separator) return binary(type, seek, depth); + u32 n = node->link.size(); + parse(node->link(n) = new Node, s += seek, depth); + }; + + while(whitespace(s[0])) s++; + if(!s[0]) return; + + if(s[0] == '(' && !node->link) { + parse(node, s += 1, 1); + if(*s++ != ')') throw "mismatched group"; + } + + if(isLiteral(s)) { + node->type = Node::Type::Literal; + node->literal = literal(s); + } + + #define p() (!node->literal && !node->link) + while(true) { + while(whitespace(s[0])) s++; + if(!s[0]) return; + + if(depth >= 17) break; //left associativity + if(s[0] == '(' && !p()) { + binary(Node::Type::Function, 1, 1); + if(*s++ != ')') throw "mismatched function"; + continue; + } + if(s[0] == '[') { + binary(Node::Type::Subscript, 1, 1); + if(*s++ != ']') throw "mismatched subscript"; + continue; + } + if(s[0] == '.') { binary(Node::Type::Member, 1, 17); continue; } + if(s[0] == '+' && s[1] == '+' && !p()) { unarySuffix(Node::Type::SuffixIncrement, 2, 17); continue; } + if(s[0] == '-' && s[1] == '-' && !p()) { unarySuffix(Node::Type::SuffixDecrement, 2, 17); continue; } + + if(s[0] == '&' && p()) { unaryPrefix(Node::Type::Reference, 1, 16); continue; } + if(s[0] == '*' && p()) { unaryPrefix(Node::Type::Dereference, 1, 16); continue; } + if(s[0] == '!' && p()) { unaryPrefix(Node::Type::LogicalNot, 1, 16); continue; } + if(s[0] == '~' && p()) { unaryPrefix(Node::Type::BitwiseNot, 1, 16); continue; } + if(s[0] == '+' && s[1] != '+' && p()) { unaryPrefix(Node::Type::Positive, 1, 16); continue; } + if(s[0] == '-' && s[1] != '-' && p()) { unaryPrefix(Node::Type::Negative, 1, 16); continue; } + if(s[0] == '+' && s[1] == '+' && p()) { unaryPrefix(Node::Type::PrefixIncrement, 2, 16); continue; } + if(s[0] == '-' && s[1] == '-' && p()) { unaryPrefix(Node::Type::PrefixDecrement, 2, 16); continue; } + if(depth >= 16) break; //right associativity + + if(depth >= 15) break; //left associativity + if(s[0] == '*' && s[1] != '=') { binary(Node::Type::Multiply, 1, 15); continue; } + if(s[0] == '/' && s[1] != '=') { binary(Node::Type::Divide, 1, 15); continue; } + if(s[0] == '%' && s[1] != '=') { binary(Node::Type::Modulo, 1, 15); continue; } + + if(depth >= 14) break; //left associativity + if(s[0] == '+' && s[1] != '=') { binary(Node::Type::Add, 1, 14); continue; } + if(s[0] == '-' && s[1] != '=') { binary(Node::Type::Subtract, 1, 14); continue; } + + if(depth >= 13) break; //left associativity + if(s[0] == '<' && s[1] == '<' && s[2] == '<' && s[3] != '=') { binary(Node::Type::RotateLeft, 3, 13); continue; } + if(s[0] == '>' && s[1] == '>' && s[2] == '>' && s[3] != '=') { binary(Node::Type::RotateRight, 3, 13); continue; } + if(s[0] == '<' && s[1] == '<' && s[2] != '=') { binary(Node::Type::ShiftLeft, 2, 13); continue; } + if(s[0] == '>' && s[1] == '>' && s[2] != '=') { binary(Node::Type::ShiftRight, 2, 13); continue; } + + if(depth >= 12) break; //left associativity + if(s[0] == '&' && s[1] != '&' && s[1] != '=') { binary(Node::Type::BitwiseAnd, 1, 12); continue; } + + if(depth >= 11) break; //left associativity + if(s[0] == '^' && s[1] != '^' && s[1] != '=') { binary(Node::Type::BitwiseXor, 1, 11); continue; } + + if(depth >= 10) break; //left associativity + if(s[0] == '|' && s[1] != '|' && s[1] != '=') { binary(Node::Type::BitwiseOr, 1, 10); continue; } + + if(depth >= 9) break; //left associativity + if(s[0] == '~' && s[1] != '=') { binary(Node::Type::Concatenate, 1, 9); continue; } + + if(depth >= 8) break; //left associativity + if(s[0] == '<' && s[1] == '=') { binary(Node::Type::LessThanEqual, 2, 8); continue; } + if(s[0] == '>' && s[1] == '=') { binary(Node::Type::GreaterThanEqual, 2, 8); continue; } + if(s[0] == '<') { binary(Node::Type::LessThan, 1, 8); continue; } + if(s[0] == '>') { binary(Node::Type::GreaterThan, 1, 8); continue; } + + if(depth >= 7) break; //left associativity + if(s[0] == '=' && s[1] == '=') { binary(Node::Type::Equal, 2, 7); continue; } + if(s[0] == '!' && s[1] == '=') { binary(Node::Type::NotEqual, 2, 7); continue; } + + if(depth >= 6) break; //left associativity + if(s[0] == '&' && s[1] == '&') { binary(Node::Type::LogicalAnd, 2, 6); continue; } + + if(depth >= 5) break; //left associativity + if(s[0] == '|' && s[1] == '|') { binary(Node::Type::LogicalOr, 2, 5); continue; } + + if(s[0] == '?' && s[1] == '?') { binary(Node::Type::Coalesce, 2, 4); continue; } + if(s[0] == '?' && s[1] != '?') { ternary(Node::Type::Condition, 1, 4); continue; } + if(depth >= 4) break; //right associativity + + if(s[0] == '=') { binary(Node::Type::Assign, 1, 3); continue; } + if(s[0] == ':' && s[1] == '=') { binary(Node::Type::Create, 2, 3); continue; } + if(s[0] == '*' && s[1] == '=') { binary(Node::Type::AssignMultiply, 2, 3); continue; } + if(s[0] == '/' && s[1] == '=') { binary(Node::Type::AssignDivide, 2, 3); continue; } + if(s[0] == '%' && s[1] == '=') { binary(Node::Type::AssignModulo, 2, 3); continue; } + if(s[0] == '+' && s[1] == '=') { binary(Node::Type::AssignAdd, 2, 3); continue; } + if(s[0] == '-' && s[1] == '=') { binary(Node::Type::AssignSubtract, 2, 3); continue; } + if(s[0] == '<' && s[1] == '<' && s[2] == '<' && s[3] == '=') { binary(Node::Type::AssignRotateLeft, 4, 3); continue; } + if(s[0] == '>' && s[1] == '>' && s[2] == '>' && s[3] == '=') { binary(Node::Type::AssignRotateRight, 4, 3); continue; } + if(s[0] == '<' && s[1] == '<' && s[2] == '=') { binary(Node::Type::AssignShiftLeft, 3, 3); continue; } + if(s[0] == '>' && s[1] == '>' && s[2] == '=') { binary(Node::Type::AssignShiftRight, 3, 3); continue; } + if(s[0] == '&' && s[1] == '=') { binary(Node::Type::AssignBitwiseAnd, 2, 3); continue; } + if(s[0] == '^' && s[1] == '=') { binary(Node::Type::AssignBitwiseXor, 2, 3); continue; } + if(s[0] == '|' && s[1] == '=') { binary(Node::Type::AssignBitwiseOr, 2, 3); continue; } + if(s[0] == '~' && s[1] == '=') { binary(Node::Type::AssignConcatenate, 2, 3); continue; } + if(depth >= 3) break; //right associativity + + if(depth >= 2) break; //left associativity + if(s[0] == ',') { separator(Node::Type::Separator, 1, 2); continue; } + + if(depth >= 1 && (s[0] == ')' || s[0] == ']')) break; + + while(whitespace(s[0])) s++; + if(!s[0]) break; + + throw "unrecognized terminal"; + } + #undef p +} + +inline auto parse(const string& expression) -> Node* { + auto result = new Node; + const char* p = expression; + parse(result, p, 0); + return result; +} + +} diff --git a/waterbox/ares64/ares/nall/string/find.hpp b/waterbox/ares64/ares/nall/string/find.hpp new file mode 100644 index 0000000000..e0beb1d28e --- /dev/null +++ b/waterbox/ares64/ares/nall/string/find.hpp @@ -0,0 +1,65 @@ +#pragma once + +namespace nall { + +inline auto string::contains(string_view characters) const -> maybe { + for(u32 x : range(size())) { + for(char y : characters) { + if(operator[](x) == y) return x; + } + } + return nothing; +} + +template inline auto string::_find(s32 offset, string_view source) const -> maybe { + if(source.size() == 0) return nothing; + auto p = data(); + for(u32 n = offset, quoted = 0; n < size();) { + if(Quoted) { if(p[n] == '\"') { quoted ^= 1; n++; continue; } if(quoted) { n++; continue; } } + if(_compare(p + n, size() - n, source.data(), source.size())) { n++; continue; } + return n - offset; + } + return nothing; +} + +inline auto string::find(string_view source) const -> maybe { return _find<0, 0>(0, source); } +inline auto string::ifind(string_view source) const -> maybe { return _find<1, 0>(0, source); } +inline auto string::qfind(string_view source) const -> maybe { return _find<0, 1>(0, source); } +inline auto string::iqfind(string_view source) const -> maybe { return _find<1, 1>(0, source); } + +inline auto string::findFrom(s32 offset, string_view source) const -> maybe { return _find<0, 0>(offset, source); } +inline auto string::ifindFrom(s32 offset, string_view source) const -> maybe { return _find<1, 0>(offset, source); } + +inline auto string::findNext(s32 offset, string_view source) const -> maybe { + if(source.size() == 0) return nothing; + for(s32 n = offset + 1; n < size(); n++) { + if(memory::compare(data() + n, size() - n, source.data(), source.size()) == 0) return n; + } + return nothing; +} + +inline auto string::ifindNext(s32 offset, string_view source) const -> maybe { + if(source.size() == 0) return nothing; + for(s32 n = offset + 1; n < size(); n++) { + if(memory::icompare(data() + n, size() - n, source.data(), source.size()) == 0) return n; + } + return nothing; +} + +inline auto string::findPrevious(s32 offset, string_view source) const -> maybe { + if(source.size() == 0) return nothing; + for(s32 n = offset - 1; n >= 0; n--) { + if(memory::compare(data() + n, size() - n, source.data(), source.size()) == 0) return n; + } + return nothing; +} + +inline auto string::ifindPrevious(s32 offset, string_view source) const -> maybe { + if(source.size() == 0) return nothing; + for(s32 n = offset - 1; n >= 0; n--) { + if(memory::icompare(data() + n, size() - n, source.data(), source.size()) == 0) return n; + } + return nothing; +} + +} diff --git a/waterbox/ares64/ares/nall/string/format.hpp b/waterbox/ares64/ares/nall/string/format.hpp new file mode 100644 index 0000000000..1117409d5f --- /dev/null +++ b/waterbox/ares64/ares/nall/string/format.hpp @@ -0,0 +1,153 @@ +#pragma once + +namespace nall { + +//nall::format is a vector of parameters that can be applied to a string +//each {#} token will be replaced with its appropriate format parameter + +inline auto string::format(const nall::string_format& params) -> type& { + auto size = (s32)this->size(); + auto data = memory::allocate(size); + memory::copy(data, this->data(), size); + + s32 x = 0; + while(x < size - 2) { //2 = minimum tag length + if(data[x] != '{') { x++; continue; } + + s32 y = x + 1; + while(y < size - 1) { //-1 avoids going out of bounds on test after this loop + if(data[y] != '}') { y++; continue; } + break; + } + + if(data[y++] != '}') { x++; continue; } + + static auto isNumeric = [](char* s, char* e) -> bool { + if(s == e) return false; //ignore empty tags: {} + while(s < e) { + if(*s >= '0' && *s <= '9') { s++; continue; } + return false; + } + return true; + }; + if(!isNumeric(&data[x + 1], &data[y - 1])) { x++; continue; } + + u32 index = toNatural(&data[x + 1]); + if(index >= params.size()) { x++; continue; } + + u32 sourceSize = y - x; + u32 targetSize = params[index].size(); + u32 remaining = size - x; + + if(sourceSize > targetSize) { + u32 difference = sourceSize - targetSize; + memory::move(&data[x], &data[x + difference], remaining - difference); + size -= difference; + } else if(targetSize > sourceSize) { + u32 difference = targetSize - sourceSize; + data = (char*)realloc(data, size + difference); + size += difference; + memory::move(&data[x + difference], &data[x], remaining); + } + memory::copy(&data[x], params[index].data(), targetSize); + x += targetSize; + } + + resize(size); + memory::copy(get(), data, size); + memory::free(data); + return *this; +} + +template inline auto string_format::append(const T& value, P&&... p) -> string_format& { + vector::append(value); + return append(forward

(p)...); +} + +inline auto string_format::append() -> string_format& { + return *this; +} + +template inline auto print(P&&... p) -> void { + string s{forward

(p)...}; + fwrite(s.data(), 1, s.size(), stdout); + fflush(stdout); +} + +template inline auto print(FILE* fp, P&&... p) -> void { + string s{forward

(p)...}; + fwrite(s.data(), 1, s.size(), fp); + if(fp == stdout || fp == stderr) fflush(fp); +} + +template inline auto pad(const T& value, long precision, char padchar) -> string { + string buffer{value}; + if(precision) buffer.size(precision, padchar); + return buffer; +} + +template inline auto hex(T value, long precision, char padchar) -> string { + string buffer; + buffer.resize(sizeof(T) * 2); + char* p = buffer.get(); + + //create a mask to clear the upper four bits after shifting right in case T is a signed type + T mask = 1; + mask <<= sizeof(T) * 8 - 4; + mask -= 1; + + u32 size = 0; + do { + u32 n = value & 15; + p[size++] = n < 10 ? '0' + n : 'a' + n - 10; + value = value >> 4 & mask; + } while(value); + buffer.resize(size); + buffer.reverse(); + if(precision) buffer.size(precision, padchar); + return buffer; +} + +template inline auto octal(T value, long precision, char padchar) -> string { + string buffer; + buffer.resize(sizeof(T) * 3); + char* p = buffer.get(); + + //create a mask to clear the upper three bits + T mask = 1; + mask <<= sizeof(T) * 8 - 3; + mask -= 1; + + u32 size = 0; + do { + p[size++] = '0' + (value & 7); + value = value >> 3 & mask; + } while(value); + buffer.resize(size); + buffer.reverse(); + if(precision) buffer.size(precision, padchar); + return buffer; +} + +template inline auto binary(T value, long precision, char padchar) -> string { + string buffer; + buffer.resize(sizeof(T) * 8); + char* p = buffer.get(); + + //create a mask to clear the upper one bit + T mask = 1; + mask <<= sizeof(T) * 8 - 1; + mask -= 1; + + u32 size = 0; + do { + p[size++] = '0' + (value & 1); + value = value >> 1 & mask; + } while(value); + buffer.resize(size); + buffer.reverse(); + if(precision) buffer.size(precision, padchar); + return buffer; +} + +} diff --git a/waterbox/ares64/ares/nall/string/markup/bml.hpp b/waterbox/ares64/ares/nall/string/markup/bml.hpp new file mode 100644 index 0000000000..52cc19947b --- /dev/null +++ b/waterbox/ares64/ares/nall/string/markup/bml.hpp @@ -0,0 +1,189 @@ +#pragma once + +//BML v1.0 parser +//revision 0.04 + +namespace nall::BML { + +//metadata is used to store nesting level + +struct ManagedNode; +using SharedNode = shared_pointer; + +struct ManagedNode : Markup::ManagedNode { +protected: + //test to verify if a valid character for a node name + auto valid(char p) const -> bool { //A-Z, a-z, 0-9, -. + return p - 'A' < 26u || p - 'a' < 26u || p - '0' < 10u || p - '-' < 2u; + } + + //determine indentation level, without incrementing pointer + auto readDepth(const char* p) -> u32 { + u32 depth = 0; + while(p[depth] == '\t' || p[depth] == ' ') depth++; + return depth; + } + + //determine indentation level + auto parseDepth(const char*& p) -> u32 { + u32 depth = readDepth(p); + p += depth; + return depth; + } + + //read name + auto parseName(const char*& p) -> void { + u32 length = 0; + while(valid(p[length])) length++; + if(length == 0) throw "Invalid node name"; + _name = slice(p, 0, length); + p += length; + } + + auto parseData(const char*& p, string_view spacing) -> void { + if(*p == '=' && *(p + 1) == '\"') { + u32 length = 2; + while(p[length] && p[length] != '\n' && p[length] != '\"') length++; + if(p[length] != '\"') throw "Unescaped value"; + _value = {slice(p, 2, length - 2), "\n"}; + p += length + 1; + } else if(*p == '=') { + u32 length = 1; + while(p[length] && p[length] != '\n' && p[length] != '\"' && p[length] != ' ') length++; + if(p[length] == '\"') throw "Illegal character in value"; + _value = {slice(p, 1, length - 1), "\n"}; + p += length; + } else if(*p == ':') { + u32 length = 1; + while(p[length] && p[length] != '\n') length++; + _value = {slice(p, 1, length - 1).trimLeft(spacing, 1L), "\n"}; + p += length; + } + } + + //read all attributes for a node + auto parseAttributes(const char*& p, string_view spacing) -> void { + while(*p && *p != '\n') { + if(*p != ' ') throw "Invalid node name"; + while(*p == ' ') p++; //skip excess spaces + if(*(p + 0) == '/' && *(p + 1) == '/') break; //skip comments + + SharedNode node(new ManagedNode); + u32 length = 0; + while(valid(p[length])) length++; + if(length == 0) throw "Invalid attribute name"; + node->_name = slice(p, 0, length); + node->parseData(p += length, spacing); + node->_value.trimRight("\n", 1L); + _children.append(node); + } + } + + //read a node and all of its child nodes + auto parseNode(const vector& text, u32& y, string_view spacing) -> void { + const char* p = text[y++]; + _metadata = parseDepth(p); + parseName(p); + parseData(p, spacing); + parseAttributes(p, spacing); + + while(y < text.size()) { + u32 depth = readDepth(text[y]); + if(depth <= _metadata) break; + + if(text[y][depth] == ':') { + _value.append(slice(text[y++], depth + 1).trimLeft(spacing, 1L), "\n"); + continue; + } + + SharedNode node(new ManagedNode); + node->parseNode(text, y, spacing); + _children.append(node); + } + + _value.trimRight("\n", 1L); + } + + //read top-level nodes + auto parse(string document, string_view spacing) -> void { + //in order to simplify the parsing logic; we do an initial pass to normalize the data + //the below code will turn '\r\n' into '\n'; skip empty lines; and skip comment lines + char* p = document.get(), *output = p; + while(*p) { + char* origin = p; + bool empty = true; + while(*p) { + //scan for first non-whitespace character. if it's a line feed or comment; skip the line + if(p[0] == ' ' || p[0] == '\t') { p++; continue; } + empty = p[0] == '\r' || p[0] == '\n' || (p[0] == '/' && p[1] == '/'); + break; + } + while(*p) { + if(p[0] == '\r') p[0] = '\n'; //turns '\r\n' into '\n\n' (second '\n' will be skipped) + if(*p++ == '\n') break; //include '\n' in the output to be copied + } + if(empty) continue; + + memory::move(output, origin, p - origin); + output += p - origin; + } + document.resize(document.size() - (p - output)).trimRight("\n"); + if(document.size() == 0) return; //empty document + + auto text = document.split("\n"); + u32 y = 0; + while(y < text.size()) { + SharedNode node(new ManagedNode); + node->parseNode(text, y, spacing); + if(node->_metadata > 0) throw "Root nodes cannot be indented"; + _children.append(node); + } + } + + friend auto unserialize(const string&, string_view) -> Markup::Node; +}; + +inline auto unserialize(const string& markup, string_view spacing = {}) -> Markup::Node { + SharedNode node(new ManagedNode); + try { + node->parse(markup, spacing); + } catch(const char* error) { + node.reset(); + } + return (Markup::SharedNode&)node; +} + +inline auto serialize(const Markup::Node& node, string_view spacing = {}, u32 depth = 0) -> string { + if(!node.name()) { + string result; + for(auto leaf : node) { + result.append(serialize(leaf, spacing, depth)); + } + return result; + } + + string padding; + padding.resize(depth * 2); + padding.fill(' '); + + vector lines; + if(auto value = node.value()) lines = value.split("\n"); + + string result; + result.append(padding); + result.append(node.name()); + if(lines.size() == 1) result.append(":", spacing, lines[0]); + result.append("\n"); + if(lines.size() > 1) { + padding.append(" "); + for(auto& line : lines) { + result.append(padding, ":", spacing, line, "\n"); + } + } + for(auto leaf : node) { + result.append(serialize(leaf, spacing, depth + 1)); + } + return result; +} + +} diff --git a/waterbox/ares64/ares/nall/string/markup/find.hpp b/waterbox/ares64/ares/nall/string/markup/find.hpp new file mode 100644 index 0000000000..de890e63bd --- /dev/null +++ b/waterbox/ares64/ares/nall/string/markup/find.hpp @@ -0,0 +1,144 @@ +#pragma once + +namespace nall::Markup { + +inline auto ManagedNode::_evaluate(string query) const -> bool { + if(!query) return true; + + for(auto& rule : query.split(",")) { + enum class Comparator : u32 { ID, EQ, NE, LT, LE, GT, GE, NF }; + auto comparator = Comparator::ID; + if(rule.match("*!=*")) comparator = Comparator::NE; + else if(rule.match("*<=*")) comparator = Comparator::LE; + else if(rule.match("*>=*")) comparator = Comparator::GE; + else if(rule.match ("*=*")) comparator = Comparator::EQ; + else if(rule.match ("*<*")) comparator = Comparator::LT; + else if(rule.match ("*>*")) comparator = Comparator::GT; + else if(rule.match ("!*")) comparator = Comparator::NF; + + if(comparator == Comparator::ID) { + if(_find(rule).size()) continue; + return false; + } + + if(comparator == Comparator::NF) { + rule.trimLeft("!", 1L); + if(_find(rule).size()) return false; + continue; + } + + vector side; + switch(comparator) { + case Comparator::EQ: side = rule.split ("=", 1L); break; + case Comparator::NE: side = rule.split("!=", 1L); break; + case Comparator::LT: side = rule.split ("<", 1L); break; + case Comparator::LE: side = rule.split("<=", 1L); break; + case Comparator::GT: side = rule.split (">", 1L); break; + case Comparator::GE: side = rule.split(">=", 1L); break; + } + + string data = string{_value}.strip(); + if(side(0)) { + auto result = _find(side(0)); + if(result.size() == 0) return false; + data = result[0].text(); //strips whitespace so rules can match without requiring it + } + + switch(comparator) { + case Comparator::EQ: if(data.match(side(1)) == true) continue; break; + case Comparator::NE: if(data.match(side(1)) == false) continue; break; + case Comparator::LT: if(data.natural() < side(1).natural()) continue; break; + case Comparator::LE: if(data.natural() <= side(1).natural()) continue; break; + case Comparator::GT: if(data.natural() > side(1).natural()) continue; break; + case Comparator::GE: if(data.natural() >= side(1).natural()) continue; break; + } + + return false; + } + + return true; +} + +inline auto ManagedNode::_find(const string& query) const -> vector { + vector result; + + auto path = query.split("/"); + string name = path.take(0), rule; + u32 lo = 0u, hi = ~0u; + + if(name.match("*[*]")) { + auto p = name.trimRight("]", 1L).split("[", 1L); + name = p(0); + if(p(1).find("-")) { + p = p(1).split("-", 1L); + lo = !p(0) ? 0u : p(0).natural(); + hi = !p(1) ? ~0u : p(1).natural(); + } else { + lo = hi = p(1).natural(); + } + } + + if(name.match("*(*)")) { + auto p = name.trimRight(")", 1L).split("(", 1L); + name = p(0); + rule = p(1); + } + + u32 position = 0; + for(auto& node : _children) { + if(!node->_name.match(name)) continue; + if(!node->_evaluate(rule)) continue; + + bool inrange = position >= lo && position <= hi; + position++; + if(!inrange) continue; + + if(path.size() == 0) { + result.append(node); + } else for(auto& item : node->_find(path.merge("/"))) { + result.append(item); + } + } + + return result; +} + +//operator[](string) +inline auto ManagedNode::_lookup(const string& path) const -> Node { + auto result = _find(path); + return result ? result[0] : Node{}; + +/*//faster, but cannot search + if(auto position = path.find("/")) { + auto name = slice(path, 0, *position); + for(auto& node : _children) { + if(name == node->_name) { + return node->_lookup(slice(path, *position + 1)); + } + } + } else for(auto& node : _children) { + if(path == node->_name) return node; + } + return {}; +*/ +} + +inline auto ManagedNode::_create(const string& path) -> Node { + if(auto position = path.find("/")) { + auto name = slice(path, 0, *position); + for(auto& node : _children) { + if(name == node->_name) { + return node->_create(slice(path, *position + 1)); + } + } + _children.append(new ManagedNode(name)); + return _children.right()->_create(slice(path, *position + 1)); + } + for(auto& node : _children) { + if(path == node->_name) return node; + } + _children.append(new ManagedNode(path)); + return _children.right(); +} + +} diff --git a/waterbox/ares64/ares/nall/string/markup/node.hpp b/waterbox/ares64/ares/nall/string/markup/node.hpp new file mode 100644 index 0000000000..33ed459baf --- /dev/null +++ b/waterbox/ares64/ares/nall/string/markup/node.hpp @@ -0,0 +1,147 @@ +#pragma once + +namespace nall::Markup { + +struct Node; +struct ManagedNode; +using SharedNode = shared_pointer; + +struct ManagedNode { + ManagedNode() = default; + ManagedNode(const string& name) : _name(name) {} + ManagedNode(const string& name, const string& value) : _name(name), _value(value) {} + + auto clone() const -> SharedNode { + SharedNode clone{new ManagedNode(_name, _value)}; + for(auto& child : _children) { + clone->_children.append(child->clone()); + } + return clone; + } + + auto copy(SharedNode source) -> void { + _name = source->_name; + _value = source->_value; + _metadata = source->_metadata; + _children.reset(); + for(auto child : source->_children) { + _children.append(child->clone()); + } + } + +protected: + string _name; + string _value; + uintptr _metadata = 0; + vector _children; + + auto _evaluate(string query) const -> bool; + auto _find(const string& query) const -> vector; + auto _lookup(const string& path) const -> Node; + auto _create(const string& path) -> Node; + + friend class Node; +}; + +struct Node { + Node() : shared(new ManagedNode) {} + Node(const SharedNode& source) : shared(source ? source : new ManagedNode) {} + Node(const nall::string& name) : shared(new ManagedNode(name)) {} + Node(const nall::string& name, const nall::string& value) : shared(new ManagedNode(name, value)) {} + + auto unique() const -> bool { return shared.unique(); } + auto clone() const -> Node { return shared->clone(); } + auto copy(Node source) -> void { return shared->copy(source.shared); } + + explicit operator bool() const { return shared->_name || shared->_children; } + auto name() const -> nall::string { return shared->_name; } + auto value() const -> nall::string { return shared->_value; } + + auto value(nall::string& target) const -> bool { if(shared) target = string(); return (bool)shared; } + auto value(bool& target) const -> bool { if(shared) target = boolean(); return (bool)shared; } + auto value(s32& target) const -> bool { if(shared) target = integer(); return (bool)shared; } + auto value(u32& target) const -> bool { if(shared) target = natural(); return (bool)shared; } + auto value(f64& target) const -> bool { if(shared) target = real(); return (bool)shared; } + + auto text() const -> nall::string { return value().strip(); } + auto string() const -> nall::string { return value().strip(); } + auto boolean() const -> bool { return text() == "true"; } + auto integer() const -> s64 { return text().integer(); } + auto natural() const -> u64 { return text().natural(); } + auto real() const -> f64 { return text().real(); } + + auto text(const nall::string& fallback) const -> nall::string { return bool(*this) ? text() : fallback; } + auto string(const nall::string& fallback) const -> nall::string { return bool(*this) ? string() : fallback; } + auto boolean(bool fallback) const -> bool { return bool(*this) ? boolean() : fallback; } + auto integer(s64 fallback) const -> s64 { return bool(*this) ? integer() : fallback; } + auto natural(u64 fallback) const -> u64 { return bool(*this) ? natural() : fallback; } + auto real(f64 fallback) const -> f64 { return bool(*this) ? real() : fallback; } + + auto setName(const nall::string& name = "") -> Node& { shared->_name = name; return *this; } + auto setValue(const nall::string& value = "") -> Node& { shared->_value = value; return *this; } + + auto reset() -> void { shared->_children.reset(); } + auto size() const -> u32 { return shared->_children.size(); } + + auto prepend(const Node& node) -> void { shared->_children.prepend(node.shared); } + auto append(const Node& node) -> void { shared->_children.append(node.shared); } + auto remove(const Node& node) -> bool { + for(auto n : range(size())) { + if(node.shared == shared->_children[n]) { + return shared->_children.remove(n), true; + } + } + return false; + } + + auto insert(u32 position, const Node& node) -> bool { + if(position > size()) return false; //used > instead of >= to allow indexed-equivalent of append() + return shared->_children.insert(position, node.shared), true; + } + + auto remove(u32 position) -> bool { + if(position >= size()) return false; + return shared->_children.remove(position), true; + } + + auto swap(u32 x, u32 y) -> bool { + if(x >= size() || y >= size()) return false; + return std::swap(shared->_children[x], shared->_children[y]), true; + } + + auto sort(function comparator = [](auto x, auto y) { + return nall::string::compare(x.shared->_name, y.shared->_name) < 0; + }) -> void { + nall::sort(shared->_children.data(), shared->_children.size(), [&](auto x, auto y) { + return comparator(x, y); //this call converts SharedNode objects to Node objects + }); + } + + auto operator[](s32 position) -> Node { + if(position >= size()) return {}; + return shared->_children[position]; + } + + auto operator[](const nall::string& path) const -> Node { return shared->_lookup(path); } + auto operator()(const nall::string& path) -> Node { return shared->_create(path); } + auto find(const nall::string& query) const -> vector { return shared->_find(query); } + + struct iterator { + auto operator*() -> Node { return {source.shared->_children[position]}; } + auto operator!=(const iterator& source) const -> bool { return position != source.position; } + auto operator++() -> iterator& { return position++, *this; } + iterator(const Node& source, u32 position) : source(source), position(position) {} + + private: + const Node& source; + u32 position; + }; + + auto begin() const -> iterator { return iterator(*this, 0); } + auto end() const -> iterator { return iterator(*this, size()); } + +protected: + SharedNode shared; +}; + +} diff --git a/waterbox/ares64/ares/nall/string/markup/xml.hpp b/waterbox/ares64/ares/nall/string/markup/xml.hpp new file mode 100644 index 0000000000..6de2d32755 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/markup/xml.hpp @@ -0,0 +1,217 @@ +#pragma once + +//XML v1.0 subset parser +//revision 0.04 + +namespace nall::XML { + +//metadata: +// 0 = element +// 1 = attribute + +struct ManagedNode; +using SharedNode = shared_pointer; + +struct ManagedNode : Markup::ManagedNode { +protected: + auto escape() const -> string { + string result = _value; + result.replace("&", "&"); + result.replace("<", "<"); + result.replace(">", ">"); + if(_metadata == 1) { + result.replace("\'", "'"); + result.replace("\"", """); + } + return result; + } + + auto isName(char c) const -> bool { + if(c >= 'A' && c <= 'Z') return true; + if(c >= 'a' && c <= 'z') return true; + if(c >= '0' && c <= '9') return true; + if(c == '.' || c == '_') return true; + if(c == '?') return true; + return false; + } + + auto isWhitespace(char c) const -> bool { + if(c == ' ' || c == '\t') return true; + if(c == '\r' || c == '\n') return true; + return false; + } + + //copy part of string from source document into target string; decode markup while copying + auto copy(string& target, const char* source, u32 length) -> void { + target.reserve(length + 1); + + #if defined(NALL_XML_LITERAL) + memory::copy(target.pointer(), source, length); + target[length] = 0; + return; + #endif + + char* output = target.get(); + while(length) { + if(*source == '&') { + if(!memory::compare(source, "<", 4)) { *output++ = '<'; source += 4; length -= 4; continue; } + if(!memory::compare(source, ">", 4)) { *output++ = '>'; source += 4; length -= 4; continue; } + if(!memory::compare(source, "&", 5)) { *output++ = '&'; source += 5; length -= 5; continue; } + if(!memory::compare(source, "'", 6)) { *output++ = '\''; source += 6; length -= 6; continue; } + if(!memory::compare(source, """, 6)) { *output++ = '\"'; source += 6; length -= 6; continue; } + } + + if(_metadata == 0 && source[0] == '<' && source[1] == '!') { + //comment + if(!memory::compare(source, "", 3)) source++, length--; + source += 3, length -= 3; + continue; + } + + //CDATA + if(!memory::compare(source, "", 3)) *output++ = *source++, length--; + source += 3, length -= 3; + continue; + } + } + + *output++ = *source++, length--; + } + *output = 0; + } + + auto parseExpression(const char*& p) -> bool { + if(*(p + 1) != '!') return false; + + //comment + if(!memory::compare(p, "", 3)) p++; + if(!*p) throw "unclosed comment"; + p += 3; + return true; + } + + //CDATA + if(!memory::compare(p, "", 3)) p++; + if(!*p) throw "unclosed CDATA"; + p += 3; + return true; + } + + //DOCTYPE + if(!memory::compare(p, "') counter--; + } while(counter); + return true; + } + + return false; + } + + //returns true if tag closes itself (); false if not () + auto parseHead(const char*& p) -> bool { + //parse name + const char* nameStart = ++p; //skip '<' + while(isName(*p)) p++; + const char* nameEnd = p; + copy(_name, nameStart, nameEnd - nameStart); + if(!_name) throw "missing element name"; + + //parse attributes + while(*p) { + while(isWhitespace(*p)) p++; + if(!*p) throw "unclosed attribute"; + if(*p == '?' || *p == '/' || *p == '>') break; + + //parse attribute name + SharedNode attribute(new ManagedNode); + attribute->_metadata = 1; + + const char* nameStart = p; + while(isName(*p)) p++; + const char* nameEnd = p; + copy(attribute->_name, nameStart, nameEnd - nameStart); + if(!attribute->_name) throw "missing attribute name"; + + //parse attribute data + if(*p++ != '=') throw "missing attribute value"; + char terminal = *p++; + if(terminal != '\'' && terminal != '\"') throw "attribute value not quoted"; + const char* dataStart = p; + while(*p && *p != terminal) p++; + if(!*p) throw "missing attribute data terminal"; + const char* dataEnd = p++; //skip closing terminal + + copy(attribute->_value, dataStart, dataEnd - dataStart); + _children.append(attribute); + } + + //parse closure + if(*p == '?' && *(p + 1) == '>') { p += 2; return true; } + if(*p == '/' && *(p + 1) == '>') { p += 2; return true; } + if(*p == '>') { p += 1; return false; } + throw "invalid element tag"; + } + + //parse element and all of its child elements + auto parseElement(const char*& p) -> void { + SharedNode node(new ManagedNode); + if(node->parseHead(p) == false) node->parse(p); + _children.append(node); + } + + //return true if matches this node's name + auto parseClosureElement(const char*& p) -> bool { + if(p[0] != '<' || p[1] != '/') return false; + p += 2; + const char* nameStart = p; + while(*p && *p != '>') p++; + if(*p != '>') throw "unclosed closure element"; + const char* nameEnd = p++; + if(memory::compare(_name.data(), nameStart, nameEnd - nameStart)) throw "closure element name mismatch"; + return true; + } + + //parse contents of an element + auto parse(const char*& p) -> void { + const char* dataStart = p; + const char* dataEnd = p; + + while(*p) { + while(*p && *p != '<') p++; + if(!*p) break; + dataEnd = p; + if(parseClosureElement(p) == true) break; + if(parseExpression(p) == true) continue; + parseElement(p); + } + + copy(_value, dataStart, dataEnd - dataStart); + } + + friend auto unserialize(const string&) -> Markup::SharedNode; +}; + +inline auto unserialize(const string& markup) -> Markup::SharedNode { + auto node = new ManagedNode; + try { + const char* p = markup; + node->parse(p); + } catch(const char* error) { + delete node; + node = nullptr; + } + return node; +} + +} diff --git a/waterbox/ares64/ares/nall/string/match.hpp b/waterbox/ares64/ares/nall/string/match.hpp new file mode 100644 index 0000000000..48af8542b9 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/match.hpp @@ -0,0 +1,90 @@ +#pragma once + +namespace nall { + +//todo: these functions are not binary-safe + +inline auto string::match(string_view source) const -> bool { + const char* s = data(); + const char* p = source.data(); + + const char* cp = nullptr; + const char* mp = nullptr; + while(*s && *p != '*') { + if(*p != '?' && *s != *p) return false; + p++, s++; + } + while(*s) { + if(*p == '*') { + if(!*++p) return true; + mp = p, cp = s + 1; + } else if(*p == '?' || *p == *s) { + p++, s++; + } else { + p = mp, s = cp++; + } + } + while(*p == '*') p++; + return !*p; +} + +inline auto string::imatch(string_view source) const -> bool { + static auto chrlower = [](char c) -> char { + return (c >= 'A' && c <= 'Z') ? c + ('a' - 'A') : c; + }; + + const char* s = data(); + const char* p = source.data(); + + const char* cp = nullptr; + const char* mp = nullptr; + while(*s && *p != '*') { + if(*p != '?' && chrlower(*s) != chrlower(*p)) return false; + p++, s++; + } + while(*s) { + if(*p == '*') { + if(!*++p) return true; + mp = p, cp = s + 1; + } else if(*p == '?' || chrlower(*p) == chrlower(*s)) { + p++, s++; + } else { + p = mp, s = cp++; + } + } + while(*p == '*') p++; + return !*p; +} + +inline auto tokenize(const char* s, const char* p) -> bool { + while(*s) { + if(*p == '*') { + while(*s) if(tokenize(s++, p + 1)) return true; + return !*++p; + } + if(*s++ != *p++) return false; + } + while(*p == '*') p++; + return !*p; +} + +inline auto tokenize(vector& list, const char* s, const char* p) -> bool { + while(*s) { + if(*p == '*') { + const char* b = s; + while(*s) { + if(tokenize(list, s++, p + 1)) { + list.prepend(slice(b, 0, --s - b)); + return true; + } + } + list.prepend(b); + return !*++p; + } + if(*s++ != *p++) return false; + } + while(*p == '*') { list.prepend(s); p++; } + return !*p; +} + +} diff --git a/waterbox/ares64/ares/nall/string/pascal.hpp b/waterbox/ares64/ares/nall/string/pascal.hpp new file mode 100644 index 0000000000..3a332d5354 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/pascal.hpp @@ -0,0 +1,79 @@ +#pragma once + +namespace nall { + +struct string_pascal { + using type = string_pascal; + + string_pascal(const char* text = nullptr) { + if(text && *text) { + u32 size = strlen(text); + _data = memory::allocate(sizeof(u32) + size + 1); + ((u32*)_data)[0] = size; + memory::copy(_data + sizeof(u32), text, size); + _data[sizeof(u32) + size] = 0; + } + } + + string_pascal(const string& text) { + if(text.size()) { + _data = memory::allocate(sizeof(u32) + text.size() + 1); + ((u32*)_data)[0] = text.size(); + memory::copy(_data + sizeof(u32), text.data(), text.size()); + _data[sizeof(u32) + text.size()] = 0; + } + } + + string_pascal(const string_pascal& source) { operator=(source); } + string_pascal(string_pascal&& source) { operator=(move(source)); } + + ~string_pascal() { + if(_data) memory::free(_data); + } + + explicit operator bool() const { return _data; } + operator const char*() const { return _data ? _data + sizeof(u32) : nullptr; } + operator string() const { return _data ? string{_data + sizeof(u32)} : ""; } + + auto operator=(const string_pascal& source) -> type& { + if(this == &source) return *this; + if(_data) { memory::free(_data); _data = nullptr; } + if(source._data) { + u32 size = source.size(); + _data = memory::allocate(sizeof(u32) + size); + memory::copy(_data, source._data, sizeof(u32) + size); + } + return *this; + } + + auto operator=(string_pascal&& source) -> type& { + if(this == &source) return *this; + if(_data) memory::free(_data); + _data = source._data; + source._data = nullptr; + return *this; + } + + auto operator==(string_view source) const -> bool { + return size() == source.size() && memory::compare(data(), source.data(), size()) == 0; + } + + auto operator!=(string_view source) const -> bool { + return size() != source.size() || memory::compare(data(), source.data(), size()) != 0; + } + + auto data() const -> char* { + if(!_data) return nullptr; + return _data + sizeof(u32); + } + + auto size() const -> u32 { + if(!_data) return 0; + return ((u32*)_data)[0]; + } + +protected: + char* _data = nullptr; +}; + +} diff --git a/waterbox/ares64/ares/nall/string/replace.hpp b/waterbox/ares64/ares/nall/string/replace.hpp new file mode 100644 index 0000000000..4912704fc1 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/replace.hpp @@ -0,0 +1,94 @@ +#pragma once + +namespace nall { + +template +inline auto string::_replace(string_view from, string_view to, long limit) -> string& { + if(limit <= 0 || from.size() == 0) return *this; + + s32 size = this->size(); + s32 matches = 0; + s32 quoted = 0; + + //count matches first, so that we only need to reallocate memory once + //(recording matches would also require memory allocation, so this is not done) + { const char* p = data(); + for(s32 n = 0; n <= size - (s32)from.size();) { + if(Quoted) { if(p[n] == '\"') { quoted ^= 1; n++; continue; } if(quoted) { n++; continue; } } + if(_compare(p + n, size - n, from.data(), from.size())) { n++; continue; } + + if(++matches >= limit) break; + n += from.size(); + } + } + if(matches == 0) return *this; + + //in-place overwrite + if(to.size() == from.size()) { + char* p = get(); + + for(s32 n = 0, remaining = matches, quoted = 0; n <= size - (s32)from.size();) { + if(Quoted) { if(p[n] == '\"') { quoted ^= 1; n++; continue; } if(quoted) { n++; continue; } } + if(_compare(p + n, size - n, from.data(), from.size())) { n++; continue; } + + memory::copy(p + n, to.data(), to.size()); + + if(!--remaining) break; + n += from.size(); + } + } + + //left-to-right shrink + else if(to.size() < from.size()) { + char* p = get(); + s32 offset = 0; + s32 base = 0; + + for(s32 n = 0, remaining = matches, quoted = 0; n <= size - (s32)from.size();) { + if(Quoted) { if(p[n] == '\"') { quoted ^= 1; n++; continue; } if(quoted) { n++; continue; } } + if(_compare(p + n, size - n, from.data(), from.size())) { n++; continue; } + + if(base) memory::move(p + offset, p + base, n - base); + memory::copy(p + offset + (n - base), to.data(), to.size()); + offset += (n - base) + to.size(); + + n += from.size(); + base = n; + if(!--remaining) break; + } + + memory::move(p + offset, p + base, size - base); + resize(size - matches * (from.size() - to.size())); + } + + //right-to-left expand + else if(to.size() > from.size()) { + resize(size + matches * (to.size() - from.size())); + char* p = get(); + + s32 offset = this->size(); + s32 base = size; + + for(s32 n = size, remaining = matches; n >= (s32)from.size();) { //quoted reused from parent scope since we are iterating backward + if(Quoted) { if(p[n] == '\"') { quoted ^= 1; n--; continue; } if(quoted) { n--; continue; } } + if(_compare(p + n - from.size(), size - n + from.size(), from.data(), from.size())) { n--; continue; } + + memory::move(p + offset - (base - n), p + base - (base - n), base - n); + memory::copy(p + offset - (base - n) - to.size(), to.data(), to.size()); + offset -= (base - n) + to.size(); + + if(!--remaining) break; + n -= from.size(); + base = n; + } + } + + return *this; +} + +inline auto string::replace(string_view from, string_view to, long limit) -> string& { return _replace<0, 0>(from, to, limit); } +inline auto string::ireplace(string_view from, string_view to, long limit) -> string& { return _replace<1, 0>(from, to, limit); } +inline auto string::qreplace(string_view from, string_view to, long limit) -> string& { return _replace<0, 1>(from, to, limit); } +inline auto string::iqreplace(string_view from, string_view to, long limit) -> string& { return _replace<1, 1>(from, to, limit); } + +}; diff --git a/waterbox/ares64/ares/nall/string/split.hpp b/waterbox/ares64/ares/nall/string/split.hpp new file mode 100644 index 0000000000..b7dbd9251e --- /dev/null +++ b/waterbox/ares64/ares/nall/string/split.hpp @@ -0,0 +1,46 @@ +#pragma once + +namespace nall { + +template +inline auto vector::_split(string_view source, string_view find, long limit) -> type& { + reset(); + if(limit <= 0 || find.size() == 0) return *this; + + const char* p = source.data(); + s32 size = source.size(); + s32 base = 0; + s32 matches = 0; + + for(s32 n = 0, quoted = 0; n <= size - (s32)find.size();) { + if constexpr(Quoted) { + if(quoted && p[n] == '\\') { n += 2; continue; } + if(p[n] == '\'' && quoted != 2) { quoted ^= 1; n++; continue; } + if(p[n] == '\"' && quoted != 1) { quoted ^= 2; n++; continue; } + if(quoted) { n++; continue; } + } + if(string::_compare(p + n, size - n, find.data(), find.size())) { n++; continue; } + if(matches >= limit) break; + + string& s = operator()(matches); + s.resize(n - base); + memory::copy(s.get(), p + base, n - base); + + n += find.size(); + base = n; + matches++; + } + + string& s = operator()(matches); + s.resize(size - base); + memory::copy(s.get(), p + base, size - base); + + return *this; +} + +inline auto string::split(string_view on, long limit) const -> vector { return vector()._split<0, 0>(*this, on, limit); } +inline auto string::isplit(string_view on, long limit) const -> vector { return vector()._split<1, 0>(*this, on, limit); } +inline auto string::qsplit(string_view on, long limit) const -> vector { return vector()._split<0, 1>(*this, on, limit); } +inline auto string::iqsplit(string_view on, long limit) const -> vector { return vector()._split<1, 1>(*this, on, limit); } + +} diff --git a/waterbox/ares64/ares/nall/string/transform/cml.hpp b/waterbox/ares64/ares/nall/string/transform/cml.hpp new file mode 100644 index 0000000000..efbf95b773 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/transform/cml.hpp @@ -0,0 +1,120 @@ +#pragma once + +/* CSS Markup Language (CML) v1.0 parser + * revision 0.02 + */ + +#include + +namespace nall { + +struct CML { + auto& setPath(const string& pathname) { settings.path = pathname; return *this; } + auto& setReader(const function& reader) { settings.reader = reader; return *this; } + + auto parse(const string& filename) -> string; + auto parse(const string& filedata, const string& pathname) -> string; + +private: + struct Settings { + string path; + function reader; + } settings; + + struct State { + string output; + } state; + + struct Variable { + string name; + string value; + }; + vector variables; + bool inMedia = false; + bool inMediaNode = false; + + auto parseDocument(const string& filedata, const string& pathname, u32 depth) -> bool; +}; + +inline auto CML::parse(const string& filename) -> string { + if(!settings.path) settings.path = Location::path(filename); + string document = settings.reader ? settings.reader(filename) : string::read(filename); + parseDocument(document, settings.path, 0); + return state.output; +} + +inline auto CML::parse(const string& filedata, const string& pathname) -> string { + settings.path = pathname; + parseDocument(filedata, settings.path, 0); + return state.output; +} + +inline auto CML::parseDocument(const string& filedata, const string& pathname, u32 depth) -> bool { + if(depth >= 100) return false; //prevent infinite recursion + + auto vendorAppend = [&](const string& name, const string& value) { + state.output.append(" -moz-", name, ": ", value, ";\n"); + state.output.append(" -webkit-", name, ": ", value, ";\n"); + }; + + for(auto& block : filedata.split("\n\n")) { + auto lines = block.stripRight().split("\n"); + auto name = lines.takeFirst(); + + if(name.beginsWith("include ")) { + name.trimLeft("include ", 1L); + string filename{pathname, name}; + string document = settings.reader ? settings.reader(filename) : string::read(filename); + parseDocument(document, Location::path(filename), depth + 1); + continue; + } + + if(name == "variables") { + for(auto& line : lines) { + auto data = line.split(":", 1L).strip(); + variables.append({data(0), data(1)}); + } + continue; + } + + state.output.append(name, " {\n"); + inMedia = name.beginsWith("@media"); + + for(auto& line : lines) { + if(inMedia && !line.find(": ")) { + if(inMediaNode) state.output.append(" }\n"); + state.output.append(line, " {\n"); + inMediaNode = true; + continue; + } + + auto data = line.split(":", 1L).strip(); + auto name = data(0), value = data(1); + while(auto offset = value.find("var(")) { + bool found = false; + if(auto length = value.findFrom(*offset, ")")) { + string name = slice(value, *offset + 4, *length - 4); + for(auto& variable : variables) { + if(variable.name == name) { + value = {slice(value, 0, *offset), variable.value, slice(value, *offset + *length + 1)}; + found = true; + break; + } + } + } + if(!found) break; + } + state.output.append(inMedia ? " " : " ", name, ": ", value, ";\n"); + if(name == "box-sizing") vendorAppend(name, value); + } + if(inMediaNode) { + state.output.append(" }\n"); + inMediaNode = false; + } + state.output.append("}\n\n"); + } + + return true; +} + +} diff --git a/waterbox/ares64/ares/nall/string/transform/dml.hpp b/waterbox/ares64/ares/nall/string/transform/dml.hpp new file mode 100644 index 0000000000..f5bc64278e --- /dev/null +++ b/waterbox/ares64/ares/nall/string/transform/dml.hpp @@ -0,0 +1,360 @@ +#pragma once + +/* Document Markup Language (DML) v1.0 parser + * revision 0.06 + */ + +#include + +namespace nall { + +struct DML { + auto content() const -> string { return state.output; } + + auto& setAllowHTML(bool allowHTML) { settings.allowHTML = allowHTML; return *this; } + auto& setHost(const string& hostname) { settings.host = hostname; return *this; } + auto& setPath(const string& pathname) { settings.path = pathname; return *this; } + auto& setReader(const function& reader) { settings.reader = reader; return *this; } + + auto parse(const string& filedata, const string& pathname) -> string; + auto parse(const string& filename) -> string; + + auto attribute(const string& name) const -> string; + +private: + struct Settings { + bool allowHTML = true; + string host = "localhost"; + string path; + function reader; + } settings; + + struct State { + string output; + } state; + + struct Attribute { + string name; + string value; + }; + vector attributes; + + auto parseDocument(const string& filedata, const string& pathname, u32 depth) -> bool; + auto parseBlock(string& block, const string& pathname, u32 depth) -> bool; + auto count(const string& text, char value) -> u32; + + auto address(string text) -> string; + auto escape(const string& text) -> string; + auto anchor(const string& text) -> string; + auto markup(const string& text) -> string; +}; + +inline auto DML::attribute(const string& name) const -> string { + for(auto& attribute : attributes) { + if(attribute.name == name) return attribute.value; + } + return {}; +} + +inline auto DML::parse(const string& filedata, const string& pathname) -> string { + state = {}; + settings.path = pathname; + parseDocument(filedata, settings.path, 0); + return state.output; +} + +inline auto DML::parse(const string& filename) -> string { + state = {}; + if(!settings.path) settings.path = Location::path(filename); + string document = settings.reader ? settings.reader(filename) : string::read(filename); + parseDocument(document, settings.path, 0); + return state.output; +} + +inline auto DML::parseDocument(const string& filedata, const string& pathname, u32 depth) -> bool { + if(depth >= 100) return false; //attempt to prevent infinite recursion with reasonable limit + + auto blocks = filedata.split("\n\n"); + for(auto& block : blocks) parseBlock(block, pathname, depth); + return true; +} + +inline auto DML::parseBlock(string& block, const string& pathname, u32 depth) -> bool { + if(!block.stripRight()) return true; + auto lines = block.split("\n"); + + //include + if(block.beginsWith("")) { + string filename{pathname, block.trim("", 1L).strip()}; + string document = settings.reader ? settings.reader(filename) : string::read(filename); + parseDocument(document, Location::path(filename), depth + 1); + } + + //attribute + else if(block.beginsWith("? ")) { + for(auto n : range(lines.size())) { + if(!lines[n].beginsWith("? ")) continue; + auto part = lines[n].trimLeft("? ", 1L).split(":", 1L); + if(part.size() != 2) continue; + auto name = part[0].strip(); + auto value = part[1].strip(); + attributes.append({name, value}); + } + } + + //html + else if(block.beginsWith("\n") && settings.allowHTML) { + for(auto n : range(lines.size())) { + if(n == 0 || !lines[n].beginsWith(" ")) continue; + state.output.append(lines[n].trimLeft(" ", 1L), "\n"); + } + } + + //header + else if(auto depth = count(block, '#')) { + auto content = slice(lines.takeLeft(), depth + 1); + auto data = markup(content); + auto name = anchor(content); + if(depth <= 5) { + state.output.append("", data); + for(auto& line : lines) { + if(count(line, '#') != depth) continue; + state.output.append("", slice(line, depth + 1), ""); + } + state.output.append("\n"); + } + } + + //navigation + else if(count(block, '-')) { + state.output.append("

\n"); + } + + //list + else if(count(block, '*')) { + u32 level = 0; + for(auto& line : lines) { + if(auto depth = count(line, '*')) { + while(level < depth) level++, state.output.append("
    \n"); + while(level > depth) level--, state.output.append("
\n"); + auto data = markup(slice(line, depth + 1)); + state.output.append("
  • ", data, "
  • \n"); + } + } + while(level--) state.output.append("\n"); + } + + //quote + else if(count(block, '>')) { + u32 level = 0; + for(auto& line : lines) { + if(auto depth = count(line, '>')) { + while(level < depth) level++, state.output.append("
    \n"); + while(level > depth) level--, state.output.append("
    \n"); + auto data = markup(slice(line, depth + 1)); + state.output.append(data, "\n"); + } + } + while(level--) state.output.append("\n"); + } + + //code + else if(block.beginsWith(" ")) { + state.output.append("
    ");
    +    for(auto& line : lines) {
    +      if(!line.beginsWith("  ")) continue;
    +      state.output.append(escape(line.trimLeft("  ", 1L)), "\n");
    +    }
    +    state.output.trimRight("\n", 1L).append("
    \n"); + } + + //divider + else if(block.equals("---")) { + state.output.append("
    \n"); + } + + //paragraph + else { + auto content = markup(block); + if(content.beginsWith("")) { + state.output.append(content, "\n"); + } else { + state.output.append("

    ", content, "

    \n"); + } + } + + return true; +} + +inline auto DML::count(const string& text, char value) -> u32 { + for(u32 n = 0; n < text.size(); n++) { + if(text[n] != value) { + if(text[n] == ' ') return n; + break; + } + } + return 0; +} + +// . => domain +// ./* => domain/* +// ../subdomain => subdomain.domain +// ../subdomain/* => subdomain.domain/* +inline auto DML::address(string s) -> string { + if(s.beginsWith("../")) { + s.trimLeft("../", 1L); + if(auto p = s.find("/")) { + return {"//", s.slice(0, *p), ".", settings.host, s.slice(*p)}; + } else { + return {"//", s, ".", settings.host}; + } + } + if(s.beginsWith("./")) { + s.trimLeft(".", 1L); + return {"//", settings.host, s}; + } + if(s == ".") { + return {"//", settings.host}; + } + return s; +} + +inline auto DML::escape(const string& text) -> string { + string output; + for(auto c : text) { + if(c == '&') { output.append("&"); continue; } + if(c == '<') { output.append("<"); continue; } + if(c == '>') { output.append(">"); continue; } + if(c == '"') { output.append("""); continue; } + output.append(c); + } + return output; +} + +inline auto DML::anchor(const string& text) -> string { + string output; + for(char c : text) { + if(c >= 'a' && c <= 'z') { output.append(c); continue; } + if(c >= 'A' && c <= 'Z') { output.append(char(c + 0x20)); continue; } + if(!output.endsWith("-")) output.append('-'); + } + return output.trim("-", "-"); +} + +inline auto DML::markup(const string& s) -> string { + string t; + + boolean strong; + boolean emphasis; + boolean insertion; + boolean deletion; + boolean code; + + maybe link; + maybe image; + + for(u32 n = 0; n < s.size();) { + char a = s[n]; + char b = s[n + 1]; + + if(!link && !image) { + if(a == '*' && b == '*') { t.append(strong.flip() ? "" : ""); n += 2; continue; } + if(a == '/' && b == '/') { t.append(emphasis.flip() ? "" : ""); n += 2; continue; } + if(a == '_' && b == '_') { t.append(insertion.flip() ? "" : ""); n += 2; continue; } + if(a == '~' && b == '~') { t.append(deletion.flip() ? "" : ""); n += 2; continue; } + if(a == '|' && b == '|') { t.append(code.flip() ? "" : ""); n += 2; continue; } + if(a =='\\' && b =='\\') { t.append("
    "); n += 2; continue; } + + if(a == '[' && b == '[') { n += 2; link = n; continue; } + if(a == '{' && b == '{') { n += 2; image = n; continue; } + } + + if(link && !image && a == ']' && b == ']') { + auto list = slice(s, link(), n - link()).split("::", 1L); + string uri = address(list.last()); + string name = list.size() == 2 ? list.first() : uri.split("//", 1L).last(); + + t.append("", escape(name), ""); + + n += 2; + link = nothing; + continue; + } + + if(image && !link && a == '}' && b == '}') { + auto side = slice(s, image(), n - image()).split("}{", 1L); + auto list = side(0).split("::", 1L); + string uri = address(list.last()); + string name = list.size() == 2 ? list.first() : uri.split("//", 1L).last(); + list = side(1).split("; "); + boolean link, title, caption; + string Class, width, height; + for(auto p : list) { + if(p == "link") { link = true; continue; } + if(p == "title") { title = true; continue; } + if(p == "caption") { caption = true; continue; } + if(p.beginsWith("class:")) { p.trimLeft("class:", 1L); Class = p.strip(); continue; } + if(p.beginsWith("width:")) { p.trimLeft("width:", 1L); width = p.strip(); continue; } + if(p.beginsWith("height:")) { p.trimLeft("height:", 1L); height = p.strip(); continue; } + } + + if(caption) { + t.append("
    \n"); + if(link) t.append(""); + t.append("\"",\n"); + if(link) t.append("\n"); + t.append("
    ", escape(name), "
    \n"); + t.append("
    "); + } else { + if(link) t.append(""); + t.append("\"","); + if(link) t.append(""); + } + + n += 2; + image = nothing; + continue; + } + + if(link || image) { n++; continue; } + if(a =='\\') { t.append(b); n += 2; continue; } + if(a == '&') { t.append("&"); n++; continue; } + if(a == '<') { t.append("<"); n++; continue; } + if(a == '>') { t.append(">"); n++; continue; } + if(a == '"') { t.append("""); n++; continue; } + t.append(a); n++; continue; + } + + if(strong) t.append(""); + if(emphasis) t.append(""); + if(insertion) t.append(""); + if(deletion) t.append(""); + if(code) t.append(""); + + return t; +} + +} diff --git a/waterbox/ares64/ares/nall/string/trim.hpp b/waterbox/ares64/ares/nall/string/trim.hpp new file mode 100644 index 0000000000..e767526575 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/trim.hpp @@ -0,0 +1,102 @@ +#pragma once + +namespace nall { + +inline auto string::trim(string_view lhs, string_view rhs, long limit) -> string& { + trimRight(rhs, limit); + trimLeft(lhs, limit); + return *this; +} + +inline auto string::trimLeft(string_view lhs, long limit) -> string& { + if(lhs.size() == 0) return *this; + long matches = 0; + while(matches < limit) { + s32 offset = lhs.size() * matches; + s32 length = (s32)size() - offset; + if(length < (s32)lhs.size()) break; + if(memory::compare(data() + offset, lhs.data(), lhs.size()) != 0) break; + matches++; + } + if(matches) remove(0, lhs.size() * matches); + return *this; +} + +inline auto string::trimRight(string_view rhs, long limit) -> string& { + if(rhs.size() == 0) return *this; + long matches = 0; + while(matches < limit) { + s32 offset = (s32)size() - rhs.size() * (matches + 1); + s32 length = (s32)size() - offset; + if(offset < 0 || length < (s32)rhs.size()) break; + if(memory::compare(data() + offset, rhs.data(), rhs.size()) != 0) break; + matches++; + } + if(matches) resize(size() - rhs.size() * matches); + return *this; +} + +inline auto string::itrim(string_view lhs, string_view rhs, long limit) -> string& { + itrimRight(rhs, limit); + itrimLeft(lhs, limit); + return *this; +} + +inline auto string::itrimLeft(string_view lhs, long limit) -> string& { + if(lhs.size() == 0) return *this; + long matches = 0; + while(matches < limit) { + s32 offset = lhs.size() * matches; + s32 length = (s32)size() - offset; + if(length < (s32)lhs.size()) break; + if(memory::icompare(data() + offset, lhs.data(), lhs.size()) != 0) break; + matches++; + } + if(matches) remove(0, lhs.size() * matches); + return *this; +} + +inline auto string::itrimRight(string_view rhs, long limit) -> string& { + if(rhs.size() == 0) return *this; + long matches = 0; + while(matches < limit) { + s32 offset = (s32)size() - rhs.size() * (matches + 1); + s32 length = (s32)size() - offset; + if(offset < 0 || length < (s32)rhs.size()) break; + if(memory::icompare(data() + offset, rhs.data(), rhs.size()) != 0) break; + matches++; + } + if(matches) resize(size() - rhs.size() * matches); + return *this; +} + +inline auto string::strip() -> string& { + stripRight(); + stripLeft(); + return *this; +} + +inline auto string::stripLeft() -> string& { + u32 length = 0; + while(length < size()) { + char input = operator[](length); + if(input != ' ' && input != '\t' && input != '\r' && input != '\n') break; + length++; + } + if(length) remove(0, length); + return *this; +} + +inline auto string::stripRight() -> string& { + u32 length = 0; + while(length < size()) { + bool matched = false; + char input = operator[](size() - length - 1); + if(input != ' ' && input != '\t' && input != '\r' && input != '\n') break; + length++; + } + if(length) resize(size() - length); + return *this; +} + +} diff --git a/waterbox/ares64/ares/nall/string/utf8.hpp b/waterbox/ares64/ares/nall/string/utf8.hpp new file mode 100644 index 0000000000..501ace6a16 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/utf8.hpp @@ -0,0 +1,32 @@ +#pragma once + +namespace nall { + +//note: this function assumes the string contains valid UTF-8 characters +//invalid characters will result in an incorrect result from this function +//invalid case 1: byte 1 == 0b'01xxxxxx +//invalid case 2: bytes 2-4 != 0b'10xxxxxx +//invalid case 3: end of string without bytes 2-4 present +inline auto characters(string_view self, s32 offset, s32 length) -> u32 { + u32 characters = 0; + if(offset < 0) offset = self.size() - abs(offset); + if(offset >= 0 && offset < self.size()) { + if(length < 0) length = self.size() - offset; + if(length >= 0) { + for(s32 index = offset; index < offset + length;) { + auto byte = self.data()[index++]; + if((byte & 0b111'00000) == 0b110'00000) index += 1; + if((byte & 0b1111'0000) == 0b1110'0000) index += 2; + if((byte & 0b11111'000) == 0b11110'000) index += 3; + characters++; + } + } + } + return characters; +} + +inline auto string::characters(s32 offset, s32 length) const -> u32 { + return nall::characters(*this, offset, length); +} + +} diff --git a/waterbox/ares64/ares/nall/string/utility.hpp b/waterbox/ares64/ares/nall/string/utility.hpp new file mode 100644 index 0000000000..0417bce6c8 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/utility.hpp @@ -0,0 +1,184 @@ +#pragma once + +namespace nall { + +inline auto string::read(string_view filename) -> string { + #if !defined(_WIN32) + FILE* fp = fopen(filename, "rb"); + #else + FILE* fp = _wfopen(utf16_t(filename), L"rb"); + #endif + + string result; + if(!fp) return result; + + fseek(fp, 0, SEEK_END); + s32 filesize = ftell(fp); + if(filesize < 0) return fclose(fp), result; + + rewind(fp); + result.resize(filesize); + (void)fread(result.get(), 1, filesize, fp); + return fclose(fp), result; +} + +inline auto string::repeat(string_view pattern, u32 times) -> string { + string result; + while(times--) result.append(pattern.data()); + return result; +} + +inline auto string::fill(char fill) -> string& { + memory::fill(get(), size(), fill); + return *this; +} + +inline auto string::hash() const -> u32 { + const char* p = data(); + u32 length = size(); + u32 result = 5381; + while(length--) result = (result << 5) + result + *p++; + return result; +} + +inline auto string::remove(u32 offset, u32 length) -> string& { + char* p = get(); + length = min(length, size()); + memory::move(p + offset, p + offset + length, size() - length); + return resize(size() - length); +} + +inline auto string::reverse() -> string& { + char* p = get(); + u32 length = size(); + u32 pivot = length >> 1; + for(s32 x = 0, y = length - 1; x < pivot && y >= 0; x++, y--) std::swap(p[x], p[y]); + return *this; +} + +//+length => insert/delete from start (right justify) +//-length => insert/delete from end (left justify) +inline auto string::size(s32 length, char fill) -> string& { + u32 size = this->size(); + if(size == length) return *this; + + bool right = length >= 0; + length = abs(length); + + if(size < length) { //expand + resize(length); + char* p = get(); + u32 displacement = length - size; + if(right) memory::move(p + displacement, p, size); + else p += size; + while(displacement--) *p++ = fill; + } else { //shrink + char* p = get(); + u32 displacement = size - length; + if(right) memory::move(p, p + displacement, length); + resize(length); + } + + return *this; +} + +inline auto slice(string_view self, s32 offset, s32 length) -> string { + string result; + if(offset < 0) offset = self.size() - abs(offset); + if(offset >= 0 && offset < self.size()) { + if(length < 0) length = self.size() - offset; + if(length >= 0) { + result.resize(length); + memory::copy(result.get(), self.data() + offset, length); + } + } + return result; +} + +inline auto string::slice(s32 offset, s32 length) const -> string { + return nall::slice(*this, offset, length); +} + +template inline auto fromInteger(char* result, T value) -> char* { + bool negative = value < 0; + if(!negative) value = -value; //negate positive integers to support eg INT_MIN + + char buffer[1 + sizeof(T) * 3]; + u32 size = 0; + + do { + s32 n = value % 10; //-0 to -9 + buffer[size++] = '0' - n; //'0' to '9' + value /= 10; + } while(value); + if(negative) buffer[size++] = '-'; + + for(s32 x = size - 1, y = 0; x >= 0 && y < size; x--, y++) result[x] = buffer[y]; + result[size] = 0; + return result; +} + +template inline auto fromNatural(char* result, T value) -> char* { + char buffer[1 + sizeof(T) * 3]; + u32 size = 0; + + do { + u32 n = value % 10; + buffer[size++] = '0' + n; + value /= 10; + } while(value); + + for(s32 x = size - 1, y = 0; x >= 0 && y < size; x--, y++) result[x] = buffer[y]; + result[size] = 0; + return result; +} + +template inline auto fromHex(char* result, T value) -> char* { + char buffer[1 + sizeof(T) * 2]; + u32 size = 0; + + do { + u32 n = value & 15; + if(n <= 9) { + buffer[size++] = '0' + n; + } else { + buffer[size++] = 'a' + n - 10; + } + value >>= 4; + } while(value); + + for(s32 x = size - 1, y = 0; x >= 0 && y < size; x--, y++) result[x] = buffer[y]; + result[size] = 0; + return result; +} + +//using sprintf is certainly not the most ideal method to convert +//a double to a string ... but attempting to parse a double by +//hand, digit-by-digit, results in subtle rounding errors. +template inline auto fromReal(char* result, T value) -> u32 { + char buffer[256]; + #ifdef _WIN32 + //Windows C-runtime does not support long double via sprintf() + sprintf(buffer, "%f", (double)value); + #else + sprintf(buffer, "%Lf", (long double)value); + #endif + + //remove excess 0's in fraction (2.500000 -> 2.5) + for(char* p = buffer; *p; p++) { + if(*p == '.') { + char* p = buffer + strlen(buffer) - 1; + while(*p == '0') { + if(*(p - 1) != '.') *p = 0; //... but not for eg 1.0 -> 1. + p--; + } + break; + } + } + + u32 length = strlen(buffer); + if(result) strcpy(result, buffer); + return length + 1; +} + +} diff --git a/waterbox/ares64/ares/nall/string/vector.hpp b/waterbox/ares64/ares/nall/string/vector.hpp new file mode 100644 index 0000000000..ace49758c2 --- /dev/null +++ b/waterbox/ares64/ares/nall/string/vector.hpp @@ -0,0 +1,60 @@ +#pragma once + +namespace nall { + +template inline auto vector::append(const string& data, P&&... p) -> type& { + vector_base::append(data); + append(forward

    (p)...); + return *this; +} + +inline auto vector::append() -> type& { + return *this; +} + +inline auto vector::isort() -> type& { + sort([](const string& x, const string& y) { + return memory::icompare(x.data(), x.size(), y.data(), y.size()) < 0; + }); + return *this; +} + +inline auto vector::find(string_view source) const -> maybe { + for(u32 n = 0; n < size(); n++) { + if(operator[](n).equals(source)) return n; + } + return {}; +} + +inline auto vector::ifind(string_view source) const -> maybe { + for(u32 n = 0; n < size(); n++) { + if(operator[](n).iequals(source)) return n; + } + return {}; +} + +inline auto vector::match(string_view pattern) const -> vector { + vector result; + for(u32 n = 0; n < size(); n++) { + if(operator[](n).match(pattern)) result.append(operator[](n)); + } + return result; +} + +inline auto vector::merge(string_view separator) const -> string { + string output; + for(u32 n = 0; n < size(); n++) { + output.append(operator[](n)); + if(n < size() - 1) output.append(separator.data()); + } + return output; +} + +inline auto vector::strip() -> type& { + for(u32 n = 0; n < size(); n++) { + operator[](n).strip(); + } + return *this; +} + +} diff --git a/waterbox/ares64/ares/nall/string/view.hpp b/waterbox/ares64/ares/nall/string/view.hpp new file mode 100644 index 0000000000..148c43e91d --- /dev/null +++ b/waterbox/ares64/ares/nall/string/view.hpp @@ -0,0 +1,90 @@ +#pragma once + +namespace nall { + +inline string_view::string_view() { + _string = nullptr; + _data = ""; + _size = 0; +} + +inline string_view::string_view(const string_view& source) { + if(this == &source) return; + _string = nullptr; + _data = source._data; + _size = source._size; +} + +inline string_view::string_view(string_view&& source) { + if(this == &source) return; + _string = source._string; + _data = source._data; + _size = source._size; + source._string = nullptr; +} + +inline string_view::string_view(const char* data) { + _string = nullptr; + _data = data; + _size = -1; //defer length calculation, as it is often unnecessary +} + +//todo: this collides with eg: {"value: ", (u32)0} +inline string_view::string_view(const char* data, u32 size) { + _string = nullptr; + _data = data; + _size = size; +} + +inline string_view::string_view(const string& source) { + _string = nullptr; + _data = source.data(); + _size = source.size(); +} + +template +inline string_view::string_view(P&&... p) { + _string = new string{forward

    (p)...}; + _data = _string->data(); + _size = _string->size(); +} + +inline string_view::~string_view() { + if(_string) delete _string; +} + +inline auto string_view::operator=(const string_view& source) -> type& { + if(this == &source) return *this; + _string = nullptr; + _data = source._data; + _size = source._size; + return *this; +} + +inline auto string_view::operator=(string_view&& source) -> type& { + if(this == &source) return *this; + _string = source._string; + _data = source._data; + _size = source._size; + source._string = nullptr; + return *this; +} + +inline string_view::operator bool() const { + return _size > 0; +} + +inline string_view::operator const char*() const { + return _data; +} + +inline auto string_view::data() const -> const char* { + return _data; +} + +inline auto string_view::size() const -> u32 { + if(_size < 0) _size = strlen(_data); + return _size; +} + +} diff --git a/waterbox/ares64/ares/nall/suffix-array.hpp b/waterbox/ares64/ares/nall/suffix-array.hpp new file mode 100644 index 0000000000..3d157ca71f --- /dev/null +++ b/waterbox/ares64/ares/nall/suffix-array.hpp @@ -0,0 +1,386 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace nall { + +/* + +input: + data = "acaacatat" + 0 "acaacatat" + 1 "caacatat" + 2 "aacatat" + 3 "acatat" + 4 "catat" + 5 "atat" + 6 "tat" + 7 "at" + 8 "t" + 9 "" + +suffix_array: + suffixes = [9,2,0,3,7,5,1,4,8,6] => input + suffixes: + 9 "" + 2 "aacatat" + 0 "acaacatat" + 3 "acatat" + 7 "at" + 5 "atat" + 1 "caacatat" + 4 "catat" + 8 "t" + 6 "tat" + +[auxiliary data structures to represent information lost from suffix trees] + +suffix_array_invert: + inverted = [2,6,1,3,7,5,9,4,8,0] => input + suffixes[inverted]: + 2 "acaacatat" + 6 "caacatat" + 1 "aacatat" + 3 "acatat" + 7 "catat" + 5 "atat" + 9 "tat" + 4 "at" + 8 "t" + 0 "" + +suffix_array_phi: + phi = [2,5,9,0,1,7,8,3,4,0] + +suffix_array_lcp: + prefixes = [0,0,1,3,1,2,0,2,0,1] => lcp[n] == lcp(n, n-1) + "" 0 + "aacatat" 0 + "acaacatat" 1 "a" + "acatat" 3 "aca" + "at" 1 "a" + "atat" 2 "at" + "caacatat" 0 + "catat" 2 "ca" + "t" 0 + "tat" 1 "t" + +suffix_array_plcp: + plcp = [1,0,0,3,2,2,1,1,0,0] + +suffix_array_lrcp: + llcp = [0,0,0,3,0,2,0,2,0,0] => llcp[m] == lcp(l, m) + rlcp = [0,1,1,1,0,0,0,0,0,0] => rlcp[m] == lcp(m, r) + +suffix_array_lpf: + lengths = [0,0,1,3,2,1,0,2,1,0] + offsets = [0,0,0,0,1,3,4,5,6,2] + "acaacatat" (0,-) + "caacatat" (0,-) + "aacatat" (1,0) at 0, match "a" + "acatat" (3,0) at 0, match "aca" + "catat" (2,1) at 1, match "ca" + "atat" (1,3) at 3, match "a" + "tat" (0,-) + "at" (2,5) at 5, match "at" + "t" (1,6) at 6, match "t" + "" (0,-) + +*/ + +// suffix array via induced sorting +// O(n) +inline auto suffix_array(array_view input) -> vector { + return induced_sort(input); +} + +// inverse +// O(n) +inline auto suffix_array_invert(array_view sa) -> vector { + vector isa; + isa.reallocate(sa.size()); + for(s32 i : range(sa.size())) isa[sa[i]] = i; + return isa; +} + +// auxiliary data structure for plcp and lpf computation +// O(n) +inline auto suffix_array_phi(array_view sa) -> vector { + vector phi; + phi.reallocate(sa.size()); + phi[sa[0]] = 0; + for(s32 i : range(1, sa.size())) phi[sa[i]] = sa[i - 1]; + return phi; +} + +// longest common prefix: lcp(l, r) +// O(n) +inline auto suffix_array_lcp(s32 l, s32 r, array_view sa, array_view input) -> s32 { + s32 i = sa[l], j = sa[r], k = 0, size = input.size(); + while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; + return k; +} + +// longest common prefix: lcp(i, j, k) +// O(n) +inline auto suffix_array_lcp(s32 i, s32 j, s32 k, array_view input) -> s32 { + s32 size = input.size(); + while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; + return k; +} + +// longest common prefix: lcp[n] == lcp(n, n-1) +// O(n) +inline auto suffix_array_lcp(array_view sa, array_view isa, array_view input) -> vector { + s32 k = 0, size = input.size(); + vector lcp; + lcp.reallocate(size + 1); + for(s32 i : range(size)) { + if(isa[i] == size) { k = 0; continue; } //the next substring is empty; ignore it + s32 j = sa[isa[i] + 1]; + while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; + lcp[1 + isa[i]] = k; + if(k) k--; + } + lcp[0] = 0; + return lcp; +} + +// longest common prefix (from permuted longest common prefix) +// O(n) +inline auto suffix_array_lcp(array_view plcp, array_view sa) -> vector { + vector lcp; + lcp.reallocate(plcp.size()); + for(s32 i : range(plcp.size())) lcp[i] = plcp[sa[i]]; + return lcp; +} + +// permuted longest common prefix +// O(n) +inline auto suffix_array_plcp(array_view phi, array_view input) -> vector { + vector plcp; + plcp.reallocate(phi.size()); + s32 k = 0, size = input.size(); + for(s32 i : range(size)) { + s32 j = phi[i]; + while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; + plcp[i] = k; + if(k) k--; + } + return plcp; +} + +// permuted longest common prefix (from longest common prefix) +// O(n) +inline auto suffix_array_plcp(array_view lcp, array_view sa) -> vector { + vector plcp; + plcp.reallocate(lcp.size()); + for(s32 i : range(lcp.size())) plcp[sa[i]] = lcp[i]; + return plcp; +} + +// longest common prefixes - left + right +// llcp[m] == lcp(l, m) +// rlcp[m] == lcp(m, r) +// O(n) +// requires: lcp -or- plcp+sa +inline auto suffix_array_lrcp(vector& llcp, vector& rlcp, array_view lcp, array_view plcp, array_view sa, array_view input) -> void { + s32 size = input.size(); + llcp.reset(), llcp.reallocate(size + 1); + rlcp.reset(), rlcp.reallocate(size + 1); + + function recurse = [&](s32 l, s32 r) -> s32 { + if(l >= r - 1) { + if(l >= size) return 0; + if(lcp) return lcp[l]; + return plcp[sa[l]]; + } + s32 m = l + r >> 1; + llcp[m - 1] = recurse(l, m); + rlcp[m - 1] = recurse(m, r); + return min(llcp[m - 1], rlcp[m - 1]); + }; + recurse(1, size + 1); + + llcp[0] = 0; + rlcp[0] = 0; +} + +// longest previous factor +// O(n) +// optional: plcp +inline auto suffix_array_lpf(vector& lengths, vector& offsets, array_view phi, array_view plcp, array_view input) -> void { + s32 k = 0, size = input.size(); + lengths.reset(), lengths.resize(size + 1, -1); + offsets.reset(), offsets.resize(size + 1, -1); + + function recurse = [&](s32 i, s32 j, s32 k) -> void { + if(lengths[i] < 0) { + lengths[i] = k; + offsets[i] = j; + } else if(lengths[i] < k) { + if(offsets[i] > j) { + recurse(offsets[i], j, lengths[i]); + } else { + recurse(j, offsets[i], lengths[i]); + } + lengths[i] = k; + offsets[i] = j; + } else { + if(offsets[i] > j) { + recurse(offsets[i], j, k); + } else { + recurse(j, offsets[i], k); + } + } + }; + + for(s32 i : range(size)) { + s32 j = phi[i]; + if(plcp) k = plcp[i]; + else while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; + if(i > j) { + recurse(i, j, k); + } else { + recurse(j, i, k); + } + if(k) k--; + } + + lengths[0] = 0; + offsets[0] = 0; +} + +// O(n log m) +inline auto suffix_array_find(s32& length, s32& offset, array_view sa, array_view input, array_view match) -> bool { + length = 0, offset = 0; + s32 l = 0, r = input.size(); + + while(l < r - 1) { + s32 m = l + r >> 1; + s32 s = sa[m]; + + s32 k = 0; + while(k < match.size() && s + k < input.size()) { + if(match[k] != input[s + k]) break; + k++; + } + + if(k > length) { + length = k; + offset = s; + if(k == match.size()) return true; + } + + if(k == match.size() || s + k == input.size()) k--; + + if(match[k] < input[s + k]) { + r = m; + } else { + l = m; + } + } + + return false; +} + +// O(n + log m) +inline auto suffix_array_find(s32& length, s32& offset, array_view llcp, array_view rlcp, array_view sa, array_view input, array_view match) -> bool { + length = 0, offset = 0; + s32 l = 0, r = input.size(), k = 0; + + while(l < r - 1) { + s32 m = l + r >> 1; + s32 s = sa[m]; + + while(k < match.size() && s + k < input.size()) { + if(match[k] != input[s + k]) break; + k++; + } + + if(k > length) { + length = k; + offset = s; + if(k == match.size()) return true; + } + + if(k == match.size() || s + k == input.size()) k--; + + if(match[k] < input[s + k]) { + r = m; + k = min(k, llcp[m]); + } else { + l = m; + k = min(k, rlcp[m]); + } + } + + return false; +} + +// + +//there are multiple strategies for building the required auxiliary structures for suffix arrays + +struct SuffixArray { + using type = SuffixArray; + + //O(n) + SuffixArray(array_view input) : input(input) { + sa = suffix_array(input); + } + + //O(n) + auto lrcp() -> type& { + //if(!isa) isa = suffix_array_invert(sa); + //if(!lcp) lcp = suffix_array_lcp(sa, isa, input); + if(!phi) phi = suffix_array_phi(sa); + if(!plcp) plcp = suffix_array_plcp(phi, input); + //if(!lcp) lcp = suffix_array_lcp(plcp, sa); + if(!llcp || !rlcp) suffix_array_lrcp(llcp, rlcp, lcp, plcp, sa, input); + return *this; + } + + //O(n) + auto lpf() -> type& { + if(!phi) phi = suffix_array_phi(sa); + //if(!plcp) plcp = suffix_array_plcp(phi, input); + if(!lengths || !offsets) suffix_array_lpf(lengths, offsets, phi, plcp, input); + return *this; + } + + auto operator[](s32 offset) const -> s32 { + return sa[offset]; + } + + //O(n log m) + //O(n + log m) with lrcp() + auto find(s32& length, s32& offset, array_view match) -> bool { + if(!llcp || !rlcp) return suffix_array_find(length, offset, sa, input, match); //O(n log m) + return suffix_array_find(length, offset, llcp, rlcp, sa, input, match); //O(n + log m) + } + + //O(n) with lpf() + auto previous(s32& length, s32& offset, s32 address) -> void { + length = lengths[address]; + offset = offsets[address]; + } + + //non-owning reference: SuffixArray is invalidated if memory is freed + array_view input; + + //suffix array and auxiliary data structures + vector sa; //suffix array + vector isa; //inverted suffix array + vector phi; //phi + vector plcp; //permuted longest common prefixes + vector lcp; //longest common prefixes + vector llcp; //longest common prefixes - left + vector rlcp; //longest common prefixes - right + vector lengths; //longest previous factors + vector offsets; //longest previous factors +}; + +} diff --git a/waterbox/ares64/ares/nall/terminal.hpp b/waterbox/ares64/ares/nall/terminal.hpp new file mode 100644 index 0000000000..9004163914 --- /dev/null +++ b/waterbox/ares64/ares/nall/terminal.hpp @@ -0,0 +1,65 @@ +#pragma once + +#include + +namespace nall::terminal { + +inline auto escapable() -> bool { + #if defined(PLATFORM_WINDOWS) + //todo: colors are supported by Windows 10+ and with alternate terminals (eg msys) + //disabled for now for compatibility with Windows 7 and 8.1's cmd.exe + return false; + #endif + return true; +} + +namespace color { + +template inline auto black(P&&... p) -> string { + if(!escapable()) return string{forward

    (p)...}; + return {"\e[30m", string{forward

    (p)...}, "\e[0m"}; +} + +template inline auto blue(P&&... p) -> string { + if(!escapable()) return string{forward

    (p)...}; + return {"\e[94m", string{forward

    (p)...}, "\e[0m"}; +} + +template inline auto green(P&&... p) -> string { + if(!escapable()) return string{forward

    (p)...}; + return {"\e[92m", string{forward

    (p)...}, "\e[0m"}; +} + +template inline auto cyan(P&&... p) -> string { + if(!escapable()) return string{forward

    (p)...}; + return {"\e[96m", string{forward

    (p)...}, "\e[0m"}; +} + +template inline auto red(P&&... p) -> string { + if(!escapable()) return string{forward

    (p)...}; + return {"\e[91m", string{forward

    (p)...}, "\e[0m"}; +} + +template inline auto magenta(P&&... p) -> string { + if(!escapable()) return string{forward

    (p)...}; + return {"\e[95m", string{forward

    (p)...}, "\e[0m"}; +} + +template inline auto yellow(P&&... p) -> string { + if(!escapable()) return string{forward

    (p)...}; + return {"\e[93m", string{forward

    (p)...}, "\e[0m"}; +} + +template inline auto white(P&&... p) -> string { + if(!escapable()) return string{forward

    (p)...}; + return {"\e[97m", string{forward

    (p)...}, "\e[0m"}; +} + +template inline auto gray(P&&... p) -> string { + if(!escapable()) return string{forward

    (p)...}; + return {"\e[37m", string{forward

    (p)...}, "\e[0m"}; +} + +} + +} diff --git a/waterbox/ares64/ares/nall/thread.hpp b/waterbox/ares64/ares/nall/thread.hpp new file mode 100644 index 0000000000..902028d157 --- /dev/null +++ b/waterbox/ares64/ares/nall/thread.hpp @@ -0,0 +1,167 @@ +#pragma once + +//simple thread library +//primary rationale is that std::thread does not support custom stack sizes +//this is highly critical in certain applications such as threaded web servers +//an added bonus is that it avoids licensing issues on Windows +//win32-pthreads (needed for std::thread) is licensed under the GPL only + +#include +#include +#include + +namespace nall { + using mutex = std::mutex; + using recursive_mutex = std::recursive_mutex; + template using lock_guard = std::lock_guard; + template using atomic = std::atomic; +} + +#if defined(API_POSIX) + +#include + +namespace nall { + +struct thread { + thread(const thread&) = delete; + auto operator=(const thread&) -> thread& = delete; + + thread() = default; + thread(thread&&) = default; + auto operator=(thread&&) -> thread& = default; + + auto join() -> void; + + static auto create(const function& callback, uintptr parameter = 0, u32 stacksize = 0) -> thread; + static auto detach() -> void; + static auto exit() -> void; + + struct context { + function void> callback; + uintptr parameter = 0; + }; + +private: + pthread_t handle = (pthread_t)nullptr; +}; + +inline auto _threadCallback(void* parameter) -> void* { + auto context = (thread::context*)parameter; + context->callback(context->parameter); + delete context; + return nullptr; +} + +inline auto thread::join() -> void { + pthread_join(handle, nullptr); +} + +inline auto thread::create(const function& callback, uintptr parameter, u32 stacksize) -> thread { + thread instance; + + auto context = new thread::context; + context->callback = callback; + context->parameter = parameter; + + pthread_attr_t attr; + pthread_attr_init(&attr); + if(stacksize) pthread_attr_setstacksize(&attr, max(PTHREAD_STACK_MIN, stacksize)); + + pthread_create(&instance.handle, &attr, _threadCallback, (void*)context); + return instance; +} + +inline auto thread::detach() -> void { + pthread_detach(pthread_self()); +} + +inline auto thread::exit() -> void { + pthread_exit(nullptr); +} + +} + +#elif defined(API_WINDOWS) + +namespace nall { + +struct thread { + thread(const thread&) = delete; + auto operator=(const thread&) -> thread& = delete; + + thread() = default; + thread(thread&& source) { operator=(move(source)); } + + ~thread() { close(); } + + auto operator=(thread&& source) -> thread& { + close(); + handle = source.handle; + source.handle = 0; + return *this; + } + + auto close() -> void; + auto join() -> void; + + static auto create(const function& callback, uintptr parameter = 0, u32 stacksize = 0) -> thread; + static auto detach() -> void; + static auto exit() -> void; + + struct context { + function void> callback; + uintptr parameter = 0; + }; + +private: + HANDLE handle = 0; +}; + +inline auto WINAPI _threadCallback(void* parameter) -> DWORD { + auto context = (thread::context*)parameter; + context->callback(context->parameter); + delete context; + return 0; +} + +inline auto thread::close() -> void { + if(handle) { + CloseHandle(handle); + handle = 0; + } +} + +inline auto thread::join() -> void { + if(handle) { + //wait until the thread has finished executing ... + WaitForSingleObject(handle, INFINITE); + CloseHandle(handle); + handle = 0; + } +} + +inline auto thread::create(const function& callback, uintptr parameter, u32 stacksize) -> thread { + thread instance; + + auto context = new thread::context; + context->callback = callback; + context->parameter = parameter; + + instance.handle = CreateThread(nullptr, stacksize, _threadCallback, (void*)context, 0, nullptr); + return instance; +} + +inline auto thread::detach() -> void { + //Windows threads do not use this concept: + //~thread() frees resources via CloseHandle() + //thread continues to run even after handle is closed +} + +inline auto thread::exit() -> void { + ExitThread(0); +} + +} + +#endif diff --git a/waterbox/ares64/ares/nall/traits.hpp b/waterbox/ares64/ares/nall/traits.hpp new file mode 100644 index 0000000000..2285103800 --- /dev/null +++ b/waterbox/ares64/ares/nall/traits.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include +#include + +//pull all type traits used by nall from std namespace into nall namespace +//this removes the requirement to prefix type traits with std:: within nall + +namespace nall { + using std::add_const; + using std::conditional; + using std::conditional_t; + using std::decay; + using std::declval; + using std::enable_if; + using std::enable_if_t; + using std::false_type; + using std::is_floating_point; + using std::is_floating_point_v; + using std::forward; + using std::initializer_list; + using std::is_array; + using std::is_array_v; + using std::is_base_of; + using std::is_base_of_v; + using std::is_function; + using std::is_integral; + using std::is_integral_v; + using std::is_pointer; + using std::is_pointer_v; + using std::is_same; + using std::is_same_v; + using std::is_signed; + using std::is_signed_v; + using std::is_unsigned; + using std::is_unsigned_v; + using std::move; + using std::nullptr_t; + using std::remove_extent; + using std::remove_extent_t; + using std::remove_reference; + using std::remove_reference_t; + using std::swap; + using std::true_type; +} + +namespace std { + #if defined(__SIZEOF_INT128__) + template<> struct is_integral : true_type {}; + template<> struct is_integral : true_type {}; + template<> struct is_signed : true_type {}; + template<> struct is_unsigned : true_type {}; + #endif +} diff --git a/waterbox/ares64/ares/nall/unique-pointer.hpp b/waterbox/ares64/ares/nall/unique-pointer.hpp new file mode 100644 index 0000000000..73a3e28c63 --- /dev/null +++ b/waterbox/ares64/ares/nall/unique-pointer.hpp @@ -0,0 +1,116 @@ +#pragma once + +namespace nall { + +template +struct unique_pointer { + template static auto create(P&&... p) { + return unique_pointer{new T{forward

    (p)...}}; + } + + using type = T; + T* pointer = nullptr; + function deleter; + + unique_pointer(const unique_pointer&) = delete; + auto operator=(const unique_pointer&) -> unique_pointer& = delete; + + unique_pointer(T* pointer = nullptr, const function& deleter = {}) : pointer(pointer), deleter(deleter) {} + ~unique_pointer() { reset(); } + + auto operator=(T* source) -> unique_pointer& { + reset(); + pointer = source; + return *this; + } + + explicit operator bool() const { return pointer; } + + auto operator->() -> T* { return pointer; } + auto operator->() const -> const T* { return pointer; } + + auto operator*() -> T& { return *pointer; } + auto operator*() const -> const T& { return *pointer; } + + auto operator()() -> T& { return *pointer; } + auto operator()() const -> const T& { return *pointer; } + + auto data() -> T* { return pointer; } + auto data() const -> const T* { return pointer; } + + auto release() -> T* { + auto result = pointer; + pointer = nullptr; + return result; + } + + auto reset() -> void { + if(pointer) { + if(deleter) { + deleter(pointer); + } else { + delete pointer; + } + pointer = nullptr; + } + } + + auto swap(unique_pointer& target) -> void { + std::swap(pointer, target.pointer); + std::swap(deleter, target.deleter); + } +}; + +template +struct unique_pointer { + using type = T; + T* pointer = nullptr; + function deleter; + + unique_pointer(const unique_pointer&) = delete; + auto operator=(const unique_pointer&) -> unique_pointer& = delete; + + unique_pointer(T* pointer = nullptr, const function& deleter = {}) : pointer(pointer), deleter(deleter) {} + ~unique_pointer() { reset(); } + + auto operator=(T* source) -> unique_pointer& { + reset(); + pointer = source; + return *this; + } + + explicit operator bool() const { return pointer; } + + auto operator()() -> T* { return pointer; } + auto operator()() const -> T* { return pointer; } + + auto operator[](u64 offset) -> T& { return pointer[offset]; } + auto operator[](u64 offset) const -> const T& { return pointer[offset]; } + + auto data() -> T* { return pointer; } + auto data() const -> const T* { return pointer; } + + auto release() -> T* { + auto result = pointer; + pointer = nullptr; + return result; + } + + auto reset() -> void { + if(pointer) { + if(deleter) { + deleter(pointer); + } else { + delete[] pointer; + } + pointer = nullptr; + } + } + + auto swap(unique_pointer& target) -> void { + std::swap(pointer, target.pointer); + std::swap(deleter, target.deleter); + } +}; + +} diff --git a/waterbox/ares64/ares/nall/utility.hpp b/waterbox/ares64/ares/nall/utility.hpp new file mode 100644 index 0000000000..bc1209e280 --- /dev/null +++ b/waterbox/ares64/ares/nall/utility.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include + +namespace nall { + +using std::tuple; + +template struct base_from_member { + base_from_member(T value) : value(value) {} + T value; +}; + +template struct castable { + operator To&() { return (To&)value; } + operator const To&() const { return (const To&)value; } + operator With&() { return value; } + operator const With&() const { return value; } + auto& operator=(const With& value) { return this->value = value; } + With value; +}; + +template inline auto allocate(u64 size, const T& value) -> T* { + T* array = new T[size]; + for(u64 i = 0; i < size; i++) array[i] = value; + return array; +} + +} diff --git a/waterbox/ares64/ares/nall/variant.hpp b/waterbox/ares64/ares/nall/variant.hpp new file mode 100644 index 0000000000..4e65fa99f2 --- /dev/null +++ b/waterbox/ares64/ares/nall/variant.hpp @@ -0,0 +1,148 @@ +#pragma once + +namespace nall { + +template struct variant_size { + static constexpr u32 size = max(sizeof(T), variant_size::size); +}; + +template struct variant_size { + static constexpr u32 size = sizeof(T); +}; + +template struct variant_index { + static constexpr u32 index = is_same_v ? Index : variant_index::index; +}; + +template struct variant_index { + static constexpr u32 index = is_same_v ? Index : 0; +}; + +template struct variant_copy { + constexpr variant_copy(u32 index, u32 assigned, void* target, void* source) { + if(index == assigned) new(target) T(*((T*)source)); + else variant_copy(index + 1, assigned, target, source); + } +}; + +template struct variant_copy { + constexpr variant_copy(u32 index, u32 assigned, void* target, void* source) { + if(index == assigned) new(target) T(*((T*)source)); + } +}; + +template struct variant_move { + constexpr variant_move(u32 index, u32 assigned, void* target, void* source) { + if(index == assigned) new(target) T(move(*((T*)source))); + else variant_move(index + 1, assigned, target, source); + } +}; + +template struct variant_move { + constexpr variant_move(u32 index, u32 assigned, void* target, void* source) { + if(index == assigned) new(target) T(move(*((T*)source))); + } +}; + +template struct variant_destruct { + constexpr variant_destruct(u32 index, u32 assigned, void* data) { + if(index == assigned) ((T*)data)->~T(); + else variant_destruct(index + 1, assigned, data); + } +}; + +template struct variant_destruct { + constexpr variant_destruct(u32 index, u32 assigned, void* data) { + if(index == assigned) ((T*)data)->~T(); + } +}; + +template struct variant_equals { + constexpr auto operator()(u32 index, u32 assigned) const -> bool { + if(index == assigned) return is_same_v; + return variant_equals()(index + 1, assigned); + } +}; + +template struct variant_equals { + constexpr auto operator()(u32 index, u32 assigned) const -> bool { + if(index == assigned) return is_same_v; + return false; + } +}; + +template struct variant final { //final as destructor is not virtual + variant() : assigned(0) {} + variant(const variant& source) { operator=(source); } + variant(variant&& source) { operator=(move(source)); } + template variant(const T& value) { operator=(value); } + template variant(T&& value) { operator=(move(value)); } + ~variant() { reset(); } + + explicit operator bool() const { return assigned; } + template explicit constexpr operator T&() { return get(); } + template explicit constexpr operator const T&() const { return get(); } + + template constexpr auto is() const -> bool { + return variant_equals()(1, assigned); + } + + template constexpr auto get() -> T& { + static_assert(variant_index<1, T, P...>::index, "type not in variant"); + struct variant_bad_cast{}; + if(!is()) throw variant_bad_cast{}; + return *((T*)data); + } + + template constexpr auto get() const -> const T& { + static_assert(variant_index<1, T, P...>::index, "type not in variant"); + struct variant_bad_cast{}; + if(!is()) throw variant_bad_cast{}; + return *((const T*)data); + } + + template constexpr auto get(const T& fallback) const -> const T& { + if(!is()) return fallback; + return *((const T*)data); + } + + auto reset() -> void { + if(assigned) variant_destruct(1, assigned, (void*)data); + assigned = 0; + } + + auto& operator=(const variant& source) { + reset(); + if(assigned = source.assigned) variant_copy(1, source.assigned, (void*)data, (void*)source.data); + return *this; + } + + auto& operator=(variant&& source) { + reset(); + if(assigned = source.assigned) variant_move(1, source.assigned, (void*)data, (void*)source.data); + source.assigned = 0; + return *this; + } + + template auto& operator=(const T& value) { + static_assert(variant_index<1, T, P...>::index, "type not in variant"); + reset(); + new((void*)&data) T(value); + assigned = variant_index<1, T, P...>::index; + return *this; + } + + template auto& operator=(T&& value) { + static_assert(variant_index<1, T, P...>::index, "type not in variant"); + reset(); + new((void*)&data) T(move(value)); + assigned = variant_index<1, T, P...>::index; + return *this; + } + +private: + alignas(P...) char data[variant_size::size]; + u32 assigned; +}; + +} diff --git a/waterbox/ares64/ares/nall/varint.hpp b/waterbox/ares64/ares/nall/varint.hpp new file mode 100644 index 0000000000..b0487a2a5a --- /dev/null +++ b/waterbox/ares64/ares/nall/varint.hpp @@ -0,0 +1,122 @@ +#pragma once + +#include +#include +#include + +namespace nall { + +struct varint { + virtual auto read() -> u8 = 0; + virtual auto write(u8) -> void = 0; + + auto readvu() -> u64 { + u64 data = 0, shift = 1; + while(true) { + u8 x = read(); + data += (x & 0x7f) * shift; + if(x & 0x80) break; + shift <<= 7; + data += shift; + } + return data; + } + + auto readvs() -> s64 { + u64 data = readvu(); + bool negate = data & 1; + data >>= 1; + if(negate) data = ~data; + return data; + } + + auto writevu(u64 data) -> void { + while(true) { + u8 x = data & 0x7f; + data >>= 7; + if(data == 0) return write(0x80 | x); + write(x); + data--; + } + } + + auto writevs(s64 data) -> void { + bool negate = data < 0; + if(negate) data = ~data; + data = (data << 1) | negate; + writevu(data); + } +}; + +struct VariadicNatural { + VariadicNatural() : mask(~0ull) { assign(0); } + template VariadicNatural(const T& value) : mask(~0ull) { assign(value); } + + operator u64() const { return data; } + template auto& operator=(const T& value) { return assign(value); } + + auto operator++(s32) { auto value = data; assign(data + 1); return value; } + auto operator--(s32) { auto value = data; assign(data - 1); return value; } + + auto& operator++() { return assign(data + 1); } + auto& operator--() { return assign(data - 1); } + + auto& operator &=(const u64 value) { return assign(data & value); } + auto& operator |=(const u64 value) { return assign(data | value); } + auto& operator ^=(const u64 value) { return assign(data ^ value); } + auto& operator<<=(const u64 value) { return assign(data << value); } + auto& operator>>=(const u64 value) { return assign(data >> value); } + auto& operator +=(const u64 value) { return assign(data + value); } + auto& operator -=(const u64 value) { return assign(data - value); } + auto& operator *=(const u64 value) { return assign(data * value); } + auto& operator /=(const u64 value) { return assign(data / value); } + auto& operator %=(const u64 value) { return assign(data % value); } + + auto resize(u32 bits) { + assert(bits <= 64); + mask = ~0ull >> (64 - bits); + data &= mask; + } + + auto serialize(serializer& s) { + s(data); + s(mask); + } + + struct Reference { + Reference(VariadicNatural& self, u32 lo, u32 hi) : self(self), Lo(lo), Hi(hi) {} + + operator u64() const { + const u64 RangeBits = Hi - Lo + 1; + const u64 RangeMask = (((1ull << RangeBits) - 1) << Lo) & self.mask; + return (self & RangeMask) >> Lo; + } + + auto& operator=(const u64 value) { + const u64 RangeBits = Hi - Lo + 1; + const u64 RangeMask = (((1ull << RangeBits) - 1) << Lo) & self.mask; + self.data = (self.data & ~RangeMask) | ((value << Lo) & RangeMask); + return *this; + } + + private: + VariadicNatural& self; + const u32 Lo; + const u32 Hi; + }; + + auto bits(u32 lo, u32 hi) -> Reference { return {*this, lo < hi ? lo : hi, hi > lo ? hi : lo}; } + auto bit(u32 index) -> Reference { return {*this, index, index}; } + auto byte(u32 index) -> Reference { return {*this, index * 8 + 0, index * 8 + 7}; } + +private: + auto assign(u64 value) -> VariadicNatural& { + data = value & mask; + return *this; + } + + u64 data; + u64 mask; +}; + +} diff --git a/waterbox/ares64/ares/nall/vector.hpp b/waterbox/ares64/ares/nall/vector.hpp new file mode 100644 index 0000000000..3bf97d4ff7 --- /dev/null +++ b/waterbox/ares64/ares/nall/vector.hpp @@ -0,0 +1,156 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nall { + +template +struct vector_base { + using type = vector_base; + + //core.hpp + vector_base() = default; + vector_base(const initializer_list& values); + vector_base(const type& source); + vector_base(type&& source); + ~vector_base(); + + explicit operator bool() const; + operator array_span(); + operator array_view() const; + template auto capacity() const -> u64; + template auto size() const -> u64; + template auto data() -> Cast*; + template auto data() const -> const Cast*; + + //assign.hpp + auto operator=(const type& source) -> type&; + auto operator=(type&& source) -> type&; + + //compare.hpp + auto operator==(const type& source) const -> bool; + auto operator!=(const type& source) const -> bool; + + //memory.hpp + auto reset() -> void; + auto acquire(T* data, u64 size, u64 capacity = 0) -> void; + auto release() -> T*; + + auto reserveLeft(u64 capacity) -> bool; + auto reserveRight(u64 capacity) -> bool; + auto reserve(u64 capacity) -> bool { return reserveRight(capacity); } + + auto reallocateLeft(u64 size) -> bool; + auto reallocateRight(u64 size) -> bool; + auto reallocate(u64 size) -> bool { return reallocateRight(size); } + + auto resizeLeft(u64 size, const T& value = T()) -> bool; + auto resizeRight(u64 size, const T& value = T()) -> bool; + auto resize(u64 size, const T& value = T()) -> bool { return resizeRight(size, value); } + + //access.hpp + auto operator[](u64 offset) -> T&; + auto operator[](u64 offset) const -> const T&; + + auto operator()(u64 offset) -> T&; + auto operator()(u64 offset, const T& value) const -> const T&; + + auto left() -> T&; + auto first() -> T& { return left(); } + auto left() const -> const T&; + auto first() const -> const T& { return left(); } + + auto right() -> T&; + auto last() -> T& { return right(); } + auto right() const -> const T&; + auto last() const -> const T& { return right(); } + + //modify.hpp + auto prepend(const T& value) -> void; + auto prepend(T&& value) -> void; + auto prepend(const type& values) -> void; + auto prepend(type&& values) -> void; + + auto append(const T& value) -> void; + auto append(T&& value) -> void; + auto append(const type& values) -> void; + auto append(type&& values) -> void; + + auto insert(u64 offset, const T& value) -> void; + + auto removeLeft(u64 length = 1) -> void; + auto removeFirst(u64 length = 1) -> void { return removeLeft(length); } + auto removeRight(u64 length = 1) -> void; + auto removeLast(u64 length = 1) -> void { return removeRight(length); } + auto remove(u64 offset, u64 length = 1) -> void; + auto removeByIndex(u64 offset) -> bool; + auto removeByValue(const T& value) -> bool; + + auto takeLeft() -> T; + auto takeFirst() -> T { return move(takeLeft()); } + auto takeRight() -> T; + auto takeLast() -> T { return move(takeRight()); } + auto take(u64 offset) -> T; + + //iterator.hpp + auto begin() -> iterator { return {data(), 0}; } + auto end() -> iterator { return {data(), size()}; } + + auto begin() const -> iterator_const { return {data(), 0}; } + auto end() const -> iterator_const { return {data(), size()}; } + + auto rbegin() -> reverse_iterator { return {data(), size() - 1}; } + auto rend() -> reverse_iterator { return {data(), (u64)-1}; } + + auto rbegin() const -> reverse_iterator_const { return {data(), size() - 1}; } + auto rend() const -> reverse_iterator_const { return {data(), (u64)-1}; } + + //utility.hpp + auto fill(const T& value = {}) -> void; + auto sort(const function& comparator = [](auto& lhs, auto& rhs) { return lhs < rhs; }) -> void; + auto reverse() -> void; + auto find(const function& comparator) -> maybe; + auto find(const T& value) const -> maybe; + auto findSorted(const T& value) const -> maybe; + auto foreach(const function& callback) -> void; + auto foreach(const function& callback) -> void; + +protected: + T* _pool = nullptr; //pointer to first initialized element in pool + u64 _size = 0; //number of initialized elements in pool + u64 _left = 0; //number of allocated elements free on the left of pool + u64 _right = 0; //number of allocated elements free on the right of pool +}; + +} + +#define vector vector_base +#include +#include +#include +#include +#include +#include +#include +#include +#undef vector + +namespace nall { + template struct vector : vector_base { + using vector_base::vector_base; + }; +} + +#include diff --git a/waterbox/ares64/ares/nall/vector/access.hpp b/waterbox/ares64/ares/nall/vector/access.hpp new file mode 100644 index 0000000000..66a14ab315 --- /dev/null +++ b/waterbox/ares64/ares/nall/vector/access.hpp @@ -0,0 +1,47 @@ +#pragma once + +namespace nall { + +template auto vector::operator[](u64 offset) -> T& { + #ifdef DEBUG + struct out_of_bounds {}; + if(offset >= size()) throw out_of_bounds{}; + #endif + return _pool[offset]; +} + +template auto vector::operator[](u64 offset) const -> const T& { + #ifdef DEBUG + struct out_of_bounds {}; + if(offset >= size()) throw out_of_bounds{}; + #endif + return _pool[offset]; +} + +template auto vector::operator()(u64 offset) -> T& { + while(offset >= size()) append(T()); + return _pool[offset]; +} + +template auto vector::operator()(u64 offset, const T& value) const -> const T& { + if(offset >= size()) return value; + return _pool[offset]; +} + +template auto vector::left() -> T& { + return _pool[0]; +} + +template auto vector::left() const -> const T& { + return _pool[0]; +} + +template auto vector::right() -> T& { + return _pool[_size - 1]; +} + +template auto vector::right() const -> const T& { + return _pool[_size - 1]; +} + +} diff --git a/waterbox/ares64/ares/nall/vector/assign.hpp b/waterbox/ares64/ares/nall/vector/assign.hpp new file mode 100644 index 0000000000..4d4ef76751 --- /dev/null +++ b/waterbox/ares64/ares/nall/vector/assign.hpp @@ -0,0 +1,28 @@ +#pragma once + +namespace nall { + +template auto vector::operator=(const vector& source) -> vector& { + if(this == &source) return *this; + _pool = memory::allocate(source._size); + _size = source._size; + _left = 0; + _right = 0; + for(u64 n : range(_size)) new(_pool + n) T(source._pool[n]); + return *this; +} + +template auto vector::operator=(vector&& source) -> vector& { + if(this == &source) return *this; + _pool = source._pool; + _size = source._size; + _left = source._left; + _right = source._right; + source._pool = nullptr; + source._size = 0; + source._left = 0; + source._right = 0; + return *this; +} + +} diff --git a/waterbox/ares64/ares/nall/vector/compare.hpp b/waterbox/ares64/ares/nall/vector/compare.hpp new file mode 100644 index 0000000000..c73c44100d --- /dev/null +++ b/waterbox/ares64/ares/nall/vector/compare.hpp @@ -0,0 +1,18 @@ +#pragma once + +namespace nall { + +template auto vector::operator==(const vector& source) const -> bool { + if(this == &source) return true; + if(size() != source.size()) return false; + for(u64 n = 0; n < size(); n++) { + if(operator[](n) != source[n]) return false; + } + return true; +} + +template auto vector::operator!=(const vector& source) const -> bool { + return !operator==(source); +} + +} diff --git a/waterbox/ares64/ares/nall/vector/core.hpp b/waterbox/ares64/ares/nall/vector/core.hpp new file mode 100644 index 0000000000..b2be4a8226 --- /dev/null +++ b/waterbox/ares64/ares/nall/vector/core.hpp @@ -0,0 +1,50 @@ +#pragma once + +namespace nall { + +template vector::vector(const initializer_list& values) { + reserveRight(values.size()); + for(auto& value : values) append(value); +} + +template vector::vector(const vector& source) { + operator=(source); +} + +template vector::vector(vector&& source) { + operator=(move(source)); +} + +template vector::~vector() { + reset(); +} + +template vector::operator bool() const { + return _size; +} + +template vector::operator array_span() { + return {data(), size()}; +} + +template vector::operator array_view() const { + return {data(), size()}; +} + +template template auto vector::capacity() const -> u64 { + return (_left + _size + _right) * sizeof(T) / sizeof(Cast); +} + +template template auto vector::size() const -> u64 { + return _size * sizeof(T) / sizeof(Cast); +} + +template template auto vector::data() -> Cast* { + return (Cast*)_pool; +} + +template template auto vector::data() const -> const Cast* { + return (const Cast*)_pool; +} + +} diff --git a/waterbox/ares64/ares/nall/vector/iterator.hpp b/waterbox/ares64/ares/nall/vector/iterator.hpp new file mode 100644 index 0000000000..ea722d5640 --- /dev/null +++ b/waterbox/ares64/ares/nall/vector/iterator.hpp @@ -0,0 +1,57 @@ +#pragma once + +namespace nall { + +template +struct vector_iterator { + vector_iterator(vector& self, u64 offset) : self(self), offset(offset) {} + auto operator*() -> T& { return self.operator[](offset); } + auto operator->() -> T* { return self.operator[](offset); } + auto operator!=(const vector_iterator& source) const -> bool { return offset != source.offset; } + auto operator++() -> vector_iterator& { return offset++, *this; } + +private: + vector& self; + u64 offset; +}; + +template +struct vector_iterator_const { + vector_iterator_const(const vector& self, u64 offset) : self(self), offset(offset) {} + auto operator*() -> const T& { return self.operator[](offset); } + auto operator->() -> T* { return self.operator[](offset); } + auto operator!=(const vector_iterator_const& source) const -> bool { return offset != source.offset; } + auto operator++() -> vector_iterator_const& { return offset++, *this; } + +private: + const vector& self; + u64 offset; +}; + +template +struct vector_reverse_iterator { + vector_reverse_iterator(vector& self, u64 offset) : self(self), offset(offset) {} + auto operator*() -> T& { return self.operator[](offset); } + auto operator->() -> T* { return self.operator[](offset); } + auto operator!=(const vector_reverse_iterator& source) const -> bool { return offset != source.offset; } + auto operator++() -> vector_reverse_iterator& { return offset--, *this; } + +private: + vector& self; + u64 offset; +}; + +template +struct vector_reverse_iterator_const { + vector_reverse_iterator_const(const vector& self, u64 offset) : self(self), offset(offset) {} + auto operator*() -> const T& { return self.operator[](offset); } + auto operator->() -> T* { return self.operator[](offset); } + auto operator!=(const vector_reverse_iterator_const& source) const -> bool { return offset != source.offset; } + auto operator++() -> vector_reverse_iterator_const& { return offset--, *this; } + +private: + const vector& self; + u64 offset; +}; + +} diff --git a/waterbox/ares64/ares/nall/vector/memory.hpp b/waterbox/ares64/ares/nall/vector/memory.hpp new file mode 100644 index 0000000000..9274452076 --- /dev/null +++ b/waterbox/ares64/ares/nall/vector/memory.hpp @@ -0,0 +1,147 @@ +#pragma once + +namespace nall { + +//nall::vector acts internally as a deque (double-ended queue) +//it does this because it's essentially free to do so, only costing an extra integer in sizeof(vector) + +template auto vector::reset() -> void { + if(!_pool) return; + + for(u64 n : range(_size)) _pool[n].~T(); + memory::free(_pool - _left); + + _pool = nullptr; + _size = 0; + _left = 0; + _right = 0; +} + +//acquire ownership of allocated memory + +template auto vector::acquire(T* data, u64 size, u64 capacity) -> void { + reset(); + _pool = data; + _size = size; + _left = 0; + _right = capacity ? capacity : size; +} + +//release ownership of allocated memory + +template auto vector::release() -> T* { + auto pool = _pool; + _pool = nullptr; + _size = 0; + _left = 0; + _right = 0; + return pool; +} + +//reserve allocates memory for objects, but does not initialize them +//when the vector desired size is known, this can be used to avoid growing the capacity dynamically +//reserve will not actually shrink the capacity, only expand it +//shrinking the capacity would destroy objects, and break amortized growth with reallocate and resize + +template auto vector::reserveLeft(u64 capacity) -> bool { + if(_size + _left >= capacity) return false; + + u64 left = bit::round(capacity); + auto pool = memory::allocate(left + _right) + (left - _size); + for(u64 n : range(_size)) new(pool + n) T(move(_pool[n])); + memory::free(_pool - _left); + + _pool = pool; + _left = left - _size; + + return true; +} + +template auto vector::reserveRight(u64 capacity) -> bool { + if(_size + _right >= capacity) return false; + + u64 right = bit::round(capacity); + auto pool = memory::allocate(_left + right) + _left; + for(u64 n : range(_size)) new(pool + n) T(move(_pool[n])); + memory::free(_pool - _left); + + _pool = pool; + _right = right - _size; + + return true; +} + +//reallocation is meant for POD types, to avoid the overhead of initialization +//do not use with non-POD types, or they will not be properly constructed or destructed + +template auto vector::reallocateLeft(u64 size) -> bool { + if(size < _size) { //shrink + _pool += _size - size; + _left += _size - size; + _size = size; + return true; + } + if(size > _size) { //grow + reserveLeft(size); + _pool -= size - _size; + _left -= size - _size; + _size = size; + return true; + } + return false; +} + +template auto vector::reallocateRight(u64 size) -> bool { + if(size < _size) { //shrink + _right += _size - size; + _size = size; + return true; + } + if(size > _size) { //grow + reserveRight(size); + _right -= size - _size; + _size = size; + return true; + } + return false; +} + +//resize is meant for non-POD types, and will properly construct objects + +template auto vector::resizeLeft(u64 size, const T& value) -> bool { + if(size < _size) { //shrink + for(u64 n : range(_size - size)) _pool[n].~T(); + _pool += _size - size; + _left += _size - size; + _size = size; + return true; + } + if(size > _size) { //grow + reserveLeft(size); + _pool -= size - _size; + for(u64 n : nall::reverse(range(size - _size))) new(_pool + n) T(value); + _left -= size - _size; + _size = size; + return true; + } + return false; +} + +template auto vector::resizeRight(u64 size, const T& value) -> bool { + if(size < _size) { //shrink + for(u64 n : range(size, _size)) _pool[n].~T(); + _right += _size - size; + _size = size; + return true; + } + if(size > _size) { //grow + reserveRight(size); + for(u64 n : range(_size, size)) new(_pool + n) T(value); + _right -= size - _size; + _size = size; + return true; + } + return false; +} + +} diff --git a/waterbox/ares64/ares/nall/vector/modify.hpp b/waterbox/ares64/ares/nall/vector/modify.hpp new file mode 100644 index 0000000000..8838405050 --- /dev/null +++ b/waterbox/ares64/ares/nall/vector/modify.hpp @@ -0,0 +1,137 @@ +#pragma once + +namespace nall { + +template auto vector::prepend(const T& value) -> void { + reserveLeft(size() + 1); + new(--_pool) T(value); + _left--; + _size++; +} + +template auto vector::prepend(T&& value) -> void { + reserveLeft(size() + 1); + new(--_pool) T(move(value)); + _left--; + _size++; +} + +template auto vector::prepend(const vector& values) -> void { + reserveLeft(size() + values.size()); + _pool -= values.size(); + for(u64 n : range(values)) new(_pool + n) T(values[n]); + _left -= values.size(); + _size += values.size(); +} + +template auto vector::prepend(vector&& values) -> void { + reserveLeft(size() + values.size()); + _pool -= values.size(); + for(u64 n : range(values)) new(_pool + n) T(move(values[n])); + _left -= values.size(); + _size += values.size(); +} + +// + +template auto vector::append(const T& value) -> void { + reserveRight(size() + 1); + new(_pool + _size) T(value); + _right--; + _size++; +} + +template auto vector::append(T&& value) -> void { + reserveRight(size() + 1); + new(_pool + _size) T(move(value)); + _right--; + _size++; +} + +template auto vector::append(const vector& values) -> void { + reserveRight(size() + values.size()); + for(u64 n : range(values.size())) new(_pool + _size + n) T(values[n]); + _right -= values.size(); + _size += values.size(); +} + +template auto vector::append(vector&& values) -> void { + reserveRight(size() + values.size()); + for(u64 n : range(values.size())) new(_pool + _size + n) T(move(values[n])); + _right -= values.size(); + _size += values.size(); +} + +// + +template auto vector::insert(u64 offset, const T& value) -> void { + if(offset == 0) return prepend(value); + if(offset == size() - 1) return append(value); + reserveRight(size() + 1); + _size++; + for(s64 n = size() - 1; n > offset; n--) { + _pool[n] = move(_pool[n - 1]); + } + new(_pool + offset) T(value); +} + +// + +template auto vector::removeLeft(u64 length) -> void { + if(length > size()) length = size(); + resizeLeft(size() - length); +} + +template auto vector::removeRight(u64 length) -> void { + if(length > size()) length = size(); + resizeRight(size() - length); +} + +template auto vector::remove(u64 offset, u64 length) -> void { + if(offset == 0) return removeLeft(length); + if(offset == size() - 1) return removeRight(length); + + for(u64 n = offset; n < size(); n++) { + if(n + length < size()) { + _pool[n] = move(_pool[n + length]); + } else { + _pool[n].~T(); + } + } + _size -= length; +} + +template auto vector::removeByIndex(u64 index) -> bool { + if(index < size()) return remove(index), true; + return false; +} + +template auto vector::removeByValue(const T& value) -> bool { + if(auto index = find(value)) return remove(*index), true; + return false; +} + +// + +template auto vector::takeLeft() -> T { + T value = move(_pool[0]); + removeLeft(); + return value; +} + +template auto vector::takeRight() -> T { + T value = move(_pool[size() - 1]); + removeRight(); + return value; +} + +template auto vector::take(u64 offset) -> T { + if(offset == 0) return takeLeft(); + if(offset == size() - 1) return takeRight(); + + T value = move(_pool[offset]); + remove(offset); + return value; +} + +} diff --git a/waterbox/ares64/ares/nall/vector/specialization/u8.hpp b/waterbox/ares64/ares/nall/vector/specialization/u8.hpp new file mode 100644 index 0000000000..dc3e5f4d18 --- /dev/null +++ b/waterbox/ares64/ares/nall/vector/specialization/u8.hpp @@ -0,0 +1,38 @@ +#pragma once + +namespace nall { + +template<> struct vector : vector_base { + using type = vector; + using vector_base::vector_base; + + template auto appendl(U value, u32 size) -> void { + for(u32 byte : range(size)) append(u8(value >> byte * 8)); + } + + template auto appendm(U value, u32 size) -> void { + for(u32 byte : nall::reverse(range(size))) append(u8(value >> byte * 8)); + } + + //note: string_view is not declared here yet ... + auto appends(array_view memory) -> void { + for(u8 byte : memory) append(byte); + } + + template auto readl(s32 offset, u32 size) -> U { + if(offset < 0) offset = this->size() - abs(offset); + U value = 0; + for(u32 byte : range(size)) value |= (U)operator[](offset + byte) << byte * 8; + return value; + } + + auto view(u32 offset, u32 length) -> array_view { + #ifdef DEBUG + struct out_of_bounds {}; + if(offset + length >= size()) throw out_of_bounds{}; + #endif + return {data() + offset, length}; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/vector/utility.hpp b/waterbox/ares64/ares/nall/vector/utility.hpp new file mode 100644 index 0000000000..4ebad0884d --- /dev/null +++ b/waterbox/ares64/ares/nall/vector/utility.hpp @@ -0,0 +1,47 @@ +#pragma once + +namespace nall { + +template auto vector::fill(const T& value) -> void { + for(u64 n : range(size())) _pool[n] = value; +} + +template auto vector::sort(const function& comparator) -> void { + nall::sort(_pool, _size, comparator); +} + +template auto vector::reverse() -> void { + vector reversed; + for(u64 n : range(size())) reversed.prepend(_pool[n]); + operator=(move(reversed)); +} + +template auto vector::find(const function& comparator) -> maybe { + for(u64 n : range(size())) if(comparator(_pool[n])) return n; + return nothing; +} + +template auto vector::find(const T& value) const -> maybe { + for(u64 n : range(size())) if(_pool[n] == value) return n; + return nothing; +} + +template auto vector::findSorted(const T& value) const -> maybe { + s64 l = 0, r = size() - 1; + while(l <= r) { + s64 m = l + (r - l >> 1); + if(value == _pool[m]) return m; + value < _pool[m] ? r = m - 1 : l = m + 1; + } + return nothing; +} + +template auto vector::foreach(const function& callback) -> void { + for(u64 n : range(size())) callback(_pool[n]); +} + +template auto vector::foreach(const function& callback) -> void { + for(u64 n : range(size())) callback(n, _pool[n]); +} + +} diff --git a/waterbox/ares64/ares/nall/vfs.hpp b/waterbox/ares64/ares/nall/vfs.hpp new file mode 100644 index 0000000000..80396785dd --- /dev/null +++ b/waterbox/ares64/ares/nall/vfs.hpp @@ -0,0 +1,3 @@ +#pragma once + +#include diff --git a/waterbox/ares64/ares/nall/vfs/attribute.hpp b/waterbox/ares64/ares/nall/vfs/attribute.hpp new file mode 100644 index 0000000000..8578faaec9 --- /dev/null +++ b/waterbox/ares64/ares/nall/vfs/attribute.hpp @@ -0,0 +1,12 @@ +namespace nall::vfs { + +struct attribute { + attribute(const string& name, const any& value = {}) : name(name), value(value) {} + auto operator==(const attribute& source) const -> bool { return name == source.name; } + auto operator< (const attribute& source) const -> bool { return name < source.name; } + + string name; + any value; +}; + +} diff --git a/waterbox/ares64/ares/nall/vfs/cdrom.hpp b/waterbox/ares64/ares/nall/vfs/cdrom.hpp new file mode 100644 index 0000000000..7b2c14894b --- /dev/null +++ b/waterbox/ares64/ares/nall/vfs/cdrom.hpp @@ -0,0 +1,177 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace nall::vfs { + +struct cdrom : file { + static auto open(const string& cueLocation) -> shared_pointer { + auto instance = shared_pointer{new cdrom}; + if(instance->load(cueLocation)) return instance; + return {}; + } + + auto writable() const -> bool override { return false; } + auto data() const -> const u8* override { return _image.data(); } + auto data() -> u8* override { return _image.data(); } + auto size() const -> u64 override { return _image.size(); } + auto offset() const -> u64 override { return _offset; } + + auto resize(u64 size) -> bool override { + //unsupported + return false; + } + + auto seek(s64 offset, index mode) -> void override { + if(mode == index::absolute) _offset = (u64)offset; + if(mode == index::relative) _offset += (s64)offset; + } + + auto read() -> u8 override { + if(_offset >= _image.size()) return 0x00; + return _image[_offset++]; + } + + auto write(u8 data) -> void override { + //CD-ROMs are read-only; but allow writing anyway if needed, since the image is in memory + if(_offset >= _image.size()) return; + _image[_offset++] = data; + } + +private: + auto load(const string& cueLocation) -> bool { + Decode::CUE cuesheet; + if(!cuesheet.load(cueLocation)) return false; + + CD::Session session; + session.leadIn.lba = -LeadInSectors; + session.leadIn.end = -1; + s32 lbaFileBase = 0; + + // add 2 sec pregap to 1st track + if(!cuesheet.files[0].tracks[0].pregap) + cuesheet.files[0].tracks[0].pregap = Track1Pregap; + else + cuesheet.files[0].tracks[0].pregap = Track1Pregap + cuesheet.files[0].tracks[0].pregap(); + + if(cuesheet.files[0].tracks[0].indices[0].number == 1) { + session.tracks[1].indices[0].lba = 0; + session.tracks[1].indices[0].end = + cuesheet.files[0].tracks[0].pregap() + cuesheet.files[0].tracks[0].indices[0].lba - 1; + } + + s32 lbaIndex = 0; + for(auto& file : cuesheet.files) { + for(auto& track : file.tracks) { + session.tracks[track.number].control = track.type == "audio" ? 0b0000 : 0b0100; + if(track.pregap) lbaFileBase += track.pregap(); + for(auto& index : track.indices) { + if(index.lba >= 0) { + session.tracks[track.number].indices[index.number].lba = lbaFileBase + index.lba; + session.tracks[track.number].indices[index.number].end = lbaFileBase + index.end; + if(index.number == 0 && track.pregap) { + session.tracks[track.number].indices[index.number].lba -= track.pregap(); + session.tracks[track.number].indices[index.number].end -= track.pregap(); + } + } else { + // insert gap + session.tracks[track.number].indices[index.number].lba = lbaIndex; + if(index.number == 0) + session.tracks[track.number].indices[index.number].end = lbaIndex + track.pregap() - 1; + else + session.tracks[track.number].indices[index.number].end = lbaIndex + track.postgap() - 1; + } + lbaIndex = session.tracks[track.number].indices[index.number].end + 1; + } + if(track.postgap) lbaFileBase += track.postgap(); + } + lbaFileBase = lbaIndex; + } + session.leadOut.lba = lbaFileBase; + session.leadOut.end = lbaFileBase + LeadOutSectors - 1; + + // determine track and index ranges + session.firstTrack = 0xff; + for(u32 track : range(100)) { + if(!session.tracks[track]) continue; + if(session.firstTrack > 99) session.firstTrack = track; + // find first index + for(u32 indexID : range(100)) { + auto& index = session.tracks[track].indices[indexID]; + if(index) { session.tracks[track].firstIndex = indexID; break; } + } + // find last index + for(u32 indexID : reverse(range(100))) { + auto& index = session.tracks[track].indices[indexID]; + if(index) { session.tracks[track].lastIndex = indexID; break; } + } + session.lastTrack = track; + } + + _image.resize(2448 * (LeadInSectors + lbaFileBase + LeadOutSectors)); + + lbaFileBase = 0; + for(auto& file : cuesheet.files) { + auto location = string{Location::path(cueLocation), file.name}; + auto filedata = nall::file::open(location, nall::file::mode::read); + if(file.type == "wave") filedata.seek(44); //skip RIFF header + for(auto& track : file.tracks) { + if(track.pregap) lbaFileBase += track.pregap(); + for(auto& index : track.indices) { + if(index.lba < 0) continue; // ignore gaps (not in file) + for(s32 sector : range(index.sectorCount())) { + auto target = _image.data() + 2448ull * (LeadInSectors + lbaFileBase + index.lba + sector); + auto length = track.sectorSize(); + if(length == 2048) { + //ISO: generate header + parity data + memory::assign(target + 0, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff); //sync + memory::assign(target + 6, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00); //sync + auto [minute, second, frame] = CD::MSF(lbaFileBase + index.lba + sector); + target[12] = CD::BCD::encode(minute); + target[13] = CD::BCD::encode(second); + target[14] = CD::BCD::encode(frame); + target[15] = 0x01; //mode + filedata.read({target + 16, length}); + CD::RSPC::encodeMode1({target, 2352}); + } + if(length == 2352) { + //BIN + WAV: direct copy + filedata.read({target, length}); + } + } + } + if(track.postgap) lbaFileBase += track.postgap(); + } + lbaFileBase += file.tracks.last().indices.last().end + 1; + } + + auto subchannel = session.encode(LeadInSectors + session.leadOut.end + 1); + if(auto overlay = nall::file::read({Location::notsuffix(cueLocation), ".sub"})) { + auto target = subchannel.data() + 96 * (LeadInSectors + Track1Pregap); + auto length = (s64)subchannel.size() - 96 * (LeadInSectors + Track1Pregap); + memory::copy(target, length, overlay.data(), overlay.size()); + } + + for(u64 sector : range(size() / 2448)) { + auto source = subchannel.data() + sector * 96; + auto target = _image.data() + sector * 2448 + 2352; + memory::copy(target, source, 96); + } + + return true; + } + + vector _image; + u64 _offset = 0; + + static constexpr s32 LeadInSectors = 7500; + static constexpr s32 Track1Pregap = 150; + static constexpr s32 LeadOutSectors = 6750; +}; + +} diff --git a/waterbox/ares64/ares/nall/vfs/directory.hpp b/waterbox/ares64/ares/nall/vfs/directory.hpp new file mode 100644 index 0000000000..4d62f3428c --- /dev/null +++ b/waterbox/ares64/ares/nall/vfs/directory.hpp @@ -0,0 +1,114 @@ +namespace nall::vfs { + +struct directory : node { + auto count() const -> u32 { + return _nodes.size(); + } + + auto find(shared_pointer item) const -> bool { + return (bool)_nodes.find(item); + } + + auto find(const string& name) const -> bool { + for(auto& node : _nodes) { + if(node->name() == name) return true; + } + return false; + } + + template + auto read(const string& name) -> shared_pointer { + for(auto& node : _nodes) { + if(node->name() == name) { + if(auto fp = node.cast()) { + if(!fp->readable()) return {}; + fp->seek(0); + return fp; + } + } + } + return {}; + } + + template + auto write(const string& name) -> shared_pointer { + for(auto& node : _nodes) { + if(node->name() == name) { + if(auto fp = node.cast()) { + if(!fp->writable()) return {}; + fp->seek(0); + return fp; + } + } + } + return {}; + } + + auto append(const string& name, u64 size) -> bool { + if(find(name)) return false; + auto item = memory::create(size); + item->setName(name); + return _nodes.append(item), true; + } + + auto append(const string& name, shared_pointer item) -> bool { + if(!item) return false; + if(_nodes.find(item)) return false; + item->setName(name); + return _nodes.append(item), true; + } + + auto append(const string& name, array_view view) -> bool { + if(find(name)) return false; + auto item = memory::open(view); + item->setName(name); + return _nodes.append(item), true; + } + + auto append(shared_pointer item) -> bool { + if(_nodes.find(item)) return false; + return _nodes.append(item), true; + } + + auto remove(shared_pointer item) -> bool { + if(!_nodes.find(item)) return false; + return _nodes.removeByValue(item), true; + } + + auto files() const -> vector> { + vector> files; + for(auto& node : _nodes) { + if(!node->isFile()) continue; + files.append(node); + } + return files; + } + + auto directories() const -> vector> { + vector> directories; + for(auto& node : _nodes) { + if(!node->isDirectory()) continue; + directories.append(node); + } + return directories; + } + + auto begin() { return _nodes.begin(); } + auto end() { return _nodes.end(); } + + auto begin() const { return _nodes.begin(); } + auto end() const { return _nodes.end(); } + +protected: + vector> _nodes; +}; + +inline auto node::isFile() const -> bool { + return dynamic_cast(this); +} + +inline auto node::isDirectory() const -> bool { + return dynamic_cast(this); +} + +} diff --git a/waterbox/ares64/ares/nall/vfs/disk.hpp b/waterbox/ares64/ares/nall/vfs/disk.hpp new file mode 100644 index 0000000000..eb80c305a8 --- /dev/null +++ b/waterbox/ares64/ares/nall/vfs/disk.hpp @@ -0,0 +1,59 @@ +#pragma once + +#include + +namespace nall::vfs { + +struct disk : file { + static auto open(string location_, mode mode_) -> shared_pointer { + auto instance = shared_pointer{new disk}; + if(!instance->_open(location_, mode_)) return {}; + return instance; + } + + auto writable() const -> bool override { return _writable; } + auto data() const -> const u8* override { return _data; } + auto data() -> u8* override { return _data; } + auto size() const -> u64 override { return _size; } + auto offset() const -> u64 override { return _offset; } + + auto resize(u64 size) -> bool override { + return false; //todo + } + + auto seek(s64 offset, index mode = index::absolute) -> void override { + if(mode == index::absolute) _offset = (u64)offset; + if(mode == index::relative) _offset += (s64)offset; + } + + auto read() -> u8 override { + if(_offset >= _size) return 0x00; + return _data[_offset++]; + } + + auto write(u8 data) -> void override { + if(_offset >= _size) return; + _data[_offset++] = data; + } + +private: + disk() = default; + disk(const disk&) = delete; + auto operator=(const disk&) -> disk& = delete; + + auto _open(string location_, mode mode_) -> bool { + if(!_fp.open(location_, (u32)mode_)) return false; + _data = _fp.data(); + _size = _fp.size(); + _writable = mode_ == mode::write; + return true; + } + + file_map _fp; + u8* _data = nullptr; + u64 _size = 0; + u64 _offset = 0; + bool _writable = false; +}; + +} diff --git a/waterbox/ares64/ares/nall/vfs/file.hpp b/waterbox/ares64/ares/nall/vfs/file.hpp new file mode 100644 index 0000000000..6d59bde24b --- /dev/null +++ b/waterbox/ares64/ares/nall/vfs/file.hpp @@ -0,0 +1,63 @@ +namespace nall::vfs { + +struct file : node { + virtual auto readable() const -> bool { return true; } + virtual auto writable() const -> bool { return false; } + + virtual auto data() const -> const u8* = 0; + virtual auto data() -> u8* = 0; + virtual auto size() const -> u64 = 0; + virtual auto offset() const -> u64 = 0; + virtual auto resize(u64 size) -> bool = 0; + + virtual auto seek(s64 offset, index = index::absolute) -> void = 0; + virtual auto read() -> u8 = 0; + virtual auto write(u8 data) -> void = 0; + virtual auto flush() -> void {} + + auto end() const -> bool { + return offset() >= size(); + } + + auto read(array_span span) -> void { + while(span) *span++ = read(); + } + + auto readl(u32 bytes) -> u64 { + u64 data = 0; + for(auto n : range(bytes)) data |= (u64)read() << n * 8; + return data; + } + + auto readm(u32 bytes) -> u64 { + u64 data = 0; + for(auto n : range(bytes)) data = data << 8 | read(); + return data; + } + + auto reads() -> string { + seek(0); + string s; + s.resize(size()); + read(s); + return s; + } + + auto write(array_view view) -> void { + while(view) write(*view++); + } + + auto writel(u64 data, u32 bytes) -> void { + for(auto n : range(bytes)) write(data), data >>= 8; + } + + auto writem(u64 data, u32 bytes) -> void { + for(auto n : reverse(range(bytes))) write(data >> n * 8); + } + + auto writes(const string& s) -> void { + write(s); + } +}; + +} diff --git a/waterbox/ares64/ares/nall/vfs/memory.hpp b/waterbox/ares64/ares/nall/vfs/memory.hpp new file mode 100644 index 0000000000..3bcdd52fe4 --- /dev/null +++ b/waterbox/ares64/ares/nall/vfs/memory.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include +#include + +namespace nall::vfs { + +struct memory : file { + ~memory() { nall::memory::free(_data); } + + static auto create(u64 size = 0) -> shared_pointer { + auto instance = shared_pointer{new memory}; + instance->_create(size); + return instance; + } + + static auto open(array_view view) -> shared_pointer { + auto instance = shared_pointer{new memory}; + instance->_open(view.data(), view.size()); + return instance; + } + + auto writable() const -> bool override { return true; } + auto data() const -> const u8* override { return _data; } + auto data() -> u8* override { return _data; } + auto size() const -> u64 override { return _size; } + auto offset() const -> u64 override { return _offset; } + + auto resize(u64 size) -> bool override { + _data = nall::memory::resize(_data, size); + _size = size; + return true; + } + + auto seek(s64 offset, index mode = index::absolute) -> void override { + if(mode == index::absolute) _offset = (u64)offset; + if(mode == index::relative) _offset += (s64)offset; + } + + auto read() -> u8 override { + if(_offset >= _size) return 0x00; + return _data[_offset++]; + } + + auto write(u8 data) -> void override { + if(_offset >= _size) return; + _data[_offset++] = data; + } + +private: + memory() = default; + memory(const file&) = delete; + auto operator=(const memory&) -> memory& = delete; + + auto _create(u64 size) -> void { + _size = size; + _data = nall::memory::allocate(size, 0x00); + } + + auto _open(const u8* data, u64 size) -> void { + _size = size; + _data = nall::memory::allocate(size); + nall::memory::copy(_data, data, size); + } + + u8* _data = nullptr; + u64 _size = 0; + u64 _offset = 0; +}; + +} diff --git a/waterbox/ares64/ares/nall/vfs/node.hpp b/waterbox/ares64/ares/nall/vfs/node.hpp new file mode 100644 index 0000000000..912a8676fb --- /dev/null +++ b/waterbox/ares64/ares/nall/vfs/node.hpp @@ -0,0 +1,52 @@ +namespace nall::vfs { + +enum class mode : u32 { read, write }; +static constexpr auto read = mode::read; +static constexpr auto write = mode::write; + +enum class index : u32 { absolute, relative }; +static constexpr auto absolute = index::absolute; +static constexpr auto relative = index::relative; + +struct node { + virtual ~node() = default; + + auto isFile() const -> bool; + auto isDirectory() const -> bool; + + auto name() const -> string { return _name; } + auto setName(const string& name) -> void { _name = name; } + + template + auto attribute(const string& name) const -> T { + if(auto attribute = _attributes.find(name)) { + if(attribute->value.is()) return attribute->value.get(); + } + return {}; + } + + template + auto hasAttribute(const string& name) const -> bool { + if(auto attribute = _attributes.find(name)) { + if(attribute->value.is()) return true; + } + return false; + } + + template + auto setAttribute(const string& name, const U& value = {}) -> void { + if constexpr(is_same_v && !is_same_v) return setAttribute(name, string{value}); + if(auto attribute = _attributes.find(name)) { + if((const T&)value) attribute->value = (const T&)value; + else _attributes.remove(*attribute); + } else { + if((const T&)value) _attributes.insert({name, (const T&)value}); + } + } + +protected: + string _name; + set _attributes; +}; + +} diff --git a/waterbox/ares64/ares/nall/vfs/vfs.hpp b/waterbox/ares64/ares/nall/vfs/vfs.hpp new file mode 100644 index 0000000000..d89c00fed8 --- /dev/null +++ b/waterbox/ares64/ares/nall/vfs/vfs.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/waterbox/ares64/ares/nall/view.hpp b/waterbox/ares64/ares/nall/view.hpp new file mode 100644 index 0000000000..ce874ce49b --- /dev/null +++ b/waterbox/ares64/ares/nall/view.hpp @@ -0,0 +1,7 @@ +#pragma once + +namespace nall { + +template struct view; + +} diff --git a/waterbox/ares64/ares/nall/windows/detour.hpp b/waterbox/ares64/ares/nall/windows/detour.hpp new file mode 100644 index 0000000000..b80b8d9dce --- /dev/null +++ b/waterbox/ares64/ares/nall/windows/detour.hpp @@ -0,0 +1,189 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace nall { + +#define Copy 0 +#define RelNear 1 + +struct detour { + static auto insert(const string& moduleName, const string& functionName, void*& source, void* target) -> bool; + static auto remove(const string& moduleName, const string& functionName, void*& source) -> bool; + +protected: + static auto length(const u8* function) -> u32; + static auto mirror(u8* target, const u8* source) -> u32; + + struct opcode { + u16 prefix; + u32 length; + u32 mode; + u16 modify; + }; + static opcode opcodes[]; +}; + +//TODO: +//* fs:, gs: should force another opcode copy +//* conditional branches within +5-byte range should fail +detour::opcode detour::opcodes[] = { + {0x50, 1}, //push eax + {0x51, 1}, //push ecx + {0x52, 1}, //push edx + {0x53, 1}, //push ebx + {0x54, 1}, //push esp + {0x55, 1}, //push ebp + {0x56, 1}, //push esi + {0x57, 1}, //push edi + {0x58, 1}, //pop eax + {0x59, 1}, //pop ecx + {0x5a, 1}, //pop edx + {0x5b, 1}, //pop ebx + {0x5c, 1}, //pop esp + {0x5d, 1}, //pop ebp + {0x5e, 1}, //pop esi + {0x5f, 1}, //pop edi + {0x64, 1}, //fs: + {0x65, 1}, //gs: + {0x68, 5}, //push dword + {0x6a, 2}, //push byte + {0x74, 2, RelNear, 0x0f84}, //je near -> je far + {0x75, 2, RelNear, 0x0f85}, //jne near -> jne far + {0x89, 2}, //mov reg,reg + {0x8b, 2}, //mov reg,reg + {0x90, 1}, //nop + {0xa1, 5}, //mov eax,[dword] + {0xeb, 2, RelNear, 0xe9}, //jmp near -> jmp far +}; + +inline auto detour::insert(const string& moduleName, const string& functionName, void*& source, void* target) -> bool { + HMODULE module = GetModuleHandleW(utf16_t(moduleName)); + if(!module) return false; + + u8* sourceData = (u8*)GetProcAddress(module, functionName); + if(!sourceData) return false; + + u32 sourceLength = detour::length(sourceData); + if(sourceLength < 5) { + //unable to clone enough bytes to insert hook + #if 1 + string output = {"detour::insert(", moduleName, "::", functionName, ") failed: "}; + for(u32 n = 0; n < 16; n++) output.append(hex<2>(sourceData[n]), " "); + output.trimRight(" ", 1L); + MessageBoxA(0, output, "nall::detour", MB_OK); + #endif + return false; + } + + auto mirrorData = new u8[512](); + detour::mirror(mirrorData, sourceData); + + DWORD privileges; + VirtualProtect((void*)mirrorData, 512, PAGE_EXECUTE_READWRITE, &privileges); + VirtualProtect((void*)sourceData, 256, PAGE_EXECUTE_READWRITE, &privileges); + u64 address = (u64)target - ((u64)sourceData + 5); + sourceData[0] = 0xe9; //jmp target + sourceData[1] = address >> 0; + sourceData[2] = address >> 8; + sourceData[3] = address >> 16; + sourceData[4] = address >> 24; + VirtualProtect((void*)sourceData, 256, privileges, &privileges); + + source = (void*)mirrorData; + return true; +} + +inline auto detour::remove(const string& moduleName, const string& functionName, void*& source) -> bool { + HMODULE module = GetModuleHandleW(utf16_t(moduleName)); + if(!module) return false; + + auto sourceData = (u8*)GetProcAddress(module, functionName); + if(!sourceData) return false; + + auto mirrorData = (u8*)source; + if(mirrorData == sourceData) return false; //hook was never installed + + u32 length = detour::length(256 + mirrorData); + if(length < 5) return false; + + DWORD privileges; + VirtualProtect((void*)sourceData, 256, PAGE_EXECUTE_READWRITE, &privileges); + for(u32 n = 0; n < length; n++) sourceData[n] = mirrorData[256 + n]; + VirtualProtect((void*)sourceData, 256, privileges, &privileges); + + source = (void*)sourceData; + delete[] mirrorData; + return true; +} + +inline auto detour::length(const u8* function) -> u32 { + u32 length = 0; + while(length < 5) { + detour::opcode *opcode = 0; + foreach(op, detour::opcodes) { + if(function[length] == op.prefix) { + opcode = &op; + break; + } + } + if(opcode == 0) break; + length += opcode->length; + } + return length; +} + +inline auto detour::mirror(u8* target, const u8* source) -> u32 { + const u8* entryPoint = source; + for(u32 n = 0; n < 256; n++) target[256 + n] = source[n]; + + u32 size = detour::length(source); + while(size) { + detour::opcode* opcode = nullptr; + foreach(op, detour::opcodes) { + if(*source == op.prefix) { + opcode = &op; + break; + } + } + + switch(opcode->mode) { + case Copy: + for(u32 n = 0; n < opcode->length; n++) *target++ = *source++; + break; + case RelNear: { + source++; + u64 sourceAddress = (u64)source + 1 + (s8)*source; + *target++ = opcode->modify; + if(opcode->modify >> 8) *target++ = opcode->modify >> 8; + u64 targetAddress = (u64)target + 4; + u64 address = sourceAddress - targetAddress; + *target++ = address >> 0; + *target++ = address >> 8; + *target++ = address >> 16; + *target++ = address >> 24; + source += 2; + } break; + } + + size -= opcode->length; + } + + u64 address = (entryPoint + detour::length(entryPoint)) - (target + 5); + *target++ = 0xe9; //jmp entryPoint + *target++ = address >> 0; + *target++ = address >> 8; + *target++ = address >> 16; + *target++ = address >> 24; + + return source - entryPoint; +} + +#undef Implied +#undef RelNear + +} diff --git a/waterbox/ares64/ares/nall/windows/guard.hpp b/waterbox/ares64/ares/nall/windows/guard.hpp new file mode 100644 index 0000000000..147c567624 --- /dev/null +++ b/waterbox/ares64/ares/nall/windows/guard.hpp @@ -0,0 +1,32 @@ +#ifndef NALL_WINDOWS_GUARD_HPP +#define NALL_WINDOWS_GUARD_HPP + +#define boolean WindowsBoolean +#define interface WindowsInterface + +#undef UNICODE +#undef WINVER +#undef WIN32_LEAN_AND_LEAN +#undef _WIN32_WINNT +#undef _WIN32_IE +#undef NOMINMAX +#undef PATH_MAX + +#define UNICODE +#define WINVER 0x0601 +#define WIN32_LEAN_AND_MEAN +#define _WIN32_WINNT WINVER +#define _WIN32_IE WINVER +#define NOMINMAX +#define PATH_MAX 260 + +#else +#undef NALL_WINDOWS_GUARD_HPP + +#undef boolean +#undef interface + +#undef far +#undef near + +#endif diff --git a/waterbox/ares64/ares/nall/windows/guid.hpp b/waterbox/ares64/ares/nall/windows/guid.hpp new file mode 100644 index 0000000000..314683d490 --- /dev/null +++ b/waterbox/ares64/ares/nall/windows/guid.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace nall { + +inline auto guid() -> string { + GUID guidInstance; + CoCreateGuid(&guidInstance); + + wchar_t guidString[39]; + StringFromGUID2(guidInstance, guidString, 39); + + return (char*)utf8_t(guidString); +} + +} diff --git a/waterbox/ares64/ares/nall/windows/launcher.hpp b/waterbox/ares64/ares/nall/windows/launcher.hpp new file mode 100644 index 0000000000..0d09faaf7e --- /dev/null +++ b/waterbox/ares64/ares/nall/windows/launcher.hpp @@ -0,0 +1,91 @@ +#pragma once + +namespace nall { + +//launch a new process and inject specified DLL into it + +inline auto launch(const char* applicationName, const char* libraryName, u32 entryPoint) -> bool { + //if a launcher does not send at least one message, a wait cursor will appear + PostThreadMessage(GetCurrentThreadId(), WM_USER, 0, 0); + MSG msg; + GetMessage(&msg, 0, 0, 0); + + STARTUPINFOW si; + PROCESS_INFORMATION pi; + + memset(&si, 0, sizeof(STARTUPINFOW)); + BOOL result = CreateProcessW( + utf16_t(applicationName), GetCommandLineW(), NULL, NULL, TRUE, + DEBUG_PROCESS | DEBUG_ONLY_THIS_PROCESS, //do not break if application creates its own processes + NULL, NULL, &si, &pi + ); + if(result == false) return false; + + u8 entryData[1024], entryHook[1024] = { + 0x68, 0x00, 0x00, 0x00, 0x00, //push libraryName + 0xb8, 0x00, 0x00, 0x00, 0x00, //mov eax,LoadLibraryW + 0xff, 0xd0, //call eax + 0xcd, 0x03, //int 3 + }; + + entryHook[1] = (u8)((entryPoint + 14) >> 0); + entryHook[2] = (u8)((entryPoint + 14) >> 8); + entryHook[3] = (u8)((entryPoint + 14) >> 16); + entryHook[4] = (u8)((entryPoint + 14) >> 24); + + auto pLoadLibraryW = (u32)GetProcAddress(GetModuleHandleW(L"kernel32"), "LoadLibraryW"); + entryHook[6] = pLoadLibraryW >> 0; + entryHook[7] = pLoadLibraryW >> 8; + entryHook[8] = pLoadLibraryW >> 16; + entryHook[9] = pLoadLibraryW >> 24; + + utf16_t buffer = utf16_t(libraryName); + memcpy(entryHook + 14, buffer, 2 * wcslen(buffer) + 2); + + while(true) { + DEBUG_EVENT event; + WaitForDebugEvent(&event, INFINITE); + + if(event.dwDebugEventCode == EXIT_PROCESS_DEBUG_EVENT) break; + + if(event.dwDebugEventCode == EXCEPTION_DEBUG_EVENT) { + if(event.u.Exception.ExceptionRecord.ExceptionCode == EXCEPTION_BREAKPOINT) { + if(event.u.Exception.ExceptionRecord.ExceptionAddress == (void*)(entryPoint + 14 - 1)) { + HANDLE hProcess = OpenProcess(0, FALSE, event.dwProcessId); + HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, event.dwThreadId); + + CONTEXT context; + context.ContextFlags = CONTEXT_FULL; + GetThreadContext(hThread, &context); + + WriteProcessMemory(pi.hProcess, (void*)entryPoint, (void*)&entryData, sizeof entryData, NULL); + context.Eip = entryPoint; + SetThreadContext(hThread, &context); + + CloseHandle(hThread); + CloseHandle(hProcess); + } + + ContinueDebugEvent(event.dwProcessId, event.dwThreadId, DBG_CONTINUE); + continue; + } + + ContinueDebugEvent(event.dwProcessId, event.dwThreadId, DBG_EXCEPTION_NOT_HANDLED); + continue; + } + + if(event.dwDebugEventCode == CREATE_PROCESS_DEBUG_EVENT) { + ReadProcessMemory(pi.hProcess, (void*)entryPoint, (void*)&entryData, sizeof entryData, NULL); + WriteProcessMemory(pi.hProcess, (void*)entryPoint, (void*)&entryHook, sizeof entryHook, NULL); + + ContinueDebugEvent(event.dwProcessId, event.dwThreadId, DBG_CONTINUE); + continue; + } + + ContinueDebugEvent(event.dwProcessId, event.dwThreadId, DBG_CONTINUE); + } + + return true; +} + +} diff --git a/waterbox/ares64/ares/nall/windows/registry.hpp b/waterbox/ares64/ares/nall/windows/registry.hpp new file mode 100644 index 0000000000..e63e037db4 --- /dev/null +++ b/waterbox/ares64/ares/nall/windows/registry.hpp @@ -0,0 +1,119 @@ +#pragma once + +#include +#include + +#include +#undef interface +#ifndef KEY_WOW64_64KEY + #define KEY_WOW64_64KEY 0x0100 +#endif +#ifndef KEY_WOW64_32KEY + #define KEY_WOW64_32KEY 0x0200 +#endif + +#ifndef NWR_FLAGS + #define NWR_FLAGS KEY_WOW64_64KEY +#endif + +#ifndef NWR_SIZE + #define NWR_SIZE 4096 +#endif + +namespace nall { + +struct registry { + static auto exists(const string& name) -> bool { + auto part = name.split("\\"); + HKEY handle, rootKey = root(part.takeLeft()); + string node = part.takeRight(); + string path = part.merge("\\"); + if(RegOpenKeyExW(rootKey, utf16_t(path), 0, NWR_FLAGS | KEY_READ, &handle) == ERROR_SUCCESS) { + wchar_t data[NWR_SIZE] = L""; + DWORD size = NWR_SIZE * sizeof(wchar_t); + LONG result = RegQueryValueExW(handle, utf16_t(node), nullptr, nullptr, (LPBYTE)&data, (LPDWORD)&size); + RegCloseKey(handle); + if(result == ERROR_SUCCESS) return true; + } + return false; + } + + static auto read(const string& name) -> string { + auto part = name.split("\\"); + HKEY handle, rootKey = root(part.takeLeft()); + string node = part.takeRight(); + string path = part.merge("\\"); + if(RegOpenKeyExW(rootKey, utf16_t(path), 0, NWR_FLAGS | KEY_READ, &handle) == ERROR_SUCCESS) { + wchar_t data[NWR_SIZE] = L""; + DWORD size = NWR_SIZE * sizeof(wchar_t); + LONG result = RegQueryValueExW(handle, utf16_t(node), nullptr, nullptr, (LPBYTE)&data, (LPDWORD)&size); + RegCloseKey(handle); + if(result == ERROR_SUCCESS) return (const char*)utf8_t(data); + } + return ""; + } + + static auto write(const string& name, const string& data = "") -> void { + auto part = name.split("\\"); + HKEY handle, rootKey = root(part.takeLeft()); + string node = part.takeRight(), path; + DWORD disposition; + for(u32 n = 0; n < part.size(); n++) { + path.append(part[n]); + if(RegCreateKeyExW(rootKey, utf16_t(path), 0, nullptr, 0, NWR_FLAGS | KEY_ALL_ACCESS, nullptr, &handle, &disposition) == ERROR_SUCCESS) { + if(n == part.size() - 1) { + RegSetValueExW(handle, utf16_t(node), 0, REG_SZ, (BYTE*)(wchar_t*)utf16_t(data), (data.length() + 1) * sizeof(wchar_t)); + } + RegCloseKey(handle); + } + path.append("\\"); + } + } + + static auto remove(const string& name) -> bool { + auto part = name.split("\\"); + HKEY rootKey = root(part.takeLeft()); + string node = part.takeRight(); + string path = part.merge("\\"); + if(!node) return SHDeleteKeyW(rootKey, utf16_t(path)) == ERROR_SUCCESS; + return SHDeleteValueW(rootKey, utf16_t(path), utf16_t(node)) == ERROR_SUCCESS; + } + + static auto contents(const string& name) -> vector { + vector result; + auto part = name.split("\\"); + HKEY handle, rootKey = root(part.takeLeft()); + part.removeRight(); + string path = part.merge("\\"); + if(RegOpenKeyExW(rootKey, utf16_t(path), 0, NWR_FLAGS | KEY_READ, &handle) == ERROR_SUCCESS) { + DWORD folders, nodes; + RegQueryInfoKey(handle, nullptr, nullptr, nullptr, &folders, nullptr, nullptr, &nodes, nullptr, nullptr, nullptr, nullptr); + for(u32 n = 0; n < folders; n++) { + wchar_t name[NWR_SIZE] = L""; + DWORD size = NWR_SIZE * sizeof(wchar_t); + RegEnumKeyEx(handle, n, (wchar_t*)&name, &size, nullptr, nullptr, nullptr, nullptr); + result.append(string{(const char*)utf8_t(name), "\\"}); + } + for(u32 n = 0; n < nodes; n++) { + wchar_t name[NWR_SIZE] = L""; + DWORD size = NWR_SIZE * sizeof(wchar_t); + RegEnumValueW(handle, n, (wchar_t*)&name, &size, nullptr, nullptr, nullptr, nullptr); + result.append((const char*)utf8_t(name)); + } + RegCloseKey(handle); + } + return result; + } + +private: + static auto root(const string& name) -> HKEY { + if(name == "HKCR") return HKEY_CLASSES_ROOT; + if(name == "HKCC") return HKEY_CURRENT_CONFIG; + if(name == "HKCU") return HKEY_CURRENT_USER; + if(name == "HKLM") return HKEY_LOCAL_MACHINE; + if(name == "HKU" ) return HKEY_USERS; + return nullptr; + } +}; + +} diff --git a/waterbox/ares64/ares/nall/windows/service.hpp b/waterbox/ares64/ares/nall/windows/service.hpp new file mode 100644 index 0000000000..fa5d87f9f5 --- /dev/null +++ b/waterbox/ares64/ares/nall/windows/service.hpp @@ -0,0 +1,13 @@ +#pragma once + +namespace nall { + +struct service { + explicit operator bool() const { return false; } + auto command(const string& name, const string& command) -> bool { return false; } + auto receive() -> string { return ""; } + auto name() const -> string { return ""; } + auto stop() const -> bool { return false; } +}; + +} diff --git a/waterbox/ares64/ares/nall/windows/shared-memory.hpp b/waterbox/ares64/ares/nall/windows/shared-memory.hpp new file mode 100644 index 0000000000..f0b162d4e2 --- /dev/null +++ b/waterbox/ares64/ares/nall/windows/shared-memory.hpp @@ -0,0 +1,27 @@ +#pragma once + +namespace nall { + +struct shared_memory { + shared_memory() = default; + shared_memory(const shared_memory&) = delete; + auto operator=(const shared_memory&) -> shared_memory& = delete; + + ~shared_memory() { + reset(); + } + + explicit operator bool() const { return false; } + auto empty() const -> bool { return true; } + auto size() const -> u32 { return 0; } + auto acquired() const -> bool { return false; } + auto acquire() -> u8* { return nullptr; } + auto release() -> void {} + auto reset() -> void {} + auto create(const string& name, u32 size) -> bool { return false; } + auto remove() -> void {} + auto open(const string& name, u32 size) -> bool { return false; } + auto close() -> void {} +}; + +} diff --git a/waterbox/ares64/ares/nall/windows/utf8.hpp b/waterbox/ares64/ares/nall/windows/utf8.hpp new file mode 100644 index 0000000000..b13d260adb --- /dev/null +++ b/waterbox/ares64/ares/nall/windows/utf8.hpp @@ -0,0 +1,84 @@ +#pragma once + +namespace nall { + //UTF-8 to UTF-16 + struct utf16_t { + utf16_t(const char* s = "") { operator=(s); } + ~utf16_t() { reset(); } + + utf16_t(const utf16_t&) = delete; + auto operator=(const utf16_t&) -> utf16_t& = delete; + + auto operator=(const char* s) -> utf16_t& { + reset(); + if(!s) s = ""; + length = MultiByteToWideChar(CP_UTF8, 0, s, -1, nullptr, 0); + buffer = new wchar_t[length + 1]; + MultiByteToWideChar(CP_UTF8, 0, s, -1, buffer, length); + buffer[length] = 0; + return *this; + } + + operator wchar_t*() { return buffer; } + operator const wchar_t*() const { return buffer; } + + auto reset() -> void { + delete[] buffer; + length = 0; + } + + auto data() -> wchar_t* { return buffer; } + auto data() const -> const wchar_t* { return buffer; } + + auto size() const -> u32 { return length; } + + private: + wchar_t* buffer = nullptr; + u32 length = 0; + }; + + //UTF-16 to UTF-8 + struct utf8_t { + utf8_t(const wchar_t* s = L"") { operator=(s); } + ~utf8_t() { reset(); } + + utf8_t(const utf8_t&) = delete; + auto operator=(const utf8_t&) -> utf8_t& = delete; + + auto operator=(const wchar_t* s) -> utf8_t& { + reset(); + if(!s) s = L""; + length = WideCharToMultiByte(CP_UTF8, 0, s, -1, nullptr, 0, nullptr, nullptr); + buffer = new char[length + 1]; + WideCharToMultiByte(CP_UTF8, 0, s, -1, buffer, length, nullptr, nullptr); + buffer[length] = 0; + return *this; + } + + auto reset() -> void { + delete[] buffer; + length = 0; + } + + operator char*() { return buffer; } + operator const char*() const { return buffer; } + + auto data() -> char* { return buffer; } + auto data() const -> const char* { return buffer; } + + auto size() const -> u32 { return length; } + + private: + char* buffer = nullptr; + u32 length = 0; + }; + + inline auto utf8_arguments(int& argc, char**& argv) -> void { + wchar_t** wargv = CommandLineToArgvW(GetCommandLineW(), &argc); + argv = new char*[argc + 1](); + for(u32 i = 0; i < argc; i++) { + argv[i] = new char[PATH_MAX]; + strcpy(argv[i], nall::utf8_t(wargv[i])); + } + } +} diff --git a/waterbox/ares64/ares/nall/xorg/clipboard.hpp b/waterbox/ares64/ares/nall/xorg/clipboard.hpp new file mode 100644 index 0000000000..03dc5af025 --- /dev/null +++ b/waterbox/ares64/ares/nall/xorg/clipboard.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace nall::Clipboard { + +inline auto clear() -> void { + XDisplay display; + if(auto atom = XInternAtom(display, "CLIPBOARD", XlibTrue)) { + XSetSelectionOwner(display, atom, XlibNone, XlibCurrentTime); + } +} + +} diff --git a/waterbox/ares64/ares/nall/xorg/guard.hpp b/waterbox/ares64/ares/nall/xorg/guard.hpp new file mode 100644 index 0000000000..2ff49b7501 --- /dev/null +++ b/waterbox/ares64/ares/nall/xorg/guard.hpp @@ -0,0 +1,51 @@ +#ifndef NALL_XORG_GUARD_HPP +#define NALL_XORG_GUARD_HPP + +#define Atom XlibAtom +#define Display XlibDisplay +#define Font XlibFont +#define Screen XlibScreen +#define Window XlibWindow + +#else +#undef NALL_XORG_GUARD_HPP + +#undef Atom +#undef Display +#undef Font +#undef Screen +#undef Window + +#undef Above +#undef Below +#undef Bool + +#ifndef NALL_XORG_GUARD_CONSTANTS +#define NALL_XORG_GUARD_CONSTANTS +enum XlibConstants : int { + XlibButton1 = Button1, + XlibButton2 = Button2, + XlibButton3 = Button3, + XlibButton4 = Button4, + XlibButton5 = Button5, + XlibCurrentTime = CurrentTime, + XlibFalse = False, + XlibNone = None, + XlibTrue = True, +}; +#endif + +#undef Button1 +#undef Button2 +#undef Button3 +#undef Button4 +#undef Button5 +#undef CurrentTime +#undef False +#undef None +#undef True + +#undef MAX +#undef MIN + +#endif diff --git a/waterbox/ares64/ares/nall/xorg/xorg.hpp b/waterbox/ares64/ares/nall/xorg/xorg.hpp new file mode 100644 index 0000000000..b93eef0561 --- /dev/null +++ b/waterbox/ares64/ares/nall/xorg/xorg.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +struct XDisplay { + XDisplay() { _display = XOpenDisplay(nullptr); } + ~XDisplay() { XCloseDisplay(_display); } + operator XlibDisplay*() const { return _display; } + +private: + XlibDisplay* _display; +}; diff --git a/waterbox/ares64/ares/thirdparty/mame/devices/video/poly.h b/waterbox/ares64/ares/thirdparty/mame/devices/video/poly.h new file mode 100644 index 0000000000..92cdefee2f --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/devices/video/poly.h @@ -0,0 +1,1472 @@ +// license:BSD-3-Clause +// copyright-holders:Ville Linde, Aaron Giles +/*************************************************************************** + + poly.h + + Polygon helper routines. + +**************************************************************************** + + Pixel model: + + (0.0,0.0) (1.0,0.0) (2.0,0.0) (3.0,0.0) + +---------------+---------------+---------------+ + | | | | + | | | | + | (0.5,0.5) | (1.5,0.5) | (2.5,0.5) | + | * | * | * | + | | | | + | | | | + (0.0,1.0) (1.0,1.0) (2.0,1.0) (3.0,1.0) + +---------------+---------------+---------------+ + | | | | + | | | | + | (0.5,1.5) | (1.5,1.5) | (2.5,1.5) | + | * | * | * | + | | | | + | | | | + | | | | + +---------------+---------------+---------------+ + (0.0,2.0) (1.0,2.0) (2.0,2.0) (3.0,2.0) + +***************************************************************************/ + +#ifndef MAME_VIDEO_POLY_H +#define MAME_VIDEO_POLY_H + +#pragma once + +#include +#include + + +#define KEEP_POLY_STATISTICS 0 +#define TRACK_POLY_WAITS 0 + + + +//************************************************************************** +// CONSTANTS +//************************************************************************** + +static constexpr u8 POLY_FLAG_NO_WORK_QUEUE = 0x01; +static constexpr u8 POLY_FLAG_NO_CLIPPING = 0x02; + + +//************************************************************************** +// TYPE DEFINITIONS +//************************************************************************** + +// base class for poly_array +class poly_array_base +{ +public: + // construction + poly_array_base() { } + + // destruction + virtual ~poly_array_base() { } + + // reset + virtual void reset() = 0; +}; + + +// class for managing an array of items +template +class poly_array : public poly_array_base +{ +public: + // this is really architecture-specific, but 64 is a reasonable + // value for most modern x64/ARM architectures + static constexpr size_t CACHE_LINE_SHIFT = 6; + static constexpr size_t CACHE_LINE_SIZE = 1 << CACHE_LINE_SHIFT; + static constexpr uintptr_t CACHE_LINE_MASK = ~uintptr_t(0) << CACHE_LINE_SHIFT; + + // size of an item, rounded up to the cache line size + static constexpr size_t ITEM_SIZE = ((sizeof(ArrayType) + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE) * CACHE_LINE_SIZE; + + // items are allocated in a 64k chunks + static constexpr size_t CHUNK_GRANULARITY = 65536; + + // number of items in a chunk + static constexpr u32 ITEMS_PER_CHUNK = CHUNK_GRANULARITY / ITEM_SIZE; + + // construction + poly_array() : + m_base(nullptr), + m_next(0), + m_max(0), + m_allocated(0) + { + for (int index = 0; index < TrackingCount; index++) + m_last[index] = nullptr; + + // allocate one chunk to start with + realloc(ITEMS_PER_CHUNK); + } + + // destruction + virtual ~poly_array() { m_base = nullptr; } + + // getters + u32 count() const { return m_next; } + u32 max() const { return m_max; } + size_t itemsize() const { return ITEM_SIZE; } + u32 allocated() const { return m_allocated; } + + // return an item by index + ArrayType &byindex(u32 index) + { + assert(index < m_next); + if (index < m_allocated) + return *item_ptr(index); + assert(m_chain); + return m_chain->byindex(index - m_allocated); + } + + // return a contiguous chunk of items + ArrayType *contiguous(u32 index, u32 count, u32 &chunk) + { + assert(index < m_next); + assert(index + count <= m_next); + if (index < m_allocated) + { + chunk = std::min(count, m_allocated - index); + return item_ptr(index); + } + assert(m_chain); + return m_chain->contiguous(index - m_allocated, count, chunk); + } + + // compute the index + int indexof(ArrayType &item) const + { + u32 result = (reinterpret_cast(&item) - m_base) / ITEM_SIZE; + if (result < m_allocated) + return result; + assert(m_chain); + return m_allocated + m_chain->indexof(item); + } + + // operations + virtual void reset() override + { + m_next = 0; + + // if we didn't have a chain, just repopulate + if (!m_chain) + repopulate(); + else + { + // otherwise, reallocate and get rid of the chain + realloc(m_max); + m_chain.reset(); + } + } + + // allocate a return a new item + ArrayType &next(int tracking_index = 0) + { + // track the maximum + if (m_next > m_max) + m_max = m_next; + + // fast case: fits within our array + ArrayType *item; + if (m_next < m_allocated) + item = new(item_ptr(m_next)) ArrayType; + + // otherwise, allocate from the chain + else + { + if (!m_chain) + m_chain = std::make_unique>(); + item = &m_chain->next(); + } + + // set the last item + m_next++; + if (TrackingCount > 0) + { + assert(tracking_index < TrackingCount); + m_last[tracking_index] = item; + } + return *item; + } + + // return the last + ArrayType &last(int tracking_index = 0) const + { + assert(tracking_index < TrackingCount); + assert(m_last[tracking_index] != nullptr); + return *m_last[tracking_index]; + } + +private: + // internal helper to make size pointers + ArrayType *item_ptr(u32 index) + { + assert(index < m_allocated); + return reinterpret_cast(m_base + index * ITEM_SIZE); + } + + // reallocate to the given size + void realloc(u32 count) + { + // round the count up to a chunk size + count = ((count + ITEMS_PER_CHUNK - 1) / ITEMS_PER_CHUNK) * ITEMS_PER_CHUNK; + + // allocate a fresh new array + std::unique_ptr new_alloc = std::make_unique(ITEM_SIZE * count + CACHE_LINE_SIZE); + std::fill_n(&new_alloc[0], ITEM_SIZE * count + CACHE_LINE_SIZE, 0); + + // align the base to a cache line + m_base = reinterpret_cast((uintptr_t(new_alloc.get()) + CACHE_LINE_SIZE - 1) & CACHE_LINE_MASK); + + // repopulate last items into the base of the new array + repopulate(); + + // replace the old allocation with the new one + m_alloc = std::move(new_alloc); + m_allocated = count; + } + + // repopulate items + void repopulate() + { + for (int tracking_index = 0; tracking_index < TrackingCount; tracking_index++) + if (m_last[tracking_index] != nullptr) + { + if (m_last[tracking_index] == item_ptr(m_next)) + m_next++; + else + next(tracking_index) = *m_last[tracking_index]; + } + } + + // internal state + u8 *m_base; + u32 m_next; + u32 m_max; + u32 m_allocated; + std::unique_ptr m_alloc; + std::unique_ptr> m_chain; + std::array m_last; +}; + + +// poly_manager is a template class +template +class poly_manager +{ +public: + // each vertex has an X/Y coordinate and a set of parameters + struct vertex_t + { + vertex_t() { } + vertex_t(BaseType _x, BaseType _y) { x = _x; y = _y; } + + BaseType x, y; // X, Y coordinates + std::array p; // iterated parameters + }; + + // a single extent describes a span and a list of parameter extents + struct extent_t + { + struct param_t + { + BaseType start; // parameter value at start + BaseType dpdx; // dp/dx relative to start + }; + int16_t startx, stopx; // starting (inclusive)/ending (exclusive) endpoints + std::array param; // array of parameter start/delays + void *userdata; // custom per-span data + }; + + // delegate type for scanline callbacks + using render_delegate = delegate; + + // poly_array of object data + using objectdata_array = poly_array; + + // construction/destruction + poly_manager(running_machine &machine); + virtual ~poly_manager(); + + // synchronization + void wait(char const *debug_reason = "general"); + + // return a reference to our ObjectType poly_array + objectdata_array &object_data() { return m_object; } + + // register a poly_array to be reset after a wait + void register_poly_array(poly_array_base &array) { m_arrays.push_back(&array); } + + // tiles + template + uint32_t render_tile(rectangle const &cliprect, render_delegate callback, vertex_t const &v1, vertex_t const &v2); + + // triangles + template + uint32_t render_triangle(rectangle const &cliprect, render_delegate callback, vertex_t const &v1, vertex_t const &v2, vertex_t const &v3); + template + uint32_t render_triangle_fan(rectangle const &cliprect, render_delegate callback, int numverts, vertex_t const *v); + template + uint32_t render_triangle_strip(rectangle const &cliprect, render_delegate callback, int numverts, vertex_t const *v); + + // polygons + template + uint32_t render_polygon(rectangle const &cliprect, render_delegate callback, vertex_t const *v); + + // direct custom extents + template + uint32_t render_extents(rectangle const &cliprect, render_delegate callback, int startscanline, int numscanlines, extent_t const *extents); + + // public helpers + template + int zclip_if_less(int numverts, vertex_t const *v, vertex_t *outv, BaseType clipval); + +private: + // number of profiling ticks before we consider a wait "long" + static constexpr osd_ticks_t POLY_LOG_WAIT_THRESHOLD = 1000; + + static constexpr int SCANLINES_PER_BUCKET = 32; + static constexpr int TOTAL_BUCKETS = (512 / SCANLINES_PER_BUCKET); + + // primitive_info describes a single primitive + struct primitive_info + { + poly_manager * m_owner; // pointer back to the poly manager + ObjectType * m_object; // object data pointer + render_delegate m_callback; // callback to handle a scanline's worth of work + }; + + // internal unit of work + struct work_unit + { + work_unit &operator=(work_unit const &rhs) + { + // this is just to satisfy the compiler; we don't actually copy + fatalerror("Attempt to copy work_unit"); + } + + std::atomic count_next; // number of scanlines and index of next item to process + primitive_info * primitive; // pointer to primitive + int32_t scanline; // starting scanline + uint32_t previtem; // index of previous item in the same bucket + extent_t extent[SCANLINES_PER_BUCKET]; // array of scanline extents + }; + + // internal array types + using primitive_array = poly_array; + using unit_array = poly_array; + + // round in a cross-platform consistent manner + inline int32_t round_coordinate(BaseType value) + { + int32_t result = int32_t(std::floor(value)); + if (value > 0 && result < 0) + return INT_MAX - 1; + return result + (value - BaseType(result) > BaseType(0.5)); + } + + // internal helpers + primitive_info &primitive_alloc(int minx, int maxx, int miny, int maxy, render_delegate callback) + { + // return and initialize the next one + primitive_info &primitive = m_primitive.next(); + primitive.m_owner = this; + primitive.m_object = &m_object.last(); + primitive.m_callback = callback; + return primitive; + } + + // enqueue work items in contiguous chunks + void queue_items(u32 start) + { + // do nothing if no queue; items will be processed on the next wait + if (m_queue == nullptr) + return; + + // enqueue the items in contiguous chunks + while (start < m_unit.count()) + { + u32 chunk; + work_unit *base = m_unit.contiguous(start, m_unit.count() - start, chunk); + osd_work_item_queue_multiple(m_queue, work_item_callback, chunk, base, m_unit.itemsize(), WORK_ITEM_FLAG_AUTO_RELEASE); + start += chunk; + } + } + + static void *work_item_callback(void *param, int threadid); + void presave() { wait("pre-save"); } + + // queue management + osd_work_queue *m_queue; // work queue + + // arrays + primitive_array m_primitive; // array of primitives + objectdata_array m_object; // array of object data + unit_array m_unit; // array of work units + std::vector m_arrays; // list of arrays we are managing + + // buckets + uint32_t m_unit_bucket[TOTAL_BUCKETS]; // buckets for tracking unit usage + + // statistics + uint32_t m_tiles; // number of tiles queued + uint32_t m_triangles; // number of triangles queued + uint32_t m_polygons; // number of polygons queued + uint64_t m_pixels; // number of pixels rendered +#if KEEP_POLY_STATISTICS + uint32_t m_conflicts[WORK_MAX_THREADS] = { 0 }; // number of conflicts found, per thread + uint32_t m_resolved[WORK_MAX_THREADS] = { 0 }; // number of conflicts resolved, per thread +#endif +#if TRACK_POLY_WAITS + static std::string friendly_number(u64 number); + struct wait_tracker + { + void update(int items, osd_ticks_t time) + { + total_waits++; + if (items > 0) + { + total_actual_waits++; + total_cycles += time; + if (time < 100) + bucket_waits[0]++; + else if (time < 1000) + bucket_waits[1]++; + else if (time < 10000) + bucket_waits[2]++; + else + bucket_waits[3]++; + } + } + + u32 total_waits = 0; + u32 total_actual_waits = 0; + u32 bucket_waits[4] = { 0 }; + u64 total_cycles = 0; + }; + using waitmap_t = std::unordered_map; + waitmap_t m_waitmap; +#endif +}; + + +//------------------------------------------------- +// poly_manager - constructor +//------------------------------------------------- + +template +poly_manager::poly_manager(running_machine &machine) : + m_queue(nullptr), + m_tiles(0), + m_triangles(0), + m_polygons(0), + m_pixels(0) +{ + // create the work queue + if (!(Flags & POLY_FLAG_NO_WORK_QUEUE)) + m_queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_MULTI | WORK_QUEUE_FLAG_HIGH_FREQ); + + // initialize the buckets to empty + std::fill_n(&m_unit_bucket[0], std::size(m_unit_bucket), 0xffffffff); + + // register our arrays for reset + register_poly_array(m_primitive); + register_poly_array(m_object); + register_poly_array(m_unit); + +#if !defined(MAME_RDP) + // request a pre-save callback for synchronization + machine.save().register_presave(save_prepost_delegate(FUNC(poly_manager::presave), this)); +#endif +} + + +//------------------------------------------------- +// ~poly_manager - destructor +//------------------------------------------------- + +#if TRACK_POLY_WAITS +template +inline std::string poly_manager::friendly_number(u64 number) +{ + static char const s_suffixes[] = " kmbtqisp"; + double value = double(number); + int suffixnum = 0; + + if (number < 1000000) + return string_format("%6d ", int(number)); + while (value >= 1000) + { + value /= 1000.0; + suffixnum++; + } + if (value >= 100) + return string_format("%6.1f%c", value, s_suffixes[suffixnum]); + if (value >= 10) + return string_format("%6.2f%c", value, s_suffixes[suffixnum]); + return string_format("%6.3f%c", value, s_suffixes[suffixnum]); +} +#endif + +template +poly_manager::~poly_manager() +{ +#if KEEP_POLY_STATISTICS +{ + // accumulate stats over the entire collection + int conflicts = 0, resolved = 0; + for (int i = 0; i < std::size(m_conflicts); i++) + { + conflicts += m_conflicts[i]; + resolved += m_resolved[i]; + } + + // output global stats + osd_printf_info("Total triangles = %d\n", m_triangles); + osd_printf_info("Total polygons = %d\n", m_polygons); + if (m_pixels > 1000000000) + osd_printf_info("Total pixels = %d%09d\n", uint32_t(m_pixels / 1000000000), uint32_t(m_pixels % 1000000000)); + else + osd_printf_info("Total pixels = %d\n", uint32_t(m_pixels)); + + osd_printf_info("Conflicts: %d resolved, %d total\n", resolved, conflicts); + osd_printf_info("Units: %5d used, %5d allocated, %4d bytes each, %7d total\n", m_unit.max(), m_unit.allocated(), int(m_unit.itemsize()), int(m_unit.allocated() * m_unit.itemsize())); + osd_printf_info("Primitives: %5d used, %5d allocated, %4d bytes each, %7d total\n", m_primitive.max(), m_primitive.allocated(), int(m_primitive.itemsize()), int(m_primitive.allocated() * m_primitive.itemsize())); + osd_printf_info("Object data: %5d used, %5d allocated, %4d bytes each, %7d total\n", m_object.max(), m_object.allocated(), int(m_object.itemsize()), int(m_object.allocated() * m_object.itemsize())); +} +#endif +#if TRACK_POLY_WAITS +{ + osd_printf_info("Wait summary:\n"); + osd_printf_info("Cause Cycles Waits Actuals Average <100 100-1k 1k-10k 10k+\n"); + osd_printf_info("-------------------------- ------- ------- ------- ------- ------- ------- ------- -------\n"); + while (1) + { + typename waitmap_t::value_type *biggest = nullptr; + for (auto &item : m_waitmap) + if (item.second.total_cycles > 0) + if (biggest == nullptr || item.second.total_cycles > biggest->second.total_cycles) + biggest = &item; + + if (biggest == nullptr) + break; + + osd_printf_info("%-28s%-7s %-7s %-7s %-7s %-7s %-7s %-7s %-7s\n", + biggest->first.c_str(), + friendly_number(biggest->second.total_cycles).c_str(), + friendly_number(biggest->second.total_waits).c_str(), + friendly_number(biggest->second.total_actual_waits).c_str(), + (biggest->second.total_actual_waits == 0) ? "n/a" : friendly_number(biggest->second.total_cycles / biggest->second.total_actual_waits).c_str(), + friendly_number(biggest->second.bucket_waits[0]).c_str(), + friendly_number(biggest->second.bucket_waits[1]).c_str(), + friendly_number(biggest->second.bucket_waits[2]).c_str(), + friendly_number(biggest->second.bucket_waits[3]).c_str()); + + biggest->second.total_cycles = 0; + } +} +#endif + + // free the work queue + if (m_queue != nullptr) + osd_work_queue_free(m_queue); +} + + +//------------------------------------------------- +// work_item_callback - process a work item +//------------------------------------------------- + +template +void *poly_manager::work_item_callback(void *param, int threadid) +{ + while (1) + { + work_unit &unit = *(work_unit *)param; + primitive_info &primitive = *unit.primitive; + int count = unit.count_next & 0xff; + uint32_t orig_count_next; + + // if our previous item isn't done yet, enqueue this item to the end and proceed + if (unit.previtem != 0xffffffff) + { + work_unit &prevunit = primitive.m_owner->m_unit.byindex(unit.previtem); + if (prevunit.count_next != 0) + { + uint32_t unitnum = primitive.m_owner->m_unit.indexof(unit); + uint32_t new_count_next; + + // attempt to atomically swap in this new value + do + { + orig_count_next = prevunit.count_next; + new_count_next = orig_count_next | (unitnum << 8); + } while (!prevunit.count_next.compare_exchange_weak(orig_count_next, new_count_next, std::memory_order_release, std::memory_order_relaxed)); + +#if KEEP_POLY_STATISTICS + // track resolved conflicts + primitive.m_owner->m_conflicts[threadid]++; + if (orig_count_next != 0) + primitive.m_owner->m_resolved[threadid]++; +#endif + // if we succeeded, skip out early so we can do other work + if (orig_count_next != 0) + break; + } + } + + // iterate over extents + for (int curscan = 0; curscan < count; curscan++) + primitive.m_callback(unit.scanline + curscan, unit.extent[curscan], *primitive.m_object, threadid); + + // set our count to 0 and re-fetch the original count value + do + { + orig_count_next = unit.count_next; + } while (!unit.count_next.compare_exchange_weak(orig_count_next, 0, std::memory_order_release, std::memory_order_relaxed)); + + // if we have no more work to do, do nothing + orig_count_next >>= 8; + if (orig_count_next == 0) + break; + param = &primitive.m_owner->m_unit.byindex(orig_count_next); + } + return nullptr; +} + + +//------------------------------------------------- +// wait - stall until all work is complete +//------------------------------------------------- + +template +void poly_manager::wait(char const *debug_reason) +{ + // early out if no units outstanding + if (m_unit.count() == 0) + return; + +#if TRACK_POLY_WAITS + int items = osd_work_queue_items(m_queue); + osd_ticks_t time = get_profile_ticks(); +#endif + + // wait for all pending work items to complete + if (m_queue != nullptr) + osd_work_queue_wait(m_queue, osd_ticks_per_second() * 100); + + // if we don't have a queue, just run the whole list now + else + for (int unitnum = 0; unitnum < m_unit.count(); unitnum++) + work_item_callback(&m_unit.byindex(unitnum), 0); + +#if TRACK_POLY_WAITS + m_waitmap[debug_reason].update(items, get_profile_ticks() - time); +#endif + + // clear the buckets + std::fill_n(&m_unit_bucket[0], std::size(m_unit_bucket), 0xffffffff); + + // reset all the poly arrays + for (auto array : m_arrays) + array->reset(); +} + + +//------------------------------------------------- +// render_tile - render a tile +//------------------------------------------------- + +template +template +uint32_t poly_manager::render_tile(rectangle const &cliprect, render_delegate callback, vertex_t const &_v1, vertex_t const &_v2) +{ + vertex_t const *v1 = &_v1; + vertex_t const *v2 = &_v2; + + // first sort by Y + if (v2->y < v1->y) + std::swap(v1, v2); + + // compute some integral X/Y vertex values + int32_t v1y = round_coordinate(v1->y); + int32_t v2y = round_coordinate(v2->y); + + // clip coordinates + int32_t v1yclip = v1y; + int32_t v2yclip = v2y; + if (!(Flags & POLY_FLAG_NO_CLIPPING)) + { + v1yclip = std::max(v1yclip, cliprect.top()); + v2yclip = std::min(v2yclip, cliprect.bottom() + 1); + if (v2yclip - v1yclip <= 0) + return 0; + } + + // determine total X extents + BaseType minx = v1->x; + BaseType maxx = v2->x; + if (minx > maxx) + return 0; + + // allocate and populate a new primitive + primitive_info &primitive = primitive_alloc(round_coordinate(minx), round_coordinate(maxx), v1yclip, v2yclip, callback); + + // compute parameter deltas + std::array param_dpdx; + std::array param_dpdy; + if (ParamCount > 0) + { + BaseType oox = BaseType(1.0) / (v2->x - v1->x); + BaseType ooy = BaseType(1.0) / (v2->y - v1->y); + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + { + param_dpdx[paramnum] = oox * (v2->p[paramnum] - v1->p[paramnum]); + param_dpdy[paramnum] = ooy * (v2->p[paramnum] - v1->p[paramnum]); + } + } + + // clamp to full pixels + int32_t istartx = round_coordinate(v1->x); + int32_t istopx = round_coordinate(v2->x); + + // force start < stop + if (istartx > istopx) + std::swap(istartx, istopx); + + // apply left/right clipping + if (!(Flags & POLY_FLAG_NO_CLIPPING)) + { + istartx = std::max(istartx, cliprect.left()); + istopx = std::min(istopx, cliprect.right() + 1); + if (istartx >= istopx) + return 0; + } + + // compute the X extents for each scanline + int32_t pixels = 0; + uint32_t startunit = m_unit.count(); + int32_t scaninc = 1; + for (int32_t curscan = v1yclip; curscan < v2yclip; curscan += scaninc) + { + uint32_t bucketnum = (uint32_t(curscan) / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS; + uint32_t unit_index = m_unit.count(); + work_unit &unit = m_unit.next(); + + // determine how much to advance to hit the next bucket + scaninc = SCANLINES_PER_BUCKET - uint32_t(curscan) % SCANLINES_PER_BUCKET; + + // fill in the work unit basics + unit.primitive = &primitive; + unit.count_next = std::min(v2yclip - curscan, scaninc); + unit.scanline = curscan; + unit.previtem = m_unit_bucket[bucketnum]; + m_unit_bucket[bucketnum] = unit_index; + + // iterate over extents + for (int extnum = 0; extnum < unit.count_next; extnum++) + { + // set the extent and update the total pixel count + extent_t &extent = unit.extent[extnum]; + extent.startx = istartx; + extent.stopx = istopx; + pixels += istopx - istartx; + + // fill in the parameters for the extent + if (ParamCount > 0) + { + BaseType fullstartx = BaseType(istartx) + BaseType(0.5); + BaseType fully = BaseType(curscan + extnum) + BaseType(0.5); + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + { + extent.param[paramnum].start = v1->p[paramnum] + fullstartx * param_dpdx[paramnum] + fully * param_dpdy[paramnum]; + extent.param[paramnum].dpdx = param_dpdx[paramnum]; + } + } + } + } + + // enqueue the work items + queue_items(startunit); + + // return the total number of pixels in the triangle + m_tiles++; + m_pixels += pixels; + return pixels; +} + + +//------------------------------------------------- +// render_triangle - render a single triangle +// given 3 vertexes +//------------------------------------------------- + +template +template +uint32_t poly_manager::render_triangle(const rectangle &cliprect, render_delegate callback, const vertex_t &_v1, const vertex_t &_v2, const vertex_t &_v3) +{ + vertex_t const *v1 = &_v1; + vertex_t const *v2 = &_v2; + vertex_t const *v3 = &_v3; + + // first sort by Y + if (v2->y < v1->y) + std::swap(v1, v2); + if (v3->y < v2->y) + { + std::swap(v2, v3); + if (v2->y < v1->y) + std::swap(v1, v2); + } + + // compute some integral X/Y vertex values + int32_t v1y = round_coordinate(v1->y); + int32_t v3y = round_coordinate(v3->y); + + // clip coordinates + int32_t v1yclip = v1y; + int32_t v3yclip = v3y; + if (!(Flags & POLY_FLAG_NO_CLIPPING)) + { + v1yclip = std::max(v1yclip, cliprect.top()); + v3yclip = std::min(v3yclip, cliprect.bottom() + 1); + if (v3yclip - v1yclip <= 0) + return 0; + } + + // determine total X extents + BaseType minx = std::min(std::min(v1->x, v2->x), v3->x); + BaseType maxx = std::max(std::max(v1->x, v2->x), v3->x); + + // allocate and populate a new primitive + primitive_info &primitive = primitive_alloc(round_coordinate(minx), round_coordinate(maxx), v1yclip, v3yclip, callback); + + // compute the slopes for each portion of the triangle + BaseType dxdy_v1v2 = (v2->y == v1->y) ? BaseType(0.0) : (v2->x - v1->x) / (v2->y - v1->y); + BaseType dxdy_v1v3 = (v3->y == v1->y) ? BaseType(0.0) : (v3->x - v1->x) / (v3->y - v1->y); + BaseType dxdy_v2v3 = (v3->y == v2->y) ? BaseType(0.0) : (v3->x - v2->x) / (v3->y - v2->y); + + // compute parameter starting points and deltas + std::array param_start; + std::array param_dpdx; + std::array param_dpdy; + if (ParamCount > 0) + { + BaseType a00 = v2->y - v3->y; + BaseType a01 = v3->x - v2->x; + BaseType a02 = v2->x*v3->y - v3->x*v2->y; + BaseType a10 = v3->y - v1->y; + BaseType a11 = v1->x - v3->x; + BaseType a12 = v3->x*v1->y - v1->x*v3->y; + BaseType a20 = v1->y - v2->y; + BaseType a21 = v2->x - v1->x; + BaseType a22 = v1->x*v2->y - v2->x*v1->y; + BaseType det = a02 + a12 + a22; + + if (std::abs(det) < BaseType(0.00001)) + { + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + { + param_dpdx[paramnum] = BaseType(0.0); + param_dpdy[paramnum] = BaseType(0.0); + param_start[paramnum] = v1->p[paramnum]; + } + } + else + { + BaseType idet = BaseType(1.0) / det; + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + { + param_dpdx[paramnum] = idet * (v1->p[paramnum]*a00 + v2->p[paramnum]*a10 + v3->p[paramnum]*a20); + param_dpdy[paramnum] = idet * (v1->p[paramnum]*a01 + v2->p[paramnum]*a11 + v3->p[paramnum]*a21); + param_start[paramnum] = idet * (v1->p[paramnum]*a02 + v2->p[paramnum]*a12 + v3->p[paramnum]*a22); + } + } + } + + // compute the X extents for each scanline + int32_t pixels = 0; + uint32_t startunit = m_unit.count(); + int32_t scaninc = 1; + for (int32_t curscan = v1yclip; curscan < v3yclip; curscan += scaninc) + { + uint32_t bucketnum = (uint32_t(curscan) / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS; + uint32_t unit_index = m_unit.count(); + work_unit &unit = m_unit.next(); + + // determine how much to advance to hit the next bucket + scaninc = SCANLINES_PER_BUCKET - uint32_t(curscan) % SCANLINES_PER_BUCKET; + + // fill in the work unit basics + unit.primitive = &primitive; + unit.count_next = std::min(v3yclip - curscan, scaninc); + unit.scanline = curscan; + unit.previtem = m_unit_bucket[bucketnum]; + m_unit_bucket[bucketnum] = unit_index; + + // iterate over extents + for (int extnum = 0; extnum < unit.count_next; extnum++) + { + // compute the ending X based on which part of the triangle we're in + BaseType fully = BaseType(curscan + extnum) + BaseType(0.5); + BaseType startx = v1->x + (fully - v1->y) * dxdy_v1v3; + BaseType stopx; + if (fully < v2->y) + stopx = v1->x + (fully - v1->y) * dxdy_v1v2; + else + stopx = v2->x + (fully - v2->y) * dxdy_v2v3; + + // clamp to full pixels + int32_t istartx = round_coordinate(startx); + int32_t istopx = round_coordinate(stopx); + + // force start < stop + if (istartx > istopx) + std::swap(istartx, istopx); + + // apply left/right clipping + if (!(Flags & POLY_FLAG_NO_CLIPPING)) + { + istartx = std::max(istartx, cliprect.left()); + istopx = std::min(istopx, cliprect.right() + 1); + } + + // set the extent and update the total pixel count + if (istartx >= istopx) + istartx = istopx = 0; + extent_t &extent = unit.extent[extnum]; + extent.startx = istartx; + extent.stopx = istopx; + pixels += istopx - istartx; + + // fill in the parameters for the extent + BaseType fullstartx = BaseType(istartx) + BaseType(0.5); + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + { + extent.param[paramnum].start = param_start[paramnum] + fullstartx * param_dpdx[paramnum] + fully * param_dpdy[paramnum]; + extent.param[paramnum].dpdx = param_dpdx[paramnum]; + } + } + } + + // enqueue the work items + queue_items(startunit); + + // return the total number of pixels in the triangle + m_triangles++; + m_pixels += pixels; + return pixels; +} + + +//------------------------------------------------- +// render_triangle_fan - render a set of +// triangles in a fan +//------------------------------------------------- + +template +template +uint32_t poly_manager::render_triangle_fan(rectangle const &cliprect, render_delegate callback, int numverts, vertex_t const *v) +{ + // iterate over vertices + uint32_t pixels = 0; + for (int vertnum = 2; vertnum < numverts; vertnum++) + pixels += render_triangle(cliprect, callback, v[0], v[vertnum - 1], v[vertnum]); + return pixels; +} + + +//------------------------------------------------- +// render_triangle_strip - render a set of +// triangles in a strip +//------------------------------------------------- + +template +template +uint32_t poly_manager::render_triangle_strip(rectangle const &cliprect, render_delegate callback, int numverts, vertex_t const *v) +{ + // iterate over vertices + uint32_t pixels = 0; + for (int vertnum = 2; vertnum < numverts; vertnum++) + pixels += render_triangle(cliprect, callback, v[vertnum - 2], v[vertnum - 1], v[vertnum]); + return pixels; +} + + +//------------------------------------------------- +// render_extents - perform a custom render of +// an object, given specific extents +//------------------------------------------------- + +template +template +uint32_t poly_manager::render_extents(rectangle const &cliprect, render_delegate callback, int startscanline, int numscanlines, extent_t const *extents) +{ + // clip coordinates + int32_t v1yclip = startscanline; + int32_t v3yclip = startscanline + numscanlines; + if (!(Flags & POLY_FLAG_NO_CLIPPING)) + { + v1yclip = std::max(v1yclip, cliprect.top()); + v3yclip = std::min(v3yclip, cliprect.bottom() + 1); + if (v3yclip - v1yclip <= 0) + return 0; + } + + // allocate and populate a new primitive + primitive_info &primitive = primitive_alloc(0, 0, v1yclip, v3yclip, callback); + + // compute the X extents for each scanline + int32_t pixels = 0; + uint32_t startunit = m_unit.count(); + int32_t scaninc = 1; + for (int32_t curscan = v1yclip; curscan < v3yclip; curscan += scaninc) + { + uint32_t bucketnum = (uint32_t(curscan) / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS; + uint32_t unit_index = m_unit.count(); + work_unit &unit = m_unit.next(); + + // determine how much to advance to hit the next bucket + scaninc = SCANLINES_PER_BUCKET - uint32_t(curscan) % SCANLINES_PER_BUCKET; + + // fill in the work unit basics + unit.primitive = &primitive; + unit.count_next = std::min(v3yclip - curscan, scaninc); + unit.scanline = curscan; + unit.previtem = m_unit_bucket[bucketnum]; + m_unit_bucket[bucketnum] = unit_index; + + // iterate over extents + for (int extnum = 0; extnum < unit.count_next; extnum++) + { + extent_t const &srcextent = extents[(curscan + extnum) - startscanline]; + int32_t istartx = srcextent.startx, istopx = srcextent.stopx; + + // apply left/right clipping + if (!(Flags & POLY_FLAG_NO_CLIPPING)) + { + istartx = std::max(istartx, cliprect.left()); + istartx = std::min(istartx, cliprect.right() + 1); + istopx = std::max(istopx, cliprect.left()); + istopx = std::min(istopx, cliprect.right() + 1); + } + + // set the extent and update the total pixel count + extent_t &extent = unit.extent[extnum]; + extent.startx = istartx; + extent.stopx = istopx; + + // fill in the parameters for the extent + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + { + extent.param[paramnum].start = srcextent.param[paramnum].start; + extent.param[paramnum].dpdx = srcextent.param[paramnum].dpdx; + } + extent.userdata = srcextent.userdata; + + if (istartx < istopx) + pixels += istopx - istartx; + else if (istopx < istartx) + pixels += istartx - istopx; + } + } + + // enqueue the work items + queue_items(startunit); + + // return the total number of pixels in the object + m_triangles++; + m_pixels += pixels; + return pixels; +} + + +//------------------------------------------------- +// render_polygon - render a single polygon up +// to 32 vertices +//------------------------------------------------- + +template +template +uint32_t poly_manager::render_polygon(rectangle const &cliprect, render_delegate callback, vertex_t const *v) +{ + // determine min/max Y vertices + BaseType minx = v[0].x; + BaseType maxx = v[0].x; + int minv = 0; + int maxv = 0; + for (int vertnum = 1; vertnum < NumVerts; vertnum++) + { + if (v[vertnum].y < v[minv].y) + minv = vertnum; + else if (v[vertnum].y > v[maxv].y) + maxv = vertnum; + minx = std::min(minx, v[vertnum].x); + maxx = std::max(maxx, v[vertnum].x); + } + + // determine start/end scanlines + int32_t miny = round_coordinate(v[minv].y); + int32_t maxy = round_coordinate(v[maxv].y); + + // clip coordinates + int32_t minyclip = miny; + int32_t maxyclip = maxy; + if (!(Flags & POLY_FLAG_NO_CLIPPING)) + { + minyclip = std::max(minyclip, cliprect.top()); + maxyclip = std::min(maxyclip, cliprect.bottom() + 1); + if (maxyclip - minyclip <= 0) + return 0; + } + + // allocate a new primitive + primitive_info &primitive = primitive_alloc(round_coordinate(minx), round_coordinate(maxx), minyclip, maxyclip, callback); + + // walk forward to build up the forward edge list + struct poly_edge + { + poly_edge *next; // next edge in sequence + int index; // index of this edge + vertex_t const *v1; // pointer to first vertex + vertex_t const *v2; // pointer to second vertex + BaseType dxdy; // dx/dy along the edge + std::array dpdy; // per-parameter dp/dy values + }; + poly_edge fedgelist[NumVerts - 1]; + poly_edge *edgeptr = &fedgelist[0]; + for (int curv = minv; curv != maxv; curv = (curv == NumVerts - 1) ? 0 : (curv + 1)) + { + // set the two vertices + edgeptr->v1 = &v[curv]; + edgeptr->v2 = &v[(curv == NumVerts - 1) ? 0 : (curv + 1)]; + + // if horizontal, skip altogether + if (edgeptr->v1->y == edgeptr->v2->y) + continue; + + // need dx/dy always, and parameter deltas as necessary + BaseType ooy = BaseType(1.0) / (edgeptr->v2->y - edgeptr->v1->y); + edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy; + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy; + ++edgeptr; + } + + // walk backward to build up the backward edge list + poly_edge bedgelist[NumVerts - 1]; + edgeptr = &bedgelist[0]; + for (int curv = minv; curv != maxv; curv = (curv == 0) ? (NumVerts - 1) : (curv - 1)) + { + // set the two vertices + edgeptr->v1 = &v[curv]; + edgeptr->v2 = &v[(curv == 0) ? (NumVerts - 1) : (curv - 1)]; + + // if horizontal, skip altogether + if (edgeptr->v1->y == edgeptr->v2->y) + continue; + + // need dx/dy always, and parameter deltas as necessary + BaseType ooy = BaseType(1.0) / (edgeptr->v2->y - edgeptr->v1->y); + edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy; + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy; + ++edgeptr; + } + + // determine which list is left/right: + // if the first vertex is shared, compare the slopes + // if the first vertex is not shared, compare the X coordinates + poly_edge const *ledge, *redge; + if ((fedgelist[0].v1 == bedgelist[0].v1 && fedgelist[0].dxdy < bedgelist[0].dxdy) || + (fedgelist[0].v1 != bedgelist[0].v1 && fedgelist[0].v1->x < bedgelist[0].v1->x)) + { + ledge = fedgelist; + redge = bedgelist; + } + else + { + ledge = bedgelist; + redge = fedgelist; + } + + // compute the X extents for each scanline + int32_t pixels = 0; + uint32_t startunit = m_unit.count(); + int32_t scaninc = 1; + for (int32_t curscan = minyclip; curscan < maxyclip; curscan += scaninc) + { + uint32_t bucketnum = (uint32_t(curscan) / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS; + uint32_t unit_index = m_unit.count(); + work_unit &unit = m_unit.next(); + + // determine how much to advance to hit the next bucket + scaninc = SCANLINES_PER_BUCKET - uint32_t(curscan) % SCANLINES_PER_BUCKET; + + // fill in the work unit basics + unit.primitive = &primitive; + unit.count_next = std::min(maxyclip - curscan, scaninc); + unit.scanline = curscan; + unit.previtem = m_unit_bucket[bucketnum]; + m_unit_bucket[bucketnum] = unit_index; + + // iterate over extents + for (int extnum = 0; extnum < unit.count_next; extnum++) + { + // compute the ending X based on which part of the triangle we're in + BaseType fully = BaseType(curscan + extnum) + BaseType(0.5); + while (fully > ledge->v2->y && fully < v[maxv].y) + ++ledge; + while (fully > redge->v2->y && fully < v[maxv].y) + ++redge; + BaseType startx = ledge->v1->x + (fully - ledge->v1->y) * ledge->dxdy; + BaseType stopx = redge->v1->x + (fully - redge->v1->y) * redge->dxdy; + + // clamp to full pixels + int32_t istartx = round_coordinate(startx); + int32_t istopx = round_coordinate(stopx); + + // compute parameter starting points and deltas + extent_t &extent = unit.extent[extnum]; + if (ParamCount > 0) + { + BaseType ldy = fully - ledge->v1->y; + BaseType rdy = fully - redge->v1->y; + BaseType oox = BaseType(1.0) / (stopx - startx); + + // iterate over parameters + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + { + BaseType lparam = ledge->v1->p[paramnum] + ldy * ledge->dpdy[paramnum]; + BaseType rparam = redge->v1->p[paramnum] + rdy * redge->dpdy[paramnum]; + BaseType dpdx = (rparam - lparam) * oox; + + extent.param[paramnum].start = lparam;// - (BaseType(istartx) + 0.5f) * dpdx; + extent.param[paramnum].dpdx = dpdx; + } + } + + // apply left/right clipping + if (!(Flags & POLY_FLAG_NO_CLIPPING)) + { + if (istartx < cliprect.left()) + { + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + extent.param[paramnum].start += (cliprect.left() - istartx) * extent.param[paramnum].dpdx; + istartx = cliprect.left(); + } + if (istopx > cliprect.right()) + istopx = cliprect.right() + 1; + } + + // set the extent and update the total pixel count + if (istartx >= istopx) + istartx = istopx = 0; + extent.startx = istartx; + extent.stopx = istopx; + pixels += istopx - istartx; + } + } + + // enqueue the work items + queue_items(startunit); + + // return the total number of pixels in the polygon + m_polygons++; + m_pixels += pixels; + return pixels; +} + + +//------------------------------------------------- +// zclip_if_less - clip a polygon using p[0] as +// a z coordinate +//------------------------------------------------- + +template +template +int poly_manager::zclip_if_less(int numverts, vertex_t const *v, vertex_t *outv, BaseType clipval) +{ + bool prevclipped = (v[numverts - 1].p[0] < clipval); + vertex_t *nextout = outv; + + // iterate over vertices + for (int vertnum = 0; vertnum < numverts; vertnum++) + { + bool thisclipped = (v[vertnum].p[0] < clipval); + + // if we switched from clipped to non-clipped, interpolate a vertex + if (thisclipped != prevclipped) + { + vertex_t const &v1 = v[(vertnum == 0) ? (numverts - 1) : (vertnum - 1)]; + vertex_t const &v2 = v[vertnum]; + BaseType frac = (clipval - v1.p[0]) / (v2.p[0] - v1.p[0]); + nextout->x = v1.x + frac * (v2.x - v1.x); + nextout->y = v1.y + frac * (v2.y - v1.y); + for (int paramnum = 0; paramnum < ParamCount; paramnum++) + nextout->p[paramnum] = v1.p[paramnum] + frac * (v2.p[paramnum] - v1.p[paramnum]); + ++nextout; + } + + // if this vertex is not clipped, copy it in + if (!thisclipped) + *nextout++ = v[vertnum]; + + // remember the last state + prevclipped = thisclipped; + } + return nextout - outv; +} + + +template +struct frustum_clip_vertex +{ + BaseType x, y, z, w; // A 3d coordinate already transformed by a projection matrix + std::array p; // Additional parameters to clip +}; + + +template +int frustum_clip_w(frustum_clip_vertex const *v, int num_vertices, frustum_clip_vertex *out) +{ + if (num_vertices <= 0) + return 0; + + const BaseType W_PLANE = 0.000001f; + + frustum_clip_vertex clipv[10]; + int clip_verts = 0; + + int previ = num_vertices - 1; + + for (int i=0; i < num_vertices; i++) + { + int v1_side = (v[i].w < W_PLANE) ? -1 : 1; + int v2_side = (v[previ].w < W_PLANE) ? -1 : 1; + + if ((v1_side * v2_side) < 0) // edge goes through W plane + { + // insert vertex at intersection point + BaseType wdiv = v[previ].w - v[i].w; + if (wdiv == 0.0f) // 0 edge means degenerate polygon + return 0; + + BaseType t = fabs((W_PLANE - v[previ].w) / wdiv); + + clipv[clip_verts].x = v[previ].x + ((v[i].x - v[previ].x) * t); + clipv[clip_verts].y = v[previ].y + ((v[i].y - v[previ].y) * t); + clipv[clip_verts].z = v[previ].z + ((v[i].z - v[previ].z) * t); + clipv[clip_verts].w = v[previ].w + ((v[i].w - v[previ].w) * t); + + // Interpolate the rest of the parameters + for (int pi = 0; pi < MaxParams; pi++) + clipv[clip_verts].p[pi] = v[previ].p[pi] + ((v[i].p[pi] - v[previ].p[pi]) * t); + + ++clip_verts; + } + if (v1_side > 0) // current point is inside + { + clipv[clip_verts] = v[i]; + ++clip_verts; + } + + previ = i; + } + + memcpy(&out[0], &clipv[0], sizeof(out[0]) * clip_verts); + return clip_verts; +} + + +template +int frustum_clip(frustum_clip_vertex const *v, int num_vertices, frustum_clip_vertex *out, int axis, int sign) +{ + if (num_vertices <= 0) + return 0; + + frustum_clip_vertex clipv[10]; + int clip_verts = 0; + + int previ = num_vertices - 1; + + for (int i=0; i < num_vertices; i++) + { + int v1_side, v2_side; + BaseType* v1a = (BaseType*)&v[i]; + BaseType* v2a = (BaseType*)&v[previ]; + + BaseType v1_axis, v2_axis; + + if (sign) // +axis + { + v1_axis = v1a[axis]; + v2_axis = v2a[axis]; + } + else // -axis + { + v1_axis = -v1a[axis]; + v2_axis = -v2a[axis]; + } + + v1_side = (v1_axis <= v[i].w) ? 1 : -1; + v2_side = (v2_axis <= v[previ].w) ? 1 : -1; + + if ((v1_side * v2_side) < 0) // edge goes through W plane + { + // insert vertex at intersection point + BaseType wdiv = ((v[previ].w - v2_axis) - (v[i].w - v1_axis)); + + if (wdiv == 0.0f) // 0 edge means degenerate polygon + return 0; + + BaseType t = fabs((v[previ].w - v2_axis) / wdiv); + + clipv[clip_verts].x = v[previ].x + ((v[i].x - v[previ].x) * t); + clipv[clip_verts].y = v[previ].y + ((v[i].y - v[previ].y) * t); + clipv[clip_verts].z = v[previ].z + ((v[i].z - v[previ].z) * t); + clipv[clip_verts].w = v[previ].w + ((v[i].w - v[previ].w) * t); + + // Interpolate the rest of the parameters + for (int pi = 0; pi < MaxParams; pi++) + clipv[clip_verts].p[pi] = v[previ].p[pi] + ((v[i].p[pi] - v[previ].p[pi]) * t); + + ++clip_verts; + } + if (v1_side > 0) // current point is inside + { + clipv[clip_verts] = v[i]; + ++clip_verts; + } + + previ = i; + } + + memcpy(&out[0], &clipv[0], sizeof(out[0]) * clip_verts); + return clip_verts; +} + + +template +int frustum_clip_all(frustum_clip_vertex *clip_vert, int num_vertices, frustum_clip_vertex *out) +{ + num_vertices = frustum_clip_w(clip_vert, num_vertices, clip_vert); + num_vertices = frustum_clip(clip_vert, num_vertices, clip_vert, 0, 0); // W <= -X + num_vertices = frustum_clip(clip_vert, num_vertices, clip_vert, 0, 1); // W <= +X + num_vertices = frustum_clip(clip_vert, num_vertices, clip_vert, 1, 0); // W <= -Y + num_vertices = frustum_clip(clip_vert, num_vertices, clip_vert, 1, 1); // W <= +X + num_vertices = frustum_clip(clip_vert, num_vertices, clip_vert, 2, 0); // W <= -Z + num_vertices = frustum_clip(clip_vert, num_vertices, clip_vert, 2, 1); // W <= +Z + out = clip_vert; + return num_vertices; +} + + +#endif // MAME_DEVICES_VIDEO_POLY_H diff --git a/waterbox/ares64/ares/thirdparty/mame/emu/emu.h b/waterbox/ares64/ares/thirdparty/mame/emu/emu.h new file mode 100644 index 0000000000..b68cf64681 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/emu/emu.h @@ -0,0 +1,115 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/*************************************************************************** + + emu.h + + Core header file to be included by most files. + + NOTE: The contents of this file are designed to meet the needs of + drivers and devices. In addition to this file, you will also need + to include the headers of any CPUs or other devices that are required. + + If you find yourself needing something outside of this file in a + driver or device, think carefully about what you are doing. + +***************************************************************************/ + +#ifndef __EMU_H__ +#define __EMU_H__ + +#include +#include +#include +#include +#include +#include +#include + +// core emulator headers -- must be first (profiler needs attotime, attotime needs xtal) +#include "emucore.h" +#include "osdcore.h" +#include "eminline.h" +#if !defined(MAME_RDP) +#include "xtal.h" +#include "attotime.h" +#include "profiler.h" + +// http interface helpers +#include "http.h" +#endif + +// commonly-referenced utilities imported from lib/util +#include "corealloc.h" +#if !defined(MAME_RDP) +#include "palette.h" +#endif + +// emulator-specific utilities +#include "hash.h" +#if !defined(MAME_RDP) +#include "fileio.h" +#endif +#include "delegate.h" +#if !defined(MAME_RDP) +#include "devdelegate.h" + +// memory and address spaces +#include "emumem.h" + +// machine-wide utilities +#include "romentry.h" +#include "save.h" + +// I/O +#include "input.h" +#include "ioport.h" +#include "output.h" + +// devices and callbacks +#include "device.h" +#include "devfind.h" +#include "addrmap.h" // Needs optional_device<> and required_device<> +#include "distate.h" +#include "dimemory.h" +#include "dipalette.h" +#include "digfx.h" +#include "diimage.h" +#include "dislot.h" +#include "disound.h" +#include "divideo.h" +#include "dinvram.h" +#include "schedule.h" +#include "dinetwork.h" + +// machine and driver configuration +#include "mconfig.h" +#include "gamedrv.h" +#include "parameters.h" + +// the running machine +#include "main.h" +#include "machine.h" +#include "driver.h" + +// common device interfaces +#include "diexec.h" +#include "devcpu.h" + +// video-related +#include "drawgfx.h" +#include "video.h" + +// sound-related +#include "sound.h" + +// generic helpers +#include "devcb.h" +#include "bookkeeping.h" +#include "video/generic.h" + +// member templates that don't like incomplete types +#include "device.ipp" +#endif + +#endif // __EMU_H__ diff --git a/waterbox/ares64/ares/thirdparty/mame/emu/emucore.cpp b/waterbox/ares64/ares/thirdparty/mame/emu/emucore.cpp new file mode 100644 index 0000000000..3228709db9 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/emu/emucore.cpp @@ -0,0 +1,43 @@ +// license:BSD-3-Clause +// copyright-holders:Nicola Salmoria, Aaron Giles +/*************************************************************************** + + emucore.cpp + + Simple core functions that are defined in emucore.h and which may + need to be accessed by other MAME-related tools. + +****************************************************************************/ + +#include "emu.h" +#include "emucore.h" +#include "osdcore.h" + +emu_fatalerror::emu_fatalerror(util::format_argument_pack const &args) + : emu_fatalerror(0, args) +{ +#if !defined(MAME_RDP) + osd_break_into_debugger(m_text.c_str()); +#endif +} + +emu_fatalerror::emu_fatalerror(int _exitcode, util::format_argument_pack const &args) + : m_text(util::string_format(args)) + , m_code(_exitcode) +{ +} + + +#if !defined(MAME_RDP) +void report_bad_cast(const std::type_info &src_type, const std::type_info &dst_type) +{ + throw emu_fatalerror("Error: bad downcast<> or device<>. Tried to convert a %s to a %s, which are incompatible.\n", + src_type.name(), dst_type.name()); +} + +void report_bad_device_cast(const device_t *dev, const std::type_info &src_type, const std::type_info &dst_type) +{ + throw emu_fatalerror("Error: bad downcast<> or device<>. Tried to convert the device %s (%s) of type %s to a %s, which are incompatible.\n", + dev->tag(), dev->name(), src_type.name(), dst_type.name()); +} +#endif diff --git a/waterbox/ares64/ares/thirdparty/mame/emu/emucore.h b/waterbox/ares64/ares/thirdparty/mame/emu/emucore.h new file mode 100644 index 0000000000..768fa8c728 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/emu/emucore.h @@ -0,0 +1,424 @@ +// license:BSD-3-Clause +// copyright-holders:Nicola Salmoria, Aaron Giles +/*************************************************************************** + + emucore.h + + General core utilities and macros used throughout the emulator. + +***************************************************************************/ + +#ifndef MAME_EMU_EMUCORE_H +#define MAME_EMU_EMUCORE_H + +#pragma once + +// standard C includes +#include +#include +#include +#include +#include +#include + +// some cleanups for Solaris for things defined in stdlib.h +#if defined(__sun__) && defined(__svr4__) +#undef si_status +#undef WWORD +#endif + +// standard C++ includes +#include +#include +#include +#include + +// core system includes +#include "osdcomm.h" +#include "coretmpl.h" +#include "bitmap.h" +#include "endianness.h" +#include "strformat.h" +#include "vecstream.h" + +#include "emufwd.h" + + +//************************************************************************** +// COMPILER-SPECIFIC NASTINESS +//************************************************************************** + +// Suppress warnings about redefining the macro 'PPC' on LinuxPPC. +#undef PPC + +// Suppress warnings about redefining the macro 'ARM' on ARM. +#undef ARM + + + +//************************************************************************** +// FUNDAMENTAL TYPES +//************************************************************************** + +// explicitly sized integers +using osd::u8; +using osd::u16; +using osd::u32; +using osd::u64; +using osd::s8; +using osd::s16; +using osd::s32; +using osd::s64; + +// useful utility functions +using util::underlying_value; +using util::enum_value; +using util::make_bitmask; +using util::BIT; +using util::bitswap; +using util::iabs; +using util::string_format; + +using endianness_t = util::endianness; + +using util::BYTE_XOR_BE; +using util::BYTE_XOR_LE; +using util::BYTE4_XOR_BE; +using util::BYTE4_XOR_LE; +using util::WORD_XOR_BE; +using util::WORD_XOR_LE; +using util::BYTE8_XOR_BE; +using util::BYTE8_XOR_LE; +using util::WORD2_XOR_BE; +using util::WORD2_XOR_LE; +using util::DWORD_XOR_BE; +using util::DWORD_XOR_LE; + + +// pen_t is used to represent pixel values in bitmaps +typedef u32 pen_t; + + + +//************************************************************************** +// USEFUL COMPOSITE TYPES +//************************************************************************** + +// PAIR is an endian-safe union useful for representing 32-bit CPU registers +union PAIR +{ +#ifdef LSB_FIRST + struct { u8 l,h,h2,h3; } b; + struct { u16 l,h; } w; + struct { s8 l,h,h2,h3; } sb; + struct { s16 l,h; } sw; +#else + struct { u8 h3,h2,h,l; } b; + struct { s8 h3,h2,h,l; } sb; + struct { u16 h,l; } w; + struct { s16 h,l; } sw; +#endif + u32 d; + s32 sd; +}; + + +// PAIR16 is a 16-bit extension of a PAIR +union PAIR16 +{ +#ifdef LSB_FIRST + struct { u8 l,h; } b; + struct { s8 l,h; } sb; +#else + struct { u8 h,l; } b; + struct { s8 h,l; } sb; +#endif + u16 w; + s16 sw; +}; + + +// PAIR64 is a 64-bit extension of a PAIR +union PAIR64 +{ +#ifdef LSB_FIRST + struct { u8 l,h,h2,h3,h4,h5,h6,h7; } b; + struct { u16 l,h,h2,h3; } w; + struct { u32 l,h; } d; + struct { s8 l,h,h2,h3,h4,h5,h6,h7; } sb; + struct { s16 l,h,h2,h3; } sw; + struct { s32 l,h; } sd; +#else + struct { u8 h7,h6,h5,h4,h3,h2,h,l; } b; + struct { u16 h3,h2,h,l; } w; + struct { u32 h,l; } d; + struct { s8 h7,h6,h5,h4,h3,h2,h,l; } sb; + struct { s16 h3,h2,h,l; } sw; + struct { s32 h,l; } sd; +#endif + u64 q; + s64 sq; +}; + + + +//************************************************************************** +// COMMON CONSTANTS +//************************************************************************** + +constexpr endianness_t ENDIANNESS_LITTLE = util::endianness::little; +constexpr endianness_t ENDIANNESS_BIG = util::endianness::big; +constexpr endianness_t ENDIANNESS_NATIVE = util::endianness::native; + + +// M_PI is not part of the C/C++ standards and is not present on +// strict ANSI compilers or when compiling under GCC with -ansi +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + + +/// \name Image orientation flags +/// \{ + +constexpr int ORIENTATION_FLIP_X = 0x0001; ///< Mirror horizontally (in the X direction) +constexpr int ORIENTATION_FLIP_Y = 0x0002; ///< Mirror vertically (in the Y direction) +constexpr int ORIENTATION_SWAP_XY = 0x0004; ///< Mirror along the top-left/bottom-right diagonal + +constexpr int ROT0 = 0; +constexpr int ROT90 = ORIENTATION_SWAP_XY | ORIENTATION_FLIP_X; ///< Rotate 90 degrees clockwise +constexpr int ROT180 = ORIENTATION_FLIP_X | ORIENTATION_FLIP_Y; ///< Rotate 180 degrees +constexpr int ROT270 = ORIENTATION_SWAP_XY | ORIENTATION_FLIP_Y; ///< Rotate 90 degrees anti-clockwise (270 degrees clockwise) + +/// \} + + +// these are UTF-8 encoded strings for common characters +#define UTF8_NBSP "\xc2\xa0" /* non-breaking space */ + +#define UTF8_MULTIPLY "\xc3\x97" /* multiplication sign */ +#define UTF8_DIVIDE "\xc3\xb7" /* division sign */ +#define UTF8_SQUAREROOT "\xe2\x88\x9a" /* square root symbol */ +#define UTF8_PLUSMINUS "\xc2\xb1" /* plusminus symbol */ + +#define UTF8_POW_2 "\xc2\xb2" /* superscript 2 */ +#define UTF8_POW_X "\xcb\xa3" /* superscript x */ +#define UTF8_POW_Y "\xca\xb8" /* superscript y */ +#define UTF8_PRIME "\xca\xb9" /* prime symbol */ +#define UTF8_DEGREES "\xc2\xb0" /* degrees symbol */ + +#define UTF8_SMALL_PI "\xcf\x80" /* Greek small letter pi */ +#define UTF8_CAPITAL_SIGMA "\xce\xa3" /* Greek capital letter sigma */ +#define UTF8_CAPITAL_DELTA "\xce\x94" /* Greek capital letter delta */ + +#define UTF8_MACRON "\xc2\xaf" /* macron symbol */ +#define UTF8_NONSPACE_MACRON "\xcc\x84" /* nonspace macron, use after another char */ + +#define a_RING "\xc3\xa5" /* small a with a ring */ +#define a_UMLAUT "\xc3\xa4" /* small a with an umlaut */ +#define o_UMLAUT "\xc3\xb6" /* small o with an umlaut */ +#define u_UMLAUT "\xc3\xbc" /* small u with an umlaut */ +#define e_ACUTE "\xc3\xa9" /* small e with an acute */ +#define n_TILDE "\xc3\xb1" /* small n with a tilde */ + +#define A_RING "\xc3\x85" /* capital A with a ring */ +#define A_UMLAUT "\xc3\x84" /* capital A with an umlaut */ +#define O_UMLAUT "\xc3\x96" /* capital O with an umlaut */ +#define U_UMLAUT "\xc3\x9c" /* capital U with an umlaut */ +#define E_ACUTE "\xc3\x89" /* capital E with an acute */ +#define N_TILDE "\xc3\x91" /* capital N with a tilde */ + +#define UTF8_LEFT "\xe2\x86\x90" /* cursor left */ +#define UTF8_RIGHT "\xe2\x86\x92" /* cursor right */ +#define UTF8_UP "\xe2\x86\x91" /* cursor up */ +#define UTF8_DOWN "\xe2\x86\x93" /* cursor down */ + + + +//************************************************************************** +// COMMON MACROS +//************************************************************************** + +// macro for defining a copy constructor and assignment operator to prevent copying +#define DISABLE_COPYING(TYPE) \ + TYPE(const TYPE &) = delete; \ + TYPE &operator=(const TYPE &) = delete + +// macro for declaring enumeration operators that increment/decrement like plain old C +#define DECLARE_ENUM_INCDEC_OPERATORS(TYPE) \ +inline TYPE &operator++(TYPE &value) { return value = TYPE(std::underlying_type_t(value) + 1); } \ +inline TYPE &operator--(TYPE &value) { return value = TYPE(std::underlying_type_t(value) - 1); } \ +inline TYPE operator++(TYPE &value, int) { TYPE const old(value); ++value; return old; } \ +inline TYPE operator--(TYPE &value, int) { TYPE const old(value); --value; return old; } + +// macro for declaring bitwise operators for an enumerated type +#define DECLARE_ENUM_BITWISE_OPERATORS(TYPE) \ +constexpr TYPE operator~(TYPE value) { return TYPE(~std::underlying_type_t(value)); } \ +constexpr TYPE operator&(TYPE a, TYPE b) { return TYPE(std::underlying_type_t(a) & std::underlying_type_t(b)); } \ +constexpr TYPE operator|(TYPE a, TYPE b) { return TYPE(std::underlying_type_t(a) | std::underlying_type_t(b)); } \ +inline TYPE &operator&=(TYPE &a, TYPE b) { return a = a & b; } \ +inline TYPE &operator|=(TYPE &a, TYPE b) { return a = a | b; } + + +// this macro passes an item followed by a string version of itself as two consecutive parameters +#define NAME(x) x, #x + +// this macro wraps a function 'x' and can be used to pass a function followed by its name +#define FUNC(x) &x, #x + + +// macros to convert radians to degrees and degrees to radians +template constexpr auto RADIAN_TO_DEGREE(T const &x) { return (180.0 / M_PI) * x; } +template constexpr auto DEGREE_TO_RADIAN(T const &x) { return (M_PI / 180.0) * x; } + + +//************************************************************************** +// EXCEPTION CLASSES +//************************************************************************** + +// emu_exception is the base class for all emu-related exceptions +class emu_exception : public std::exception { }; + + +// emu_fatalerror is a generic fatal exception that provides an error string +class emu_fatalerror : public emu_exception +{ +public: + emu_fatalerror(util::format_argument_pack const &args); + emu_fatalerror(int _exitcode, util::format_argument_pack const &args); + + template + emu_fatalerror(Format const &fmt, Params &&... args) + : emu_fatalerror(static_cast const &>(util::make_format_argument_pack(fmt, std::forward(args)...))) + { + } + template + emu_fatalerror(int _exitcode, Format const &fmt, Params &&... args) + : emu_fatalerror(_exitcode, static_cast const &>(util::make_format_argument_pack(fmt, std::forward(args)...))) + { + } + + virtual char const *what() const noexcept override { return m_text.c_str(); } + int exitcode() const noexcept { return m_code; } + +private: + std::string m_text; + int m_code; +}; + +class tag_add_exception +{ +public: + tag_add_exception(const char *tag) : m_tag(tag) { } + const char *tag() const { return m_tag.c_str(); } +private: + std::string m_tag; +}; + + +//************************************************************************** +// CASTING TEMPLATES +//************************************************************************** + +[[noreturn]] void report_bad_cast(const std::type_info &src_type, const std::type_info &dst_type); +[[noreturn]] void report_bad_device_cast(const device_t *dev, const std::type_info &src_type, const std::type_info &dst_type); + +template +inline void report_bad_cast(Source *src) +{ + if constexpr (std::is_base_of_v) + { + if (src) report_bad_device_cast(src, typeid(Source), typeid(Dest)); + else report_bad_cast(typeid(Source), typeid(Dest)); + } + else + { + device_t const *dev(dynamic_cast(src)); + if (dev) report_bad_device_cast(dev, typeid(Source), typeid(Dest)); + else report_bad_cast(typeid(Source), typeid(Dest)); + } +} + +// template function for casting from a base class to a derived class that is checked +// in debug builds and fast in release builds +template +inline Dest downcast(Source *src) +{ +#if defined(MAME_DEBUG) + Dest const chk(dynamic_cast(src)); + if (chk != src) report_bad_cast, Source>(src); +#endif + return static_cast(src); +} + +template +inline Dest downcast(Source &src) +{ +#if defined(MAME_DEBUG) + std::remove_reference_t *const chk(dynamic_cast *>(&src)); + if (chk != &src) report_bad_cast, Source>(&src); +#endif + return static_cast(src); +} + + + +//************************************************************************** +// INLINE FUNCTIONS +//************************************************************************** + +template +[[noreturn]] inline void fatalerror(T &&... args) +{ + throw emu_fatalerror(std::forward(args)...); +} + + +// convert a series of 32 bits into a float +inline float u2f(u32 v) +{ + union { + float ff; + u32 vv; + } u; + u.vv = v; + return u.ff; +} + + +// convert a float into a series of 32 bits +inline u32 f2u(float f) +{ + union { + float ff; + u32 vv; + } u; + u.ff = f; + return u.vv; +} + + +// convert a series of 64 bits into a double +inline double u2d(u64 v) +{ + union { + double dd; + u64 vv; + } u; + u.vv = v; + return u.dd; +} + + +// convert a double into a series of 64 bits +inline u64 d2u(double d) +{ + union { + double dd; + u64 vv; + } u; + u.dd = d; + return u.vv; +} + +#endif // MAME_EMU_EMUCORE_H diff --git a/waterbox/ares64/ares/thirdparty/mame/emu/emufwd.h b/waterbox/ares64/ares/thirdparty/mame/emu/emufwd.h new file mode 100644 index 0000000000..8b8e59b925 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/emu/emufwd.h @@ -0,0 +1,255 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/********************************************************************** + * emufwd.h + * + * Forward declarations for MAME famework. + * + * Please place forward declarations here rather than littering them + * throughout headers in src/emu. It makes it much easier to update + * them and remove obsolete ones. + **********************************************************************/ +#ifndef MAME_EMU_EMUFWD_H +#define MAME_EMU_EMUFWD_H + +#pragma once + +#include + + +//---------------------------------- +// 3rdparty +//---------------------------------- + +// declared in expat.h +struct XML_ParserStruct; + + + +//---------------------------------- +// osd +//---------------------------------- + +// declared in modules/output/output_module.h +class output_module; + +// declared in osdepend.h +class osd_font; +class osd_interface; + + + +//---------------------------------- +// lib/util +//---------------------------------- + +// declared in aviio.h +class avi_file; + +// declared in chd.h +class chd_file; + +// declared in unzip.h +namespace util { class archive_file; } + +// declared in xmlfile.h +namespace util::xml { class data_node; class file; } + + + +//---------------------------------- +// emu +//---------------------------------- + +// declared in addrmap.h +class address_map; +class address_map_entry; + +// declared in bookkeeping.h +class bookkeeping_manager; + +// declared in config.h +enum class config_type : int; +enum class config_level : int; +class configuration_manager; + +// declared in crsshair.h +class crosshair_manager; + +// declared in debug/debugcmd.h +class debugger_commands; + +// declared in debug/debugcon.h +class debugger_console; + +// declared in debug/debugcpu.h +class debugger_cpu; +class device_debug; + +// declared in debug/debugvw.h +class debug_view; +class debug_view_manager; + +// declared in debug/express.h +class parsed_expression; +class symbol_table; + +// declared in debug/points.h +class debug_breakpoint; +class debug_watchpoint; +class debug_registerpoint; + +// declared in debugger.h +class debugger_manager; + +// declared in devcb.h +class devcb_base; +template DefaultMask> class devcb_write; + +// declared in devfind.h +class finder_base; +template class device_finder; + +// declared in device.h +class device_interface; +class device_t; + +// declared in didisasm.h +class device_disasm_interface; + +// declared in diexec.h +class device_execute_interface; + +// declared in digfx.h +struct gfx_decode_entry; + +// declared in diimage.h +class device_image_interface; + +// declared in dimemory.h +class device_memory_interface; + +// declared in dipalette.h +class device_palette_interface; + +// declared in distate.h +class device_state_interface; + +// declared in drawgfx.h +class gfx_element; + +// declared in driver.h +class driver_device; + +// declared in emumem.h +class address_space; +class memory_bank; +class memory_manager; +class memory_region; +class memory_share; +class memory_view; + +// declared in emuopts.h +class emu_options; + +// declared in gamedrv.h +class game_driver; + +// declared in input.h +class input_manager; + +// declared in inputdev.h +class input_class; +class input_device; +class input_device_item; + +// declared in image.h +class image_manager; + +// declared in ioport.h +class analog_field; +struct input_device_default; +class ioport_field; +struct ioport_field_live; +class ioport_list; +class ioport_manager; +class ioport_port; +struct ioport_port_live; + +// declared in machine.h +class running_machine; + +// declared in mconfig.h +namespace emu::detail { class machine_config_replace; } +class machine_config; + +// declared in natkeyboard.h +class natural_keyboard; + +// declared in network.h +class network_manager; + +// declared in output.h +class output_manager; + +// declared in render.h +class render_container; +class render_manager; +class render_target; +class render_texture; + +// declared in rendfont.h +class render_font; + +// declared in rendlay.h +class layout_element; +class layout_view_item; +class layout_view; +class layout_file; + +// declared in romentry.h +class rom_entry; + +// declared in romload.h +class rom_load_manager; + +// declared in schedule.h +class device_scheduler; +class emu_timer; + +// declared in screen.h +class screen_device; + +// declared in softlist.h +class software_info; +class software_part; + +// declared in softlist_dev.h +class software_list_device; +class software_list_loader; + +// declared in sound.h +class sound_manager; +class sound_stream; + +// declared in speaker.h +class speaker_device; + +// declared in tilemap.h +class tilemap_device; +class tilemap_manager; +class tilemap_t; + +// declared in ui/uimain.h +class ui_manager; + +// declared in uiinput.h +class ui_input_manager; + +// declared in validity.h +class validity_checker; + +// declared in video.h +class video_manager; + +#endif // MAME_EMU_EMUFWD_H diff --git a/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbgen.h b/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbgen.h new file mode 100644 index 0000000000..e460208d9f --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbgen.h @@ -0,0 +1,445 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb, Ryan Holtz +/*************************************************************************** + + rgbgen.h + + General RGB utilities. + +***************************************************************************/ + +#ifndef MAME_EMU_VIDEO_RGBGEN_H +#define MAME_EMU_VIDEO_RGBGEN_H + + +/*************************************************************************** + TYPE DEFINITIONS +***************************************************************************/ + +class rgbaint_t +{ +public: + rgbaint_t(): m_a(0), m_r(0), m_g(0), m_b(0) { } + explicit rgbaint_t(u32 rgba) { set(rgba); } + rgbaint_t(s32 a, s32 r, s32 g, s32 b) { set(a, r, g, b); } + explicit rgbaint_t(const rgb_t& rgba) { set(rgba); } + + rgbaint_t(const rgbaint_t& other) = default; + rgbaint_t &operator=(const rgbaint_t& other) = default; + + void set(const rgbaint_t& other) { set(other.m_a, other.m_r, other.m_g, other.m_b); } + void set(u32 rgba) { set((rgba >> 24) & 0xff, (rgba >> 16) & 0xff, (rgba >> 8) & 0xff, rgba & 0xff); } + void set(s32 a, s32 r, s32 g, s32 b) + { + m_a = a; + m_r = r; + m_g = g; + m_b = b; + } + void set(const rgb_t& rgba) { set(rgba.a(), rgba.r(), rgba.g(), rgba.b()); } + // This function sets all elements to the same val + void set_all(const s32& val) { set(val, val, val, val); } + // This function zeros all elements + void zero() { set_all(0); } + // This function zeros only the alpha element + void zero_alpha() { m_a = 0; } + + rgb_t to_rgba() const { return rgb_t(get_a(), get_r(), get_g(), get_b()); } + + rgb_t to_rgba_clamp() const + { + const u8 a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a; + const u8 r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r; + const u8 g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g; + const u8 b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b; + return rgb_t(a, r, g, b); + } + + void set_a16(const s32 value) { m_a = value; } + void set_a(const s32 value) { m_a = value; } + void set_r(const s32 value) { m_r = value; } + void set_g(const s32 value) { m_g = value; } + void set_b(const s32 value) { m_b = value; } + + u8 get_a() const { return u8(u32(m_a)); } + u8 get_r() const { return u8(u32(m_r)); } + u8 get_g() const { return u8(u32(m_g)); } + u8 get_b() const { return u8(u32(m_b)); } + + s32 get_a32() const { return m_a; } + s32 get_r32() const { return m_r; } + s32 get_g32() const { return m_g; } + s32 get_b32() const { return m_b; } + + // These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b) + rgbaint_t select_alpha32() const { return rgbaint_t(get_a32(), get_a32(), get_a32(), get_a32()); } + rgbaint_t select_red32() const { return rgbaint_t(get_r32(), get_r32(), get_r32(), get_r32()); } + rgbaint_t select_green32() const { return rgbaint_t(get_g32(), get_g32(), get_g32(), get_g32()); } + rgbaint_t select_blue32() const { return rgbaint_t(get_b32(), get_b32(), get_b32(), get_b32()); } + + inline void add(const rgbaint_t& color) + { + add_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); + } + + inline void add_imm(const s32 imm) + { + add_imm_rgba(imm, imm, imm, imm); + } + + inline void add_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_a += a; + m_r += r; + m_g += g; + m_b += b; + } + + inline void sub(const rgbaint_t& color) + { + sub_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); + } + + inline void sub_imm(const s32 imm) + { + sub_imm_rgba(imm, imm, imm, imm); + } + + inline void sub_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_a -= a; + m_r -= r; + m_g -= g; + m_b -= b; + } + + inline void subr(const rgbaint_t& color) + { + subr_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); + } + + inline void subr_imm(const s32 imm) + { + subr_imm_rgba(imm, imm, imm, imm); + } + + inline void subr_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_a = a - m_a; + m_r = r - m_r; + m_g = g - m_g; + m_b = b - m_b; + } + + inline void mul(const rgbaint_t& color) + { + mul_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); + } + + inline void mul_imm(const s32 imm) + { + mul_imm_rgba(imm, imm, imm, imm); + } + + inline void mul_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_a *= a; + m_r *= r; + m_g *= g; + m_b *= b; + } + + inline void shl(const rgbaint_t& shift) + { + m_a <<= shift.m_a; + m_r <<= shift.m_r; + m_g <<= shift.m_g; + m_b <<= shift.m_b; + } + + inline void shl_imm(const u8 shift) + { + if (shift == 0) + return; + + m_a <<= shift; + m_r <<= shift; + m_g <<= shift; + m_b <<= shift; + } + + inline void shr(const rgbaint_t& shift) + { + m_a = s32(u32(m_a) >> shift.m_a); + m_r = s32(u32(m_r) >> shift.m_r); + m_g = s32(u32(m_g) >> shift.m_g); + m_b = s32(u32(m_b) >> shift.m_b); + } + + inline void shr_imm(const u8 shift) + { + if (shift == 0) + return; + + m_a = s32(u32(m_a) >> shift); + m_r = s32(u32(m_r) >> shift); + m_g = s32(u32(m_g) >> shift); + m_b = s32(u32(m_b) >> shift); + } + + inline void sra(const rgbaint_t& shift) + { + m_a >>= shift.m_a; + if (m_a & (1 << (31 - shift.m_a))) + m_a |= ~0 << (32 - shift.m_a); + + m_r >>= shift.m_r; + if (m_r & (1 << (31 - shift.m_r))) + m_r |= ~0 << (32 - shift.m_r); + + m_g >>= shift.m_g; + if (m_g & (1 << (31 - shift.m_g))) + m_g |= ~0 << (32 - shift.m_g); + + m_b >>= shift.m_b; + if (m_b & (1 << (31 - shift.m_b))) + m_b |= ~0 << (32 - shift.m_b); + } + + inline void sra_imm(const u8 shift) + { + const u32 high_bit = 1 << (31 - shift); + const u32 high_mask = ~0 << (32 - shift); + + m_a >>= shift; + if (m_a & high_bit) + m_a |= high_mask; + + m_r >>= shift; + if (m_r & high_bit) + m_r |= high_mask; + + m_g >>= shift; + if (m_g & high_bit) + m_g |= high_mask; + + m_b >>= shift; + if (m_b & high_bit) + m_b |= high_mask; + } + + void or_reg(const rgbaint_t& color) { or_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } + void and_reg(const rgbaint_t& color) { and_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } + void xor_reg(const rgbaint_t& color) { xor_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } + + void andnot_reg(const rgbaint_t& color) { and_imm_rgba(~color.m_a, ~color.m_r, ~color.m_g, ~color.m_b); } + + void or_imm(s32 imm) { or_imm_rgba(imm, imm, imm, imm); } + void and_imm(s32 imm) { and_imm_rgba(imm, imm, imm, imm); } + void xor_imm(s32 imm) { xor_imm_rgba(imm, imm, imm, imm); } + + inline void or_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_a |= a; + m_r |= r; + m_g |= g; + m_b |= b; + } + + inline void and_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_a &= a; + m_r &= r; + m_g &= g; + m_b &= b; + } + + inline void xor_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_a ^= a; + m_r ^= r; + m_g ^= g; + m_b ^= b; + } + + inline void clamp_and_clear(const u32 sign) + { + if (m_a & sign) m_a = 0; + if (m_r & sign) m_r = 0; + if (m_g & sign) m_g = 0; + if (m_b & sign) m_b = 0; + + clamp_to_uint8(); + } + + inline void clamp_to_uint8() + { + m_a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a; + m_r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r; + m_g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g; + m_b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b; + } + + inline void sign_extend(const u32 compare, const u32 sign) + { + if ((m_a & compare) == compare) + m_a |= sign; + + if ((m_r & compare) == compare) + m_r |= sign; + + if ((m_g & compare) == compare) + m_g |= sign; + + if ((m_b & compare) == compare) + m_b |= sign; + } + + inline void min(const s32 value) + { + m_a = (m_a > value) ? value : m_a; + m_r = (m_r > value) ? value : m_r; + m_g = (m_g > value) ? value : m_g; + m_b = (m_b > value) ? value : m_b; + } + + inline void max(const s32 value) + { + m_a = (m_a < value) ? value : m_a; + m_r = (m_r < value) ? value : m_r; + m_g = (m_g < value) ? value : m_g; + m_b = (m_b < value) ? value : m_b; + } + + void blend(const rgbaint_t& other, u8 factor); + + void scale_and_clamp(const rgbaint_t& scale); + void scale_imm_and_clamp(const s32 scale); + void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2); + void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other); + + void cmpeq(const rgbaint_t& value) { cmpeq_imm_rgba(value.m_a, value.m_r, value.m_g, value.m_b); } + void cmpgt(const rgbaint_t& value) { cmpgt_imm_rgba(value.m_a, value.m_r, value.m_g, value.m_b); } + void cmplt(const rgbaint_t& value) { cmplt_imm_rgba(value.m_a, value.m_r, value.m_g, value.m_b); } + + void cmpeq_imm(s32 value) { cmpeq_imm_rgba(value, value, value, value); } + void cmpgt_imm(s32 value) { cmpgt_imm_rgba(value, value, value, value); } + void cmplt_imm(s32 value) { cmplt_imm_rgba(value, value, value, value); } + + void cmpeq_imm_rgba(s32 a, s32 r, s32 g, s32 b) + { + m_a = (m_a == a) ? 0xffffffff : 0; + m_r = (m_r == r) ? 0xffffffff : 0; + m_g = (m_g == g) ? 0xffffffff : 0; + m_b = (m_b == b) ? 0xffffffff : 0; + } + + void cmpgt_imm_rgba(s32 a, s32 r, s32 g, s32 b) + { + m_a = (m_a > a) ? 0xffffffff : 0; + m_r = (m_r > r) ? 0xffffffff : 0; + m_g = (m_g > g) ? 0xffffffff : 0; + m_b = (m_b > b) ? 0xffffffff : 0; + } + + void cmplt_imm_rgba(s32 a, s32 r, s32 g, s32 b) + { + m_a = (m_a < a) ? 0xffffffff : 0; + m_r = (m_r < r) ? 0xffffffff : 0; + m_g = (m_g < g) ? 0xffffffff : 0; + m_b = (m_b < b) ? 0xffffffff : 0; + } + + void merge_alpha16(const rgbaint_t& alpha) + { + m_a = alpha.m_a; + } + + void merge_alpha(const rgbaint_t& alpha) + { + m_a = alpha.m_a; + } + + rgbaint_t& operator+=(const rgbaint_t& other) + { + add_imm_rgba(other.m_a, other.m_r, other.m_g, other.m_b); + return *this; + } + + rgbaint_t& operator+=(const s32 other) + { + add_imm_rgba(other, other, other, other); + return *this; + } + + rgbaint_t &operator-=(const rgbaint_t& other) + { + sub_imm_rgba(other.m_a, other.m_r, other.m_g, other.m_b); + return *this; + } + + rgbaint_t& operator*=(const rgbaint_t& other) + { + mul_imm_rgba(other.m_a, other.m_r, other.m_g, other.m_b); + return *this; + } + + rgbaint_t& operator*=(const s32 other) + { + mul_imm_rgba(other, other, other, other); + return *this; + } + + rgbaint_t& operator>>=(const s32 shift) + { + sra_imm(shift); + return *this; + } + + static u32 bilinear_filter(u32 rgb00, u32 rgb01, u32 rgb10, u32 rgb11, u8 u, u8 v) + { + u32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8); + u32 rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8); + + rgb00 >>= 8; + rgb01 >>= 8; + rgb10 >>= 8; + rgb11 >>= 8; + + u32 ag0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8); + u32 ag1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8); + + rb0 = (rb0 & 0x00ff00ff) + ((((rb1 & 0x00ff00ff) - (rb0 & 0x00ff00ff)) * v) >> 8); + ag0 = (ag0 & 0x00ff00ff) + ((((ag1 & 0x00ff00ff) - (ag0 & 0x00ff00ff)) * v) >> 8); + + return ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff); + } + + void bilinear_filter_rgbaint(u32 rgb00, u32 rgb01, u32 rgb10, u32 rgb11, u8 u, u8 v) + { + u32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8); + u32 rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8); + + rgb00 >>= 8; + rgb01 >>= 8; + rgb10 >>= 8; + rgb11 >>= 8; + + u32 ag0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8); + u32 ag1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8); + + rb0 = (rb0 & 0x00ff00ff) + ((((rb1 & 0x00ff00ff) - (rb0 & 0x00ff00ff)) * v) >> 8); + ag0 = (ag0 & 0x00ff00ff) + ((((ag1 & 0x00ff00ff) - (ag0 & 0x00ff00ff)) * v) >> 8); + + u32 result = ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff); + this->set(result); + } + +protected: + s32 m_a; + s32 m_r; + s32 m_g; + s32 m_b; +}; + +#endif // MAME_EMU_VIDEO_RGBGEN_H diff --git a/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbsse.h b/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbsse.h new file mode 100644 index 0000000000..ea3bc1e208 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbsse.h @@ -0,0 +1,502 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb, Ryan Holtz +/*************************************************************************** + + rgbsse.h + + SSE optimized RGB utilities. + + WARNING: This code assumes SSE2 or greater capability. + +***************************************************************************/ + +#ifndef MAME_EMU_VIDEO_RGBSSE_H +#define MAME_EMU_VIDEO_RGBSSE_H + +#pragma once + +#include +#ifdef __SSE4_1__ +#include +#endif + + +/*************************************************************************** + TYPE DEFINITIONS +***************************************************************************/ + +class rgbaint_t +{ +public: + rgbaint_t() { } + explicit rgbaint_t(u32 rgba) { set(rgba); } + rgbaint_t(s32 a, s32 r, s32 g, s32 b) { set(a, r, g, b); } + explicit rgbaint_t(const rgb_t& rgb) { set(rgb); } + explicit rgbaint_t(__m128i rgba) { m_value = rgba; } + + rgbaint_t(const rgbaint_t& other) = default; + rgbaint_t &operator=(const rgbaint_t& other) = default; + + void set(const rgbaint_t& other) { m_value = other.m_value; } + void set(const u32& rgba) { m_value = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(rgba), _mm_setzero_si128()), _mm_setzero_si128()); } + void set(s32 a, s32 r, s32 g, s32 b) { m_value = _mm_set_epi32(a, r, g, b); } + void set(const rgb_t& rgb) { set((const u32&) rgb); } + // This function sets all elements to the same val + void set_all(const s32& val) { m_value = _mm_set1_epi32(val); } + // This function zeros all elements + void zero() { m_value = _mm_xor_si128(m_value, m_value); } + // This function zeros only the alpha element + void zero_alpha() { m_value = _mm_and_si128(m_value, alpha_mask()); } + + inline rgb_t to_rgba() const + { + return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128())); + } + + inline rgb_t to_rgba_clamp() const + { + return _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(m_value, _mm_setzero_si128()), _mm_setzero_si128())); + } + + void set_a16(const s32 value) { m_value = _mm_insert_epi16(m_value, value, 6); } +#ifdef __SSE4_1__ + void set_a(const s32 value) { m_value = _mm_insert_epi32(m_value, value, 3); } + void set_r(const s32 value) { m_value = _mm_insert_epi32(m_value, value, 2); } + void set_g(const s32 value) { m_value = _mm_insert_epi32(m_value, value, 1); } + void set_b(const s32 value) { m_value = _mm_insert_epi32(m_value, value, 0); } +#else + void set_a(const s32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, alpha_mask()), _mm_set_epi32(value, 0, 0, 0)); } + void set_r(const s32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, red_mask()), _mm_set_epi32(0, value, 0, 0)); } + void set_g(const s32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, green_mask()), _mm_set_epi32(0, 0, value, 0)); } + void set_b(const s32 value) { m_value = _mm_or_si128(_mm_and_si128(m_value, blue_mask()), _mm_set_epi32(0, 0, 0, value)); } +#endif + + u8 get_a() const { return u8(unsigned(_mm_extract_epi16(m_value, 6))); } + u8 get_r() const { return u8(unsigned(_mm_extract_epi16(m_value, 4))); } + u8 get_g() const { return u8(unsigned(_mm_extract_epi16(m_value, 2))); } + u8 get_b() const { return u8(unsigned(_mm_cvtsi128_si32(m_value))); } + +#ifdef __SSE4_1__ + s32 get_a32() const { return _mm_extract_epi32(m_value, 3); } + s32 get_r32() const { return _mm_extract_epi32(m_value, 2); } + s32 get_g32() const { return _mm_extract_epi32(m_value, 1); } + s32 get_b32() const { return _mm_extract_epi32(m_value, 0); } +#else + s32 get_a32() const { return (_mm_cvtsi128_si32(_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 3)))); } + s32 get_r32() const { return (_mm_cvtsi128_si32(_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 2)))); } + s32 get_g32() const { return (_mm_cvtsi128_si32(_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 1)))); } + s32 get_b32() const { return (_mm_cvtsi128_si32(m_value)); } +#endif + + // These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b) + rgbaint_t select_alpha32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(3, 3, 3, 3)); } + rgbaint_t select_red32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(2, 2, 2, 2)); } + rgbaint_t select_green32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(1, 1, 1, 1)); } + rgbaint_t select_blue32() const { return (rgbaint_t)_mm_shuffle_epi32(m_value, _MM_SHUFFLE(0, 0, 0, 0)); } + + inline void add(const rgbaint_t& color2) + { + m_value = _mm_add_epi32(m_value, color2.m_value); + } + + inline void add_imm(const s32 imm) + { + m_value = _mm_add_epi32(m_value, _mm_set1_epi32(imm)); + } + + inline void add_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_value = _mm_add_epi32(m_value, _mm_set_epi32(a, r, g, b)); + } + + inline void sub(const rgbaint_t& color2) + { + m_value = _mm_sub_epi32(m_value, color2.m_value); + } + + inline void sub_imm(const s32 imm) + { + m_value = _mm_sub_epi32(m_value, _mm_set1_epi32(imm)); + } + + inline void sub_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_value = _mm_sub_epi32(m_value, _mm_set_epi32(a, r, g, b)); + } + + inline void subr(const rgbaint_t& color2) + { + m_value = _mm_sub_epi32(color2.m_value, m_value); + } + + inline void subr_imm(const s32 imm) + { + m_value = _mm_sub_epi32(_mm_set1_epi32(imm), m_value); + } + + inline void subr_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + m_value = _mm_sub_epi32(_mm_set_epi32(a, r, g, b), m_value); + } + + inline void mul(const rgbaint_t& color) + { + __m128i tmp1 = _mm_mul_epu32(m_value, color.m_value); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(color.m_value, 4)); + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); + } + + inline void mul_imm(const s32 imm) + { + __m128i immv = _mm_set1_epi32(imm); + __m128i tmp1 = _mm_mul_epu32(m_value, immv); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(immv, 4)); + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); + } + + inline void mul_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { + __m128i immv = _mm_set_epi32(a, r, g, b); + __m128i tmp1 = _mm_mul_epu32(m_value, immv); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(immv, 4)); + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); + } + + inline void shl(const rgbaint_t& shift) + { + rgbaint_t areg(*this); + rgbaint_t rreg(*this); + rgbaint_t greg(*this); + rgbaint_t breg(*this); + rgbaint_t ashift(0, 0, 0, shift.get_a32()); + rgbaint_t rshift(0, 0, 0, shift.get_r32()); + rgbaint_t gshift(0, 0, 0, shift.get_g32()); + rgbaint_t bshift(0, 0, 0, shift.get_b32()); + areg.m_value = _mm_sll_epi32(areg.m_value, ashift.m_value); + rreg.m_value = _mm_sll_epi32(rreg.m_value, rshift.m_value); + greg.m_value = _mm_sll_epi32(greg.m_value, gshift.m_value); + breg.m_value = _mm_sll_epi32(breg.m_value, bshift.m_value); + set(areg.get_a32(), rreg.get_r32(), greg.get_g32(), breg.get_b32()); + } + + inline void shl_imm(const u8 shift) + { + m_value = _mm_slli_epi32(m_value, shift); + } + + inline void shr(const rgbaint_t& shift) + { + rgbaint_t areg(*this); + rgbaint_t rreg(*this); + rgbaint_t greg(*this); + rgbaint_t breg(*this); + rgbaint_t ashift(0, 0, 0, shift.get_a32()); + rgbaint_t rshift(0, 0, 0, shift.get_r32()); + rgbaint_t gshift(0, 0, 0, shift.get_g32()); + rgbaint_t bshift(0, 0, 0, shift.get_b32()); + areg.m_value = _mm_srl_epi32(areg.m_value, ashift.m_value); + rreg.m_value = _mm_srl_epi32(rreg.m_value, rshift.m_value); + greg.m_value = _mm_srl_epi32(greg.m_value, gshift.m_value); + breg.m_value = _mm_srl_epi32(breg.m_value, bshift.m_value); + set(areg.get_a32(), rreg.get_r32(), greg.get_g32(), breg.get_b32()); + } + + inline void shr_imm(const u8 shift) + { + m_value = _mm_srli_epi32(m_value, shift); + } + + inline void sra(const rgbaint_t& shift) + { + rgbaint_t areg(*this); + rgbaint_t rreg(*this); + rgbaint_t greg(*this); + rgbaint_t breg(*this); + rgbaint_t ashift(0, 0, 0, shift.get_a32()); + rgbaint_t rshift(0, 0, 0, shift.get_r32()); + rgbaint_t gshift(0, 0, 0, shift.get_g32()); + rgbaint_t bshift(0, 0, 0, shift.get_b32()); + areg.m_value = _mm_sra_epi32(areg.m_value, ashift.m_value); + rreg.m_value = _mm_sra_epi32(rreg.m_value, rshift.m_value); + greg.m_value = _mm_sra_epi32(greg.m_value, gshift.m_value); + breg.m_value = _mm_sra_epi32(breg.m_value, bshift.m_value); + set(areg.get_a32(), rreg.get_r32(), greg.get_g32(), breg.get_b32()); + } + + inline void sra_imm(const u8 shift) + { + m_value = _mm_srai_epi32(m_value, shift); + } + + void or_reg(const rgbaint_t& color2) { m_value = _mm_or_si128(m_value, color2.m_value); } + void and_reg(const rgbaint_t& color2) { m_value = _mm_and_si128(m_value, color2.m_value); } + void xor_reg(const rgbaint_t& color2) { m_value = _mm_xor_si128(m_value, color2.m_value); } + + void andnot_reg(const rgbaint_t& color2) { m_value = _mm_andnot_si128(color2.m_value, m_value); } + + void or_imm(s32 value) { m_value = _mm_or_si128(m_value, _mm_set1_epi32(value)); } + void and_imm(s32 value) { m_value = _mm_and_si128(m_value, _mm_set1_epi32(value)); } + void xor_imm(s32 value) { m_value = _mm_xor_si128(m_value, _mm_set1_epi32(value)); } + + void or_imm_rgba(s32 a, s32 r, s32 g, s32 b) { m_value = _mm_or_si128(m_value, _mm_set_epi32(a, r, g, b)); } + void and_imm_rgba(s32 a, s32 r, s32 g, s32 b) { m_value = _mm_and_si128(m_value, _mm_set_epi32(a, r, g, b)); } + void xor_imm_rgba(s32 a, s32 r, s32 g, s32 b) { m_value = _mm_xor_si128(m_value, _mm_set_epi32(a, r, g, b)); } + + inline void clamp_and_clear(const u32 sign) + { + __m128i vsign = _mm_set1_epi32(sign); + m_value = _mm_and_si128(m_value, _mm_cmpeq_epi32(_mm_and_si128(m_value, vsign), _mm_setzero_si128())); + vsign = _mm_srai_epi32(vsign, 1); + vsign = _mm_xor_si128(vsign, _mm_set1_epi32(0xffffffff)); + __m128i mask = _mm_cmpgt_epi32(m_value, vsign); + m_value = _mm_or_si128(_mm_and_si128(vsign, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff)))); + } + + inline void clamp_to_uint8() + { + m_value = _mm_packs_epi32(m_value, _mm_setzero_si128()); + m_value = _mm_packus_epi16(m_value, _mm_setzero_si128()); + m_value = _mm_unpacklo_epi8(m_value, _mm_setzero_si128()); + m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128()); + } + + inline void sign_extend(const u32 compare, const u32 sign) + { + __m128i compare_vec = _mm_set1_epi32(compare); + __m128i compare_mask = _mm_cmpeq_epi32(_mm_and_si128(m_value, compare_vec), compare_vec); + __m128i compared = _mm_and_si128(_mm_set1_epi32(sign), compare_mask); + m_value = _mm_or_si128(m_value, compared); + } + + inline void min(const s32 value) + { + __m128i val = _mm_set1_epi32(value); +#ifdef __SSE4_1__ + m_value = _mm_min_epi32(m_value, val); +#else + __m128i is_greater_than = _mm_cmpgt_epi32(m_value, val); + + __m128i val_to_set = _mm_and_si128(val, is_greater_than); + __m128i keep_mask = _mm_xor_si128(is_greater_than, _mm_set1_epi32(0xffffffff)); + + m_value = _mm_and_si128(m_value, keep_mask); + m_value = _mm_or_si128(val_to_set, m_value); +#endif + } + + inline void max(const s32 value) + { + __m128i val = _mm_set1_epi32(value); +#ifdef __SSE4_1__ + m_value = _mm_max_epi32(m_value, val); +#else + __m128i is_less_than = _mm_cmplt_epi32(m_value, val); + + __m128i val_to_set = _mm_and_si128(val, is_less_than); + __m128i keep_mask = _mm_xor_si128(is_less_than, _mm_set1_epi32(0xffffffff)); + + m_value = _mm_and_si128(m_value, keep_mask); + m_value = _mm_or_si128(val_to_set, m_value); +#endif + } + + void blend(const rgbaint_t& other, u8 factor); + + void scale_and_clamp(const rgbaint_t& scale); + + // Leave this here in case Model3 blows up... + //inline void scale_imm_and_clamp(const s32 scale) + //{ + // mul_imm(scale); + // sra_imm(8); + // clamp_to_uint8(); + //} + + // This version needs absolute value of value and scale to be 11 bits or less + inline void scale_imm_and_clamp(const s16 scale) + { + // Set mult a 16 bit inputs to scale + __m128i immv = _mm_set1_epi16(scale); + // Shift up by 4 + immv = _mm_slli_epi16(immv, 4); + // Pack color into mult b 16 bit inputs + m_value = _mm_packs_epi32(m_value, _mm_setzero_si128()); + // Shift up by 4 + m_value = _mm_slli_epi16(m_value, 4); + // Do the 16 bit multiply, bottom 64 bits will contain 16 bit truncated results + m_value = _mm_mulhi_epi16(m_value, immv); + // Clamp to u8 + m_value = _mm_packus_epi16(m_value, _mm_setzero_si128()); + // Unpack up to s32 + m_value = _mm_unpacklo_epi8(m_value, _mm_setzero_si128()); + m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128()); + } + + // This function needs absolute value of color and scale to be 15 bits or less + inline void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other) + { +#ifdef __SSE4_1__ + m_value = _mm_mullo_epi32(m_value, scale.m_value); +#else + // Mask off the top 16 bits of each 32-bit value + m_value = _mm_and_si128(m_value, _mm_set1_epi32(0x0000ffff)); + // Do 16x16 multiplies and sum into 32-bit pairs; the AND above ensures upper pair is always 0 + m_value = _mm_madd_epi16(m_value, scale.m_value); +#endif + // Arithmetic shift down the result by 8 bits + sra_imm(8); + add(other); + clamp_to_uint8(); + } + + // This function needs absolute value of color and scale to be 15 bits or less + inline void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2) + { + // Pack 32-bit values to 16-bit values in low half, and scales in top half + __m128i tmp1 = _mm_packs_epi32(m_value, scale.m_value); + // Same for other and scale2 + __m128i tmp2 = _mm_packs_epi32(other.m_value, scale2.m_value); + // Interleave the low halves (m_value, other) + __m128i tmp3 = _mm_unpacklo_epi16(tmp1, tmp2); + // Interleave the top halves (scale, scale2) + __m128i tmp4 = _mm_unpackhi_epi16(tmp1, tmp2); + // Multiply values by scales and add adjacent pairs + m_value = _mm_madd_epi16(tmp3, tmp4); + // Final shift by 8 + sra_imm(8); + clamp_to_uint8(); + } + + void cmpeq(const rgbaint_t& value) { m_value = _mm_cmpeq_epi32(m_value, value.m_value); } + void cmpgt(const rgbaint_t& value) { m_value = _mm_cmpgt_epi32(m_value, value.m_value); } + void cmplt(const rgbaint_t& value) { m_value = _mm_cmplt_epi32(m_value, value.m_value); } + + void cmpeq_imm(s32 value) { m_value = _mm_cmpeq_epi32(m_value, _mm_set1_epi32(value)); } + void cmpgt_imm(s32 value) { m_value = _mm_cmpgt_epi32(m_value, _mm_set1_epi32(value)); } + void cmplt_imm(s32 value) { m_value = _mm_cmplt_epi32(m_value, _mm_set1_epi32(value)); } + + void cmpeq_imm_rgba(s32 a, s32 r, s32 g, s32 b) { m_value = _mm_cmpeq_epi32(m_value, _mm_set_epi32(a, r, g, b)); } + void cmpgt_imm_rgba(s32 a, s32 r, s32 g, s32 b) { m_value = _mm_cmpgt_epi32(m_value, _mm_set_epi32(a, r, g, b)); } + void cmplt_imm_rgba(s32 a, s32 r, s32 g, s32 b) { m_value = _mm_cmplt_epi32(m_value, _mm_set_epi32(a, r, g, b)); } + + inline rgbaint_t& operator+=(const rgbaint_t& other) + { + m_value = _mm_add_epi32(m_value, other.m_value); + return *this; + } + + inline rgbaint_t& operator+=(const s32 other) + { + m_value = _mm_add_epi32(m_value, _mm_set1_epi32(other)); + return *this; + } + + inline rgbaint_t& operator-=(const rgbaint_t& other) + { + m_value = _mm_sub_epi32(m_value, other.m_value); + return *this; + } + + inline rgbaint_t& operator*=(const rgbaint_t& other) + { + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(_mm_mul_epu32(m_value, other.m_value), _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(_mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(other.m_value, 4)), _MM_SHUFFLE(0, 0, 2, 0))); + return *this; + } + + inline rgbaint_t& operator*=(const s32 other) + { + const __m128i immv = _mm_set1_epi32(other); + m_value = _mm_unpacklo_epi32(_mm_shuffle_epi32(_mm_mul_epu32(m_value, immv), _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(_mm_mul_epu32(_mm_srli_si128(m_value, 4), _mm_srli_si128(immv, 4)), _MM_SHUFFLE(0, 0, 2, 0))); + return *this; + } + + inline rgbaint_t& operator>>=(const s32 shift) + { + m_value = _mm_srai_epi32(m_value, shift); + return *this; + } + + inline void merge_alpha16(const rgbaint_t& alpha) + { + m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 6), 6); + } + + inline void merge_alpha(const rgbaint_t& alpha) + { +#ifdef __SSE4_1__ + m_value = _mm_insert_epi32(m_value, _mm_extract_epi32(alpha.m_value, 3), 3); +#else + m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 7), 7); + m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 6), 6); +#endif + } + + static u32 bilinear_filter(u32 rgb00, u32 rgb01, u32 rgb10, u32 rgb11, u8 u, u8 v) + { + __m128i color00 = _mm_cvtsi32_si128(rgb00); + __m128i color01 = _mm_cvtsi32_si128(rgb01); + __m128i color10 = _mm_cvtsi32_si128(rgb10); + __m128i color11 = _mm_cvtsi32_si128(rgb11); + + /* interleave color01 and color00 at the byte level */ + color01 = _mm_unpacklo_epi8(color01, color00); + color11 = _mm_unpacklo_epi8(color11, color10); + color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128()); + color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128()); + color01 = _mm_madd_epi16(color01, scale_factor(u)); + color11 = _mm_madd_epi16(color11, scale_factor(u)); + color01 = _mm_slli_epi32(color01, 15); + color11 = _mm_srli_epi32(color11, 1); + color01 = _mm_max_epi16(color01, color11); + color01 = _mm_madd_epi16(color01, scale_factor(v)); + color01 = _mm_srli_epi32(color01, 15); + color01 = _mm_packs_epi32(color01, _mm_setzero_si128()); + color01 = _mm_packus_epi16(color01, _mm_setzero_si128()); + return _mm_cvtsi128_si32(color01); + } + + void bilinear_filter_rgbaint(u32 rgb00, u32 rgb01, u32 rgb10, u32 rgb11, u8 u, u8 v) + { + __m128i color00 = _mm_cvtsi32_si128(rgb00); + __m128i color01 = _mm_cvtsi32_si128(rgb01); + __m128i color10 = _mm_cvtsi32_si128(rgb10); + __m128i color11 = _mm_cvtsi32_si128(rgb11); + + /* interleave color01 and color00 at the byte level */ + color01 = _mm_unpacklo_epi8(color01, color00); + color11 = _mm_unpacklo_epi8(color11, color10); + color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128()); + color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128()); + color01 = _mm_madd_epi16(color01, scale_factor(u)); + color11 = _mm_madd_epi16(color11, scale_factor(u)); + color01 = _mm_slli_epi32(color01, 15); + color11 = _mm_srli_epi32(color11, 1); + color01 = _mm_max_epi16(color01, color11); + color01 = _mm_madd_epi16(color01, scale_factor(v)); + m_value = _mm_srli_epi32(color01, 15); + } + +protected: + struct _statics + { + __m128 dummy_for_alignment; + u16 alpha_mask[8]; + u16 red_mask[8]; + u16 green_mask[8]; + u16 blue_mask[8]; + s16 scale_table[256][8]; + }; + + static __m128i alpha_mask() { return *(__m128i *)&statics.alpha_mask[0]; } + static __m128i red_mask() { return *(__m128i *)&statics.red_mask[0]; } + static __m128i green_mask() { return *(__m128i *)&statics.green_mask[0]; } + static __m128i blue_mask() { return *(__m128i *)&statics.blue_mask[0]; } + static __m128i scale_factor(u8 index) { return *(__m128i *)&statics.scale_table[index][0]; } + + __m128i m_value; + + static const _statics statics; + +}; + +#endif /* MAME_EMU_VIDEO_RGBSSE_H */ diff --git a/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbutil.h b/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbutil.h new file mode 100644 index 0000000000..c6ee325c75 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbutil.h @@ -0,0 +1,32 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/*************************************************************************** + + rgbutil.h + + Utility definitions for RGB manipulation. Allows RGB handling to be + performed in an abstracted fashion and optimized with SIMD. + +***************************************************************************/ + +#ifndef MAME_EMU_VIDEO_RGBUTIL_H +#define MAME_EMU_VIDEO_RGBUTIL_H + +// use SSE on 64-bit implementations, where it can be assumed +#if (!defined(MAME_DEBUG) || defined(__OPTIMIZE__)) && (defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2))) + +#define MAME_RGB_HIGH_PRECISION +#include "rgbsse.h" + +#elif defined(__ALTIVEC__) + +#define MAME_RGB_HIGH_PRECISION +#include "rgbvmx.h" + +#else + +#include "rgbgen.h" + +#endif + +#endif // MAME_EMU_VIDEO_RGBUTIL_H diff --git a/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbvmx.h b/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbvmx.h new file mode 100644 index 0000000000..05d26cd9e2 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/emu/video/rgbvmx.h @@ -0,0 +1,728 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb, Ryan Holtz +/*************************************************************************** + + rgbvmx.h + + VMX/Altivec optimised RGB utilities. + +***************************************************************************/ + +#ifndef MAME_EMU_VIDEO_RGBVMX_H +#define MAME_EMU_VIDEO_RGBVMX_H + +#pragma once + +#include + +/*************************************************************************** + TYPE DEFINITIONS +***************************************************************************/ + +class rgbaint_t +{ +protected: + typedef __vector signed char VECS8; + typedef __vector unsigned char VECU8; + typedef __vector signed short VECS16; + typedef __vector unsigned short VECU16; + typedef __vector signed int VECS32; + typedef __vector unsigned int VECU32; + +public: + rgbaint_t() { set(0, 0, 0, 0); } + explicit rgbaint_t(u32 rgba) { set(rgba); } + rgbaint_t(s32 a, s32 r, s32 g, s32 b) { set(a, r, g, b); } + explicit rgbaint_t(const rgb_t& rgb) { set(rgb); } + explicit rgbaint_t(VECS32 rgba) : m_value(rgba) { } + + rgbaint_t(const rgbaint_t& other) = default; + rgbaint_t &operator=(const rgbaint_t& other) = default; + + void set(const rgbaint_t& other) { m_value = other.m_value; } + + void set(u32 rgba) + { + const VECU32 zero = { 0, 0, 0, 0 }; +#ifdef __LITTLE_ENDIAN__ + const VECS8 temp = *reinterpret_cast(&rgba); + m_value = VECS32(vec_mergeh(VECS16(vec_mergeh(temp, VECS8(zero))), VECS16(zero))); +#else + const VECS8 temp = VECS8(vec_perm(vec_lde(0, &rgba), zero, vec_lvsl(0, &rgba))); + m_value = VECS32(vec_mergeh(VECS16(zero), VECS16(vec_mergeh(VECS8(zero), temp)))); +#endif + } + + void set(s32 a, s32 r, s32 g, s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 result = { b, g, r, a }; +#else + const VECS32 result = { a, r, g, b }; +#endif + m_value = result; + } + + void set(const rgb_t& rgb) + { + const VECU32 zero = { 0, 0, 0, 0 }; +#ifdef __LITTLE_ENDIAN__ + const VECS8 temp = *reinterpret_cast(rgb.ptr()); + m_value = VECS32(vec_mergeh(VECS16(vec_mergeh(temp, VECS8(zero))), VECS16(zero))); +#else + const VECS8 temp = VECS8(vec_perm(vec_lde(0, rgb.ptr()), zero, vec_lvsl(0, rgb.ptr()))); + m_value = VECS32(vec_mergeh(VECS16(zero), VECS16(vec_mergeh(VECS8(zero), temp)))); +#endif + } + + // This function sets all elements to the same val + void set_all(const s32& val) { set(val, val, val, val); } + // This function zeros all elements + void zero() { set_all(0); } + // This function zeros only the alpha element + void zero_alpha() { set_a(0); } + + inline rgb_t to_rgba() const + { + VECU32 temp = VECU32(vec_packs(m_value, m_value)); + temp = VECU32(vec_packsu(VECS16(temp), VECS16(temp))); + u32 result; + vec_ste(temp, 0, &result); + return result; + } + + inline rgb_t to_rgba_clamp() const + { + VECU32 temp = VECU32(vec_packs(m_value, m_value)); + temp = VECU32(vec_packsu(VECS16(temp), VECS16(temp))); + u32 result; + vec_ste(temp, 0, &result); + return result; + } + + void set_a16(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_perm(m_value, temp, alpha_perm); + } + + void set_a(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_perm(m_value, temp, alpha_perm); + } + + void set_r(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_perm(m_value, temp, red_perm); + } + + void set_g(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_perm(m_value, temp, green_perm); + } + + void set_b(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_perm(m_value, temp, blue_perm); + } + + u8 get_a() const + { + u8 result; +#ifdef __LITTLE_ENDIAN__ + vec_ste(vec_splat(VECU8(m_value), 12), 0, &result); +#else + vec_ste(vec_splat(VECU8(m_value), 3), 0, &result); +#endif + return result; + } + + u8 get_r() const + { + u8 result; +#ifdef __LITTLE_ENDIAN__ + vec_ste(vec_splat(VECU8(m_value), 8), 0, &result); +#else + vec_ste(vec_splat(VECU8(m_value), 7), 0, &result); +#endif + return result; + } + + u8 get_g() const + { + u8 result; +#ifdef __LITTLE_ENDIAN__ + vec_ste(vec_splat(VECU8(m_value), 4), 0, &result); +#else + vec_ste(vec_splat(VECU8(m_value), 11), 0, &result); +#endif + return result; + } + + u8 get_b() const + { + u8 result; +#ifdef __LITTLE_ENDIAN__ + vec_ste(vec_splat(VECU8(m_value), 0), 0, &result); +#else + vec_ste(vec_splat(VECU8(m_value), 15), 0, &result); +#endif + return result; + } + + s32 get_a32() const + { + s32 result; +#ifdef __LITTLE_ENDIAN__ + vec_ste(vec_splat(m_value, 3), 0, &result); +#else + vec_ste(vec_splat(m_value, 0), 0, &result); +#endif + return result; + } + + s32 get_r32() const + { + s32 result; +#ifdef __LITTLE_ENDIAN__ + vec_ste(vec_splat(m_value, 2), 0, &result); +#else + vec_ste(vec_splat(m_value, 1), 0, &result); +#endif + return result; + } + + s32 get_g32() const + { + s32 result; +#ifdef __LITTLE_ENDIAN__ + vec_ste(vec_splat(m_value, 1), 0, &result); +#else + vec_ste(vec_splat(m_value, 2), 0, &result); +#endif + return result; + } + + s32 get_b32() const + { + s32 result; +#ifdef __LITTLE_ENDIAN__ + vec_ste(vec_splat(m_value, 0), 0, &result); +#else + vec_ste(vec_splat(m_value, 3), 0, &result); +#endif + return result; + } + + // These selects return an rgbaint_t with all fields set to the element choosen (a, r, g, or b) + rgbaint_t select_alpha32() const { return rgbaint_t(get_a32(), get_a32(), get_a32(), get_a32()); } + rgbaint_t select_red32() const { return rgbaint_t(get_r32(), get_r32(), get_r32(), get_r32()); } + rgbaint_t select_green32() const { return rgbaint_t(get_g32(), get_g32(), get_g32(), get_g32()); } + rgbaint_t select_blue32() const { return rgbaint_t(get_b32(), get_b32(), get_b32(), get_b32()); } + + inline void add(const rgbaint_t& color2) + { + m_value = vec_add(m_value, color2.m_value); + } + + inline void add_imm(const s32 imm) + { + const VECS32 temp = { imm, imm, imm, imm }; + m_value = vec_add(m_value, temp); + } + + inline void add_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 temp = { b, g, r, a }; +#else + const VECS32 temp = { a, r, g, b }; +#endif + m_value = vec_add(m_value, temp); + } + + inline void sub(const rgbaint_t& color2) + { + m_value = vec_sub(m_value, color2.m_value); + } + + inline void sub_imm(const s32 imm) + { + const VECS32 temp = { imm, imm, imm, imm }; + m_value = vec_sub(m_value, temp); + } + + inline void sub_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 temp = { b, g, r, a }; +#else + const VECS32 temp = { a, r, g, b }; +#endif + m_value = vec_sub(m_value, temp); + } + + inline void subr(const rgbaint_t& color2) + { + m_value = vec_sub(color2.m_value, m_value); + } + + inline void subr_imm(const s32 imm) + { + const VECS32 temp = { imm, imm, imm, imm }; + m_value = vec_sub(temp, m_value); + } + + inline void subr_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 temp = { b, g, r, a }; +#else + const VECS32 temp = { a, r, g, b }; +#endif + m_value = vec_sub(temp, m_value); + } + + inline void mul(const rgbaint_t& color) + { + const VECU32 shift = vec_splat_u32(-16); + const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(color.m_value, shift)), vec_splat_u32(0)); +#ifdef __LITTLE_ENDIAN__ + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(color.m_value)))); +#else + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(color.m_value)))); +#endif + } + + inline void mul_imm(const s32 imm) + { + const VECU32 value = { u32(imm), u32(imm), u32(imm), u32(imm) }; + const VECU32 shift = vec_splat_u32(-16); + const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(value, shift)), vec_splat_u32(0)); +#ifdef __LITTLE_ENDIAN__ + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(value)))); +#else + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(value)))); +#endif + } + + inline void mul_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECU32 value = { u32(b), u32(g), u32(r), u32(a) }; +#else + const VECU32 value = { u32(a), u32(r), u32(g), u32(b) }; +#endif + const VECU32 shift = vec_splat_u32(-16); + const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(value, shift)), vec_splat_u32(0)); +#ifdef __LITTLE_ENDIAN__ + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(value)))); +#else + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(value)))); +#endif + } + + inline void shl(const rgbaint_t& shift) + { + const VECU32 limit = { 32, 32, 32, 32 }; + m_value = vec_and(vec_sl(m_value, VECU32(shift.m_value)), vec_cmpgt(limit, VECU32(shift.m_value))); + } + + inline void shl_imm(const u8 shift) + { + const VECU32 temp = { shift, shift, shift, shift }; + m_value = vec_sl(m_value, temp); + } + + inline void shr(const rgbaint_t& shift) + { + const VECU32 limit = { 32, 32, 32, 32 }; + m_value = vec_and(vec_sr(m_value, VECU32(shift.m_value)), vec_cmpgt(limit, VECU32(shift.m_value))); + } + + inline void shr_imm(const u8 shift) + { + const VECU32 temp = { shift, shift, shift, shift }; + m_value = vec_sr(m_value, temp); + } + + inline void sra(const rgbaint_t& shift) + { + const VECU32 limit = { 31, 31, 31, 31 }; + m_value = vec_sra(m_value, vec_min(VECU32(shift.m_value), limit)); + } + + inline void sra_imm(const u8 shift) + { + const VECU32 temp = { shift, shift, shift, shift }; + m_value = vec_sra(m_value, temp); + } + + inline void or_reg(const rgbaint_t& color2) + { + m_value = vec_or(m_value, color2.m_value); + } + + inline void or_imm(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_or(m_value, temp); + } + + inline void or_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 temp = { b, g, r, a }; +#else + const VECS32 temp = { a, r, g, b }; +#endif + m_value = vec_or(m_value, temp); + } + + inline void and_reg(const rgbaint_t& color) + { + m_value = vec_and(m_value, color.m_value); + } + + inline void andnot_reg(const rgbaint_t& color) + { + m_value = vec_andc(m_value, color.m_value); + } + + inline void and_imm(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_and(m_value, temp); + } + + inline void and_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 temp = { b, g, r, a }; +#else + const VECS32 temp = { a, r, g, b }; +#endif + m_value = vec_and(m_value, temp); + } + + inline void xor_reg(const rgbaint_t& color2) + { + m_value = vec_xor(m_value, color2.m_value); + } + + inline void xor_imm(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_xor(m_value, temp); + } + + inline void xor_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 temp = { b, g, r, a }; +#else + const VECS32 temp = { a, r, g, b }; +#endif + m_value = vec_xor(m_value, temp); + } + + inline void clamp_and_clear(const u32 sign) + { + const VECS32 vzero = { 0, 0, 0, 0 }; + VECS32 vsign = { s32(sign), s32(sign), s32(sign), s32(sign) }; + m_value = vec_and(m_value, vec_cmpeq(vec_and(m_value, vsign), vzero)); + vsign = vec_nor(vec_sra(vsign, vec_splat_u32(1)), vzero); + const VECS32 mask = VECS32(vec_cmpgt(m_value, vsign)); + m_value = vec_or(vec_and(vsign, mask), vec_and(m_value, vec_nor(mask, vzero))); + } + + inline void clamp_to_uint8() + { + const VECU32 zero = { 0, 0, 0, 0 }; + m_value = VECS32(vec_packs(m_value, m_value)); + m_value = VECS32(vec_packsu(VECS16(m_value), VECS16(m_value))); +#ifdef __LITTLE_ENDIAN__ + m_value = VECS32(vec_mergeh(VECU8(m_value), VECU8(zero))); + m_value = VECS32(vec_mergeh(VECS16(m_value), VECS16(zero))); +#else + m_value = VECS32(vec_mergeh(VECU8(zero), VECU8(m_value))); + m_value = VECS32(vec_mergeh(VECS16(zero), VECS16(m_value))); +#endif + } + + inline void sign_extend(const u32 compare, const u32 sign) + { + const VECS32 compare_vec = { s32(compare), s32(compare), s32(compare), s32(compare) }; + const VECS32 compare_mask = VECS32(vec_cmpeq(vec_and(m_value, compare_vec), compare_vec)); + const VECS32 sign_vec = { s32(sign), s32(sign), s32(sign), s32(sign) }; + m_value = vec_or(m_value, vec_and(sign_vec, compare_mask)); + } + + inline void min(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_min(m_value, temp); + } + + inline void max(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = vec_max(m_value, temp); + } + + void blend(const rgbaint_t& other, u8 factor); + + void scale_and_clamp(const rgbaint_t& scale); + void scale_imm_and_clamp(const s32 scale); + + void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other) + { + mul(scale); + sra_imm(8); + add(other); + clamp_to_uint8(); + } + + void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2) + { + rgbaint_t color2(other); + color2.mul(scale2); + + mul(scale); + add(color2); + sra_imm(8); + clamp_to_uint8(); + } + + inline void cmpeq(const rgbaint_t& value) + { + m_value = VECS32(vec_cmpeq(m_value, value.m_value)); + } + + inline void cmpeq_imm(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = VECS32(vec_cmpeq(m_value, temp)); + } + + inline void cmpeq_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 temp = { b, g, r, a }; +#else + const VECS32 temp = { a, r, g, b }; +#endif + m_value = VECS32(vec_cmpeq(m_value, temp)); + } + + inline void cmpgt(const rgbaint_t& value) + { + m_value = VECS32(vec_cmpgt(m_value, value.m_value)); + } + + inline void cmpgt_imm(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = VECS32(vec_cmpgt(m_value, temp)); + } + + inline void cmpgt_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 temp = { b, g, r, a }; +#else + const VECS32 temp = { a, r, g, b }; +#endif + m_value = VECS32(vec_cmpgt(m_value, temp)); + } + + inline void cmplt(const rgbaint_t& value) + { + m_value = VECS32(vec_cmplt(m_value, value.m_value)); + } + + inline void cmplt_imm(const s32 value) + { + const VECS32 temp = { value, value, value, value }; + m_value = VECS32(vec_cmplt(m_value, temp)); + } + + inline void cmplt_imm_rgba(const s32 a, const s32 r, const s32 g, const s32 b) + { +#ifdef __LITTLE_ENDIAN__ + const VECS32 temp = { b, g, r, a }; +#else + const VECS32 temp = { a, r, g, b }; +#endif + m_value = VECS32(vec_cmplt(m_value, temp)); + } + + inline rgbaint_t& operator+=(const rgbaint_t& other) + { + m_value = vec_add(m_value, other.m_value); + return *this; + } + + inline rgbaint_t& operator+=(const s32 other) + { + const VECS32 temp = { other, other, other, other }; + m_value = vec_add(m_value, temp); + return *this; + } + + inline rgbaint_t& operator-=(const rgbaint_t& other) + { + m_value = vec_sub(m_value, other.m_value); + return *this; + } + + inline rgbaint_t& operator*=(const rgbaint_t& other) + { + const VECU32 shift = vec_splat_u32(-16); + const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(other.m_value, shift)), vec_splat_u32(0)); +#ifdef __LITTLE_ENDIAN__ + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(other.m_value)))); +#else + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(other.m_value)))); +#endif + return *this; + } + + inline rgbaint_t& operator*=(const s32 other) + { + const VECS32 value = { other, other, other, other }; + const VECU32 shift = vec_splat_u32(-16); + const VECU32 temp = vec_msum(VECU16(m_value), VECU16(vec_rl(value, shift)), vec_splat_u32(0)); +#ifdef __LITTLE_ENDIAN__ + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mule(VECU16(m_value), VECU16(value)))); +#else + m_value = VECS32(vec_add(vec_sl(temp, shift), vec_mulo(VECU16(m_value), VECU16(value)))); +#endif + return *this; + } + + inline rgbaint_t& operator>>=(const s32 shift) + { + const VECU32 temp = { u32(shift), u32(shift), u32(shift), u32(shift) }; + m_value = vec_sra(m_value, temp); + return *this; + } + + inline void merge_alpha16(const rgbaint_t& alpha) + { + m_value = vec_perm(m_value, alpha.m_value, alpha_perm); + } + + inline void merge_alpha(const rgbaint_t& alpha) + { + m_value = vec_perm(m_value, alpha.m_value, alpha_perm); + } + + static u32 bilinear_filter(const u32 &rgb00, const u32 &rgb01, const u32 &rgb10, const u32 &rgb11, u8 u, u8 v) + { + const VECS32 zero = vec_splat_s32(0); + + // put each packed value into first element of a vector register +#ifdef __LITTLE_ENDIAN__ + VECS32 color00 = *reinterpret_cast(&rgb00); + VECS32 color01 = *reinterpret_cast(&rgb01); + VECS32 color10 = *reinterpret_cast(&rgb10); + VECS32 color11 = *reinterpret_cast(&rgb11); +#else + VECS32 color00 = vec_perm(VECS32(vec_lde(0, &rgb00)), zero, vec_lvsl(0, &rgb00)); + VECS32 color01 = vec_perm(VECS32(vec_lde(0, &rgb01)), zero, vec_lvsl(0, &rgb01)); + VECS32 color10 = vec_perm(VECS32(vec_lde(0, &rgb10)), zero, vec_lvsl(0, &rgb10)); + VECS32 color11 = vec_perm(VECS32(vec_lde(0, &rgb11)), zero, vec_lvsl(0, &rgb11)); +#endif + + // interleave color01/color00 and color10/color11 at the byte level then zero-extend + color01 = VECS32(vec_mergeh(VECU8(color01), VECU8(color00))); + color11 = VECS32(vec_mergeh(VECU8(color11), VECU8(color10))); +#ifdef __LITTLE_ENDIAN__ + color01 = VECS32(vec_mergeh(VECU8(color01), VECU8(zero))); + color11 = VECS32(vec_mergeh(VECU8(color11), VECU8(zero))); +#else + color01 = VECS32(vec_mergeh(VECU8(zero), VECU8(color01))); + color11 = VECS32(vec_mergeh(VECU8(zero), VECU8(color11))); +#endif + + color01 = vec_msum(VECS16(color01), scale_table[u], zero); + color11 = vec_msum(VECS16(color11), scale_table[u], zero); + color01 = vec_sl(color01, vec_splat_u32(15)); + color11 = vec_sr(color11, vec_splat_u32(1)); + color01 = VECS32(vec_max(VECS16(color01), VECS16(color11))); + color01 = vec_msum(VECS16(color01), scale_table[v], zero); + color01 = vec_sr(color01, vec_splat_u32(15)); + color01 = VECS32(vec_packs(color01, color01)); + color01 = VECS32(vec_packsu(VECS16(color01), VECS16(color01))); + + u32 result; + vec_ste(VECU32(color01), 0, &result); + return result; + } + + void bilinear_filter_rgbaint(const u32 &rgb00, const u32 &rgb01, const u32 &rgb10, const u32 &rgb11, u8 u, u8 v) + { + const VECS32 zero = vec_splat_s32(0); + + // put each packed value into first element of a vector register +#ifdef __LITTLE_ENDIAN__ + VECS32 color00 = *reinterpret_cast(&rgb00); + VECS32 color01 = *reinterpret_cast(&rgb01); + VECS32 color10 = *reinterpret_cast(&rgb10); + VECS32 color11 = *reinterpret_cast(&rgb11); +#else + VECS32 color00 = vec_perm(VECS32(vec_lde(0, &rgb00)), zero, vec_lvsl(0, &rgb00)); + VECS32 color01 = vec_perm(VECS32(vec_lde(0, &rgb01)), zero, vec_lvsl(0, &rgb01)); + VECS32 color10 = vec_perm(VECS32(vec_lde(0, &rgb10)), zero, vec_lvsl(0, &rgb10)); + VECS32 color11 = vec_perm(VECS32(vec_lde(0, &rgb11)), zero, vec_lvsl(0, &rgb11)); +#endif + + // interleave color01/color00 and color10/color11 at the byte level then zero-extend + color01 = VECS32(vec_mergeh(VECU8(color01), VECU8(color00))); + color11 = VECS32(vec_mergeh(VECU8(color11), VECU8(color10))); +#ifdef __LITTLE_ENDIAN__ + color01 = VECS32(vec_mergeh(VECU8(color01), VECU8(zero))); + color11 = VECS32(vec_mergeh(VECU8(color11), VECU8(zero))); +#else + color01 = VECS32(vec_mergeh(VECU8(zero), VECU8(color01))); + color11 = VECS32(vec_mergeh(VECU8(zero), VECU8(color11))); +#endif + + color01 = vec_msum(VECS16(color01), scale_table[u], zero); + color11 = vec_msum(VECS16(color11), scale_table[u], zero); + color01 = vec_sl(color01, vec_splat_u32(15)); + color11 = vec_sr(color11, vec_splat_u32(1)); + color01 = VECS32(vec_max(VECS16(color01), VECS16(color11))); + color01 = vec_msum(VECS16(color01), scale_table[v], zero); + m_value = vec_sr(color01, vec_splat_u32(15)); + } + +protected: + VECS32 m_value; + + static const VECU8 alpha_perm; + static const VECU8 red_perm; + static const VECU8 green_perm; + static const VECU8 blue_perm; + static const VECS16 scale_table[256]; +}; + + + +// altivec.h somehow redefines "bool" in a bad way. really. +#ifdef vector +#undef vector +#endif +#ifdef bool +#undef bool +#endif +#ifdef pixel +#undef pixel +#endif + +#endif // MAME_EMU_VIDEO_RGBVMX_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/abi.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/abi.h new file mode 100644 index 0000000000..b9e18f1635 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/abi.h @@ -0,0 +1,117 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/// \file +/// \brief ABI feature macros +/// +/// Macros that are useful for writing ABI-dependent code. +#ifndef MAME_LIB_UTIL_ABI_H +#define MAME_LIB_UTIL_ABI_H + +#pragma once + + +/// \brief Itanium C++ ABI +/// +/// Value of #MAME_ABI_CXX_TYPE when compiled with a variant of the +/// Itanium C++ ABI. +/// \sa MAME_ABI_CXX_TYPE MAME_ABI_CXX_MSVC +#define MAME_ABI_CXX_ITANIUM 0 + +/// \brief Microsoft Visual C++ ABI +/// +/// Value of #MAME_ABI_CXX_TYPE when compiled with a variant of the +/// Microsoft Visual C++ ABI. +/// \sa MAME_ABI_CXX_TYPE MAME_ABI_CXX_ITANIUM +#define MAME_ABI_CXX_MSVC 1 + + +/// \brief Standard Itanium C++ ABI member function pointers +/// +/// Value of #MAME_ABI_CXX_ITANIUM_MFP_TYPE when compiled with a variant +/// of the Itanium C++ ABI using the standard representation of +/// pointers to non-static member functions. +/// \sa MAME_ABI_CXX_ITANIUM_MFP_TYPE MAME_ABI_CXX_ITANIUM_MFP_ARM +#define MAME_ABI_CXX_ITANIUM_MFP_STANDARD 0 + +/// \brief ARM Itanium C++ ABI member function pointers +/// +/// Value of #MAME_ABI_CXX_ITANIUM_MFP_TYPE when compiled with a variant +/// of the Itanium C++ ABI using the 32-bit ARM representation of +/// pointers to non-static member functions. +/// \sa MAME_ABI_CXX_ITANIUM_MFP_TYPE MAME_ABI_CXX_ITANIUM_MFP_STANDARD +#define MAME_ABI_CXX_ITANIUM_MFP_ARM 1 + + +/// \def MAME_ABI_FNDESC_SIZE +/// \brief Size of function descriptors +/// +/// Size of function descriptors as a multiple of the size of a pointer, +/// or zero if function pointers point to the function entry point +/// directly. +#if (defined(__ppc64__) || defined(__PPC64__)) && !defined(__APPLE__) && !defined(__LITTLE_ENDIAN__) + #define MAME_ABI_FNDESC_SIZE 3 // entry point (PC), TOC (R2), environment (R11) +#elif defined(__ia64__) + #define MAME_ABI_FNDESC_SIZE 2 // GP, entry point +#else + #define MAME_ABI_FNDESC_SIZE 0 // function pointers point to entry point directly +#endif + + +/// \def MAME_ABI_CXX_TYPE +/// \brief C++ ABI type +/// +/// A constant representing the C++ ABI. +/// \sa MAME_ABI_CXX_ITANIUM MAME_ABI_CXX_MSVC +#if defined(_MSC_VER) + #define MAME_ABI_CXX_TYPE MAME_ABI_CXX_MSVC +#else + #define MAME_ABI_CXX_TYPE MAME_ABI_CXX_ITANIUM +#endif + + +/// \def MAME_ABI_CXX_MEMBER_CALL +/// \brief Member function calling convention qualifier +/// +/// A qualifier for functions and function pointers that may be used to +/// specify that the calling convention for non-static member functions +/// should be used. +#if defined(__GNUC__) && defined(__MINGW32__) && !defined(__x86_64__) && defined(__i386__) + #define MAME_ABI_CXX_MEMBER_CALL __thiscall +#else + #define MAME_ABI_CXX_MEMBER_CALL +#endif + + +/// \def MAME_ABI_CXX_VTABLE_FNDESC +/// \brief Whether function descriptors are stored in virtual tables +/// +/// Non-zero if function descriptors are stored in virtual tables +/// directly, or zero if function entries in virtual tables are +/// conventional function pointers. +/// \sa MAME_ABI_FNDESC_SIZE +#if defined(__ia64__) + #define MAME_ABI_CXX_VTABLE_FNDESC 1 // function descriptors stored directly in vtable +#else + #define MAME_ABI_CXX_VTABLE_FNDESC 0 // conventional function pointers in vtable +#endif + + +/// \def MAME_ABI_CXX_ITANIUM_MFP_TYPE +/// Itanium C++ member function representation +/// +/// A constant representing the representation of pointers to non-static +/// member functions in use with the Itanium C++ ABI. Only valid if +/// compiled with a variant of the Itanium C++ ABI. +/// \sa MAME_ABI_CXX_ITANIUM_MFP_STANDARD MAME_ABI_CXX_ITANIUM_MFP_ARM +/// MAME_ABI_CXX_TYPE +#if defined(__arm__) || defined(__ARMEL__) || defined(__aarch64__) + #define MAME_ABI_CXX_ITANIUM_MFP_TYPE MAME_ABI_CXX_ITANIUM_MFP_ARM +#elif defined(__MIPSEL__) || defined(__mips_isa_rev) || defined(__mips64) + #define MAME_ABI_CXX_ITANIUM_MFP_TYPE MAME_ABI_CXX_ITANIUM_MFP_ARM +#elif defined(__EMSCRIPTEN__) + #define MAME_ABI_CXX_ITANIUM_MFP_TYPE MAME_ABI_CXX_ITANIUM_MFP_ARM +#else + #define MAME_ABI_CXX_ITANIUM_MFP_TYPE MAME_ABI_CXX_ITANIUM_MFP_STANDARD +#endif + +#endif // MAME_LIB_UTIL_ABI_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/bitmap.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/bitmap.h new file mode 100644 index 0000000000..1b8dee3b5b --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/bitmap.h @@ -0,0 +1,415 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/*************************************************************************** + + bitmap.h + + Core bitmap routines. + +***************************************************************************/ + +#ifndef MAME_UTIL_BITMAP_H +#define MAME_UTIL_BITMAP_H + +#pragma once + +#include "palette.h" + +#include +#include + + +//************************************************************************** +// TYPE DEFINITIONS +//************************************************************************** + +// bitmap_format describes the various bitmap formats we use +enum bitmap_format +{ + BITMAP_FORMAT_INVALID = 0, // invalid forma + BITMAP_FORMAT_IND8, // 8bpp indexed + BITMAP_FORMAT_IND16, // 16bpp indexed + BITMAP_FORMAT_IND32, // 32bpp indexed + BITMAP_FORMAT_IND64, // 64bpp indexed + BITMAP_FORMAT_RGB32, // 32bpp 8-8-8 RGB + BITMAP_FORMAT_ARGB32, // 32bpp 8-8-8-8 ARGB + BITMAP_FORMAT_YUY16 // 16bpp 8-8 Y/Cb, Y/Cr in sequence +}; + + +// ======================> rectangle + +// rectangles describe a bitmap portion +class rectangle +{ +public: + // construction/destruction + constexpr rectangle() { } + constexpr rectangle(int32_t minx, int32_t maxx, int32_t miny, int32_t maxy) + : min_x(minx), max_x(maxx), min_y(miny), max_y(maxy) + { } + + // getters + constexpr int32_t left() const { return min_x; } + constexpr int32_t right() const { return max_x; } + constexpr int32_t top() const { return min_y; } + constexpr int32_t bottom() const { return max_y; } + + // compute intersection with another rect + rectangle &operator&=(const rectangle &src) + { + if (src.min_x > min_x) min_x = src.min_x; + if (src.max_x < max_x) max_x = src.max_x; + if (src.min_y > min_y) min_y = src.min_y; + if (src.max_y < max_y) max_y = src.max_y; + return *this; + } + + // compute union with another rect + rectangle &operator|=(const rectangle &src) + { + if (src.min_x < min_x) min_x = src.min_x; + if (src.max_x > max_x) max_x = src.max_x; + if (src.min_y < min_y) min_y = src.min_y; + if (src.max_y > max_y) max_y = src.max_y; + return *this; + } + + rectangle operator&(const rectangle &b) + { + rectangle a(*this); + a &= b; + return a; + } + + rectangle operator|(const rectangle &b) + { + rectangle a(*this); + a |= b; + return a; + } + + // comparisons + constexpr bool operator==(const rectangle &rhs) const { return min_x == rhs.min_x && max_x == rhs.max_x && min_y == rhs.min_y && max_y == rhs.max_y; } + constexpr bool operator!=(const rectangle &rhs) const { return min_x != rhs.min_x || max_x != rhs.max_x || min_y != rhs.min_y || max_y != rhs.max_y; } + constexpr bool operator>(const rectangle &rhs) const { return min_x < rhs.min_x && min_y < rhs.min_y && max_x > rhs.max_x && max_y > rhs.max_y; } + constexpr bool operator>=(const rectangle &rhs) const { return min_x <= rhs.min_x && min_y <= rhs.min_y && max_x >= rhs.max_x && max_y >= rhs.max_y; } + constexpr bool operator<(const rectangle &rhs) const { return min_x >= rhs.min_x || min_y >= rhs.min_y || max_x <= rhs.max_x || max_y <= rhs.max_y; } + constexpr bool operator<=(const rectangle &rhs) const { return min_x > rhs.min_x || min_y > rhs.min_y || max_x < rhs.max_x || max_y < rhs.max_y; } + + // other helpers + constexpr bool empty() const { return (min_x > max_x) || (min_y > max_y); } + constexpr bool contains(int32_t x, int32_t y) const { return (x >= min_x) && (x <= max_x) && (y >= min_y) && (y <= max_y); } + constexpr bool contains(const rectangle &rect) const { return (min_x <= rect.min_x) && (max_x >= rect.max_x) && (min_y <= rect.min_y) && (max_y >= rect.max_y); } + constexpr int32_t width() const { return max_x + 1 - min_x; } + constexpr int32_t height() const { return max_y + 1 - min_y; } + constexpr int32_t xcenter() const { return (min_x + max_x + 1) / 2; } + constexpr int32_t ycenter() const { return (min_y + max_y + 1) / 2; } + + // setters + void set(int32_t minx, int32_t maxx, int32_t miny, int32_t maxy) { min_x = minx; max_x = maxx; min_y = miny; max_y = maxy; } + void setx(int32_t minx, int32_t maxx) { min_x = minx; max_x = maxx; } + void sety(int32_t miny, int32_t maxy) { min_y = miny; max_y = maxy; } + void set_width(int32_t width) { max_x = min_x + width - 1; } + void set_height(int32_t height) { max_y = min_y + height - 1; } + void set_origin(int32_t x, int32_t y) { max_x += x - min_x; max_y += y - min_y; min_x = x; min_y = y; } + void set_size(int32_t width, int32_t height) { set_width(width); set_height(height); } + + // offset helpers + void offset(int32_t xdelta, int32_t ydelta) { min_x += xdelta; max_x += xdelta; min_y += ydelta; max_y += ydelta; } + void offsetx(int32_t delta) { min_x += delta; max_x += delta; } + void offsety(int32_t delta) { min_y += delta; max_y += delta; } + + // internal state + int32_t min_x = 0; // minimum X, or left coordinate + int32_t max_x = 0; // maximum X, or right coordinate (inclusive) + int32_t min_y = 0; // minimum Y, or top coordinate + int32_t max_y = 0; // maximum Y, or bottom coordinate (inclusive) +}; + + +// ======================> bitmap_t + +// bitmaps describe a rectangular array of pixels +class bitmap_t +{ +protected: + // construction/destruction -- subclasses only to ensure type correctness + bitmap_t(const bitmap_t &) = delete; + bitmap_t(bitmap_t &&that); + bitmap_t(bitmap_format format, uint8_t bpp, int width = 0, int height = 0, int xslop = 0, int yslop = 0); + bitmap_t(bitmap_format format, uint8_t bpp, void *base, int width, int height, int rowpixels); + bitmap_t(bitmap_format format, uint8_t bpp, bitmap_t &source, const rectangle &subrect); + virtual ~bitmap_t(); + + // prevent implicit copying + bitmap_t &operator=(const bitmap_t &) = delete; + bitmap_t &operator=(bitmap_t &&that); + +public: + // allocation/deallocation + void reset(); + + // getters + int32_t width() const { return m_width; } + int32_t height() const { return m_height; } + int32_t rowpixels() const { return m_rowpixels; } + int32_t rowbytes() const { return m_rowpixels * m_bpp / 8; } + uint8_t bpp() const { return m_bpp; } + bitmap_format format() const { return m_format; } + bool valid() const { return (m_base != nullptr); } + palette_t *palette() const { return m_palette; } + const rectangle &cliprect() const { return m_cliprect; } + + // allocation/sizing + void allocate(int width, int height, int xslop = 0, int yslop = 0); + void resize(int width, int height, int xslop = 0, int yslop = 0); + + // operations + void set_palette(palette_t *palette); + void fill(uint64_t color) { fill(color, m_cliprect); } + void fill(uint64_t color, const rectangle &bounds); + void plot_box(int32_t x, int32_t y, int32_t width, int32_t height, uint64_t color) + { + fill(color, rectangle(x, x + width - 1, y, y + height - 1)); + } + + // pixel access + void *raw_pixptr(int32_t y, int32_t x = 0) { return reinterpret_cast(m_base) + (y * m_rowpixels + x) * m_bpp / 8; } + void const *raw_pixptr(int32_t y, int32_t x = 0) const { return reinterpret_cast(m_base) + (y * m_rowpixels + x) * m_bpp / 8; } + +protected: + // for use by subclasses only to ensure type correctness + template PixelType &pixt(int32_t y, int32_t x = 0) { return *(reinterpret_cast(m_base) + y * m_rowpixels + x); } + template PixelType const &pixt(int32_t y, int32_t x = 0) const { return *(reinterpret_cast(m_base) + y * m_rowpixels + x); } + void wrap(void *base, int width, int height, int rowpixels); + void wrap(bitmap_t &source, const rectangle &subrect); + +private: + // internal helpers + int32_t compute_rowpixels(int width, int xslop); + void compute_base(int xslop, int yslop); + bool valid_format() const; + + // internal state + std::unique_ptr m_alloc; // pointer to allocated pixel memory + uint32_t m_allocbytes; // size of our allocation + void * m_base; // pointer to pixel (0,0) (adjusted for padding) + int32_t m_rowpixels; // pixels per row (including padding) + int32_t m_width; // width of the bitmap + int32_t m_height; // height of the bitmap + bitmap_format m_format; // format of the bitmap + uint8_t m_bpp; // bits per pixel + palette_t * m_palette; // optional palette + rectangle m_cliprect; // a clipping rectangle covering the full bitmap +}; + + +// ======================> bitmap_specific, bitmap8_t, bitmap16_t, bitmap32_t, bitmap64_t + +template +class bitmap_specific : public bitmap_t +{ + static constexpr int PIXEL_BITS = 8 * sizeof(PixelType); + +protected: + // construction/destruction -- subclasses only + bitmap_specific(bitmap_specific &&) = default; + bitmap_specific(bitmap_format format, int width = 0, int height = 0, int xslop = 0, int yslop = 0) : bitmap_t(format, PIXEL_BITS, width, height, xslop, yslop) { } + bitmap_specific(bitmap_format format, PixelType *base, int width, int height, int rowpixels) : bitmap_t(format, PIXEL_BITS, base, width, height, rowpixels) { } + bitmap_specific(bitmap_format format, bitmap_specific &source, const rectangle &subrect) : bitmap_t(format, PIXEL_BITS, source, subrect) { } + + bitmap_specific &operator=(bitmap_specific &&) = default; + +public: + using pixel_t = PixelType; + + // getters + uint8_t bpp() const { return PIXEL_BITS; } + + // pixel accessors + PixelType &pix(int32_t y, int32_t x = 0) { return pixt(y, x); } + PixelType const &pix(int32_t y, int32_t x = 0) const { return pixt(y, x); } + + // operations + void fill(PixelType color) { fill(color, cliprect()); } + void fill(PixelType color, const rectangle &bounds) + { + // if we have a cliprect, intersect with that + rectangle fill(bounds); + fill &= cliprect(); + if (!fill.empty()) + { + for (int32_t y = fill.top(); y <= fill.bottom(); y++) + std::fill_n(&pix(y, fill.left()), fill.width(), color); + } + } + void plot_box(int32_t x, int32_t y, int32_t width, int32_t height, PixelType color) + { + fill(color, rectangle(x, x + width - 1, y, y + height - 1)); + } +}; + +// 8bpp bitmaps +using bitmap8_t = bitmap_specific; +extern template class bitmap_specific; + +// 16bpp bitmaps +using bitmap16_t = bitmap_specific; +extern template class bitmap_specific; + +// 32bpp bitmaps +using bitmap32_t = bitmap_specific; +extern template class bitmap_specific; + +// 64bpp bitmaps +using bitmap64_t = bitmap_specific; +extern template class bitmap_specific; + + +// ======================> bitmap_ind8, bitmap_ind16, bitmap_ind32, bitmap_ind64 + +// BITMAP_FORMAT_IND8 bitmaps +class bitmap_ind8 : public bitmap8_t +{ + static const bitmap_format k_bitmap_format = BITMAP_FORMAT_IND8; + +public: + // construction/destruction + bitmap_ind8(bitmap_ind8 &&) = default; + bitmap_ind8(int width = 0, int height = 0, int xslop = 0, int yslop = 0) : bitmap8_t(k_bitmap_format, width, height, xslop, yslop) { } + bitmap_ind8(uint8_t *base, int width, int height, int rowpixels) : bitmap8_t(k_bitmap_format, base, width, height, rowpixels) { } + bitmap_ind8(bitmap_ind8 &source, const rectangle &subrect) : bitmap8_t(k_bitmap_format, source, subrect) { } + void wrap(uint8_t *base, int width, int height, int rowpixels) { bitmap_t::wrap(base, width, height, rowpixels); } + void wrap(bitmap_ind8 &source, const rectangle &subrect) { bitmap_t::wrap(static_cast(source), subrect); } + + // getters + bitmap_format format() const { return k_bitmap_format; } + + bitmap_ind8 &operator=(bitmap_ind8 &&) = default; +}; + +// BITMAP_FORMAT_IND16 bitmaps +class bitmap_ind16 : public bitmap16_t +{ + static const bitmap_format k_bitmap_format = BITMAP_FORMAT_IND16; + +public: + // construction/destruction + bitmap_ind16(bitmap_ind16 &&) = default; + bitmap_ind16(int width = 0, int height = 0, int xslop = 0, int yslop = 0) : bitmap16_t(k_bitmap_format, width, height, xslop, yslop) { } + bitmap_ind16(uint16_t *base, int width, int height, int rowpixels) : bitmap16_t(k_bitmap_format, base, width, height, rowpixels) { } + bitmap_ind16(bitmap_ind16 &source, const rectangle &subrect) : bitmap16_t(k_bitmap_format, source, subrect) { } + void wrap(uint16_t *base, int width, int height, int rowpixels) { bitmap_t::wrap(base, width, height, rowpixels); } + void wrap(bitmap_ind8 &source, const rectangle &subrect) { bitmap_t::wrap(static_cast(source), subrect); } + + // getters + bitmap_format format() const { return k_bitmap_format; } + + bitmap_ind16 &operator=(bitmap_ind16 &&) = default; +}; + +// BITMAP_FORMAT_IND32 bitmaps +class bitmap_ind32 : public bitmap32_t +{ + static const bitmap_format k_bitmap_format = BITMAP_FORMAT_IND32; + +public: + // construction/destruction + bitmap_ind32(bitmap_ind32 &&) = default; + bitmap_ind32(int width = 0, int height = 0, int xslop = 0, int yslop = 0) : bitmap32_t(k_bitmap_format, width, height, xslop, yslop) { } + bitmap_ind32(uint32_t *base, int width, int height, int rowpixels) : bitmap32_t(k_bitmap_format, base, width, height, rowpixels) { } + bitmap_ind32(bitmap_ind32 &source, const rectangle &subrect) : bitmap32_t(k_bitmap_format, source, subrect) { } + void wrap(uint32_t *base, int width, int height, int rowpixels) { bitmap_t::wrap(base, width, height, rowpixels); } + void wrap(bitmap_ind8 &source, const rectangle &subrect) { bitmap_t::wrap(static_cast(source), subrect); } + + // getters + bitmap_format format() const { return k_bitmap_format; } + + bitmap_ind32 &operator=(bitmap_ind32 &&) = default; +}; + +// BITMAP_FORMAT_IND64 bitmaps +class bitmap_ind64 : public bitmap64_t +{ + static const bitmap_format k_bitmap_format = BITMAP_FORMAT_IND64; + +public: + // construction/destruction + bitmap_ind64(bitmap_ind64 &&) = default; + bitmap_ind64(int width = 0, int height = 0, int xslop = 0, int yslop = 0) : bitmap64_t(k_bitmap_format, width, height, xslop, yslop) { } + bitmap_ind64(uint64_t *base, int width, int height, int rowpixels) : bitmap64_t(k_bitmap_format, base, width, height, rowpixels) { } + bitmap_ind64(bitmap_ind64 &source, const rectangle &subrect) : bitmap64_t(k_bitmap_format, source, subrect) { } + void wrap(uint64_t *base, int width, int height, int rowpixels) { bitmap_t::wrap(base, width, height, rowpixels); } + void wrap(bitmap_ind8 &source, const rectangle &subrect) { bitmap_t::wrap(static_cast(source), subrect); } + + // getters + bitmap_format format() const { return k_bitmap_format; } + + bitmap_ind64 &operator=(bitmap_ind64 &&) = default; +}; + + +// ======================> bitmap_yuy16, bitmap_rgb32, bitmap_argb32 + +// BITMAP_FORMAT_YUY16 bitmaps +class bitmap_yuy16 : public bitmap16_t +{ + static const bitmap_format k_bitmap_format = BITMAP_FORMAT_YUY16; + +public: + // construction/destruction + bitmap_yuy16(bitmap_yuy16 &&) = default; + bitmap_yuy16(int width = 0, int height = 0, int xslop = 0, int yslop = 0) : bitmap16_t(k_bitmap_format, width, height, xslop, yslop) { } + bitmap_yuy16(uint16_t *base, int width, int height, int rowpixels) : bitmap16_t(k_bitmap_format, base, width, height, rowpixels) { } + bitmap_yuy16(bitmap_yuy16 &source, const rectangle &subrect) : bitmap16_t(k_bitmap_format, source, subrect) { } + void wrap(uint16_t *base, int width, int height, int rowpixels) { bitmap_t::wrap(base, width, height, rowpixels); } + void wrap(bitmap_yuy16 &source, const rectangle &subrect) { bitmap_t::wrap(static_cast(source), subrect); } + + // getters + bitmap_format format() const { return k_bitmap_format; } + + bitmap_yuy16 &operator=(bitmap_yuy16 &&) = default; +}; + +// BITMAP_FORMAT_RGB32 bitmaps +class bitmap_rgb32 : public bitmap32_t +{ + static const bitmap_format k_bitmap_format = BITMAP_FORMAT_RGB32; + +public: + // construction/destruction + bitmap_rgb32(bitmap_rgb32 &&) = default; + bitmap_rgb32(int width = 0, int height = 0, int xslop = 0, int yslop = 0) : bitmap32_t(k_bitmap_format, width, height, xslop, yslop) { } + bitmap_rgb32(uint32_t *base, int width, int height, int rowpixels) : bitmap32_t(k_bitmap_format, base, width, height, rowpixels) { } + bitmap_rgb32(bitmap_rgb32 &source, const rectangle &subrect) : bitmap32_t(k_bitmap_format, source, subrect) { } + void wrap(uint32_t *base, int width, int height, int rowpixels) { bitmap_t::wrap(base, width, height, rowpixels); } + void wrap(bitmap_rgb32 &source, const rectangle &subrect) { bitmap_t::wrap(static_cast(source), subrect); } + + // getters + bitmap_format format() const { return k_bitmap_format; } + + bitmap_rgb32 &operator=(bitmap_rgb32 &&) = default; +}; + +// BITMAP_FORMAT_ARGB32 bitmaps +class bitmap_argb32 : public bitmap32_t +{ + static const bitmap_format k_bitmap_format = BITMAP_FORMAT_ARGB32; + +public: + // construction/destruction + bitmap_argb32(bitmap_argb32 &&) = default; + bitmap_argb32(int width = 0, int height = 0, int xslop = 0, int yslop = 0) : bitmap32_t(k_bitmap_format, width, height, xslop, yslop) { } + bitmap_argb32(uint32_t *base, int width, int height, int rowpixels) : bitmap32_t(k_bitmap_format, base, width, height, rowpixels) { } + bitmap_argb32(bitmap_argb32 &source, const rectangle &subrect) : bitmap32_t(k_bitmap_format, source, subrect) { } + void wrap(uint32_t *base, int width, int height, int rowpixels) { bitmap_t::wrap(base, width, height, rowpixels); } + void wrap(bitmap_argb32 &source, const rectangle &subrect) { bitmap_t::wrap(static_cast(source), subrect); } + + // getters + bitmap_format format() const { return k_bitmap_format; } + + bitmap_argb32 &operator=(bitmap_argb32 &&) = default; +}; + +#endif // MAME_UTIL_BITMAP_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/corealloc.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/corealloc.h new file mode 100644 index 0000000000..d70a0909f2 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/corealloc.h @@ -0,0 +1,66 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/*************************************************************************** + + corealloc.h + + Memory allocation helpers for the helper library. + +***************************************************************************/ + +#ifndef MAME_LIB_UTIL_COREALLOC_H +#define MAME_LIB_UTIL_COREALLOC_H + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + + +// global allocation helpers + +template struct MakeUniqClearT { typedef std::unique_ptr single_object; }; + +template struct MakeUniqClearT { typedef std::unique_ptr array; }; + +template struct MakeUniqClearT { struct invalid_type { }; }; + +/// make_unique_clear for single objects +template +inline typename MakeUniqClearT::single_object make_unique_clear(Params&&... args) +{ + void *const ptr = ::operator new(sizeof(Tp)); // allocate memory + std::memset(ptr, 0, sizeof(Tp)); + return std::unique_ptr(new(ptr) Tp(std::forward(args)...)); +} + +/// make_unique_clear for arrays of unknown bound +template +inline typename MakeUniqClearT::array make_unique_clear(size_t num) +{ + auto size = sizeof(std::remove_extent_t) * num; + unsigned char* ptr = new unsigned char[size]; // allocate memory + std::memset(ptr, 0, size); + return std::unique_ptr(new(ptr) std::remove_extent_t[num]()); +} + +template +inline typename MakeUniqClearT::array make_unique_clear(size_t num) +{ + auto size = sizeof(std::remove_extent_t) * num; + unsigned char* ptr = new unsigned char[size]; // allocate memory + std::memset(ptr, F, size); + return std::unique_ptr(new(ptr) std::remove_extent_t[num]()); +} + +/// Disable make_unique_clear for arrays of known bound +template +inline typename MakeUniqClearT::invalid_type make_unique_clear(Params&&...) = delete; + +#endif // MAME_LIB_UTIL_COREALLOC_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/coretmpl.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/coretmpl.h new file mode 100644 index 0000000000..9d033f72d2 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/coretmpl.h @@ -0,0 +1,689 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles, Vas Crabb +/*************************************************************************** + + coretmpl.h + + Core templates for basic non-string types. + +***************************************************************************/ +#ifndef MAME_UTIL_CORETMPL_H +#define MAME_UTIL_CORETMPL_H + +#pragma once + +#include "osdcomm.h" +#include "vecstream.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +// ======================> simple_list + +// a simple_list is a singly-linked list whose 'next' pointer is owned +// by the object +template +class simple_list final +{ +public: + class auto_iterator + { + public: + typedef int difference_type; + typedef ElementType value_type; + typedef ElementType *pointer; + typedef ElementType &reference; + typedef std::forward_iterator_tag iterator_category; + + // construction/destruction + auto_iterator() noexcept : m_current(nullptr) { } + auto_iterator(ElementType *ptr) noexcept : m_current(ptr) { } + + // required operator overloads + bool operator==(const auto_iterator &iter) const noexcept { return m_current == iter.m_current; } + bool operator!=(const auto_iterator &iter) const noexcept { return m_current != iter.m_current; } + ElementType &operator*() const noexcept { return *m_current; } + ElementType *operator->() const noexcept { return m_current; } + // note that ElementType::next() must not return a const ptr + auto_iterator &operator++() noexcept { m_current = m_current->next(); return *this; } + auto_iterator operator++(int) noexcept { auto_iterator result(*this); m_current = m_current->next(); return result; } + + private: + // private state + ElementType *m_current; + }; + + // construction/destruction + simple_list() noexcept { } + ~simple_list() noexcept { reset(); } + + // we don't support deep copying + simple_list(const simple_list &) = delete; + simple_list &operator=(const simple_list &) = delete; + + // but we do support cheap swap/move + simple_list(simple_list &&list) noexcept { operator=(std::move(list)); } + simple_list &operator=(simple_list &&list) + { + using std::swap; + swap(m_head, list.m_head); + swap(m_tail, list.m_tail); + swap(m_count, list.m_count); + return *this; + } + + // simple getters + ElementType *first() const noexcept { return m_head; } + ElementType *last() const noexcept { return m_tail; } + int count() const noexcept { return m_count; } + bool empty() const noexcept { return m_count == 0; } + + // range iterators + auto_iterator begin() const noexcept { return auto_iterator(m_head); } + auto_iterator end() const noexcept { return auto_iterator(nullptr); } + + // remove (free) all objects in the list, leaving an empty list + void reset() noexcept + { + while (m_head != nullptr) + remove(*m_head); + } + + // add the given object to the head of the list + ElementType &prepend(ElementType &object) noexcept + { + object.m_next = m_head; + m_head = &object; + if (m_tail == nullptr) + m_tail = m_head; + m_count++; + return object; + } + + // add the given list to the head of the list + void prepend_list(simple_list &list) noexcept + { + int count = list.count(); + if (count == 0) + return; + ElementType *tail = list.last(); + ElementType *head = list.detach_all(); + tail->m_next = m_head; + m_head = head; + if (m_tail == nullptr) + m_tail = tail; + m_count += count; + } + + // add the given object to the tail of the list + ElementType &append(ElementType &object) noexcept + { + object.m_next = nullptr; + if (m_tail != nullptr) + m_tail = m_tail->m_next = &object; + else + m_tail = m_head = &object; + m_count++; + return object; + } + + // add the given list to the tail of the list + void append_list(simple_list &list) noexcept + { + int count = list.count(); + if (count == 0) + return; + ElementType *tail = list.last(); + ElementType *head = list.detach_all(); + if (m_tail != nullptr) + m_tail->m_next = head; + else + m_head = head; + m_tail = tail; + m_count += count; + } + + // insert the given object after a particular object (nullptr means prepend) + ElementType &insert_after(ElementType &object, ElementType *insert_after) noexcept + { + if (insert_after == nullptr) + return prepend(object); + object.m_next = insert_after->m_next; + insert_after->m_next = &object; + if (m_tail == insert_after) + m_tail = &object; + m_count++; + return object; + } + + // insert the given object before a particular object (nullptr means append) + ElementType &insert_before(ElementType &object, ElementType *insert_before) noexcept + { + if (insert_before == nullptr) + return append(object); + for (ElementType **curptr = &m_head; *curptr != nullptr; curptr = &(*curptr)->m_next) + if (*curptr == insert_before) + { + object.m_next = insert_before; + *curptr = &object; + if (m_head == insert_before) + m_head = &object; + m_count++; + return object; + } + return object; + } + + // replace an item in the list at the same location, and remove it + ElementType &replace_and_remove(ElementType &object, ElementType &toreplace) noexcept + { + ElementType *prev = nullptr; + for (ElementType *cur = m_head; cur != nullptr; prev = cur, cur = cur->m_next) + if (cur == &toreplace) + { + if (prev != nullptr) + prev->m_next = &object; + else + m_head = &object; + if (m_tail == &toreplace) + m_tail = &object; + object.m_next = toreplace.m_next; + delete &toreplace; + return object; + } + return append(object); + } + + // detach the head item from the list, but don't free its memory + ElementType *detach_head() noexcept + { + ElementType *result = m_head; + if (result != nullptr) + { + m_head = result->m_next; + m_count--; + if (m_head == nullptr) + m_tail = nullptr; + } + return result; + } + + // detach the given item from the list, but don't free its memory + ElementType &detach(ElementType &object) noexcept + { + ElementType *prev = nullptr; + for (ElementType *cur = m_head; cur != nullptr; prev = cur, cur = cur->m_next) + if (cur == &object) + { + if (prev != nullptr) + prev->m_next = object.m_next; + else + m_head = object.m_next; + if (m_tail == &object) + m_tail = prev; + m_count--; + return object; + } + return object; + } + + // detach the entire list, returning the head, but don't free memory + ElementType *detach_all() noexcept + { + ElementType *result = m_head; + m_head = m_tail = nullptr; + m_count = 0; + return result; + } + + // remove the given object and free its memory + void remove(ElementType &object) noexcept + { + delete &detach(object); + } + + // find an object by index in the list + ElementType *find(int index) const noexcept + { + for (ElementType *cur = m_head; cur != nullptr; cur = cur->m_next) + if (index-- == 0) + return cur; + return nullptr; + } + + // return the index of the given object in the list + int indexof(const ElementType &object) const noexcept + { + int index = 0; + for (ElementType *cur = m_head; cur != nullptr; cur = cur->m_next) + { + if (cur == &object) + return index; + index++; + } + return -1; + } + +private: + // internal state + ElementType * m_head = nullptr; // head of the singly-linked list + ElementType * m_tail = nullptr; // tail of the singly-linked list + int m_count = 0; // number of objects in the list +}; + + +// ======================> fixed_allocator + +// a fixed_allocator is a simple class that maintains a free pool of objects +template +class fixed_allocator +{ + // we don't support deep copying + fixed_allocator(const fixed_allocator &); + fixed_allocator &operator=(const fixed_allocator &); + +public: + // construction/destruction + fixed_allocator() { } + + // allocate a new item, either by recycling an old one, or by allocating a new one + ItemType *alloc() + { + ItemType *result = m_freelist.detach_head(); + if (result == nullptr) + result = new ItemType; + return result; + } + + // reclaim an item by adding it to the free list + void reclaim(ItemType *item) { if (item != nullptr) m_freelist.append(*item); } + void reclaim(ItemType &item) { m_freelist.append(item); } + + // reclaim all items from a list + void reclaim_all(simple_list &_list) { m_freelist.append_list(_list); } + +private: + // internal state + simple_list m_freelist; // list of free objects +}; + + +// ======================> contiguous_sequence_wrapper + +namespace util { + +using osd::u8; +using osd::u16; +using osd::u32; +using osd::u64; + +using osd::s8; +using osd::s16; +using osd::s32; +using osd::s64; + + +// wraps an existing sequence of values +template +class contiguous_sequence_wrapper +{ +public: + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + typedef T value_type; + typedef T &reference; + typedef const T &const_reference; + typedef T *pointer; + typedef T *iterator; + typedef const T *const_iterator; + typedef std::reverse_iterator reverse_iterator; + typedef std::reverse_iterator const_reverse_iterator; + + contiguous_sequence_wrapper(T *ptr, std::size_t size) + : m_begin(ptr) + , m_end(ptr + size) + { + } + + contiguous_sequence_wrapper(const contiguous_sequence_wrapper &that) = default; + + // iteration + iterator begin() { return m_begin; } + const_iterator begin() const { return m_begin; } + const_iterator cbegin() const { return m_begin; } + iterator end() { return m_end; } + const_iterator end() const { return m_end; } + const_iterator cend() const { return m_end; } + + // reverse iteration + reverse_iterator rbegin() { return std::reverse_iterator(end()); } + const_reverse_iterator rbegin() const { return std::reverse_iterator(end()); } + const_reverse_iterator crbegin() const { return std::reverse_iterator(cend()); } + reverse_iterator rend() { return std::reverse_iterator(begin()); } + const_reverse_iterator rend() const { return std::reverse_iterator(begin()); } + const_reverse_iterator crend() const { return std::reverse_iterator(begin()); } + + // capacity + size_type size() const { return m_end - m_begin; } + size_type max_size() const { return size(); } + bool empty() const { return size() == 0; } + + // element access + reference front() { return operator[](0); } + const_reference front() const { return operator[](0); } + reference back() { return operator[](size() - 1); } + const_reference back() const { return operator[](size() - 1); } + reference operator[] (size_type n) { return m_begin[n]; } + const_reference operator[] (size_type n) const { return m_begin[n]; } + reference at(size_type n) { check_in_bounds(n); return operator[](n); } + const_reference at(size_type n) const { check_in_bounds(n); return operator[](n); } + +private: + iterator m_begin; + iterator m_end; + + void check_in_bounds(size_type n) + { + if (n < 0 || n >= size()) + throw std::out_of_range("invalid contiguous_sequence_wrapper subscript"); + } +}; + + +template +class fifo : protected std::array +{ +public: + fifo() + : std::array() + , m_head(this->begin()) + , m_tail(this->begin()) + , m_empty(true) + { + static_assert(0U < N, "FIFO must have at least one element"); + } + fifo(fifo const &) = delete; + fifo(fifo &&) = delete; + fifo &operator=(fifo const &) = delete; + fifo &operator=(fifo &&) = delete; + + template + fifo(fifo const &that) + : std::array(that) + , m_head(std::advance(this->begin(), std::distance(that.begin(), that.m_head))) + , m_tail(std::advance(this->begin(), std::distance(that.begin(), that.m_tail))) + , m_empty(that.m_empty) + { + } + + template + fifo(fifo &&that) + : std::array(std::move(that)) + , m_head(std::advance(this->begin(), std::distance(that.begin(), that.m_head))) + , m_tail(std::advance(this->begin(), std::distance(that.begin(), that.m_tail))) + , m_empty(that.m_empty) + { + } + + template + fifo &operator=(fifo const &that) + { + std::array::operator=(that); + m_head = std::advance(this->begin(), std::distance(that.begin(), that.m_head)); + m_tail = std::advance(this->begin(), std::distance(that.begin(), that.m_tail)); + m_empty = that.m_empty; + return *this; + } + + template + fifo &operator=(fifo &&that) + { + std::array::operator=(std::move(that)); + m_head = std::advance(this->begin(), std::distance(that.begin(), that.m_head)); + m_tail = std::advance(this->begin(), std::distance(that.begin(), that.m_tail)); + m_empty = that.m_empty; + return *this; + } + + bool full() const { return !m_empty && (m_head == m_tail); } + bool empty() const { return m_empty; } + + // number of currently enqueued elements + std::size_t queue_length() const + { + if (m_empty) + return 0; + + auto const distance = std::distance(m_head, m_tail); + + return (distance > 0) ? distance : (N + distance); + } + + void enqueue(T const &v) + { + if (WriteWrap || m_empty || (m_head != m_tail)) + { + *m_tail = v; + if (this->end() == ++m_tail) + m_tail = this->begin(); + m_empty = false; + } + } + + void enqueue(T &&v) + { + if (WriteWrap || m_empty || (m_head != m_tail)) + { + *m_tail = std::move(v); + if (this->end() == ++m_tail) + m_tail = this->begin(); + m_empty = false; + } + } + + T const &dequeue() + { + T const &result(*m_head); + if (ReadWrap || !m_empty) + { + if (this->end() == ++m_head) + m_head = this->begin(); + m_empty = (m_head == m_tail); + } + return result; + } + + void poke(T &v) + { + *m_tail = v; + } + + void poke(T &&v) + { + *m_tail = std::move(v); + } + + T const &peek() const + { + return *m_head; + } + + void clear() + { + m_head = m_tail = this->begin(); + m_empty = true; + } + +private: + typename fifo::iterator m_head, m_tail; + bool m_empty; +}; + + +// extract a string_view from an ovectorstream buffer +template +std::basic_string_view buf_to_string_view(basic_ovectorstream &stream) +{ + // this works on the assumption that the value tellp returns is the same both before and after vec is called + return std::basic_string_view(&stream.vec()[0], stream.tellp()); +} + + +// For declaring an array of the same dimensions as another array (including multi-dimensional arrays) +template struct equivalent_array_or_type { typedef T type; }; +template struct equivalent_array_or_type { typedef typename equivalent_array_or_type::type type[N]; }; +template using equivalent_array_or_type_t = typename equivalent_array_or_type::type; +template struct equivalent_array { }; +template struct equivalent_array { typedef equivalent_array_or_type_t type[N]; }; +template using equivalent_array_t = typename equivalent_array::type; +#define EQUIVALENT_ARRAY(a, T) util::equivalent_array_t > + + +template +using enable_enum_t = typename std::enable_if_t::value, typename std::underlying_type_t >; + +// template function which takes a strongly typed enumerator and returns its value as a compile-time constant +template +constexpr enable_enum_t underlying_value(E e) noexcept +{ + return static_cast >(e); +} + +// template function which takes an integral value and returns its representation as enumerator (even strongly typed) +template +constexpr typename std::enable_if_t::value && std::is_integral::value, E> enum_value(T value) noexcept +{ + return static_cast(value); +} + + +/// \defgroup bitutils Useful functions for bit shuffling +/// \{ + +/// \brief Generate a right-aligned bit mask +/// +/// Generates a right aligned mask of the specified width. Works with +/// signed and unsigned integer types. +/// \tparam T Desired output type. +/// \tparam U Type of the input (generally resolved by the compiler). +/// \param [in] n Width of the mask to generate in bits. +/// \return Right-aligned mask of the specified width. + +template constexpr T make_bitmask(U n) +{ + return T((n < (8 * sizeof(T)) ? (std::make_unsigned_t(1) << n) : std::make_unsigned_t(0)) - 1); +} + + +/// \brief Extract a single bit from an integer +/// +/// Extracts a single bit from an integer into the least significant bit +/// position. +/// +/// \param [in] x The integer to extract the bit from. +/// \param [in] n The bit to extract, where zero is the least +/// significant bit of the input. +/// \return Zero if the specified bit is unset, or one if it is set. +/// \sa bitswap +template constexpr T BIT(T x, U n) noexcept { return (x >> n) & T(1); } + + +/// \brief Extract a bit field from an integer +/// +/// Extracts and right-aligns a bit field from an integer. +/// +/// \param [in] x The integer to extract the bit field from. +/// \param [in] n The least significant bit position of the field to +/// extract, where zero is the least significant bit of the input. +/// \param [in] w The width of the field to extract in bits. +/// \return The field [n..(n+w-1)] from the input. +/// \sa bitswap +template constexpr T BIT(T x, U n, V w) +{ + return (x >> n) & make_bitmask(w); +} + + +/// \brief Extract bits in arbitrary order +/// +/// Extracts bits from an integer. Specify the bits in the order they +/// should be arranged in the output, from most significant to least +/// significant. The extracted bits will be packed into a right-aligned +/// field in the output. +/// +/// \param [in] val The integer to extract bits from. +/// \param [in] b The first bit to extract from the input +/// extract, where zero is the least significant bit of the input. +/// This bit will appear in the most significant position of the +/// right-aligned output field. +/// \param [in] c The remaining bits to extract, where zero is the +/// least significant bit of the input. +/// \return The extracted bits packed into a right-aligned field. +template constexpr T bitswap(T val, U b, V... c) noexcept +{ + if constexpr (sizeof...(c) > 0U) + return (BIT(val, b) << sizeof...(c)) | bitswap(val, c...); + else + return BIT(val, b); +} + + +/// \brief Extract bits in arbitrary order with explicit count +/// +/// Extracts bits from an integer. Specify the bits in the order they +/// should be arranged in the output, from most significant to least +/// significant. The extracted bits will be packed into a right-aligned +/// field in the output. The number of bits to extract must be supplied +/// as a template argument. +/// +/// A compile error will be generated if the number of bit positions +/// supplied does not match the specified number of bits to extract, or +/// if the output type is too small to hold the extracted bits. This +/// guards against some simple errors. +/// +/// \tparam B The number of bits to extract. Must match the number of +/// bit positions supplied. +/// \param [in] val The integer to extract bits from. +/// \param [in] b Bits to extract, where zero is the least significant +/// bit of the input. Specify bits in the order they should appear in +/// the output field, from most significant to least significant. +/// \return The extracted bits packed into a right-aligned field. +template T bitswap(T val, U... b) noexcept +{ + static_assert(sizeof...(b) == B, "wrong number of bits"); + static_assert((sizeof(std::remove_reference_t) * 8) >= B, "return type too small for result"); + return bitswap(val, b...); +} + +/// \} + + +// constexpr absolute value of an integer +template +constexpr std::enable_if_t::value, T> iabs(T v) noexcept +{ + return (v < T(0)) ? -v : v; +} + + +// reduce a fraction +template +inline void reduce_fraction(M &num, N &den) +{ + auto const div(std::gcd(num, den)); + if (div) + { + num /= div; + den /= div; + } +} + +} // namespace util + +#endif // MAME_UTIL_CORETMPL_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/delegate.cpp b/waterbox/ares64/ares/thirdparty/mame/lib/util/delegate.cpp new file mode 100644 index 0000000000..8949f90f14 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/delegate.cpp @@ -0,0 +1,287 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles,Vas Crabb +/*************************************************************************** + + delegate.cpp + + Templates and classes to enable delegates for callbacks. + +***************************************************************************/ + +#include "delegate.h" + +#include +#include + + +//************************************************************************** +// MACROS +//************************************************************************** + +#if defined(MAME_DELEGATE_LOG_ADJ) + #define LOG(...) printf(__VA_ARGS__) +#else + #define LOG(...) do { if (false) printf(__VA_ARGS__); } while (false) +#endif + + + +//************************************************************************** +// LATE BINDING EXCEPTION +//************************************************************************** + +binding_type_exception::binding_type_exception(std::type_info const &target_type, std::type_info const &actual_type) + : m_target_type(&target_type) + , m_actual_type(&actual_type) +{ + std::ostringstream os; + os << "Error performing late bind of function expecting type " << target_type.name() << " to instance of type " << actual_type.name(); + m_what = os.str(); +} + + +char const *binding_type_exception::what() const noexcept +{ + return m_what.c_str(); +} + + + +namespace util::detail { + +//************************************************************************** +// GLOBAL VARIABLES +//************************************************************************** + +const delegate_mfp_compatible::raw_mfp_data delegate_mfp_compatible::s_null_mfp = { { 0 } }; + + + +//************************************************************************** +// INTERNAL DELEGATE HELPERS +//************************************************************************** + +//------------------------------------------------- +// delegate_mfp_itanium::convert_to_generic - +// given an object pointer and member function +// pointer, apply the displacement and get the +// actual function pointer +//------------------------------------------------- + +delegate_generic_function delegate_mfp_itanium::convert_to_generic(delegate_generic_class *&object) const +{ + // apply the "this" delta to the object first - the value is shifted to the left one bit position for the ARM-like variant + LOG("Input this=%p ptr=%p adj=%ld ", reinterpret_cast(object), reinterpret_cast(m_function), long(m_this_delta)); + object = reinterpret_cast( + reinterpret_cast(object) + (m_this_delta >> ((MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? 1 : 0))); + LOG("Calculated this=%p ", reinterpret_cast(object)); + + // test the virtual member function flag - it's the low bit of either the ptr or adj field, depending on the variant + if ((MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? !(m_this_delta & 1) : !(m_function & 1)) + { + // conventional function pointer + LOG("ptr=%p\n", reinterpret_cast(m_function)); + return reinterpret_cast(m_function); + } + else + { + // byte index into the vtable to the function + std::uint8_t const *const vtable_ptr = *reinterpret_cast(object) + m_function - ((MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? 0 : 1); + delegate_generic_function result; + if (MAME_ABI_CXX_VTABLE_FNDESC) + result = reinterpret_cast(uintptr_t(vtable_ptr)); + else + result = *reinterpret_cast(vtable_ptr); + LOG("ptr=%p (vtable)\n", reinterpret_cast(result)); + return result; + } +} + + +//------------------------------------------------- +// delegate_mfp_msvc::adjust_this_pointer - given +// an object pointer and member function pointer, +// apply the displacement, and walk past +// recognisable thunks +//------------------------------------------------- + +delegate_generic_function delegate_mfp_msvc::adjust_this_pointer(delegate_generic_class *&object) const +{ + LOG("Input this=%p ", reinterpret_cast(object)); + if (sizeof(single_base_equiv) < m_size) + LOG("thisdelta=%d ", m_this_delta); + if (sizeof(unknown_base_equiv) == m_size) + LOG("vptrdelta=%d vindex=%d ", m_vptr_offs, m_vt_index); + std::uint8_t *byteptr = reinterpret_cast(object); + + // test for pointer to member function cast across virtual inheritance relationship + if ((sizeof(unknown_base_equiv) == m_size) && m_vt_index) + { + // add offset from "this" pointer to location of vptr, and add offset to virtual base from vtable + byteptr += m_vptr_offs; + std::uint8_t const *const vptr = *reinterpret_cast(byteptr); + byteptr += *reinterpret_cast(vptr + m_vt_index); + } + + // add "this" pointer displacement if present in the pointer to member function + if (sizeof(single_base_equiv) < m_size) + byteptr += m_this_delta; + LOG("Calculated this=%p\n", reinterpret_cast(byteptr)); + object = reinterpret_cast(byteptr); + + // walk past recognisable thunks +#if defined(__x86_64__) || defined(_M_X64) + std::uint8_t const *func = reinterpret_cast(m_function); + while (true) + { + // Assumes Windows calling convention, and doesn't consider that + // the "this" pointer could be in RDX if RCX is a pointer to + // space for an oversize scalar result. Since the result area + // is uninitialised on entry, you won't see something that looks + // like a vtable dispatch through RCX in this case - it won't + // behave badly, it just won't bypass virtual call thunks in the + // rare situations where the return type is an oversize scalar. + if (0xe9 == func[0]) + { + // relative jump with 32-bit displacement (typically a resolved PLT entry) + LOG("Found relative jump at %p ", func); + func += std::ptrdiff_t(5) + *reinterpret_cast(func + 1); + LOG("redirecting to %p\n", func); + continue; + } + else if ((0x48 == func[0]) && (0x8b == func[1]) && (0x01 == func[2])) + { + if ((0xff == func[3]) && ((0x20 == func[4]) || (0x60 == func[4]) || (0xa0 == func[4]))) + { + // MSVC virtual function call thunk - mov rax,QWORD PTR [rcx] ; jmp QWORD PTR [rax+...] + LOG("Found virtual member function thunk at %p ", func); + std::uint8_t const *const vptr = *reinterpret_cast(object); + if (0x20 == func[4]) // no displacement + func = *reinterpret_cast(vptr); + else if (0x60 == func[4]) // 8-bit displacement + func = *reinterpret_cast(vptr + *reinterpret_cast(func + 5)); + else // 32-bit displacement + func = *reinterpret_cast(vptr + *reinterpret_cast(func + 5)); + LOG("redirecting to %p\n", func); + continue; + } + else if ((0x48 == func[3]) && (0x8b == func[4])) + { + // clang virtual function call thunk - mov rax,QWORD PTR [rcx] ; mov rax,QWORD PTR [rax+...] ; jmp rax + if ((0x00 == func[5]) && (0x48 == func[6]) && (0xff == func[7]) && (0xe0 == func[8])) + { + // no displacement + LOG("Found virtual member function thunk at %p ", func); + std::uint8_t const *const vptr = *reinterpret_cast(object); + func = *reinterpret_cast(vptr); + LOG("redirecting to %p\n", func); + continue; + } + else if ((0x40 == func[5]) && (0x48 == func[7]) && (0xff == func[8]) && (0xe0 == func[9])) + { + // 8-bit displacement + LOG("Found virtual member function thunk at %p ", func); + std::uint8_t const *const vptr = *reinterpret_cast(object); + func = *reinterpret_cast(vptr + *reinterpret_cast(func + 6)); + LOG("redirecting to %p\n", func); + continue; + } + else if ((0x80 == func[5]) && (0x48 == func[10]) && (0xff == func[11]) && (0xe0 == func[12])) + { + // 32-bit displacement + LOG("Found virtual member function thunk at %p ", func); + std::uint8_t const *const vptr = *reinterpret_cast(object); + func = *reinterpret_cast(vptr + *reinterpret_cast(func + 6)); + LOG("redirecting to %p\n", func); + continue; + } + } + } + + // clang uses unoptimised thunks if optimisation is disabled + // Without optimisation, clang produces thunks like: + // 50 push rax + // 48 89 0c 24 mov QWORD PTR [rsp],rcx + // 48 8b 0c 24 mov rcx,QWORD PTR [rsp] + // 48 8b 01 mov rax,QWORD PTR [rcx] + // 48 8b 80 xx xx xx xx mov rax,QWORD PTR [rax+...] + // 41 5a pop r10 + // 48 ff e0 jmp rax + // Trying to decode these thunks likely isn't worth the effort. + // Chasing performance in unoptimised builds isn't very useful, + // and the format of these thunks may be fragile. + + // not something we can easily bypass + break; + } + return reinterpret_cast(std::uintptr_t(func)); +#elif defined(__aarch64__) || defined(_M_ARM64) + std::uint32_t const *func = reinterpret_cast(m_function); + while (true) + { + // Assumes little Endian mode. Instructions are always stored + // in little Endian format on AArch64, so if big Endian mode is + // to be supported, the values need to be swapped. + if ((0x90000010 == (func[0] & 0x9f00001f)) && (0x91000210 == (func[1] & 0xffc003ff)) && (0xd61f0200 == func[2])) + { + // page-relative jump with +/-4GB reach - adrp xip0,... ; add xip0,xip0,#... ; br xip0 + LOG("Found page-relative jump at %p ", func); + std::int64_t const page = + (std::uint64_t(func[0] & 0x60000000) >> 17) | + (std::uint64_t(func[0] & 0x00ffffe0) << 9) | + ((func[0] & 0x00800000) ? (~std::uint64_t(0) << 33) : 0); + std::uint32_t const offset = (func[1] & 0x003ffc00) >> 10; + func = reinterpret_cast(((std::uintptr_t(func) + page) & (~std::uintptr_t(0) << 12)) + offset); + LOG("redirecting to %p\n", func); + } + else if ((0xf9400010 == func[0]) && (0xf9400210 == (func[1] & 0xffc003ff)) && (0xd61f0200 == func[2])) + { + // virtual function call thunk - ldr xip0,[x0] ; ldr xip0,[x0,#...] ; br xip0 + LOG("Found virtual member function thunk at %p ", func); + std::uint32_t const *const *const vptr = *reinterpret_cast(object); + func = vptr[(func[1] & 0x003ffc00) >> 10]; + LOG("redirecting to %p\n", func); + } + else + { + // not something we can easily bypass + break; + } + + // clang uses horribly sub-optimal thunks for AArch64 + // Without optimisation, clang produces thunks like: + // d10143ff sub sp,sp,#80 + // f90027e7 str x7,[sp,#72] + // f90023e6 str x6,[sp,#64] + // f9001fe5 str x5,[sp,#56] + // f9001be4 str x4,[sp,#48] + // f90017e3 str x3,[sp,#40] + // f90013e2 str x2,[sp,#32] + // f9000fe1 str x1,[sp,#24] + // f90007e0 str x0,[sp,#8] + // f94007e0 ldr x0,[sp,#8] + // f9400009 ldr x9,[x0] + // f9400129 ldr x9,[x9,#...] + // 910143ff add sp,sp,#80 + // d61f0120 br x9 + // With optimisation, clang produces thunks like: + // d10103ff sub sp,sp,#64 + // a9008be1 stp x1,x2,[sp,#8] + // a90193e3 stp x3,x4,[sp,#24] + // a9029be5 stp x5,x6,[sp,#40] + // f9001fe7 str x7,[sp,#56] + // f9400009 ldr x9,[x0] + // f9400129 ldr x9,[x9,#...] + // 910103ff add sp,sp,#64 + // d61f0120 br x9 + // It's more effort than it's worth to try decoding these + // thunks. + + } + return reinterpret_cast(std::uintptr_t(func)); +#else + return reinterpret_cast(m_function); +#endif +} + +} // namespace util::detail diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/delegate.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/delegate.h new file mode 100644 index 0000000000..df1d6ebcba --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/delegate.h @@ -0,0 +1,972 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles,Couriersud,Miodrag Milanovic,Vas Crabb +/*************************************************************************** + + delegate.h + + Templates and classes to enable delegates for callbacks. + +**************************************************************************** + + There are many implementations of delegate-like functionality for + C++ code, but none of them is a perfect drop-in fit for use in MAME. + In order to be useful in MAME, we need the following properties: + + * No significant overhead; we want to use these for memory + accessors, and memory accessor overhead is already the dominant + performance aspect for most drivers. + + * Existing static functions need to be bound with an additional + pointer parameter as the first argument. All existing + implementations that allow static function binding assume the + same signature as the member functions. + + * We must be able to bind the function separately from the + object. This is to allow configurations to bind functions + before the objects are created. + + Thus, the implementations below are based on existing works but are + really a new implementation that is specific to MAME. + + -------------------------------------------------------------------- + + The "compatible" version of delegates is based on an implementation + from Sergey Ryazanov, found here: + + https://www.codeproject.com/Articles/11015/The-Impossibly-Fast-C-Delegates + + These delegates essentially generate a templated static stub function + for each target function. The static function takes the first + parameter, uses it as the object pointer, and calls through the + member function. For static functions, the stub is compatible with + the signature of a static function, so we just set the stub directly. + + Pros: + * should work with any modern compiler + * static bindings are just as fast as direct calls + + Cons: + * lots of little stub functions generated + * double-hops on member function calls means more overhead + * calling through stub functions repackages parameters + + -------------------------------------------------------------------- + + The "Itanium" version of delegates makes use of the internal + structure of member function pointers in order to convert them at + binding time into simple static function pointers. This only works + on platforms where object->func(p1, p2) is equivalent in calling + convention to func(object, p1, p2). + + Pros: + * as fast as a standard function call in static and member cases + * no stub functions or double-hops needed + + Cons: + * requires internal knowledge of the member function pointer + * only works for two popular variants of the Itanium C++ ABI + + -------------------------------------------------------------------- + + The "MSVC" version of delegates makes use of the internal structure + of member function pointers in order to convert them at binding time + into simple static function pointers. This only works on platforms + where object->func(p1, p2) is equivalent in calling convention to + func(object, p1, p2). + + Pros: + * as fast as a standard function call in static and non-virtual + member cases + * no stub functions needed + + Cons: + * requires internal knowledge of the member function pointer + * only works works with MSVC ABI, and not on 32-bit x86 + * does not work for classes with virtual bases + * structure return does not work with member function pointers + * virtual member function lookup cannot be done in advance + + -------------------------------------------------------------------- + + Further reading: + + * http://itanium-cxx-abi.github.io/cxx-abi/abi.html#member-pointers + Formal specification for the most common member function pointer + implementations. + + * https://www.codeproject.com/Articles/7150/Member-Function-Pointers-and-the-Fastest-Possible + Discusses many member function pointer implementations. Based + on reverse-engineering, so not entirely accurate. In particular, + various fields are incorrectly assumed to be int-sized which is + not true in the general case. + + * https://devblogs.microsoft.com/oldnewthing/20040209-00/?p=40713 + Describes the MSVC implementation of pointers to member + functions for classes with single or multiple inheritance. Does + not mention the additional variants for virtual or unknown + inheritance. Incorrectly states that the "this" pointer + displacement is a size_t when in reality it is an int (important + for 64-bit architectures). + +***************************************************************************/ +#ifndef MAME_LIB_UTIL_DELEGATE_H +#define MAME_LIB_UTIL_DELEGATE_H + +#pragma once + +#include "abi.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +//************************************************************************** +// MACROS +//************************************************************************** + +// types of delegates supported +#define MAME_DELEGATE_TYPE_COMPATIBLE 0 +#define MAME_DELEGATE_TYPE_ITANIUM 1 +#define MAME_DELEGATE_TYPE_MSVC 2 + +// select which one we will be using +#if defined(MAME_DELEGATE_FORCE_COMPATIBLE) + #define MAME_DELEGATE_USE_TYPE MAME_DELEGATE_TYPE_COMPATIBLE +#elif defined(__GNUC__) + // 32bit MINGW asks for different convention + #if defined(__MINGW32__) && !defined(__x86_64__) && defined(__i386__) + #define MAME_DELEGATE_USE_TYPE MAME_DELEGATE_TYPE_COMPATIBLE + //#define MAME_DELEGATE_USE_TYPE MAME_DELEGATE_TYPE_ITANIUM + //#define MAME_DELEGATE_DIFFERENT_MEMBER_ABI 1 + #elif defined(__clang__) && defined(__i386__) && defined(_WIN32) + #define MAME_DELEGATE_USE_TYPE MAME_DELEGATE_TYPE_COMPATIBLE + #else + #define MAME_DELEGATE_USE_TYPE MAME_DELEGATE_TYPE_ITANIUM + #define MAME_DELEGATE_DIFFERENT_MEMBER_ABI 0 + #endif +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) + #define MAME_DELEGATE_DIFFERENT_MEMBER_ABI 0 + #define MAME_DELEGATE_USE_TYPE MAME_DELEGATE_TYPE_MSVC +#else + #define MAME_DELEGATE_USE_TYPE MAME_DELEGATE_TYPE_COMPATIBLE +#endif + +#if MAME_DELEGATE_USE_TYPE == MAME_DELEGATE_TYPE_COMPATIBLE + #define MAME_DELEGATE_DIFFERENT_MEMBER_ABI 0 +#endif + + +/// \brief Base for objects used with late binding +/// +/// Default polymorphic class used as base for objects that can be bound +/// to after the target function has already been set. +class delegate_late_bind +{ +public: + virtual ~delegate_late_bind() = default; +}; + + +/// \brief Inappropriate late bind object error +/// +/// Thrown as an exception if the object supplied for late binding +/// cannot be cast to the target type for the delegate's function. +class binding_type_exception : public std::bad_cast +{ +public: + binding_type_exception(std::type_info const &target_type, std::type_info const &actual_type); + + virtual char const *what() const noexcept override; + + std::type_info const &target_type() const noexcept { return *m_target_type; } + std::type_info const &actual_type() const noexcept { return *m_actual_type; } + +private: + std::string m_what; + std::type_info const *m_target_type; + std::type_info const *m_actual_type; +}; + + + +namespace util::detail { + +//************************************************************************** +// HELPER CLASSES +//************************************************************************** + +// generic function type +using delegate_generic_function = void(*)(); + + +// ======================> generic_class + +// define a dummy generic class that is just straight single-inheritance +#ifdef _MSC_VER +class delegate_generic_class { }; +#else +class delegate_generic_class; +#endif + + +// ======================> delegate_traits + +// delegate_traits is a meta-template that is used to provide a static function pointer +// and member function pointer of the appropriate type and number of parameters + +template +struct delegate_traits +{ + using static_func_type = ReturnType (*)(ClassType *, Params...); + using static_ref_func_type = ReturnType (*)(ClassType &, Params...); + using member_func_type = ReturnType (ClassType::*)(Params...); + using const_member_func_type = ReturnType (ClassType::*)(Params...) const; +}; + + + +/// \brief Maximally compatible member function pointer wrapper +/// +/// Instantiates a static member function template on construction as +/// an adaptor thunk to call the supplied member function with the +/// supplied object. Adds one layer of indirection to calls. +/// +/// This implementation requires the representation of a null member +/// function pointer to be all zeroes. +class delegate_mfp_compatible +{ +public: + // default constructor + delegate_mfp_compatible() + : m_rawdata(s_null_mfp) + , m_realobject(nullptr) + , m_stubfunction(nullptr) + { } + + // copy constructor + delegate_mfp_compatible(const delegate_mfp_compatible &src) = default; + + // construct from any member function pointer + template + delegate_mfp_compatible(MemberFunctionType mfp, MemberFunctionClass *, ReturnType *, StaticFunctionType) + : m_rawdata(s_null_mfp) + , m_realobject(nullptr) + , m_stubfunction(make_generic(&delegate_mfp_compatible::method_stub)) + { + static_assert(sizeof(mfp) <= sizeof(m_rawdata), "Unsupported member function pointer size"); + *reinterpret_cast(&m_rawdata) = mfp; + } + + // comparison helpers + bool operator==(const delegate_mfp_compatible &rhs) const { return m_rawdata == rhs.m_rawdata; } + bool isnull() const { return m_rawdata == s_null_mfp; } + + // getters + delegate_generic_class *real_object(delegate_generic_class *original) const + { + return m_realobject; + } + + // binding helpers + template + void update_after_bind(FunctionType &funcptr, delegate_generic_class *&object); + + template + void update_after_copy(FunctionType &funcptr, delegate_generic_class *&object); + +private: + // helper stubs for calling encased member function pointers + template + static ReturnType method_stub(delegate_generic_class *object, Params ... args); + + // helper to convert a function of a given type to a generic function, forcing template + // instantiation to match the source type + template + static delegate_generic_function make_generic(SourceType funcptr) + { + return reinterpret_cast(funcptr); + } + + // FIXME: not properly aligned for storing pointers + struct raw_mfp_data + { +#if defined(__INTEL_COMPILER) && defined(_M_X64) // needed for "Intel(R) C++ Intel(R) 64 Compiler XE for applications running on Intel(R) 64, Version 14.0.2.176 Build 20140130" at least + int data[((sizeof(void *) + 4 * sizeof(int)) + (sizeof(int) - 1)) / sizeof(int)]; +#else // all other cases - for MSVC maximum size is one pointer, plus 3 ints; all other implementations seem to be smaller + int data[((sizeof(void *) + 3 * sizeof(int)) + (sizeof(int) - 1)) / sizeof(int)]; +#endif + bool operator==(const raw_mfp_data &rhs) const { return !std::memcmp(data, rhs.data, sizeof(data)); } + }; + + // internal state + raw_mfp_data m_rawdata; // raw buffer to hold the copy of the function pointer + delegate_generic_class * m_realobject; // pointer to the object used for calling + delegate_generic_function m_stubfunction; // pointer to our matching stub function + + static const raw_mfp_data s_null_mfp; // nullptr mfp +}; + + +template +void delegate_mfp_compatible::update_after_bind(FunctionType &funcptr, delegate_generic_class *&object) +{ + m_realobject = object; + object = reinterpret_cast(this); + funcptr = reinterpret_cast(m_stubfunction); +} + + +template +void delegate_mfp_compatible::update_after_copy(FunctionType &funcptr, delegate_generic_class *&object) +{ + assert(reinterpret_cast(m_stubfunction) == funcptr); + object = reinterpret_cast(this); +} + + +template +ReturnType delegate_mfp_compatible::method_stub(delegate_generic_class *object, Params ... args) +{ + using mfptype = ReturnType(FunctionClass::*)(Params...); + delegate_mfp_compatible &_this = *reinterpret_cast(object); + mfptype &mfp = *reinterpret_cast(&_this.m_rawdata); + return (reinterpret_cast(_this.m_realobject)->*mfp)(std::forward(args)...); +} + + + +/// \brief Itanium C++ ABI member function pointer wrapper +/// +/// Supports the two most popular pointer to member function +/// implementations described in the Itanium C++ ABI. Both of these +/// consist of a pointer followed by a ptrdiff_t. +/// +/// The first variant is used when member the least significant bit of a +/// member function pointer need never be set and vtable entry offsets +/// are guaranteed to be even numbers of bytes. If the pointer is even, +/// it is a conventional function pointer to the member function. If +/// the pointer is odd, it is a byte offset into the vtable plus one. +/// The ptrdiff_t is a byte offset to add to the this pointer. A null +/// member function pointer is represented by setting the pointer to a +/// null pointer. +/// +/// The second variant is used when the least significant bit of a +/// pointer to a member function may need to be set or it may not be +/// possible to distinguish between a vtable offset and a null pointer. +/// (This is the case for ARM where the least significant bit of a +/// pointer to a function is set if the function starts in Thumb mode.) +/// If the least significant bit of the ptrdiff_t is clear, the pointer +/// is a conventional function pointer to the member function. If the +/// least significant bit of the ptrdiff_t is set, the pointer is a byte +/// offset into the vtable. The ptrdiff_t must be shifted right one bit +/// position to make a byte offset to add to the this pointer. A null +/// member function pointer is represented by setting the pointer to a +/// null pointer and clearing the least significant bit of the +/// ptrdiff_t. +class delegate_mfp_itanium +{ +public: + // default constructor + delegate_mfp_itanium() = default; + + // copy constructor + delegate_mfp_itanium(const delegate_mfp_itanium &src) = default; + + // construct from any member function pointer + template + delegate_mfp_itanium(MemberFunctionType mfp, MemberFunctionClass *, ReturnType *, StaticFunctionType) + { + static_assert(sizeof(mfp) == sizeof(*this), "Unsupported member function pointer size"); + *reinterpret_cast(this) = mfp; + } + + // comparison helpers + bool operator==(const delegate_mfp_itanium &rhs) const + { + return (isnull() && rhs.isnull()) || ((m_function == rhs.m_function) && (m_this_delta == rhs.m_this_delta)); + } + + bool isnull() const + { + if (MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) + return !reinterpret_cast(m_function) && !(m_this_delta & 1); + else + return !reinterpret_cast(m_function); + } + + // getters + static delegate_generic_class *real_object(delegate_generic_class *original) + { + return original; + } + + // binding helpers + template + void update_after_bind(FunctionType &funcptr, delegate_generic_class *&object) + { + funcptr = reinterpret_cast(convert_to_generic(object)); + } + + template + void update_after_copy(FunctionType &funcptr, delegate_generic_class *&object) + { + } + +private: + // extract the generic function and adjust the object pointer + delegate_generic_function convert_to_generic(delegate_generic_class *&object) const; + + // actual state + uintptr_t m_function = reinterpret_cast(static_cast(nullptr)); // function pointer or vtable offset + ptrdiff_t m_this_delta = 0; // delta to apply to the 'this' pointer +}; + + + +/// \brief MSVC member function pointer wrapper +/// +/// MSVC uses space optimisation. A member function pointer is a +/// conventional function pointer followed by zero to three int values, +/// depending on whether the class has single, multiple, virtual or +/// unknown inheritance of base classes. The function pointer is always +/// a conventional function pointer (a thunk is used to call virtual +/// member functions through the vtable). +/// +/// If present, the first int value is a byte offset to add to the this +/// pointer before calling the function. +/// +/// For the virtual inheritance case, the offset to the vtable pointer +/// from the location the this pointer points to must be known by the +/// compiler when the member function pointer is called. The second int +/// value is a byte offset into the vtable to an int value containing an +/// additional byte offset to add to the this pointer. +/// +/// For the unknown inheritance case, the second int value is a byte +/// offset add to the this pointer to obtain a pointer to the vtable +/// pointer, or undefined if not required. If the third int value is +/// not zero, it is a byte offset into the vtable to an int value +/// containing an additional byte offset to add to the this pointer. +/// +/// It is not possible to support the virtual inheritance case without +/// some way of obtaining the offset to the vtable pointer. +class delegate_mfp_msvc +{ + struct single_base_equiv { delegate_generic_function fptr; }; + struct multi_base_equiv { delegate_generic_function fptr; int thisdisp; }; + struct unknown_base_equiv { delegate_generic_function fptr; int thisdisp, vptrdisp, vtdisp; }; + +public: + // default constructor + delegate_mfp_msvc() = default; + + // copy constructor + delegate_mfp_msvc(const delegate_mfp_msvc &src) = default; + + // construct from any member function pointer + template + delegate_mfp_msvc(MemberFunctionType mfp, MemberFunctionClass *, ReturnType *, StaticFunctionType) + { + // FIXME: this doesn't actually catch the unsupported virtual inheritance case on 64-bit targets + // alignment of the pointer means sizeof gives the same value for multiple inheritance and virtual inheritance cases + static_assert( + (sizeof(mfp) == sizeof(single_base_equiv)) || (sizeof(mfp) == sizeof(multi_base_equiv)) || (sizeof(mfp) == sizeof(unknown_base_equiv)), + "Unsupported member function pointer size"); + static_assert(sizeof(mfp) <= sizeof(*this), "Member function pointer is too large to support"); + *reinterpret_cast(this) = mfp; + m_size = sizeof(mfp); + } + + // comparison helpers + bool operator==(const delegate_mfp_msvc &rhs) const + { + if (m_function != rhs.m_function) + { + return false; + } + else if (sizeof(single_base_equiv) == m_size) + { + return (sizeof(single_base_equiv) == rhs.m_size) || (!rhs.m_this_delta && ((sizeof(multi_base_equiv) == rhs.m_size) || !rhs.m_vt_index)); + } + else if (sizeof(multi_base_equiv) == m_size) + { + if (sizeof(unknown_base_equiv) == rhs.m_size) + return (m_this_delta == rhs.m_this_delta) && !rhs.m_vt_index; + else + return (sizeof(single_base_equiv) == rhs.m_size) ? !m_this_delta : (m_this_delta == rhs.m_this_delta); + } + else if (sizeof(unknown_base_equiv) == rhs.m_size) + { + return (m_this_delta == rhs.m_this_delta) && (m_vt_index == rhs.m_vt_index) && (!m_vt_index || (m_vptr_offs == rhs.m_vptr_offs)); + } + else + { + return !m_vt_index && ((sizeof(multi_base_equiv) == rhs.m_size) ? (m_this_delta == rhs.m_this_delta) : !m_this_delta); + } + } + + bool isnull() const + { + return !reinterpret_cast(m_function); + } + + // getters + static delegate_generic_class *real_object(delegate_generic_class *original) { return original; } + + // binding helpers + template + void update_after_bind(FunctionType &funcptr, delegate_generic_class *&object) + { + funcptr = reinterpret_cast(adjust_this_pointer(object)); + } + + template + void update_after_copy(FunctionType &funcptr, delegate_generic_class *&object) + { + } + +private: + // adjust the object pointer and bypass thunks + delegate_generic_function adjust_this_pointer(delegate_generic_class *&object) const; + + // actual state + uintptr_t m_function = 0; // pointer to function or non-virtual thunk for virtual function call + int m_this_delta = 0; // delta to apply to the 'this' pointer for multiple inheritance + int m_vptr_offs = 0; // offset to apply to this pointer to obtain pointer to vptr + int m_vt_index = 0; // offset into vtable to additional delta to apply to the 'this' pointer + + unsigned m_size = 0; // overall size of the pointer to member function representation +}; + + + +#if MAME_DELEGATE_USE_TYPE == MAME_DELEGATE_TYPE_COMPATIBLE + +template +struct delegate_mfp { using type = delegate_mfp_compatible; }; + +#elif MAME_DELEGATE_USE_TYPE == MAME_DELEGATE_TYPE_ITANIUM + +template +struct delegate_mfp { using type = delegate_mfp_itanium; }; + +#elif MAME_DELEGATE_USE_TYPE == MAME_DELEGATE_TYPE_MSVC + +/// \brief Determine whether a type is returned conventionally +/// +/// Under the MSVC C++ ABI with the Microsoft calling convention for +/// x86-64 or AArch64, the calling convention for member functions is +/// not quite the same as a free function with the "this" pointer as the +/// first parameter. +/// +/// Conventionally, structure and union values can be returned in +/// registers if they are small enough and are aggregates (trivially +/// constructible, destructible, copyable and assignable). On x86-64, +/// if the value cannot be returned in registers, the pointer to the +/// area for the return value is conventionally passed in RCX and +/// explicit parameters are shifted by one position. On AArch64, if the +/// value cannot be returned in registers, the pointer to the area for +/// the return value is passed in X8 (explicit parameters do not need to +/// be shifted). +/// +/// For member functions, structure and union types are never returned +/// in registers, and the pointer to the area for the return value is +/// passed differently for structures and unions. When a structure or +/// union is to be returned, a pointer to the area for the return value +/// is effectively passed as a second implicit parameter. On x86-64, +/// the "this" pointer is passed in RCX and the pointer to the area for +/// the return value is passed in RDX; on AArch64, the "this" pointer is +/// passed in X0 and the pointer to the area for the return value is +/// passed in X1. Explicit parameters are shifted an additional +/// position to allow for the second implicit parameter. +/// +/// Note that pointer types are returned conventionally from member +/// functions even when they're too large to return in registers (e.g. a +/// pointer to a function member of a class with unknown inheritance). +/// +/// Because of this, we may need to use the #delegate_mfp_compatible +/// class to generate adaptor thunks depending on the return type. This +/// trait doesn't need to reliably be true for types that are returned +/// conventionally from member functions; it only needs to reliably be +/// false for types that aren't. Incorrectly yielding true will result +/// in incorrect behaviour while incorrectly yielding false will just +/// cause increased overhead (both compile-time and run-time). +template +using delegate_mfp_conventional_return = std::bool_constant< + std::is_void_v || + std::is_scalar_v || + std::is_reference_v >; + +template +struct delegate_mfp; + +template +struct delegate_mfp::value> > { using type = delegate_mfp_msvc; }; + +template +struct delegate_mfp::value> > { using type = delegate_mfp_compatible; }; + +#endif + +template using delegate_mfp_t = typename delegate_mfp::type; + + + +/// \brief Helper class for generating late bind functions +/// +/// Members of this class don't depend on the delegate's signature. +/// Keeping them here reduces the number of template instantiations as +/// you'll only need one late bind helper for each class used for late +/// binding, not for each class for each delegate signature. +template +class delegate_late_bind_helper +{ +public: + // make it default constructible and copyable + delegate_late_bind_helper() = default; + delegate_late_bind_helper(delegate_late_bind_helper const &) = default; + delegate_late_bind_helper(delegate_late_bind_helper &&) = default; + delegate_late_bind_helper &operator=(delegate_late_bind_helper const &) = default; + delegate_late_bind_helper &operator=(delegate_late_bind_helper &&) = default; + + template + delegate_late_bind_helper(FunctionClass *) + : m_latebinder(&delegate_late_bind_helper::late_bind_helper) + { + } + + delegate_generic_class *operator()(LateBindBase &object) { return m_latebinder(object); } + + explicit operator bool() const noexcept { return bool(m_latebinder); } + +private: + using late_bind_func = delegate_generic_class*(*)(LateBindBase &object); + + template static delegate_generic_class *late_bind_helper(LateBindBase &object); + + late_bind_func m_latebinder = nullptr; +}; + + +template +template +delegate_generic_class *delegate_late_bind_helper::late_bind_helper(LateBindBase &object) +{ + FunctionClass *result = dynamic_cast(&object); + if (result) + return reinterpret_cast(result); + throw binding_type_exception(typeid(FunctionClass), typeid(object)); +} + + + +//************************************************************************** +// COMMON DELEGATE BASE CLASS +//************************************************************************** + +template +class delegate_base +{ +public: + // define our traits + template using traits = delegate_traits; + using generic_static_func = typename traits::static_func_type; + typedef MAME_ABI_CXX_MEMBER_CALL generic_static_func generic_member_func; + + // generic constructor + delegate_base() = default; + + // copy constructor + delegate_base(const delegate_base &src) + : m_function(src.m_function) + , m_object(src.m_object) + , m_latebinder(src.m_latebinder) + , m_raw_function(src.m_raw_function) + , m_raw_mfp(src.m_raw_mfp) + { + if (src.object() && is_mfp()) + m_raw_mfp.update_after_copy(m_function, m_object); + } + + // copy constructor with late bind + delegate_base(const delegate_base &src, LateBindBase &object) + : m_function(src.m_function) + , m_latebinder(src.m_latebinder) + , m_raw_function(src.m_raw_function) + , m_raw_mfp(src.m_raw_mfp) + { + late_bind(object); + } + + // construct from member function with object pointer + template + delegate_base(typename traits::member_func_type funcptr, FunctionClass *object) + : m_latebinder(object) + , m_raw_mfp(funcptr, object, static_cast(nullptr), static_cast(nullptr)) + { + bind(object); + } + + // construct from const member function with object pointer + template + delegate_base(typename traits::const_member_func_type funcptr, FunctionClass *object) + : m_latebinder(object) + , m_raw_mfp(funcptr, object, static_cast(nullptr), static_cast(nullptr)) + { + bind(object); + } + + // construct from static reference function with object reference + template + delegate_base(typename traits::static_ref_func_type funcptr, FunctionClass *object) + : m_function(reinterpret_cast(funcptr)) + , m_latebinder(object) + , m_raw_function(reinterpret_cast(funcptr)) + { + bind(object); + } + + // copy operator + delegate_base &operator=(const delegate_base &src) + { + if (this != &src) + { + m_function = src.m_function; + m_object = src.m_object; + m_latebinder = src.m_latebinder; + m_raw_function = src.m_raw_function; + m_raw_mfp = src.m_raw_mfp; + + if (src.object() && is_mfp()) + m_raw_mfp.update_after_copy(m_function, m_object); + } + return *this; + } + + // comparison helper + bool operator==(const delegate_base &rhs) const + { + return (m_raw_function == rhs.m_raw_function) && (object() == rhs.object()) && (m_raw_mfp == rhs.m_raw_mfp); + } + + // call the function + ReturnType operator()(Params... args) const + { + if ((MAME_DELEGATE_DIFFERENT_MEMBER_ABI) && is_mfp()) + return (*reinterpret_cast(m_function))(m_object, std::forward(args)...); + else + return (*m_function)(m_object, std::forward(args)...); + } + + // getters + bool has_object() const { return object() != nullptr; } + bool isnull() const { return !m_raw_function && m_raw_mfp.isnull(); } + bool is_mfp() const { return !m_raw_mfp.isnull(); } + + // late binding + void late_bind(LateBindBase &object) + { + if (m_latebinder) + bind(m_latebinder(object)); + } + +protected: + // return the actual object (not the one we use for calling) + delegate_generic_class *object() const { return is_mfp() ? m_raw_mfp.real_object(m_object) : m_object; } + + // bind the actual object + template + void bind(FunctionClass *object) + { + m_object = reinterpret_cast(object); + + // if we're wrapping a member function pointer, handle special stuff + if (m_object && is_mfp()) + m_raw_mfp.update_after_bind(m_function, m_object); + } + + // internal state + generic_static_func m_function = nullptr; // resolved static function pointer + delegate_generic_class * m_object = nullptr; // resolved object to the post-cast object + delegate_late_bind_helper m_latebinder; // late binding helper + generic_static_func m_raw_function = nullptr; // raw static function pointer + delegate_mfp_t m_raw_mfp; // raw member function pointer +}; + +} // namespace util::detail + + + +//************************************************************************** +// NATURAL SYNTAX +//************************************************************************** + +// declare the base template +template class delegate; + +template +class delegate : public util::detail::delegate_base +{ +private: + using basetype = util::detail::delegate_base; + using functoid_setter = void (*)(delegate &); + + template struct functoid_type_unwrap { using type = std::remove_reference_t; }; + template struct functoid_type_unwrap > { using type = typename functoid_type_unwrap::type; }; + template using unwrapped_functoid_t = typename functoid_type_unwrap > >::type; + + template static constexpr bool matching_non_const_call(T &&) { return false; } + template static constexpr bool matching_non_const_call(ReturnType (T::*)(Params...)) { return true; } + template static constexpr bool matching_const_call(T &&) { return false; } + template static constexpr bool matching_const_call(ReturnType (T::*)(Params...) const) { return true; } + + template static T *unwrap_functoid(T *functoid) { return functoid; } + template static T *unwrap_functoid(std::reference_wrapper *functoid) { return &functoid->get(); } + + template + unwrapped_functoid_t *unwrap_functoid() noexcept + { + return unwrap_functoid(std::any_cast > >(&m_functoid)); + } + + template + static functoid_setter make_functoid_setter() + { + if constexpr (matching_non_const_call(&unwrapped_functoid_t::operator())) + { + return + [] (delegate &obj) + { + obj.basetype::operator=( + basetype( + static_cast::*)(Params...)>(&unwrapped_functoid_t::operator()), + obj.unwrap_functoid())); + }; + } + else if constexpr (matching_const_call(&unwrapped_functoid_t::operator())) + { + return + [] (delegate &obj) + { + obj.basetype::operator=( + basetype( + static_cast::*)(Params...) const>(&unwrapped_functoid_t::operator()), + obj.unwrap_functoid())); + }; + } + else + { + return + [] (delegate &obj) + { + obj.basetype::operator=( + basetype( + [] (unwrapped_functoid_t &f, Params... args) { return ReturnType(f(std::forward(args)...)); }, + obj.unwrap_functoid())); + }; + } + } + + std::any m_functoid; + functoid_setter m_set_functoid = nullptr; + +protected: + template using traits = typename basetype::template traits; + template using member_func_type = typename traits::member_func_type; + template using const_member_func_type = typename traits::const_member_func_type; + template using static_ref_func_type = typename traits::static_ref_func_type; + + template using suitable_functoid = std::is_invocable_r; + +public: + delegate() : basetype() { } + + delegate(delegate const &src) + : basetype(src.m_functoid.has_value() ? static_cast(basetype()) : src) + , m_functoid(src.m_functoid) + , m_set_functoid(src.m_set_functoid) + { + if (m_functoid.has_value()) + m_set_functoid(*this); + } + + delegate(delegate &src) + : delegate(const_cast(src)) + { + } + + delegate(delegate &&src) + : basetype(src.m_functoid.has_value() ? basetype() : std::move(src)) + , m_functoid(std::move(src.m_functoid)) + , m_set_functoid(std::move(src.m_set_functoid)) + { + if (m_functoid.has_value()) + m_set_functoid(*this); + } + + delegate(delegate const &src, LateBindBase &object) + : basetype(src.m_functoid.has_value() ? basetype() : basetype(src, object)) + , m_functoid(src.m_functoid) + , m_set_functoid(src.m_set_functoid) + { + if (m_functoid.has_value()) + m_set_functoid(*this); + } + + template delegate(member_func_type funcptr, FunctionClass *object) : basetype(funcptr, object) { } + template delegate(const_member_func_type funcptr, FunctionClass *object) : basetype(funcptr, object) { } + template delegate(static_ref_func_type funcptr, FunctionClass *object) : basetype(funcptr, object) { } + + template + explicit delegate(T &&functoid, std::enable_if_t::value, int> = 0) + : basetype() + , m_functoid(std::forward(functoid)) + , m_set_functoid(make_functoid_setter()) + { + m_set_functoid(*this); + } + + delegate &operator=(std::nullptr_t) noexcept + { + reset(); + return *this; + } + + delegate &operator=(delegate const &src) + { + m_functoid = src.m_functoid; + m_set_functoid = src.m_set_functoid; + if (m_functoid.has_value()) + m_set_functoid(*this); + else + basetype::operator=(src); + return *this; + } + + delegate &operator=(delegate &&src) + { + m_functoid = std::move(src.m_functoid); + m_set_functoid = std::move(src.m_set_functoid); + if (m_functoid.has_value()) + m_set_functoid(*this); + else + basetype::operator=(std::move(src)); + return *this; + } + + void reset() noexcept + { + basetype::operator=(basetype()); + m_functoid.reset(); + m_set_functoid = nullptr; + } +}; + +#endif // MAME_LIB_UTIL_DELEGATE_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/endianness.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/endianness.h new file mode 100644 index 0000000000..cef615fcb8 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/endianness.h @@ -0,0 +1,76 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/*************************************************************************** + + endianness.h + + Endianness types and utility functions. + +***************************************************************************/ + +#ifndef MAME_LIB_UTIL_ENDIANNESS_H +#define MAME_LIB_UTIL_ENDIANNESS_H + +#pragma once + +#include + + +namespace util { + +//************************************************************************** +// TYPE DEFINITIONS +//************************************************************************** + +// constants for expression endianness +enum class endianness +{ + little, + big, +#ifdef LSB_FIRST + native = little +#else + native = big +#endif +}; + + +//************************************************************************** +// MACROS AND INLINE FUNCTIONS +//************************************************************************** + +constexpr std::string_view endian_to_string_view(endianness e) { using namespace std::literals; return e == endianness::little ? "little"sv : "big"sv; } + +// endian-based value: first value is if native endianness is little-endian, second is if native is big-endian +#define NATIVE_ENDIAN_VALUE_LE_BE(leval,beval) ((util::endianness::native == util::endianness::little) ? (leval) : (beval)) + + +// inline functions for accessing bytes and words within larger chunks + +// read/write a byte to a 16-bit space +template constexpr T BYTE_XOR_BE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(1,0); } +template constexpr T BYTE_XOR_LE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(0,1); } + +// read/write a byte to a 32-bit space +template constexpr T BYTE4_XOR_BE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(3,0); } +template constexpr T BYTE4_XOR_LE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(0,3); } + +// read/write a word to a 32-bit space +template constexpr T WORD_XOR_BE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(2,0); } +template constexpr T WORD_XOR_LE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(0,2); } + +// read/write a byte to a 64-bit space +template constexpr T BYTE8_XOR_BE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(7,0); } +template constexpr T BYTE8_XOR_LE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(0,7); } + +// read/write a word to a 64-bit space +template constexpr T WORD2_XOR_BE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(6,0); } +template constexpr T WORD2_XOR_LE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(0,6); } + +// read/write a dword to a 64-bit space +template constexpr T DWORD_XOR_BE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(4,0); } +template constexpr T DWORD_XOR_LE(T a) { return a ^ NATIVE_ENDIAN_VALUE_LE_BE(0,4); } + +} // namespace util + +#endif // MAME_LIB_UTIL_ENDIANNESS_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/hash.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/hash.h new file mode 100644 index 0000000000..b13ac5556a --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/hash.h @@ -0,0 +1,122 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/*************************************************************************** + + hash.h + + Function to handle hash functions (checksums) + + Based on original idea by Farfetch'd + +***************************************************************************/ + +#ifndef MAME_UTIL_HASH_H +#define MAME_UTIL_HASH_H + +#pragma once + +#include "hashing.h" + + +//************************************************************************** +// MACROS +//************************************************************************** + +// use these to define compile-time internal-format hash strings +#define CRC(x) "R" #x +#define SHA1(x) "S" #x +#define NO_DUMP "!" +#define BAD_DUMP "^" + + +namespace util { +//************************************************************************** +// TYPE DEFINITIONS +//************************************************************************** + + +// ======================> hash_collection + +// a collection of the various supported hashes and flags +class hash_collection +{ +public: + // hash types are identified by non-hex alpha values (G-Z) + static constexpr char HASH_CRC = 'R'; + static constexpr char HASH_SHA1 = 'S'; + + // common combinations for requests + static char const *const HASH_TYPES_CRC; + static char const *const HASH_TYPES_CRC_SHA1; + static char const *const HASH_TYPES_ALL; + + // flags are identified by punctuation marks + static constexpr char FLAG_NO_DUMP = '!'; + static constexpr char FLAG_BAD_DUMP = '^'; + + // construction/destruction + hash_collection(); + hash_collection(std::string_view string); + hash_collection(const hash_collection &src); + ~hash_collection(); + + // operators + hash_collection &operator=(const hash_collection &src); + bool operator==(const hash_collection &rhs) const; + bool operator!=(const hash_collection &rhs) const { return !(*this == rhs); } + + // getters + bool flag(char flag) const { return (m_flags.find_first_of(flag) != std::string::npos); } + std::string hash_types() const; + + // hash manipulators + void reset(); + bool add_from_string(char type, std::string_view string); + bool remove(char type); + + // CRC-specific helpers + bool crc(uint32_t &result) const { result = m_crc32; return m_has_crc32; } + void add_crc(uint32_t crc) { m_crc32 = crc; m_has_crc32 = true; } + + // SHA1-specific helpers + bool sha1(sha1_t &result) const { result = m_sha1; return m_has_sha1; } + void add_sha1(sha1_t sha1) { m_has_sha1 = true; m_sha1 = sha1; } + + // string conversion + std::string internal_string() const; + std::string macro_string() const; + std::string attribute_string() const; + bool from_internal_string(std::string_view string); + + // creation + void begin(const char *types = nullptr); + void buffer(const uint8_t *data, uint32_t length); + void end(); + void compute(const uint8_t *data, uint32_t length, const char *types = nullptr) { begin(types); buffer(data, length); end(); } + +private: + // internal helpers + void copyfrom(const hash_collection &src); + + // internal state + std::string m_flags; + bool m_has_crc32; + crc32_t m_crc32; + bool m_has_sha1; + sha1_t m_sha1; + + // creators + struct hash_creator + { + bool m_doing_crc32; + crc32_creator m_crc32_creator; + bool m_doing_sha1; + sha1_creator m_sha1_creator; + }; + hash_creator * m_creator; +}; + + +} // namespace util + +#endif // MAME_UTIL_HASH_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/hashing.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/hashing.h new file mode 100644 index 0000000000..6cd73ac184 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/hashing.h @@ -0,0 +1,314 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles, Vas Crabb +/*************************************************************************** + + hashing.h + + Hashing helper classes. + +***************************************************************************/ +#ifndef MAME_UTIL_HASHING_H +#define MAME_UTIL_HASHING_H + +#pragma once + +#include "md5.h" + +#include +#include +#include +#include +#include +#include + + +namespace util { + +//************************************************************************** +// TYPE DEFINITIONS +//************************************************************************** + + +// ======================> SHA-1 + +// final digest +struct sha1_t +{ + bool operator==(const sha1_t &rhs) const { return memcmp(m_raw, rhs.m_raw, sizeof(m_raw)) == 0; } + bool operator!=(const sha1_t &rhs) const { return memcmp(m_raw, rhs.m_raw, sizeof(m_raw)) != 0; } + operator uint8_t *() { return m_raw; } + bool from_string(std::string_view string); + std::string as_string() const; + uint8_t m_raw[20]; + static const sha1_t null; +}; + +// creation helper +class sha1_creator +{ +public: + // construction/destruction + sha1_creator() { reset(); } + + // reset + void reset(); + + // append data + void append(const void *data, uint32_t length); + + // finalize and compute the final digest + sha1_t finish(); + + // static wrapper to just get the digest from a block + static sha1_t simple(const void *data, uint32_t length) + { + sha1_creator creator; + creator.append(data, length); + return creator.finish(); + } + +protected: + uint64_t m_cnt; + std::array m_st; + uint32_t m_buf[16]; +}; + + + +// ======================> MD5 + +// final digest +struct md5_t +{ + bool operator==(const md5_t &rhs) const { return memcmp(m_raw, rhs.m_raw, sizeof(m_raw)) == 0; } + bool operator!=(const md5_t &rhs) const { return memcmp(m_raw, rhs.m_raw, sizeof(m_raw)) != 0; } + operator uint8_t *() { return m_raw; } + bool from_string(std::string_view string); + std::string as_string() const; + uint8_t m_raw[16]; + static const md5_t null; +}; + +// creation helper +class md5_creator +{ +public: + // construction/destruction + md5_creator() { reset(); } + + // reset + void reset() { MD5Init(&m_context); } + + // append data + void append(const void *data, uint32_t length) { MD5Update(&m_context, reinterpret_cast(data), length); } + + // finalize and compute the final digest + md5_t finish() + { + md5_t result; + MD5Final(result.m_raw, &m_context); + return result; + } + + // static wrapper to just get the digest from a block + static md5_t simple(const void *data, uint32_t length) + { + md5_creator creator; + creator.append(data, length); + return creator.finish(); + } + +protected: + // internal state + struct MD5Context m_context; // internal context +}; + + + +// ======================> CRC-32 + +// final digest +struct crc32_t +{ + crc32_t() { } + constexpr crc32_t(const crc32_t &rhs) = default; + constexpr crc32_t(const uint32_t crc) : m_raw(crc) { } + + constexpr bool operator==(const crc32_t &rhs) const { return m_raw == rhs.m_raw; } + constexpr bool operator!=(const crc32_t &rhs) const { return m_raw != rhs.m_raw; } + + crc32_t &operator=(const crc32_t &rhs) = default; + crc32_t &operator=(const uint32_t crc) { m_raw = crc; return *this; } + + constexpr operator uint32_t() const { return m_raw; } + + bool from_string(std::string_view string); + std::string as_string() const; + + uint32_t m_raw; + + static const crc32_t null; +}; + +// creation helper +class crc32_creator +{ +public: + // construction/destruction + crc32_creator() { reset(); } + + // reset + void reset() { m_accum.m_raw = 0; } + + // append data + void append(const void *data, uint32_t length); + + // finalize and compute the final digest + crc32_t finish() { return m_accum; } + + // static wrapper to just get the digest from a block + static crc32_t simple(const void *data, uint32_t length) + { + crc32_creator creator; + creator.append(data, length); + return creator.finish(); + } + +protected: + // internal state + crc32_t m_accum; // internal accumulator +}; + + + +// ======================> CRC-16 + +// final digest +struct crc16_t +{ + crc16_t() { } + constexpr crc16_t(const crc16_t &rhs) = default; + constexpr crc16_t(const uint16_t crc) : m_raw(crc) { } + + constexpr bool operator==(const crc16_t &rhs) const { return m_raw == rhs.m_raw; } + constexpr bool operator!=(const crc16_t &rhs) const { return m_raw != rhs.m_raw; } + + crc16_t &operator=(const crc16_t &rhs) = default; + crc16_t &operator=(const uint16_t crc) { m_raw = crc; return *this; } + + constexpr operator uint16_t() const { return m_raw; } + + bool from_string(std::string_view string); + std::string as_string() const; + + uint16_t m_raw; + + static const crc16_t null; +}; + +// creation helper +class crc16_creator +{ +public: + // construction/destruction + crc16_creator() { reset(); } + + // reset + void reset() { m_accum.m_raw = 0xffff; } + + // append data + void append(const void *data, uint32_t length); + + // finalize and compute the final digest + crc16_t finish() { return m_accum; } + + // static wrapper to just get the digest from a block + static crc16_t simple(const void *data, uint32_t length) + { + crc16_creator creator; + creator.append(data, length); + return creator.finish(); + } + +protected: + // internal state + crc16_t m_accum; // internal accumulator +}; + + + +// ======================> SUM-16 + +// final digest +struct sum16_t +{ + sum16_t() { } + constexpr sum16_t(const sum16_t &rhs) = default; + constexpr sum16_t(const uint16_t sum) : m_raw(sum) { } + + constexpr bool operator==(const sum16_t &rhs) const { return m_raw == rhs.m_raw; } + constexpr bool operator!=(const sum16_t &rhs) const { return m_raw != rhs.m_raw; } + + sum16_t &operator=(const sum16_t &rhs) = default; + sum16_t &operator=(const uint16_t sum) { m_raw = sum; return *this; } + + constexpr operator uint16_t() const { return m_raw; } + + bool from_string(std::string_view string); + std::string as_string() const; + + uint16_t m_raw; + + static const sum16_t null; +}; + +// creation helper +class sum16_creator +{ +public: + // construction/destruction + sum16_creator() { reset(); } + + // reset + void reset() { m_accum.m_raw = 0; } + + // append data + void append(const void *data, uint32_t length); + + // finalize and compute the final digest + sum16_t finish() { return m_accum; } + + // static wrapper to just get the digest from a block + static sum16_t simple(const void *data, uint32_t length) + { + sum16_creator creator; + creator.append(data, length); + return creator.finish(); + } + +protected: + // internal state + sum16_t m_accum; // internal accumulator +}; + +} // namespace util + +namespace std { + +template <> struct hash<::util::crc32_t> +{ + typedef ::util::crc32_t argument_type; + typedef std::size_t result_type; + result_type operator()(argument_type const & s) const { return std::hash()(s); } +}; + +template <> struct hash<::util::crc16_t> +{ + typedef ::util::crc16_t argument_type; + typedef std::size_t result_type; + result_type operator()(argument_type const & s) const { return std::hash()(s); } +}; + +} // namespace std + +#endif // MAME_UTIL_HASHING_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/md5.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/md5.h new file mode 100644 index 0000000000..8358e8c1e9 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/md5.h @@ -0,0 +1,43 @@ +// license:Public Domain +// copyright-holders:Colin Plumb +/* + * This is the header file for the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + * + * Changed so as no longer to depend on Colin Plumb's `usual.h' + * header definitions; now uses stuff from dpkg's config.h + * - Ian Jackson . + * Still in the public domain. + */ + +#ifndef MD5_H +#define MD5_H + +typedef unsigned int UWORD32; + +#define md5byte unsigned char + +struct MD5Context { + UWORD32 buf[4]; + UWORD32 bytes[2]; + UWORD32 in[16]; +}; + +void MD5Init(struct MD5Context *context); +void MD5Update(struct MD5Context *context, md5byte const *buf, unsigned len); +void MD5Final(unsigned char digest[16], struct MD5Context *context); +void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]); + +#endif /* !MD5_H */ diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/palette.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/palette.h new file mode 100644 index 0000000000..d21c8ca812 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/palette.h @@ -0,0 +1,287 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/****************************************************************************** + + palette.h + + Core palette routines. + +***************************************************************************/ + +#ifndef MAME_UTIL_PALETTE_H +#define MAME_UTIL_PALETTE_H + +#pragma once + +#include +#include + + +//************************************************************************** +// TYPE DEFINITIONS +//************************************************************************** + +// forward definitions +class palette_t; + +// an rgb15_t is a single combined 15-bit R,G,B value +typedef uint16_t rgb15_t; + + +// ======================> rgb_t + +// an rgb_t is a single combined R,G,B (and optionally alpha) value +class rgb_t +{ +public: + // construction/destruction + constexpr rgb_t() : m_data(0) { } + constexpr rgb_t(uint32_t data) : m_data(data) { } + constexpr rgb_t(uint8_t r, uint8_t g, uint8_t b) : m_data((255 << 24) | (r << 16) | (g << 8) | b) { } + constexpr rgb_t(uint8_t a, uint8_t r, uint8_t g, uint8_t b) : m_data((a << 24) | (r << 16) | (g << 8) | b) { } + + // getters + constexpr uint8_t a() const { return m_data >> 24; } + constexpr uint8_t r() const { return m_data >> 16; } + constexpr uint8_t g() const { return m_data >> 8; } + constexpr uint8_t b() const { return m_data >> 0; } + constexpr rgb15_t as_rgb15() const { return ((r() >> 3) << 10) | ((g() >> 3) << 5) | ((b() >> 3) << 0); } + constexpr uint8_t brightness() const { return (r() * 222 + g() * 707 + b() * 71) / 1000; } + constexpr uint32_t const *ptr() const { return &m_data; } + void expand_rgb(uint8_t &r, uint8_t &g, uint8_t &b) const { r = m_data >> 16; g = m_data >> 8; b = m_data >> 0; } + void expand_rgb(int &r, int &g, int &b) const { r = (m_data >> 16) & 0xff; g = (m_data >> 8) & 0xff; b = (m_data >> 0) & 0xff; } + + // setters + rgb_t &set_a(uint8_t a) { m_data &= ~0xff000000; m_data |= a << 24; return *this; } + rgb_t &set_r(uint8_t r) { m_data &= ~0x00ff0000; m_data |= r << 16; return *this; } + rgb_t &set_g(uint8_t g) { m_data &= ~0x0000ff00; m_data |= g << 8; return *this; } + rgb_t &set_b(uint8_t b) { m_data &= ~0x000000ff; m_data |= b << 0; return *this; } + + // implicit conversion operators + constexpr operator uint32_t() const { return m_data; } + + // operations + rgb_t &scale8(uint8_t scale) { m_data = rgb_t(clamphi((a() * scale) >> 8), clamphi((r() * scale) >> 8), clamphi((g() * scale) >> 8), clamphi((b() * scale) >> 8)); return *this; } + + // assignment operators + rgb_t &operator=(uint32_t rhs) { m_data = rhs; return *this; } + rgb_t &operator+=(const rgb_t &rhs) { m_data = uint32_t(*this + rhs); return *this; } + rgb_t &operator-=(const rgb_t &rhs) { m_data = uint32_t(*this - rhs); return *this; } + + // arithmetic operators + constexpr rgb_t operator+(const rgb_t &rhs) const { return rgb_t(clamphi(a() + rhs.a()), clamphi(r() + rhs.r()), clamphi(g() + rhs.g()), clamphi(b() + rhs.b())); } + constexpr rgb_t operator-(const rgb_t &rhs) const { return rgb_t(clamplo(a() - rhs.a()), clamplo(r() - rhs.r()), clamplo(g() - rhs.g()), clamplo(b() - rhs.b())); } + + // static helpers + static constexpr uint8_t clamp(int32_t value) { return (value < 0) ? 0 : (value > 255) ? 255 : value; } + static constexpr uint8_t clamphi(int32_t value) { return (value > 255) ? 255 : value; } + static constexpr uint8_t clamplo(int32_t value) { return (value < 0) ? 0 : value; } + + // constant factories + static constexpr rgb_t black() { return rgb_t(0, 0, 0); } + static constexpr rgb_t white() { return rgb_t(255, 255, 255); } + static constexpr rgb_t green() { return rgb_t(0, 255, 0); } + static constexpr rgb_t amber() { return rgb_t(247, 170, 0); } + static constexpr rgb_t transparent() { return rgb_t(0, 0, 0, 0); } + +private: + uint32_t m_data; +}; + + +// ======================> palette_client + +// a single palette client +class palette_client +{ +public: + // construction/destruction + palette_client(palette_t &palette); + ~palette_client(); + + // getters + palette_client *next() const { return m_next; } + palette_t &palette() const { return m_palette; } + const uint32_t *dirty_list(uint32_t &mindirty, uint32_t &maxdirty); + + // dirty marking + void mark_dirty(uint32_t index) { m_live->mark_dirty(index); } + +private: + // internal object to track dirty states + class dirty_state + { + public: + // construction + dirty_state(); + + // operations + const uint32_t *dirty_list(uint32_t &mindirty, uint32_t &maxdirty); + void resize(uint32_t colors); + void mark_dirty(uint32_t index); + void reset(); + + private: + // internal state + std::vector m_dirty; // bitmap of dirty entries + uint32_t m_mindirty; // minimum dirty entry + uint32_t m_maxdirty; // minimum dirty entry + }; + + // internal state + palette_t & m_palette; // reference to the palette + palette_client *m_next; // pointer to next client + dirty_state * m_live; // live dirty state + dirty_state * m_previous; // previous dirty state + dirty_state m_dirty[2]; // two dirty states +}; + + +// ======================> palette_t + +// a palette object +class palette_t +{ + friend class palette_client; + +public: + // static constructor: used to ensure same new/delete is used + static palette_t *alloc(uint32_t numcolors, uint32_t numgroups = 1); + + // reference counting + void ref() { m_refcount++; } + void deref(); + + // getters + int num_colors() const { return m_numcolors; } + int num_groups() const { return m_numgroups; } + int max_index() const { return m_numcolors * m_numgroups + 2; } + uint32_t black_entry() const { return m_numcolors * m_numgroups + 0; } + uint32_t white_entry() const { return m_numcolors * m_numgroups + 1; } + + // overall adjustments + void set_brightness(float brightness); + void set_contrast(float contrast); + void set_gamma(float gamma); + + // entry getters + rgb_t entry_color(uint32_t index) const { return (index < m_numcolors) ? m_entry_color[index] : rgb_t::black(); } + rgb_t entry_adjusted_color(uint32_t index) const { return (index < m_numcolors * m_numgroups) ? m_adjusted_color[index] : rgb_t::black(); } + float entry_contrast(uint32_t index) const { return (index < m_numcolors) ? m_entry_contrast[index] : 1.0f; } + + // entry setters + void entry_set_color(uint32_t index, rgb_t rgb); + void entry_set_red_level(uint32_t index, uint8_t level); + void entry_set_green_level(uint32_t index, uint8_t level); + void entry_set_blue_level(uint32_t index, uint8_t level); + void entry_set_contrast(uint32_t index, float contrast); + + // entry list getters + const rgb_t *entry_list_raw() const { return &m_entry_color[0]; } + const rgb_t *entry_list_adjusted() const { return &m_adjusted_color[0]; } + const rgb_t *entry_list_adjusted_rgb15() const { return &m_adjusted_rgb15[0]; } + + // group adjustments + void group_set_brightness(uint32_t group, float brightness); + void group_set_contrast(uint32_t group, float contrast); + + // utilities + void normalize_range(uint32_t start, uint32_t end, int lum_min = 0, int lum_max = 255); + +private: + // construction/destruction + palette_t(uint32_t numcolors, uint32_t numgroups = 1); + ~palette_t(); + + // internal helpers + rgb_t adjust_palette_entry(rgb_t entry, float brightness, float contrast, const uint8_t *gamma_map); + void update_adjusted_color(uint32_t group, uint32_t index); + + // internal state + uint32_t m_refcount; // reference count on the palette + uint32_t m_numcolors; // number of colors in the palette + uint32_t m_numgroups; // number of groups in the palette + + float m_brightness; // overall brightness value + float m_contrast; // overall contrast value + float m_gamma; // overall gamma value + uint8_t m_gamma_map[256]; // gamma map + + std::vector m_entry_color; // array of raw colors + std::vector m_entry_contrast; // contrast value for each entry + std::vector m_adjusted_color; // array of adjusted colors + std::vector m_adjusted_rgb15; // array of adjusted colors as RGB15 + + std::vector m_group_bright; // brightness value for each group + std::vector m_group_contrast; // contrast value for each group + + palette_client *m_client_list; // list of clients for this palette +}; + + + +//************************************************************************** +// INLINE FUNCTIONS +//************************************************************************** + +//------------------------------------------------- +// palexpand - expand a palette value to 8 bits +//------------------------------------------------- + +template +constexpr uint8_t palexpand(uint8_t bits) +{ + if (_NumBits == 1) { return (bits & 1) ? 0xff : 0x00; } + if (_NumBits == 2) { bits &= 3; return (bits << 6) | (bits << 4) | (bits << 2) | bits; } + if (_NumBits == 3) { bits &= 7; return (bits << 5) | (bits << 2) | (bits >> 1); } + if (_NumBits == 4) { bits &= 0xf; return (bits << 4) | bits; } + if (_NumBits == 5) { bits &= 0x1f; return (bits << 3) | (bits >> 2); } + if (_NumBits == 6) { bits &= 0x3f; return (bits << 2) | (bits >> 4); } + if (_NumBits == 7) { bits &= 0x7f; return (bits << 1) | (bits >> 6); } + return bits; +} + + +//------------------------------------------------- +// palxbit - convert an x-bit value to 8 bits +//------------------------------------------------- + +constexpr uint8_t pal1bit(uint8_t bits) { return palexpand<1>(bits); } +constexpr uint8_t pal2bit(uint8_t bits) { return palexpand<2>(bits); } +constexpr uint8_t pal3bit(uint8_t bits) { return palexpand<3>(bits); } +constexpr uint8_t pal4bit(uint8_t bits) { return palexpand<4>(bits); } +constexpr uint8_t pal5bit(uint8_t bits) { return palexpand<5>(bits); } +constexpr uint8_t pal6bit(uint8_t bits) { return palexpand<6>(bits); } +constexpr uint8_t pal7bit(uint8_t bits) { return palexpand<7>(bits); } + + +//------------------------------------------------- +// rgbexpand - expand a 32-bit raw data to 8-bit +// RGB +//------------------------------------------------- + +template +constexpr rgb_t rgbexpand(uint32_t data, uint8_t rshift, uint8_t gshift, uint8_t bshift) +{ + return rgb_t(palexpand<_RBits>(data >> rshift), palexpand<_GBits>(data >> gshift), palexpand<_BBits>(data >> bshift)); +} + +template +constexpr rgb_t argbexpand(uint32_t data, uint8_t ashift, uint8_t rshift, uint8_t gshift, uint8_t bshift) +{ + return rgb_t(palexpand<_ABits>(data >> ashift), palexpand<_RBits>(data >> rshift), palexpand<_GBits>(data >> gshift), palexpand<_BBits>(data >> bshift)); +} + + +//------------------------------------------------- +// palxxx - create an x-x-x color by extracting +// bits from a uint32_t +//------------------------------------------------- + +constexpr rgb_t pal332(uint32_t data, uint8_t rshift, uint8_t gshift, uint8_t bshift) { return rgbexpand<3,3,2>(data, rshift, gshift, bshift); } +constexpr rgb_t pal444(uint32_t data, uint8_t rshift, uint8_t gshift, uint8_t bshift) { return rgbexpand<4,4,4>(data, rshift, gshift, bshift); } +constexpr rgb_t pal555(uint32_t data, uint8_t rshift, uint8_t gshift, uint8_t bshift) { return rgbexpand<5,5,5>(data, rshift, gshift, bshift); } +constexpr rgb_t pal565(uint32_t data, uint8_t rshift, uint8_t gshift, uint8_t bshift) { return rgbexpand<5,6,5>(data, rshift, gshift, bshift); } +constexpr rgb_t pal888(uint32_t data, uint8_t rshift, uint8_t gshift, uint8_t bshift) { return rgbexpand<8,8,8>(data, rshift, gshift, bshift); } + +#endif // MAME_UTIL_PALETTE_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/strformat.cpp b/waterbox/ares64/ares/thirdparty/mame/lib/util/strformat.cpp new file mode 100644 index 0000000000..cec14635e1 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/strformat.cpp @@ -0,0 +1,661 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/*************************************************************************** + + strformat.h + + type-safe printf substitutes + +***************************************************************************/ + +#include "strformat.h" + +#include +#include + + +namespace util { + +namespace detail { + +template class format_chars; +template class format_chars; + +template void format_flags::apply(std::ostream &) const; +template void format_flags::apply(std::wostream &) const; +template void format_flags::apply(std::iostream &) const; +template void format_flags::apply(std::wiostream &) const; +template void format_flags::apply(std::ostringstream &) const; +template void format_flags::apply(std::wostringstream &) const; +template void format_flags::apply(std::stringstream &) const; +template void format_flags::apply(std::wstringstream &) const; +template void format_flags::apply(ovectorstream &) const; +template void format_flags::apply(wovectorstream &) const; +template void format_flags::apply(vectorstream &) const; +template void format_flags::apply(wvectorstream &) const; + +template class format_argument; +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument; +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template bool format_argument::static_make_integer(void const *, int &); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); +template void format_argument::static_store_integer(void const *, std::streamoff); + +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; +template class format_argument_pack; + +template std::ostream::off_type stream_format(std::ostream &, format_argument_pack const &); +template std::wostream::off_type stream_format(std::wostream &, format_argument_pack const &); +template std::iostream::off_type stream_format(std::iostream &, format_argument_pack const &); +template std::iostream::off_type stream_format(std::iostream &, format_argument_pack const &); +template std::wiostream::off_type stream_format(std::wiostream &, format_argument_pack const &); +template std::wiostream::off_type stream_format(std::wiostream &, format_argument_pack const &); +template std::ostringstream::off_type stream_format(std::ostringstream &, format_argument_pack const &); +template std::ostringstream::off_type stream_format(std::ostringstream &, format_argument_pack const &); +template std::wostringstream::off_type stream_format(std::wostringstream &, format_argument_pack const &); +template std::wostringstream::off_type stream_format(std::wostringstream &, format_argument_pack const &); +template std::stringstream::off_type stream_format(std::stringstream &, format_argument_pack const &); +template std::stringstream::off_type stream_format(std::stringstream &, format_argument_pack const &); +template std::stringstream::off_type stream_format(std::stringstream &, format_argument_pack const &); +template std::wstringstream::off_type stream_format(std::wstringstream &, format_argument_pack const &); +template std::wstringstream::off_type stream_format(std::wstringstream &, format_argument_pack const &); +template std::wstringstream::off_type stream_format(std::wstringstream &, format_argument_pack const &); +template ovectorstream::off_type stream_format(ovectorstream &, format_argument_pack const &); +template ovectorstream::off_type stream_format(ovectorstream &, format_argument_pack const &); +template wovectorstream::off_type stream_format(wovectorstream &, format_argument_pack const &); +template wovectorstream::off_type stream_format(wovectorstream &, format_argument_pack const &); +template vectorstream::off_type stream_format(vectorstream &, format_argument_pack const &); +template vectorstream::off_type stream_format(vectorstream &, format_argument_pack const &); +template vectorstream::off_type stream_format(vectorstream &, format_argument_pack const &); +template wvectorstream::off_type stream_format(wvectorstream &, format_argument_pack const &); +template wvectorstream::off_type stream_format(wvectorstream &, format_argument_pack const &); +template wvectorstream::off_type stream_format(wvectorstream &, format_argument_pack const &); + +} // namespace detail + +} // namespace util diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/strformat.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/strformat.h new file mode 100644 index 0000000000..e6d2ee0a0f --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/strformat.h @@ -0,0 +1,2432 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/*************************************************************************** + + strformat.h + + type-safe printf substitutes + + This header provides type-safe printf substitutes that output to + std::ostream- or std::string-like objects. Most format strings + supported by C99, SUS, glibc and MSVCRT are accepted. Not all + features are implemented, and semantics differ in some cases. Any + object with an appropriate stream output operator can be used as + a format argument with the %s conversion. + + Since the functions are implemented using C++ iostream, some + behaviour more closely resembles iostream output operator behaviour + than printf behaviour. You are also exposed to bugs in your C++ + iostream implementation (e.g. hexadecimal scientific format doesn't + work properly on MinGW). + + These functions are designed to be forgiving - using an + inappropriate conversion for an argument's type just results in the + default conversion for the type being used. Inappropriate types or + out-of-range positions for parameterised field width and precision + are treated as if no width/precision was specified. Out-of-range + argument positions result in the format specification being printed. + + Position specifiers for arguments (%123$), field width (*456$) and + precision (.*789$) are supported. Mixing explicit and implied + positions for arguments/widths/precisions is discouraged, although + it does produce deterministic behaviour. + + The following format flags are recognised: + - "#": alternate format - sets showbase/showpoint, and also + boolalpha for bool with s conversion + - "0": pad with zeroes rather than spaces, ignored if '-' flag is + specified or if precision is specified for d/i/u/o/x/X + conversion + - "-": left-align output, overrides '0' + - " ": recognised but not implemented, ignored for u/o/x/X + conversion + - "+": show sign for positive numbers, overrides ' ', ignored for + u/o/x/X conversions + - "'": recognised for SUS compatibility but ignored (digit grouping + is controlled by stream locale) + - "I": recognised for glibc compatibility but ignored (digits are + controlled by stream locale) + + Precision is supported for conversions by setting precision on the + stream. This works as expected for a/A/e/E/f/F/g/G conversions on + floating-point types, and may work for objects with user-defined + stream output operators. Precision for d/i/u/o/x/X conversions + (minimum digits to print) is not supported. Precision for s + conversions (maximum characters to print) is only honoured for + string-like types (output character pointer/array and + std::basic_string). + + Length specifiers are supported but not required for d/i/u/o/x/X + conversions with integer/char/bool arguments. They result in the + value being cast to the desired type before being printed. Length + specifiers are ignored for other conversions. + + The following length specifiers are recognised: + - hh: cast to char/unsigned char for d/i/u/o/x/X + - h: cast to short/unsigned short for d/i/u/o/x/X + - l: cast to long/unsigned long for d/i/u/o/x/X + - ll: cast to long long/unsigned long long for d/i/u/o/x/X + - L: always ignored + - j: cast to intmax_t/uintmax_t for d/i/u/o/x/X + - z: cast to ssize_t/size_t for d/i/u/o/x/X + - t: cast to ptrdiff_t for d/i/u/o/x/X + - I: cast to ssize_t/size_t for d/i/u/o/x/X + - I32: cast to int32_t/uint32_t for d/i/u/o/x/X + - I64: cast to int64_t/uint64_t for d/i/u/o/x/X + - w: always ignored + + The following conversions are recognised: + - d/i: signed decimal for integer/char/bool types + - u: unsigned decimal for integer/char/bool types + - o: unsigned octal for integer/char/bool types + - x/X: lower/uppercase unsigned hexadecimal for integer/char/bool + types or scientific hexadecimal for floating-point types + - e/E: lower/uppercase scientific decimal for floating-point types + - f/F: lower/uppercase fixed-point decimal for floating-point types + - g/G: default stream output format for floating-point types (may + differ from printf behaviour) + - a/A: lower/uppercase scientific hexadecimal for floating-point + types or hexadecimal for integer types + - c/C: cast integer types to stream's character type, no automatic + widening or narrowing + - s/S: default stream output behaviour for argument + - p/P: cast any integer/char/bool/pointer/array to void const * + - n: store characters printed so far, produces no output, argument + must be pointer to type that std::streamoff is convertible to + - m: output of std::strerror(errno), no automatic widening or + narrowing, does not consume an argument + - %: literal %, field width applied, does not consume an argument + + The output stream type for stream_format must be equivalent to a + std::basic_ostream for duck-typing purposes. The output string for + string type for string_format must provide value_type, traits_type + and allocator_type declarations, and must be constructible from a + std::basic_string using the same value, traits and allocator types. + + The format string type can be a pointer to a NUL-terminated string, + an array containing a NUL-terminated or non-terminated string, or a + STL contiguous container holding a string (e.g. std::string, + std::string_view, std::vector or std::array). Note that NUL + characters characters are only treated as terminators for pointers + and arrays, they are treated as normal characters for other + containers. Using a non-contiguous container (e.g. std::list or + std::deque) will result in undesirable behaviour likely culminating + in a crash. + + The value type of the format string and the character type of the + output stream/string need to match. You can't use a wchar_t format + to format char output and vice versa. + + The format string encoding must have contiguous decimal digits. The + character encoding must not use shift states or multi-byte sequences + that could result in a format character codepoint appearing as part + of a multi-byte sequence or being interpreted differently. ASCII, + ISO Latin, KOI-8, UTF-8, EUC, EBCDIC, and UTF-EBCDIC encodings meet + these requirements, while ISO-2022, UTF-7, KOI-7 and Shift-JIS + encodings do not. For character types other than char and wchar_t, + the encoding must be a strict superset of the char encoding. + + The following conditions cause assertion failures in debug builds: + - Unsupported conversion specifier + - Out-of-range argument/width/precision position + - Inappropriate type for parameterised width/precision + - Positional width/precision specifier not terminated with $ + - Inappropriate type for n conversion + - Default conversion for type that lacks stream out operator + + Some limitations have been described in passing. Major limitations + and bugs include: + - No automatic widening/narrowing support, so no simple way to + output wide characters/strings to narrow streams/strings and vice + versa. + - Precision ignored for d/i/u/o/x/X conversions (should set minimum + digits to print). + - Precision for s/S conversion is only honoured for string-like + types (output character pointer/array and std::basic_string). + - If the output character type is not char, signed char or unsgined + char, printing the a value of this type with d/i/u/o/x/X + conversion and no length specifier causes it to be printed as a + character. Can be worked around by casting to another integer + type or using length specifier. + - Printing with d/i/u/o/x/X conversions may not behave as expected + if the output character type is an integer type other than char, + signed char, unsigned char, or wchar_t. + - Only output character pointer/array is treated as a C string, any + other pointer/array will be printed as a pointer value. The + signed/unsigned/default char are not handled equivalently. + - There is no length specifier to force cast to int/unsigned. Can + be worked around by casting the argument. + - MSVCRT length specifiers I/I32/I64 will not be recognised if no + width or precision is specified, as they will be mistaken for + glibc alternate digits flag. + - The " " flag to prefix positive numbers with a space is not + implemented. + - The "'" and "I" flags are not implemented, as digit grouping and + characters are controlled by the output stream's locale. + - The characters used for space- and zero-padding are not locale- + aware. + +***************************************************************************/ + +#ifndef MAME_UTIL_STRFORMAT_H +#define MAME_UTIL_STRFORMAT_H + +#pragma once + +#include "vecstream.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace util { + +namespace detail { + +//************************************************************************** +// FORMAT CHARACTER DEFINITIONS +//************************************************************************** + +template +class format_chars +{ +public: + typedef Character char_type; + enum : Character { + nul = Character('\0'), + space = Character(' '), + point = Character('.'), + percent = Character('%'), + dollar = Character('$'), + hash = Character('#'), + minus = Character('-'), + plus = Character('+'), + asterisk = Character('*'), + quote = Character('\''), + zero = Character('0'), + nine = Character('9'), + a = Character('a'), + A = Character('A'), + c = Character('c'), + C = Character('C'), + d = Character('d'), + e = Character('e'), + E = Character('E'), + f = Character('f'), + F = Character('F'), + g = Character('g'), + G = Character('G'), + h = Character('h'), + i = Character('i'), + I = Character('I'), + j = Character('j'), + l = Character('l'), + L = Character('L'), + m = Character('m'), + n = Character('n'), + o = Character('o'), + p = Character('p'), + s = Character('s'), + S = Character('S'), + t = Character('t'), + u = Character('u'), + w = Character('w'), + x = Character('x'), + X = Character('X'), + z = Character('z') + }; +}; + +template <> +class format_chars +{ +public: + typedef wchar_t char_type; + enum : wchar_t { + nul = L'\0', + space = L' ', + point = L'.', + percent = L'%', + dollar = L'$', + hash = L'#', + minus = L'-', + plus = L'+', + asterisk = L'*', + quote = L'\'', + zero = L'0', + nine = L'9', + a = L'a', + A = L'A', + c = L'c', + C = L'C', + d = L'd', + e = L'e', + E = L'E', + f = L'f', + F = L'F', + g = L'g', + G = L'G', + h = L'h', + i = L'i', + I = L'I', + j = L'j', + l = L'l', + L = L'L', + m = L'm', + n = L'n', + o = L'o', + p = L'p', + s = L's', + S = L'S', + t = L't', + u = L'u', + w = L'w', + x = L'x', + X = L'X', + z = L'z' + }; +}; + + +//************************************************************************** +// FORMAT SPECIFIER ENCAPSULATION +//************************************************************************** + +class format_flags +{ +public: + enum class positive_sign { + none, + space, // ' ' + plus // + + }; + + enum class length { + unspecified, + character, // hh + short_integer, // h + long_integer, // l + long_long_integer, // ll + long_double, // L + integer_maximum, // j + size_type, // z, I + pointer_difference, // t + integer_32, // I32 + integer_64, // I64 + wide_character // w + }; + + enum class conversion { + unspecified, + signed_decimal, // i, d + unsigned_decimal, // u + octal, // o + hexadecimal, // x, X + scientific_decimal, // e, E + fixed_decimal, // f, F + floating_decimal, // g, G + scientific_hexadecimal, // a, A + character, // c, C + string, // s, S + pointer, // p + tell, // n + strerror, // m + percent // % + }; + + format_flags() + : m_alternate_format(false) + , m_zero_pad(false) + , m_left_align(false) + , m_positive_sign(positive_sign::none) + , m_digit_grouping(false) + , m_alternate_digits(false) + , m_field_width(0) + , m_precision(-1) + , m_length(length::unspecified) + , m_uppercase(false) + , m_conversion(conversion::unspecified) + { + } + + template void apply(Stream &stream) const + { + typedef format_chars chars; + + stream.unsetf( + Stream::basefield | + Stream::adjustfield | + Stream::floatfield | + Stream::boolalpha | + Stream::showbase | + Stream::showpoint | + Stream::showpos | + Stream::uppercase); + + if (get_alternate_format()) stream.setf(Stream::showbase | Stream::showpoint); + stream.fill(get_zero_pad() ? chars::zero : chars::space); + stream.setf(get_left_align() ? Stream::left : get_zero_pad() ? Stream::internal : Stream::right); + if (positive_sign::plus == get_positive_sign()) stream.setf(Stream::showpos); + stream.precision((get_precision() < 0) ? 6 : get_precision()); + stream.width(get_field_width()); + if (get_uppercase()) stream.setf(Stream::uppercase); + switch (get_conversion()) + { + case conversion::unspecified: + break; + case conversion::signed_decimal: + case conversion::unsigned_decimal: + stream.setf(Stream::dec); + break; + case conversion::octal: + stream.setf(Stream::oct); + break; + case conversion::hexadecimal: + stream.setf(Stream::hex | Stream::scientific | Stream::fixed); + break; + case conversion::scientific_decimal: + stream.setf(Stream::dec | Stream::scientific); + break; + case conversion::fixed_decimal: + stream.setf(Stream::dec | Stream::fixed); + break; + case conversion::floating_decimal: + stream.setf(Stream::dec); + break; + case conversion::scientific_hexadecimal: + stream.setf(Stream::hex | Stream::scientific | Stream::fixed); + break; + case conversion::character: + case conversion::string: + case conversion::pointer: + case conversion::tell: + case conversion::strerror: + case conversion::percent: + break; + } + } + + bool get_alternate_format() const { return m_alternate_format; } + bool get_zero_pad() const { return m_zero_pad; } + bool get_left_align() const { return m_left_align; } + positive_sign get_positive_sign() const { return m_positive_sign; } + bool get_digit_grouping() const { return m_digit_grouping; } + bool get_alternate_digits() const { return m_alternate_digits; } + unsigned get_field_width() const { return m_field_width; } + int get_precision() const { return m_precision; } + length get_length() const { return m_length; } + bool get_uppercase() const { return m_uppercase; } + conversion get_conversion() const { return m_conversion; } + + void set_alternate_format() + { + m_alternate_format = true; + } + + void set_zero_pad() + { + if (!m_left_align) + { + switch (m_conversion) + { + case conversion::signed_decimal: + case conversion::unsigned_decimal: + case conversion::octal: + case conversion::hexadecimal: + m_zero_pad = (0 > m_precision); + break; + default: + m_zero_pad = true; + } + } + } + + void set_left_align() + { + m_zero_pad = false; + m_left_align = true; + } + + void set_positive_sign_space() + { + switch (m_conversion) + { + case conversion::unsigned_decimal: + case conversion::octal: + case conversion::hexadecimal: + break; + default: + if (positive_sign::plus != m_positive_sign) + m_positive_sign = positive_sign::space; + } + } + + void set_positive_sign_plus() + { + switch (m_conversion) + { + case conversion::unsigned_decimal: + case conversion::octal: + case conversion::hexadecimal: + break; + default: + m_positive_sign = positive_sign::plus; + } + } + + void set_digit_grouping() + { + m_digit_grouping = true; + } + + void set_alternate_digits() + { + m_alternate_digits = true; + } + + void set_field_width(int value) + { + if (0 > value) + { + set_left_align(); + m_field_width = unsigned(-value); + } + else + { + m_field_width = unsigned(value); + } + } + + void set_precision(int value) + { + m_precision = value; + if (0 <= value) + { + switch (m_conversion) + { + case conversion::signed_decimal: + case conversion::unsigned_decimal: + case conversion::octal: + case conversion::hexadecimal: + m_zero_pad = false; + break; + default: + break; + } + } + } + + void set_length(length value) + { + m_length = value; + } + + void set_uppercase() + { + m_uppercase = true; + } + + void set_conversion(conversion value) + { + m_conversion = value; + switch (value) + { + case conversion::unsigned_decimal: + case conversion::octal: + case conversion::hexadecimal: + m_positive_sign = positive_sign::none; + [[fallthrough]]; + case conversion::signed_decimal: + if (0 <= m_precision) + m_zero_pad = false; + break; + default: + break; + } + } + +private: + bool m_alternate_format; // # + bool m_zero_pad; // 0 + bool m_left_align; // - + positive_sign m_positive_sign; // ' ', + + bool m_digit_grouping; // ' + bool m_alternate_digits; // I + unsigned m_field_width; + int m_precision; // . + length m_length; // hh, h, l, ll, L, j, z, I, t, w + bool m_uppercase; // X, E, F, G, A + conversion m_conversion; // i, d, u, o, x, X, e, E, f, F, g, G, a, A, c, C, s, S, p, m, % +}; + + +//************************************************************************** +// FORMAT OUTPUT HELPERS +//************************************************************************** + +template +class format_output +{ +private: + template + struct string_semantics : public std::false_type { }; + template + struct string_semantics > : public std::true_type { }; + template + struct string_semantics > : public std::true_type { }; + template + using signed_integer_semantics = std::bool_constant && std::is_signed_v >; + template + using unsigned_integer_semantics = std::bool_constant && !std::is_signed_v >; + + template + static std::enable_if_t, std::make_signed_t > || std::is_integral_v > apply_signed(Stream &str, U const &value) + { + if constexpr (std::is_same_v, std::make_signed_t >) + str << int(std::make_signed_t(value)); + else if constexpr (!std::is_signed_v || std::is_same_v) + str << std::make_signed_t(value); +#if __cplusplus > 201703L + else if constexpr (!std::is_invocable_v decltype(x << y) { return x << y; }), Stream &, U const &>) + str << std::make_signed_t(value); +#endif + else + str << value; + } + + template + static std::enable_if_t, std::make_unsigned_t > || std::is_integral_v > apply_unsigned(Stream &str, U const &value) + { + if constexpr (std::is_same_v, std::make_unsigned_t >) + str << unsigned(std::make_unsigned_t(value)); + else if constexpr (!std::is_unsigned_v || std::is_same_v) + str << std::make_unsigned_t(value); +#if __cplusplus > 201703L + else if constexpr (!std::is_invocable_v decltype(x << y) { return x << y; }), Stream &, U const &>) + str << std::make_unsigned_t(value); +#endif + else + str << value; + } + +public: + template + static void apply(Stream &str, format_flags const &flags, U const &value) + { + if constexpr (string_semantics::value) + { + int const precision(flags.get_precision()); + if ((0 <= precision) && (value.size() > unsigned(precision))) + { + if constexpr (std::is_same_v) + { + unsigned width(flags.get_field_width()); + bool const pad(unsigned(precision) < width); + typename Stream::fmtflags const adjust(str.flags() & Stream::adjustfield); + if (!pad || (Stream::left == adjust)) str.write(&*value.begin(), unsigned(precision)); + if (pad) + { + for (width -= precision; 0U < width; --width) str.put(str.fill()); + if (Stream::left != adjust) str.write(&*value.begin(), unsigned(precision)); + } + str.width(0); + } + else + { + str << value.substr(0, unsigned(precision)); + } + } + else + { + str << value; + } + } + else if constexpr (signed_integer_semantics::value) + { + switch (flags.get_conversion()) + { + case format_flags::conversion::signed_decimal: + switch (flags.get_length()) + { + case format_flags::length::character: + str << int(static_cast(value)); + break; + case format_flags::length::short_integer: + str << short(value); + break; + case format_flags::length::long_integer: + str << long(value); + break; + case format_flags::length::long_long_integer: + str << static_cast(value); + break; + case format_flags::length::integer_maximum: + str << std::intmax_t(value); + break; + case format_flags::length::size_type: + str << std::make_signed_t(value); + break; + case format_flags::length::pointer_difference: + str << std::make_signed_t(value); + break; + case format_flags::length::integer_32: + str << std::uint32_t(std::int32_t(value)); + break; + case format_flags::length::integer_64: + str << std::int64_t(value); + break; + default: + apply_signed(str, value); + } + break; + case format_flags::conversion::unsigned_decimal: + case format_flags::conversion::octal: + case format_flags::conversion::hexadecimal: + switch (flags.get_length()) + { + case format_flags::length::character: + str << unsigned(static_cast(static_cast(value))); + break; + case format_flags::length::short_integer: + str << static_cast(short(value)); + break; + case format_flags::length::long_integer: + str << static_cast(long(value)); + break; + case format_flags::length::long_long_integer: + str << static_cast(static_cast(value)); + break; + case format_flags::length::integer_maximum: + str << std::uintmax_t(std::intmax_t(value)); + break; + case format_flags::length::size_type: + str << std::make_unsigned_t(std::make_signed_t(value)); + break; + case format_flags::length::pointer_difference: + str << std::make_unsigned_t(std::make_signed_t(value)); + break; + case format_flags::length::integer_32: + str << std::uint32_t(std::int32_t(value)); + break; + case format_flags::length::integer_64: + str << std::uint64_t(std::int64_t(value)); + break; + default: + apply_unsigned(str, value); + } + break; + case format_flags::conversion::character: + if (std::is_signed::value) + str << typename Stream::char_type(value); + else + str << typename Stream::char_type(std::make_signed_t(value)); + break; + case format_flags::conversion::pointer: + str << reinterpret_cast(std::uintptr_t(std::intptr_t(value))); + break; + default: + str << value; + } + } + else if constexpr (unsigned_integer_semantics::value) + { + switch (flags.get_conversion()) + { + case format_flags::conversion::signed_decimal: + switch (flags.get_length()) + { + case format_flags::length::character: + str << int(static_cast(static_cast(value))); + break; + case format_flags::length::short_integer: + str << short(static_cast(value)); + break; + case format_flags::length::long_integer: + str << long(static_cast(value)); + break; + case format_flags::length::long_long_integer: + str << static_cast(static_cast(value)); + break; + case format_flags::length::integer_maximum: + str << std::intmax_t(std::uintmax_t(value)); + break; + case format_flags::length::size_type: + str << std::make_signed_t(std::make_unsigned_t(value)); + break; + case format_flags::length::pointer_difference: + str << std::make_signed_t(std::make_unsigned_t(value)); + break; + case format_flags::length::integer_32: + str << std::int32_t(std::uint32_t(value)); + break; + case format_flags::length::integer_64: + str << std::int64_t(std::uint64_t(value)); + break; + default: + apply_signed(str, value); + } + break; + case format_flags::conversion::unsigned_decimal: + case format_flags::conversion::octal: + case format_flags::conversion::hexadecimal: + switch (flags.get_length()) + { + case format_flags::length::character: + str << unsigned(static_cast(value)); + break; + case format_flags::length::short_integer: + str << static_cast(value); + break; + case format_flags::length::long_integer: + str << static_cast(value); + break; + case format_flags::length::long_long_integer: + str << static_cast(value); + break; + case format_flags::length::integer_maximum: + str << std::uintmax_t(value); + break; + case format_flags::length::size_type: + str << std::make_unsigned_t(value); + break; + case format_flags::length::pointer_difference: + str << std::make_unsigned_t(value); + break; + case format_flags::length::integer_32: + str << std::uint32_t(std::int32_t(value)); + break; + case format_flags::length::integer_64: + str << std::int64_t(value); + break; + default: + apply_unsigned(str, value); + } + break; + case format_flags::conversion::character: + if (std::is_signed::value) + str << typename Stream::char_type(value); + else + str << typename Stream::char_type(std::make_signed_t(value)); + break; + case format_flags::conversion::pointer: + str << reinterpret_cast(std::uintptr_t(value)); + break; + default: +#if __cplusplus > 201703L + if constexpr (!std::is_invocable_v decltype(x << y) { return x << y; }), Stream &, U const &>) + { + assert(false); // stream out operator not declared or declared deleted + str << '?'; + } + else +#endif + { + str << value; + } + } + } + else + { + str << value; + } + } + static void apply(Stream &str, format_flags const &flags, bool value) + { + switch (flags.get_conversion()) + { + case format_flags::conversion::signed_decimal: + case format_flags::conversion::unsigned_decimal: + case format_flags::conversion::octal: + case format_flags::conversion::hexadecimal: + case format_flags::conversion::scientific_decimal: + case format_flags::conversion::fixed_decimal: + case format_flags::conversion::floating_decimal: + case format_flags::conversion::scientific_hexadecimal: + case format_flags::conversion::character: + case format_flags::conversion::pointer: + apply(str, flags, unsigned(value)); + break; + default: + if (flags.get_alternate_format()) str.setf(Stream::boolalpha); + str << value; + } + } +}; + +template +class format_output +{ +protected: + template + using string_semantics = std::bool_constant, typename Stream::char_type> >; + +public: + template + static void apply(Stream &str, format_flags const &flags, U const *value) + { + if constexpr (string_semantics::value) + { + switch (flags.get_conversion()) + { + case format_flags::conversion::string: + { + int precision(flags.get_precision()); + if (0 <= flags.get_precision()) + { + std::streamsize cnt(0); + for ( ; (0 < precision) && (U(format_chars::nul) != value[cnt]); --precision, ++cnt) { } + unsigned width(flags.get_field_width()); + bool const pad(std::make_unsigned_t(cnt) < width); + typename Stream::fmtflags const adjust(str.flags() & Stream::adjustfield); + if (!pad || (Stream::left == adjust)) str.write(value, cnt); + if (pad) + { + for (width -= cnt; 0U < width; --width) str.put(str.fill()); + if (Stream::left != adjust) str.write(value, cnt); + } + str.width(0); + } + else + { + str << value; + } + } + break; + case format_flags::conversion::pointer: + str << reinterpret_cast(const_cast *>(value)); + break; + default: + str << value; + } + } + else + { + str << reinterpret_cast(const_cast *>(value)); + } + } +}; + +template +class format_output : protected format_output +{ +public: + template + static void apply(Stream &str, format_flags const &flags, U const *value) + { + static_assert( + !format_output::template string_semantics::value || (N <= size_t(unsigned((std::numeric_limits::max)()))), + "C string array length must not exceed maximum integer value"); + format_flags f(flags); + if (format_output::template string_semantics::value && ((0 > f.get_precision()) || (N < unsigned(f.get_precision())))) + f.set_precision(int(unsigned(N))); + format_output::apply(str, f, value); + } +}; + + +//************************************************************************** +// INTEGER INPUT HELPERS +//************************************************************************** + +template +class format_make_integer +{ +private: + template + using use_unsigned_cast = std::bool_constant && std::is_unsigned_v >; + template + using use_signed_cast = std::bool_constant::value && std::is_convertible_v >; + +public: + template static bool apply(U const &value, int &result) + { + if constexpr (use_unsigned_cast::value) + { + result = int(unsigned(value)); + return true; + } + else if constexpr (use_signed_cast::value) + { + result = int(value); + return true; + } + else + { + return false; + } + } +}; + + +//************************************************************************** +// INTEGER OUTPUT HELPERS +//************************************************************************** + +template +class format_store_integer +{ +private: + template + using is_non_const_ptr = std::bool_constant && !std::is_const_v > >; + template + using is_unsigned_ptr = std::bool_constant && std::is_unsigned_v > >; + template + using use_unsigned_cast = std::bool_constant::value && is_unsigned_ptr::value && std::is_convertible_v, std::remove_pointer_t > >; + template + using use_signed_cast = std::bool_constant::value && !use_unsigned_cast::value && std::is_convertible_v > >; + +public: + template static bool apply(U const &value, std::streamoff data) + { + if constexpr (use_unsigned_cast::value) + { + *value = std::remove_pointer_t(std::make_unsigned_t(data)); + return true; + } + else if constexpr (use_signed_cast::value) + { + *value = std::remove_pointer_t(std::make_signed_t(data)); + return true; + } + else + { + assert(false); // inappropriate type for storing characters written so far + return false; + } + } +}; + + +//************************************************************************** +// NON-POLYMORPHIC ARGUMENT WRAPPER +//************************************************************************** + +template +class format_argument +{ +public: + format_argument() + : m_value(nullptr) + , m_output_function(nullptr) + , m_make_integer_function(nullptr) + , m_store_integer_function(nullptr) + { + } + + template + format_argument(T const &value) + : m_value(reinterpret_cast(&value)) + , m_output_function(&static_output) + , m_make_integer_function(&static_make_integer) + , m_store_integer_function(&static_store_integer) + { + } + + void output(Stream &str, format_flags const &flags) const { m_output_function(str, flags, m_value); } + bool make_integer(int &result) const { return m_make_integer_function(m_value, result); } + void store_integer(std::streamoff data) const { m_store_integer_function(m_value, data); } + +private: + typedef void (*output_function)(Stream &str, format_flags const &flags, void const *value); + typedef bool (*make_integer_function)(void const *value, int &result); + typedef void (*store_integer_function)(void const *value, std::streamoff data); + + template static void static_output(Stream &str, format_flags const &flags, void const *value) + { + format_output::apply(str, flags, *reinterpret_cast(value)); + } + + template static bool static_make_integer(void const *value, int &result) + { + return format_make_integer::apply(*reinterpret_cast(value), result); + } + + template static void static_store_integer(void const *value, std::streamoff data) + { + format_store_integer::apply(*reinterpret_cast(value), data); + } + + void const *m_value; + output_function m_output_function; + make_integer_function m_make_integer_function; + store_integer_function m_store_integer_function; +}; + + +//************************************************************************** +// NON-POLYMORPHIC ARGUMENT PACK WRAPPER BASE +//************************************************************************** + +template +class format_argument_pack +{ +public: + typedef typename Stream::char_type char_type; + typedef char_type const *iterator; + iterator format_begin() const + { + return m_begin; + } + bool format_at_end(iterator it) const + { + return (m_end && (m_end == it)) || (m_check_nul && (format_chars::nul == *it)); + } + std::size_t argument_count() const + { + return m_argument_count; + } + format_argument const &operator[](std::size_t index) const + { + assert(m_argument_count > index); + return m_arguments[index]; + } + +protected: + template + using handle_char_ptr = std::bool_constant && std::is_same_v >, char_type> >; + template + using handle_char_array = std::bool_constant && std::is_same_v >, char_type> >; + template + using handle_container = std::bool_constant::value && !handle_char_array::value>; + + template + format_argument_pack( + Format &&fmt, + format_argument const *arguments, + std::enable_if_t >::value, std::size_t> argument_count) + : m_begin(fmt) + , m_end(nullptr) + , m_check_nul(true) + , m_arguments(arguments) + , m_argument_count(argument_count) + { + assert(m_begin); + assert(m_end || m_check_nul); + assert(!m_end || (m_end > m_begin)); + assert(m_arguments || !m_argument_count); + } + template + format_argument_pack( + Format &&fmt, + format_argument const *arguments, + std::enable_if_t >::value, std::size_t> argument_count) + : m_begin(std::cbegin(fmt)) + , m_end(std::cend(fmt)) + , m_check_nul(true) + , m_arguments(arguments) + , m_argument_count(argument_count) + { + assert(m_begin); + assert(m_end || m_check_nul); + assert(!m_end || (m_end > m_begin)); + assert(m_arguments || !m_argument_count); + } + template + format_argument_pack( + Format &&fmt, + format_argument const *arguments, + std::enable_if_t >::value, std::size_t> argument_count) + : m_begin(fmt.empty() ? nullptr : &*std::cbegin(fmt)) + , m_end(fmt.empty() ? nullptr : (m_begin + std::distance(std::cbegin(fmt), std::cend(fmt)))) + , m_check_nul(true) + , m_arguments(arguments) + , m_argument_count(argument_count) + { + assert(m_begin); + assert(m_end || m_check_nul); + assert(!m_end || (m_end > m_begin)); + assert(m_arguments || !m_argument_count); + } + + format_argument_pack(format_argument_pack const &) = default; + format_argument_pack(format_argument_pack &&) = default; + format_argument_pack &operator=(format_argument_pack const &) = default; + format_argument_pack &operator=(format_argument_pack &&) = default; + +private: + iterator m_begin; + iterator m_end; + bool m_check_nul; + format_argument const *m_arguments; + std::size_t m_argument_count; +}; + + +//************************************************************************** +// ARGUMENT PACK WRAPPER IMPLEMENTATION +//************************************************************************** + +template +class format_argument_pack_impl + : private std::array, Count> + , public format_argument_pack +{ +public: + using typename format_argument_pack::iterator; + using format_argument_pack::operator[]; + + template + format_argument_pack_impl(Format &&fmt, Params &&... args) + : std::array, Count>({ { format_argument(std::forward(args))... } }) + , format_argument_pack(std::forward(fmt), Count ? &*this->cbegin() : nullptr, Count) + { + static_assert(sizeof...(Params) == Count, "Wrong number of constructor arguments"); + } + + format_argument_pack_impl(format_argument_pack_impl const &) = default; + format_argument_pack_impl(format_argument_pack_impl &&) = default; + format_argument_pack_impl &operator=(format_argument_pack_impl const &) = default; + format_argument_pack_impl &operator=(format_argument_pack_impl &&) = default; +}; + + +//************************************************************************** +// ARGUMENT PACK CREATOR FUNCTION +//************************************************************************** + +template +inline format_argument_pack_impl make_format_argument_pack(Format &&fmt, Params &&... args) +{ + return format_argument_pack_impl(std::forward(fmt), std::forward(args)...); +} + + +//************************************************************************** +// FORMAT STRING PARSING HELPER +//************************************************************************** + +template +class format_helper : public format_chars +{ +public: + static bool parse_format( + Format const &fmt, + typename Format::iterator &it, + format_flags &flags, + int &next_position, + int &argument_position, + int &width_position, + int &precision_position) + { + static_assert((format_helper::nine - format_helper::zero) == 9, "Digits must be contiguous"); + assert(!fmt.format_at_end(it)); + assert(format_helper::percent == *it); + + int num; + int nxt(next_position); + ++it; + flags = format_flags(); + argument_position = -1; + width_position = -1; + precision_position = -1; + + // Leading zeroes are tricky - they could be a zero-pad flag or part of a position specifier + bool const leading_zero(!fmt.format_at_end(it) && (format_helper::zero == *it)); + while (!fmt.format_at_end(it) && (format_helper::zero == *it)) ++it; + + // Digits encountered at this point could be a field width or a position specifier + num = 0; + bool leading_num(have_digit(fmt, it)); + while (have_digit(fmt, it)) add_digit(num, *it++); + if (leading_num && !have_dollar(fmt, it)) + { + // No dollar sign, leading number is field width + if (leading_zero) flags.set_zero_pad(); + flags.set_field_width(num); + } + else + { + // If we hit a dollar sign after a number, that's a position specifier + if ((leading_zero || leading_num) && have_dollar(fmt, it)) + { + argument_position = num; + ++it; + } + else if (leading_zero) + { + flags.set_zero_pad(); + } + + // Parse flag characters + while (!fmt.format_at_end(it)) + { + switch (*it) + { + case format_helper::hash: ++it; flags.set_alternate_format(); continue; + case format_helper::zero: ++it; flags.set_zero_pad(); continue; + case format_helper::minus: ++it; flags.set_left_align(); continue; + case format_helper::space: ++it; flags.set_positive_sign_space(); continue; + case format_helper::plus: ++it; flags.set_positive_sign_plus(); continue; + case format_helper::quote: ++it; flags.set_digit_grouping(); continue; + case format_helper::I: ++it; flags.set_alternate_digits(); continue; + default: break; + } + break; + } + + // Check for literal or parameterised field width + if (!fmt.format_at_end(it)) + { + if (is_digit(*it)) + { + flags.set_field_width(read_number(fmt, it)); + } + else if (format_helper::asterisk == *it) + { + ++it; + if (have_digit(fmt, it)) + { + num = read_number(fmt, it); + assert(have_dollar(fmt, it)); // invalid positional width + if (!have_dollar(fmt, it)) return false; + width_position = num; + nxt = width_position + 1; + ++it; + } + else + { + width_position = nxt++; + } + } + } + } + + // Check for literal or parameterised precision + if (!fmt.format_at_end(it) && (*it == format_helper::point)) + { + ++it; + if (have_digit(fmt, it)) + { + flags.set_precision(read_number(fmt, it)); + } + else if (!fmt.format_at_end(it) && (format_helper::asterisk == *it)) + { + ++it; + if (have_digit(fmt, it)) + { + num = read_number(fmt, it); + assert(have_dollar(fmt, it)); // invalid positional precision + if (!have_dollar(fmt, it)) return false; + precision_position = num; + nxt = precision_position + 1; + ++it; + } + else + { + precision_position = nxt++; + } + } + else + { + flags.set_precision(0); + } + } + + // Check for length modifiers + if (!fmt.format_at_end(it)) switch (*it) + { + case format_helper::h: + ++it; + if (!fmt.format_at_end(it) && (format_helper::h == *it)) + { + ++it; + flags.set_length(format_flags::length::character); + } + else + { + flags.set_length(format_flags::length::short_integer); + } + break; + case format_helper::l: + ++it; + if (!fmt.format_at_end(it) && (format_helper::l == *it)) + { + ++it; + flags.set_length(format_flags::length::long_long_integer); + } + else + { + flags.set_length(format_flags::length::long_integer); + } + break; + case format_helper::L: + ++it; + flags.set_length(format_flags::length::long_double); + break; + case format_helper::j: + ++it; + flags.set_length(format_flags::length::integer_maximum); + break; + case format_helper::z: + ++it; + flags.set_length(format_flags::length::size_type); + break; + case format_helper::t: + ++it; + flags.set_length(format_flags::length::pointer_difference); + break; + case format_helper::I: + { + ++it; + format_flags::length length = format_flags::length::size_type; + if (!fmt.format_at_end(it)) + { + if ((typename format_helper::char_type(format_helper::zero) + 3) == *it) + { + typename Format::iterator tmp(it); + ++tmp; + if (!fmt.format_at_end(tmp) && ((typename format_helper::char_type(format_helper::zero) + 2) == *tmp)) + { + length = format_flags::length::integer_32; + it = ++tmp; + } + } + else if ((typename format_helper::char_type(format_helper::zero) + 6) == *it) + { + typename Format::iterator tmp(it); + ++tmp; + if (!fmt.format_at_end(tmp) && ((typename format_helper::char_type(format_helper::zero) + 4) == *tmp)) + { + length = format_flags::length::integer_64; + it = ++tmp; + } + } + } + flags.set_length(length); + } + break; + case format_helper::w: + ++it; + flags.set_length(format_flags::length::wide_character); + break; + default: + break; + } + + // Now we should find a conversion specifier + assert(!fmt.format_at_end(it)); // missing conversion + if (fmt.format_at_end(it)) return false; + switch (*it) + { + case format_helper::d: + case format_helper::i: + flags.set_conversion(format_flags::conversion::signed_decimal); + break; + case format_helper::o: + flags.set_conversion(format_flags::conversion::octal); + break; + case format_helper::u: + flags.set_conversion(format_flags::conversion::unsigned_decimal); + break; + case format_helper::X: + flags.set_uppercase(); + [[fallthrough]]; + case format_helper::x: + flags.set_conversion(format_flags::conversion::hexadecimal); + break; + case format_helper::E: + flags.set_uppercase(); + [[fallthrough]]; + case format_helper::e: + flags.set_conversion(format_flags::conversion::scientific_decimal); + break; + case format_helper::F: + flags.set_uppercase(); + [[fallthrough]]; + case format_helper::f: + flags.set_conversion(format_flags::conversion::fixed_decimal); + break; + case format_helper::G: + flags.set_uppercase(); + [[fallthrough]]; + case format_helper::g: + flags.set_conversion(format_flags::conversion::floating_decimal); + break; + case format_helper::A: + flags.set_uppercase(); + [[fallthrough]]; + case format_helper::a: + flags.set_conversion(format_flags::conversion::scientific_hexadecimal); + break; + case format_helper::C: + if (format_flags::length::unspecified == flags.get_length()) + flags.set_length(format_flags::length::long_integer); + [[fallthrough]]; + case format_helper::c: + flags.set_conversion(format_flags::conversion::character); + break; + case format_helper::S: + if (format_flags::length::unspecified == flags.get_length()) + flags.set_length(format_flags::length::long_integer); + [[fallthrough]]; + case format_helper::s: + flags.set_conversion(format_flags::conversion::string); + break; + case format_helper::p: + flags.set_conversion(format_flags::conversion::pointer); + break; + case format_helper::n: + flags.set_conversion(format_flags::conversion::tell); + break; + case format_helper::m: + flags.set_conversion(format_flags::conversion::strerror); + break; + case format_helper::percent: + flags.set_conversion(format_flags::conversion::percent); + break; + default: + assert(false); // unsupported conversion + return false; + } + ++it; + + // Finalise argument position + if (argument_position < 0) argument_position = nxt; + next_position = argument_position; + switch (flags.get_conversion()) + { + case format_flags::conversion::strerror: + case format_flags::conversion::percent: + break; + default: + ++next_position; + } + return true; + } + +private: + static bool have_dollar(Format const &fmt, typename Format::iterator const &it) + { + return !fmt.format_at_end(it) && (*it == format_helper::dollar); + } + + static bool have_digit(Format const &fmt, typename Format::iterator const &it) + { + return !fmt.format_at_end(it) && is_digit(*it); + } + + static bool is_digit(typename format_helper::char_type value) + { + return (format_helper::zero <= value) && (format_helper::nine >= value); + } + + static int digit_value(typename format_helper::char_type value) + { + assert(is_digit(value)); + return int(std::make_signed_t(value - format_helper::zero)); + } + + static void add_digit(int &num, typename format_helper::char_type digit) + { + num = (num * 10) + digit_value(digit); + } + + static int read_number(Format const &fmt, typename Format::iterator &it) + { + assert(have_digit(fmt, it)); + int value = 0; + do add_digit(value, *it++); while (have_digit(fmt, it)); + return value; + } +}; + + +//************************************************************************** +// CORE FORMATTING FUNCTION +//************************************************************************** + +template +typename Stream::off_type stream_format(Stream &str, format_argument_pack const &args) +{ + typedef format_helper > format_helper; + typedef typename format_argument_pack::iterator iterator; + class stream_preserver + { + public: + stream_preserver(Stream &stream) + : m_stream(stream) + , m_fill(stream.fill()) + , m_flags(stream.flags()) + , m_precision(stream.precision()) + , m_width(stream.width()) + { + } + ~stream_preserver() + { + m_stream.width(m_width); + m_stream.precision(m_precision); + m_stream.flags(m_flags); + m_stream.fill(m_fill); + } + private: + Stream &m_stream; + typename Stream::char_type m_fill; + typename Stream::fmtflags m_flags; + std::streamsize m_precision; + std::streamsize m_width; + }; + + typename Stream::pos_type const begin(str.tellp()); + stream_preserver const preserver(str); + int next_pos(1); + iterator start = args.format_begin(); + for (iterator it = start; !args.format_at_end(start); ) + { + while (!args.format_at_end(it) && (format_helper::percent != *it)) ++it; + if (start != it) + { + str.write(&*start, it - start); + start = it; + } + if (!args.format_at_end(it)) + { + // Try to parse a percent format specification + format_flags flags; + int arg_pos, width_pos, prec_pos; + if (!format_helper::parse_format(args, it, flags, next_pos, arg_pos, width_pos, prec_pos)) + continue; + + // Handle parameterised width + if (0 <= width_pos) + { + assert(flags.get_field_width() == 0U); + assert(0 < width_pos); + assert(args.argument_count() >= unsigned(width_pos)); + if ((0 < width_pos) && (args.argument_count() >= unsigned(width_pos))) + { + int width; + if (args[width_pos - 1].make_integer(width)) + { + if (0 > width) + { + flags.set_left_align(); + flags.set_field_width(unsigned(-width)); + } + else + { + flags.set_field_width(unsigned(width)); + } + } + else + { + assert(false); // inappropriate type passed as width argument + } + } + } + + // Handle parameterised precision + if (0 <= prec_pos) + { + assert(flags.get_precision() < 0); + assert(0 < prec_pos); + assert(args.argument_count() >= unsigned(prec_pos)); + if ((0 < prec_pos) && (args.argument_count() >= unsigned(prec_pos))) + { + int precision; + if (args[prec_pos - 1].make_integer(precision)) + flags.set_precision(precision); + else + assert(false); // inappropriate type passed as precision argument + } + } + + // Some conversions don't actually take an argument - get them out of the way + flags.apply(str); + if (format_flags::conversion::strerror == flags.get_conversion()) + { + str << std::strerror(errno); + start = it; + } + else if (format_flags::conversion::percent == flags.get_conversion()) + { + str << typename Stream::char_type(format_chars::percent); + start = it; + } + else + { + assert(0 < arg_pos); + assert(args.argument_count() >= unsigned(arg_pos)); + if ((0 >= arg_pos) || (args.argument_count() < unsigned(arg_pos))) + continue; + if (format_flags::conversion::tell == flags.get_conversion()) + { + typename Stream::pos_type const current(str.tellp()); + args[arg_pos - 1].store_integer( + ((typename Stream::pos_type(-1) == begin) || (typename Stream::pos_type(-1) == current)) + ? typename Stream::off_type(-1) + : (current - begin)); + } + else + { + args[arg_pos - 1].output(str, flags); + } + start = it; + } + } + } + typename Stream::pos_type const end(str.tellp()); + return ((typename Stream::pos_type(-1) == begin) || (typename Stream::pos_type(-1) == end)) + ? typename Stream::off_type(-1) + : (end - begin); +} + +} // namespace detail + + +//************************************************************************** +// FORMAT TO STREAM FUNCTIONS +//************************************************************************** + +template +inline typename Stream::off_type stream_format(Stream &str, Format const &fmt, Params &&... args) +{ + return detail::stream_format(str, detail::make_format_argument_pack(fmt, std::forward(args)...)); +} + +template +inline typename Stream::off_type stream_format(Stream &str, detail::format_argument_pack const &args) +{ + return detail::stream_format(str, args); +} + +template +inline typename Stream::off_type stream_format(Stream &str, detail::format_argument_pack &&args) +{ + return detail::stream_format(str, args); +} + + +//************************************************************************** +// FORMAT TO NEW STRING FUNCTIONS +//************************************************************************** + +template +inline String string_format(Format &&fmt, Params &&... args) +{ + typedef std::basic_ostringstream ostream; + ostream str; + stream_format(str, fmt, std::forward(args)...); + return str.str(); +}; + +template +inline String string_format(std::locale const &locale, Format &&fmt, Params &&... args) +{ + typedef std::basic_ostringstream ostream; + ostream str; + str.imbue(locale); + stream_format(str, fmt, std::forward(args)...); + return str.str(); +}; + +template +inline String string_format(detail::format_argument_pack const &args) +{ + typedef std::basic_ostringstream ostream; + ostream str; + detail::stream_format(str, args); + return str.str(); +}; + +template +inline String string_format(detail::format_argument_pack &&args) +{ + typedef std::basic_ostringstream ostream; + ostream str; + detail::stream_format(str, std::move(args)); + return str.str(); +}; + +template +inline String string_format(std::locale const &locale, detail::format_argument_pack const &args) +{ + typedef std::basic_ostringstream ostream; + ostream str; + str.imbue(locale); + detail::stream_format(str, args); + return str.str(); +}; + +template +inline String string_format(std::locale const &locale, detail::format_argument_pack &&args) +{ + typedef std::basic_ostringstream ostream; + ostream str; + str.imbue(locale); + detail::stream_format(str, std::move(args)); + return str.str(); +}; + + +//************************************************************************** +// CREATING ARGUMENT PACKS +//************************************************************************** + +using detail::format_argument_pack; +using detail::make_format_argument_pack; + +} // namespace util + + +//************************************************************************** +// EXTERNAL TEMPLATE INSTANTIATIONS +//************************************************************************** + +namespace util { + +namespace detail { + +extern template class format_chars; +extern template class format_chars; + +extern template void format_flags::apply(std::ostream &) const; +extern template void format_flags::apply(std::wostream &) const; +extern template void format_flags::apply(std::iostream &) const; +extern template void format_flags::apply(std::wiostream &) const; +extern template void format_flags::apply(std::ostringstream &) const; +extern template void format_flags::apply(std::wostringstream &) const; +extern template void format_flags::apply(std::stringstream &) const; +extern template void format_flags::apply(std::wstringstream &) const; +extern template void format_flags::apply(ovectorstream &) const; +extern template void format_flags::apply(wovectorstream &) const; +extern template void format_flags::apply(vectorstream &) const; +extern template void format_flags::apply(wvectorstream &) const; + +extern template class format_argument; +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::iostream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wiostream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::ostringstream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wostringstream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::stringstream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template void format_argument::static_output(std::wstringstream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(ovectorstream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wovectorstream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(vectorstream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument; +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template void format_argument::static_output(wvectorstream &, format_flags const &, void const *); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template bool format_argument::static_make_integer(void const *, int &); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); +extern template void format_argument::static_store_integer(void const *, std::streamoff); + +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; +extern template class format_argument_pack; + +extern template std::ostream::off_type stream_format(std::ostream &, format_argument_pack const &); +extern template std::wostream::off_type stream_format(std::wostream &, format_argument_pack const &); +extern template std::iostream::off_type stream_format(std::iostream &, format_argument_pack const &); +extern template std::iostream::off_type stream_format(std::iostream &, format_argument_pack const &); +extern template std::wiostream::off_type stream_format(std::wiostream &, format_argument_pack const &); +extern template std::wiostream::off_type stream_format(std::wiostream &, format_argument_pack const &); +extern template std::ostringstream::off_type stream_format(std::ostringstream &, format_argument_pack const &); +extern template std::ostringstream::off_type stream_format(std::ostringstream &, format_argument_pack const &); +extern template std::wostringstream::off_type stream_format(std::wostringstream &, format_argument_pack const &); +extern template std::wostringstream::off_type stream_format(std::wostringstream &, format_argument_pack const &); +extern template std::stringstream::off_type stream_format(std::stringstream &, format_argument_pack const &); +extern template std::stringstream::off_type stream_format(std::stringstream &, format_argument_pack const &); +extern template std::stringstream::off_type stream_format(std::stringstream &, format_argument_pack const &); +extern template std::wstringstream::off_type stream_format(std::wstringstream &, format_argument_pack const &); +extern template std::wstringstream::off_type stream_format(std::wstringstream &, format_argument_pack const &); +extern template std::wstringstream::off_type stream_format(std::wstringstream &, format_argument_pack const &); +extern template ovectorstream::off_type stream_format(ovectorstream &, format_argument_pack const &); +extern template ovectorstream::off_type stream_format(ovectorstream &, format_argument_pack const &); +extern template wovectorstream::off_type stream_format(wovectorstream &, format_argument_pack const &); +extern template wovectorstream::off_type stream_format(wovectorstream &, format_argument_pack const &); +extern template vectorstream::off_type stream_format(vectorstream &, format_argument_pack const &); +extern template vectorstream::off_type stream_format(vectorstream &, format_argument_pack const &); +extern template vectorstream::off_type stream_format(vectorstream &, format_argument_pack const &); +extern template wvectorstream::off_type stream_format(wvectorstream &, format_argument_pack const &); +extern template wvectorstream::off_type stream_format(wvectorstream &, format_argument_pack const &); +extern template wvectorstream::off_type stream_format(wvectorstream &, format_argument_pack const &); + +} // namespace detail + +} // namespace util + +#endif // MAME_UTIL_STRFORMAT_H diff --git a/waterbox/ares64/ares/thirdparty/mame/lib/util/vecstream.h b/waterbox/ares64/ares/thirdparty/mame/lib/util/vecstream.h new file mode 100644 index 0000000000..1af7dd0cd8 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/lib/util/vecstream.h @@ -0,0 +1,408 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/*************************************************************************** + + vecstream.h + + streams with vector storage + + These types are useful if you want a persistent buffer for formatted + text and you need to use it like a character array or character + pointer, as you get read-only access to it without copying. The + storage is always guaranteed to be contiguous. Writing to the + stream may invalidate pointers to storage. + +***************************************************************************/ + +#ifndef MAME_UTIL_VECSTREAM_H +#define MAME_UTIL_VECSTREAM_H + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace util { + +template , typename Allocator = std::allocator > +class basic_vectorbuf : public std::basic_streambuf +{ +public: + typedef typename std::basic_streambuf::char_type char_type; + typedef typename std::basic_streambuf::int_type int_type; + typedef typename std::basic_streambuf::pos_type pos_type; + typedef typename std::basic_streambuf::off_type off_type; + typedef Allocator allocator_type; + typedef std::vector vector_type; + + basic_vectorbuf(std::ios_base::openmode mode = std::ios_base::in | std::ios_base::out) : std::basic_streambuf(), m_mode(mode), m_storage(), m_threshold(nullptr) + { + setup(); + } + + basic_vectorbuf(vector_type const &content, std::ios_base::openmode mode = std::ios_base::in | std::ios_base::out) : std::basic_streambuf(), m_mode(mode), m_storage(content), m_threshold(nullptr) + { + setup(); + } + + basic_vectorbuf(vector_type &&content, std::ios_base::openmode mode = std::ios_base::in | std::ios_base::out) : std::basic_streambuf(), m_mode(mode), m_storage(std::move(content)), m_threshold(nullptr) + { + setup(); + } + + basic_vectorbuf(basic_vectorbuf const &that) : std::basic_streambuf(that), m_mode(that.m_mode), m_storage(that.m_storage), m_threshold(nullptr) + { + adjust(); + } + + basic_vectorbuf(basic_vectorbuf &&that) : std::basic_streambuf(that), m_mode(that.m_mode), m_storage(std::move(that.m_storage)), m_threshold(that.m_threshold) + { + that.clear(); + } + + vector_type const &vec() const + { + if (m_mode & std::ios_base::out) + { + if (this->pptr() > m_threshold) m_threshold = this->pptr(); + auto const base(this->pbase()); + auto const end(m_threshold - base); + if (m_storage.size() > std::make_unsigned_t(end)) + { + m_storage.resize(std::make_unsigned_t(end)); + assert(&m_storage[0] == base); + auto const put_offset(this->pptr() - base); + const_cast(this)->setp(base, base + put_offset); + const_cast(this)->pbump(put_offset); + } + } + return m_storage; + } + + void vec(const vector_type &content) + { + m_storage = content; + setup(); + } + + void vec(vector_type &&content) + { + m_storage = std::move(content); + setup(); + } + + void clear() + { + m_storage.clear(); + setup(); + } + + void swap(basic_vectorbuf &that) + { + using std::swap; + std::basic_streambuf::swap(that); + swap(m_mode, that.m_mode); + swap(m_storage, that.m_storage); + swap(m_threshold, that.m_threshold); + } + + void reserve(typename vector_type::size_type size) + { + if ((m_mode & std::ios_base::out) && (m_storage.capacity() < size)) + { + m_storage.reserve(size); + adjust(); + } + } + + basic_vectorbuf &operator=(basic_vectorbuf const &that) + { + std::basic_streambuf::operator=(that); + m_mode = that.m_mode; + m_storage = that.m_storage; + m_threshold = that.m_threshold; + adjust(); + return *this; + } + + basic_vectorbuf &operator=(basic_vectorbuf &&that) + { + std::basic_streambuf::operator=(that); + m_mode = that.m_mode; + m_storage = std::move(that.m_storage); + m_threshold = that.m_threshold; + that.clear(); + return *this; + } + +protected: + virtual pos_type seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) override + { + bool const in(which & std::ios_base::in); + bool const out(which & std::ios_base::out); + if ((!in && !out) || + (in && out && (std::ios_base::cur == dir)) || + (in && !(m_mode & std::ios_base::in)) || + (out && !(m_mode & std::ios_base::out))) + { + return pos_type(off_type(-1)); + } + maximise_egptr(); + off_type const end((m_mode & std::ios_base::out) ? off_type(m_threshold - this->pbase()) : off_type(m_storage.size())); + switch (dir) + { + case std::ios_base::beg: + break; + case std::ios_base::end: + off += end; + break; + case std::ios_base::cur: + off += off_type(in ? (this->gptr() - this->eback()) : (this->pptr() - this->pbase())); + break; + default: + return pos_type(off_type(-1)); + } + if ((off_type(0) > off) || ((m_mode & std::ios_base::app) && out && (end != off))) return pos_type(off_type(-1)); + if ((out ? off_type(this->epptr() - this->pbase()) : end) < off) return pos_type(off_type(-1)); + if (out) + { + this->setp(this->pbase(), this->epptr()); + this->pbump(off); + if (m_threshold < this->pptr()) m_threshold = this->pptr(); + if (m_mode & std::ios_base::in) + { + if (in) this->setg(this->eback(), this->eback() + off, m_threshold); + else if (this->egptr() < m_threshold) this->setg(this->eback(), this->gptr(), m_threshold); + } + } + else if (in) + { + this->setg(this->eback(), this->eback() + off, this->egptr()); + } + return pos_type(off); + } + + virtual pos_type seekpos(pos_type pos, std::ios_base::openmode which = std::ios_base::in |std:: ios_base::out) override + { + return seekoff(off_type(pos), std::ios_base::beg, which); + } + + virtual int_type underflow() override + { + if (!this->gptr()) return Traits::eof(); + maximise_egptr(); + return (this->gptr() < this->egptr()) ? Traits::to_int_type(*this->gptr()) : Traits::eof(); + } + + virtual int_type overflow(int_type ch = Traits::eof()) override + { + if (!(m_mode & std::ios_base::out)) return Traits::eof(); + if (Traits::eq_int_type(ch, Traits::eof())) return Traits::not_eof(ch); + auto const put_offset(this->pptr() - this->pbase() + 1); + auto const threshold_offset((std::max)(m_threshold - this->pbase(), put_offset)); + m_storage.push_back(Traits::to_char_type(ch)); + m_storage.resize(m_storage.capacity()); + auto const base(&m_storage[0]); + this->setp(base, base + m_storage.size()); + m_threshold = base + threshold_offset; + if (m_mode & std::ios_base::in) this->setg(base, base + (this->gptr() - this->eback()), m_threshold); + this->pbump(int(put_offset)); + return ch; + } + + virtual int_type pbackfail(int_type ch = Traits::eof()) override + { + if (this->gptr() != this->eback()) + { + if (Traits::eq_int_type(ch, Traits::eof())) + { + this->gbump(-1); + return Traits::not_eof(ch); + } + else if (Traits::eq(Traits::to_char_type(ch), this->gptr()[-1])) + { + this->gbump(-1); + return ch; + } + else if (m_mode & std::ios_base::out) + { + this->gbump(-1); + *this->gptr() = Traits::to_char_type(ch); + return ch; + } + } + return Traits::eof(); + } + +private: + void setup() + { + if (m_mode & std::ios_base::out) + { + auto const end(m_storage.size()); + m_storage.resize(m_storage.capacity()); + if (m_storage.empty()) + { + m_threshold = nullptr; + this->setg(nullptr, nullptr, nullptr); + this->setp(nullptr, nullptr); + } + else + { + auto const base(&m_storage[0]); + m_threshold = base + end; + this->setp(base, base + m_storage.size()); + if (m_mode & std::ios_base::in) this->setg(base, base, m_threshold); + } + if (m_mode & (std::ios_base::app | std::ios_base::ate)) this->pbump(int(unsigned(end))); + } + else if (m_storage.empty()) + { + this->setg(nullptr, nullptr, nullptr); + } + else if (m_mode & std::ios_base::in) + { + auto const base(&m_storage[0]); + this->setg(base, base, base + m_storage.size()); + } + } + + void adjust() + { + auto const put_offset(this->pptr() - this->pbase()); + auto const get_offset(this->gptr() - this->eback()); + setup(); + if (m_mode & std::ios_base::out) + { + this->pbump(int(put_offset)); + m_threshold = this->pptr(); + if (m_mode & std::ios_base::in) + { + auto const base(&m_storage[0]); + this->setg(base, base + get_offset, m_threshold); + } + } + else if (m_mode & std::ios_base::in) + { + this->gbump(int(get_offset)); + } + } + + void maximise_egptr() + { + if (m_mode & std::ios_base::out) + { + if (m_threshold < this->pptr()) m_threshold = this->pptr(); + if ((m_mode & std::ios_base::in) && (this->egptr() < m_threshold)) this->setg(this->eback(), this->gptr(), m_threshold); + } + } + + std::ios_base::openmode m_mode; + mutable vector_type m_storage; + mutable CharT *m_threshold; +}; + +template , typename Allocator = std::allocator > +class basic_ivectorstream : public std::basic_istream +{ +public: + typedef typename basic_vectorbuf::vector_type vector_type; + + basic_ivectorstream(std::ios_base::openmode mode = std::ios_base::in) : std::basic_istream(&m_rdbuf), m_rdbuf(mode) { } + basic_ivectorstream(vector_type const &content, std::ios_base::openmode mode = std::ios_base::in) : std::basic_istream(&m_rdbuf), m_rdbuf(content, mode) { } + basic_ivectorstream(vector_type &&content, std::ios_base::openmode mode = std::ios_base::in) : std::basic_istream(&m_rdbuf), m_rdbuf(std::move(content), mode) { } + + basic_vectorbuf *rdbuf() const { return static_cast *>(std::basic_istream::rdbuf()); } + vector_type const &vec() const { return rdbuf()->vec(); } + void vec(const vector_type &content) { rdbuf()->vec(content); } + void vec(vector_type &&content) { rdbuf()->vec(std::move(content)); } + + void swap(basic_ivectorstream &that) { std::basic_istream::swap(that); rdbuf()->swap(*that.rdbuf()); } + +private: + basic_vectorbuf m_rdbuf; +}; + +template , typename Allocator = std::allocator > +class basic_ovectorstream : public std::basic_ostream +{ +public: + typedef typename basic_vectorbuf::vector_type vector_type; + + basic_ovectorstream(std::ios_base::openmode mode = std::ios_base::out) : std::basic_ostream(&m_rdbuf), m_rdbuf(mode) { } + basic_ovectorstream(vector_type const &content, std::ios_base::openmode mode = std::ios_base::out) : std::basic_ostream(&m_rdbuf), m_rdbuf(content, mode) { } + basic_ovectorstream(vector_type &&content, std::ios_base::openmode mode = std::ios_base::out) : std::basic_ostream(&m_rdbuf), m_rdbuf(std::move(content), mode) { } + + basic_vectorbuf *rdbuf() const { return static_cast *>(std::basic_ostream::rdbuf()); } + + vector_type const &vec() const { return rdbuf()->vec(); } + void vec(const vector_type &content) { rdbuf()->vec(content); } + void vec(vector_type &&content) { rdbuf()->vec(std::move(content)); } + basic_ovectorstream &reserve(typename vector_type::size_type size) { rdbuf()->reserve(size); return *this; } + + void swap(basic_ovectorstream &that) { std::basic_ostream::swap(that); rdbuf()->swap(*that.rdbuf()); } + +private: + basic_vectorbuf m_rdbuf; +}; + +template , typename Allocator = std::allocator > +class basic_vectorstream : public std::basic_iostream +{ +public: + typedef typename basic_vectorbuf::vector_type vector_type; + + basic_vectorstream(std::ios_base::openmode mode = std::ios_base::in | std::ios_base::out) : std::basic_iostream(&m_rdbuf), m_rdbuf(mode) { } + basic_vectorstream(vector_type const &content, std::ios_base::openmode mode = std::ios_base::in | std::ios_base::out) : std::basic_iostream(&m_rdbuf), m_rdbuf(content, mode) { } + basic_vectorstream(vector_type &&content, std::ios_base::openmode mode = std::ios_base::in | std::ios_base::out) : std::basic_iostream(&m_rdbuf), m_rdbuf(std::move(content), mode) { } + + basic_vectorbuf *rdbuf() const { return static_cast *>(std::basic_iostream::rdbuf()); } + + vector_type const &vec() const { return rdbuf()->vec(); } + void vec(const vector_type &content) { rdbuf()->vec(content); } + void vec(vector_type &&content) { rdbuf()->vec(std::move(content)); } + basic_vectorstream &reserve(typename vector_type::size_type size) { rdbuf()->reserve(size); return *this; } + + void swap(basic_vectorstream &that) { std::basic_iostream::swap(that); rdbuf()->swap(*that.rdbuf()); } + +private: + basic_vectorbuf m_rdbuf; +}; + +typedef basic_ivectorstream ivectorstream; +typedef basic_ivectorstream wivectorstream; +typedef basic_ovectorstream ovectorstream; +typedef basic_ovectorstream wovectorstream; +typedef basic_vectorstream vectorstream; +typedef basic_vectorstream wvectorstream; + +template +void swap(basic_vectorbuf &a, basic_vectorbuf &b) { a.swap(b); } + +template +void swap(basic_ivectorstream &a, basic_ivectorstream &b) { a.swap(b); } +template +void swap(basic_ovectorstream &a, basic_ovectorstream &b) { a.swap(b); } +template +void swap(basic_vectorstream &a, basic_vectorstream &b) { a.swap(b); } + +extern template class basic_ivectorstream; +extern template class basic_ivectorstream; +extern template class basic_ovectorstream; +extern template class basic_ovectorstream; +extern template class basic_vectorstream; +extern template class basic_vectorstream; + +} // namespace util + +#endif // MAME_UTIL_VECSTREAM_H diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/includes/n64.h b/waterbox/ares64/ares/thirdparty/mame/mame/includes/n64.h new file mode 100644 index 0000000000..3fa804c8bb --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/includes/n64.h @@ -0,0 +1,470 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz +#ifndef MAME_INCLUDES_N64_H +#define MAME_INCLUDES_N64_H + +#pragma once + +#if defined(MAME_RDP) +#include "video/n64.h" + +class running_machine +{ +public: + running_machine(n64_state* state) + : m_state(state), m_rand_seed(0x9d14abd7) + { } + + template DriverClass *driver_data() const; + + u32 rand() + { + m_rand_seed = 1664525 * m_rand_seed + 1013904223; + + // return rotated by 16 bits; the low bits have a short period + // and are frequently used + return (m_rand_seed >> 16) | (m_rand_seed << 16); + } + +private: + n64_state* m_state; + u32 m_rand_seed; // current random number seed +}; + +template <> inline n64_state *running_machine::driver_data() const { return m_state; } + +class n64_state +{ +public: + n64_state(uint32_t* rdram, uint32_t* rsp_dmem, n64_periphs* rcp_periphs) + : m_machine(this), m_rdram(rdram), m_rsp_dmem(rsp_dmem), m_rcp_periphs(rcp_periphs) + { } + + void video_start(); + + // Getters + n64_rdp* rdp() { return m_rdp.get(); } + + running_machine& machine() { return m_machine; } + +protected: + running_machine m_machine; + + uint32_t* m_rdram; + uint32_t* m_rsp_dmem; + + n64_periphs* m_rcp_periphs; + + /* video-related */ + std::unique_ptr m_rdp; +}; + +class n64_periphs +{ +public: + virtual void dp_full_sync() = 0; +}; +#else +#include "cpu/rsp/rsp.h" +#include "cpu/mips/mips3.h" +#include "sound/dmadac.h" +#include "video/n64.h" + +/*----------- driver state -----------*/ + +class n64_rdp; +class n64_periphs; + +class n64_state : public driver_device +{ +public: + n64_state(const machine_config &mconfig, device_type type, const char *tag) + : driver_device(mconfig, type, tag) + , m_vr4300(*this, "maincpu") + , m_rsp(*this, "rsp") + , m_sram(*this, "sram") + , m_rdram(*this, "rdram") + , m_rsp_imem(*this, "rsp_imem") + , m_rsp_dmem(*this, "rsp_dmem") + , m_rcp_periphs(*this, "rcp") + { + } + + virtual void machine_start() override; + virtual void machine_reset() override; + virtual void video_start() override; + void n64_machine_stop(); + + uint32_t screen_update_n64(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect); + DECLARE_WRITE_LINE_MEMBER(screen_vblank_n64); + + // Getters + n64_rdp* rdp() { return m_rdp.get(); } + uint32_t* rdram() { return m_rdram; } + uint32_t* sram() { return m_sram; } + +protected: + required_device m_vr4300; + required_device m_rsp; + + optional_shared_ptr m_sram; + required_shared_ptr m_rdram; + required_shared_ptr m_rsp_imem; + required_shared_ptr m_rsp_dmem; + + required_device m_rcp_periphs; + + /* video-related */ + std::unique_ptr m_rdp; +}; + +/*----------- devices -----------*/ + +#define AUDIO_DMA_DEPTH 2 + +struct n64_savable_data_t +{ + uint8_t sram[0x20000]; + uint8_t eeprom[2048]; + uint8_t mempak[2][0x8000]; +}; + +class n64_periphs : public device_t, + public device_video_interface +{ +private: + struct AUDIO_DMA + { + uint32_t address; + uint32_t length; + }; + +public: + // construction/destruction + n64_periphs(const machine_config &mconfig, const char *tag, device_t *owner, uint32_t clock); + + uint32_t is64_r(offs_t offset); + void is64_w(offs_t offset, uint32_t data); + uint32_t open_r(offs_t offset); + void open_w(uint32_t data); + uint32_t rdram_reg_r(offs_t offset, uint32_t mem_mask = ~0); + void rdram_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + uint32_t mi_reg_r(offs_t offset, uint32_t mem_mask = ~0); + void mi_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + uint32_t vi_reg_r(offs_t offset, uint32_t mem_mask = ~0); + void vi_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + uint32_t ai_reg_r(offs_t offset, uint32_t mem_mask = ~0); + void ai_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + uint32_t pi_reg_r(offs_t offset, uint32_t mem_mask = ~0); + void pi_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + uint32_t ri_reg_r(offs_t offset, uint32_t mem_mask = ~0); + void ri_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + uint32_t si_reg_r(offs_t offset); + void si_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + uint32_t dd_reg_r(offs_t offset); + void dd_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + uint32_t pif_ram_r(offs_t offset, uint32_t mem_mask = ~0); + void pif_ram_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + TIMER_CALLBACK_MEMBER(reset_timer_callback); + TIMER_CALLBACK_MEMBER(vi_scanline_callback); + TIMER_CALLBACK_MEMBER(dp_delay_callback); + TIMER_CALLBACK_MEMBER(ai_timer_callback); + TIMER_CALLBACK_MEMBER(pi_dma_callback); + TIMER_CALLBACK_MEMBER(si_dma_callback); + uint32_t dp_reg_r(offs_t offset, uint32_t mem_mask = ~0); + void dp_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + uint32_t sp_reg_r(offs_t offset); + void sp_reg_w(offs_t offset, uint32_t data, uint32_t mem_mask = ~0); + void sp_set_status(uint32_t data); + void signal_rcp_interrupt(int interrupt); + void check_interrupts(); + + void dp_full_sync(); + void ai_timer_tick(); + void pi_dma_tick(); + void si_dma_tick(); + void reset_tick(); + void video_update(bitmap_rgb32 &bitmap); + + // Video Interface (VI) registers + uint32_t vi_width; + uint32_t vi_origin; + uint32_t vi_control; + uint32_t vi_blank; + uint32_t vi_hstart; + uint32_t vi_vstart; + uint32_t vi_xscale; + uint32_t vi_yscale; + uint32_t vi_burst; + uint32_t vi_vsync; + uint32_t vi_hsync; + uint32_t vi_leap; + uint32_t vi_intr; + uint32_t vi_vburst; + uint8_t field; + + // nvram-specific for the console + device_t *m_nvram_image; + + n64_savable_data_t m_save_data; + + uint32_t cart_length; + + bool dd_present; + bool disk_present; + bool cart_present; + + // Mouse X2/Y2 for delta position + int mouse_x2[4]; + int mouse_y2[4]; + + void poll_reset_button(bool button); + + uint32_t dp_clock; + +protected: + // device-level overrides + virtual void device_start() override; + virtual void device_reset() override; + +private: + n64_state* m_n64; + address_space *m_mem_map; + required_device m_vr4300; + required_device m_rsp; + required_shared_ptr m_rsp_imem; + required_shared_ptr m_rsp_dmem; + + uint32_t *m_rdram; + uint32_t *m_sram; + + void clear_rcp_interrupt(int interrupt); + + bool reset_held; + emu_timer *reset_timer; + emu_timer *dp_delay_timer; + + uint8_t is64_buffer[0x10000]; + + // Video interface (VI) registers and functions + emu_timer *vi_scanline_timer; + + // Audio Interface (AI) registers and functions + void ai_dma(); + AUDIO_DMA *ai_fifo_get_top(); + void ai_fifo_push(uint32_t address, uint32_t length); + void ai_fifo_pop(); + bool ai_delayed_carry; + + required_device_array ai_dac; + uint32_t ai_dram_addr; + uint32_t ai_len; + uint32_t ai_control; + int ai_dacrate; + int ai_bitrate; + uint32_t ai_status; + + emu_timer *ai_timer; + + AUDIO_DMA ai_fifo[AUDIO_DMA_DEPTH]; + int ai_fifo_wpos; + int ai_fifo_rpos; + int ai_fifo_num; + + // Memory Interface (MI) registers + uint32_t mi_version; + uint32_t mi_interrupt; + uint32_t mi_intr_mask; + uint32_t mi_mode; + + // RDRAM Interface (RI) registers + uint32_t rdram_regs[10]; + uint32_t ri_regs[8]; + + // RSP Interface (SP) registers + void sp_dma(int direction); + + uint32_t sp_mem_addr; + uint32_t sp_dram_addr; + uint32_t sp_mem_addr_start; + uint32_t sp_dram_addr_start; + int sp_dma_length; + int sp_dma_count; + int sp_dma_skip; + uint32_t sp_semaphore; + + // Disk Drive (DD) registers and functions + void dd_set_zone_and_track_offset(); + void dd_update_bm(); + void dd_write_sector(); + void dd_read_sector(); + void dd_read_C2(); + uint32_t dd_buffer[256]; + uint32_t dd_sector_data[64]; + uint32_t dd_ram_seq_data[16]; + uint32_t dd_data_reg; + uint32_t dd_status_reg; + uint32_t dd_track_reg; + uint32_t dd_buf_status_reg; + uint32_t dd_sector_err_reg; + uint32_t dd_seq_status_reg; + uint32_t dd_seq_ctrl_reg; + uint32_t dd_sector_reg; + uint32_t dd_reset_reg; + uint32_t dd_current_reg; + bool dd_bm_reset_held; + bool dd_write; + uint8_t dd_int; + uint8_t dd_start_block; + uint8_t dd_start_sector; + uint8_t dd_sectors_per_block; + uint8_t dd_sector_size; + uint8_t dd_zone; + uint32_t dd_track_offset; + + // Peripheral Interface (PI) registers and functions + emu_timer *pi_dma_timer; + uint32_t pi_dram_addr; + uint32_t pi_cart_addr; + uint32_t pi_rd_len; + uint32_t pi_wr_len; + uint32_t pi_status; + uint32_t pi_bsd_dom1_lat; + uint32_t pi_bsd_dom1_pwd; + uint32_t pi_bsd_dom1_pgs; + uint32_t pi_bsd_dom1_rls; + uint32_t pi_bsd_dom2_lat; + uint32_t pi_bsd_dom2_pwd; + uint32_t pi_bsd_dom2_pgs; + uint32_t pi_bsd_dom2_rls; + uint32_t pi_dma_dir; + + // Serial Interface (SI) registers and functions + emu_timer *si_dma_timer; + void pif_dma(int direction); + void handle_pif(); + int pif_channel_handle_command(int channel, int slength, uint8_t *sdata, int rlength, uint8_t *rdata); + uint8_t calc_mempak_crc(uint8_t *buffer, int length); + uint8_t pif_ram[0x40]; + uint8_t pif_cmd[0x40]; + uint32_t si_dram_addr; + uint32_t si_pif_addr; + uint32_t si_pif_addr_rd64b; + uint32_t si_pif_addr_wr64b; + uint32_t si_status_val; + uint32_t si_dma_dir; + uint32_t cic_status; + int cic_type; + + n64_savable_data_t savable_data; + + // Video Interface (VI) functions + void vi_recalculate_resolution(); + void video_update16(bitmap_rgb32 &bitmap); + void video_update32(bitmap_rgb32 &bitmap); + uint8_t random_seed; // %HACK%, adds 19 each time it's read and is more or less random + uint8_t get_random() { return random_seed += 0x13; } + + int32_t m_gamma_table[256]; + int32_t m_gamma_dither_table[0x4000]; + +}; + +// device type definition +DECLARE_DEVICE_TYPE(N64PERIPH, n64_periphs) +#endif + +/*----------- defined in video/n64.c -----------*/ + +#define DACRATE_NTSC (48681812) +#define DACRATE_PAL (49656530) +#define DACRATE_MPAL (48628316) + +/*----------- defined in machine/n64.c -----------*/ + +#define SP_INTERRUPT 0x1 +#define SI_INTERRUPT 0x2 +#define AI_INTERRUPT 0x4 +#define VI_INTERRUPT 0x8 +#define PI_INTERRUPT 0x10 +#define DP_INTERRUPT 0x20 + +#define SP_STATUS_HALT 0x0001 +#define SP_STATUS_BROKE 0x0002 +#define SP_STATUS_DMABUSY 0x0004 +#define SP_STATUS_DMAFULL 0x0008 +#define SP_STATUS_IOFULL 0x0010 +#define SP_STATUS_SSTEP 0x0020 +#define SP_STATUS_INTR_BREAK 0x0040 +#define SP_STATUS_SIGNAL0 0x0080 +#define SP_STATUS_SIGNAL1 0x0100 +#define SP_STATUS_SIGNAL2 0x0200 +#define SP_STATUS_SIGNAL3 0x0400 +#define SP_STATUS_SIGNAL4 0x0800 +#define SP_STATUS_SIGNAL5 0x1000 +#define SP_STATUS_SIGNAL6 0x2000 +#define SP_STATUS_SIGNAL7 0x4000 + +#define DP_STATUS_XBUS_DMA 0x01 +#define DP_STATUS_FREEZE 0x02 +#define DP_STATUS_FLUSH 0x04 +#define DP_STATUS_START_VALID 0x400 + +#define DD_ASIC_STATUS_DISK_CHANGE 0x00010000 +#define DD_ASIC_STATUS_MECHA_ERR 0x00020000 +#define DD_ASIC_STATUS_WRPROTECT_ERR 0x00040000 +#define DD_ASIC_STATUS_HEAD_RETRACT 0x00080000 +#define DD_ASIC_STATUS_MOTOR_OFF 0x00100000 +#define DD_ASIC_STATUS_RESET 0x00400000 +#define DD_ASIC_STATUS_BUSY 0x00800000 +#define DD_ASIC_STATUS_DISK 0x01000000 +#define DD_ASIC_STATUS_MECHA_INT 0x02000000 +#define DD_ASIC_STATUS_BM_INT 0x04000000 +#define DD_ASIC_STATUS_BM_ERROR 0x08000000 +#define DD_ASIC_STATUS_C2_XFER 0x10000000 +#define DD_ASIC_STATUS_DREQ 0x40000000 + +#define DD_TRACK_INDEX_LOCK 0x60000000 + +#define DD_BM_MECHA_INT_RESET 0x01000000 +#define DD_BM_XFERBLOCKS 0x02000000 +#define DD_BM_DISABLE_C1 0x04000000 +#define DD_BM_DISABLE_OR_CHK 0x08000000 +#define DD_BM_RESET 0x10000000 +#define DD_BM_INT_MASK 0x20000000 +#define DD_BM_MODE 0x40000000 +#define DD_BM_START 0x80000000 + +#define DD_BMST_RUNNING 0x80000000 +#define DD_BMST_ERROR 0x04000000 +#define DD_BMST_MICRO_STATUS 0x02000000 +#define DD_BMST_BLOCKS 0x01000000 +#define DD_BMST_C1_CORRECT 0x00800000 +#define DD_BMST_C1_DOUBLE 0x00400000 +#define DD_BMST_C1_SINGLE 0x00200000 +#define DD_BMST_C1_ERROR 0x00010000 + +#define DD_ASIC_ERR_AM_FAIL 0x80000000 +#define DD_ASIC_ERR_MICRO_FAIL 0x40000000 +#define DD_ASIC_ERR_SPINDLE_FAIL 0x20000000 +#define DD_ASIC_ERR_OVER_RUN 0x10000000 +#define DD_ASIC_ERR_OFFTRACK 0x08000000 +#define DD_ASIC_ERR_NO_DISK 0x04000000 +#define DD_ASIC_ERR_CLOCK_UNLOCK 0x02000000 +#define DD_ASIC_ERR_SELF_STOP 0x01000000 + +#define DD_SEQ_MICRO_INT_MASK 0x80000000 +#define DD_SEQ_MICRO_PC_ENABLE 0x40000000 + +#define SECTORS_PER_BLOCK 85 +#define BLOCKS_PER_TRACK 2 + +const unsigned int ddZoneSecSize[16] = {232,216,208,192,176,160,144,128, + 216,208,192,176,160,144,128,112}; +const unsigned int ddZoneTrackSize[16] = {158,158,149,149,149,149,149,114, + 158,158,149,149,149,149,149,114}; +const unsigned int ddStartOffset[16] = + {0x0,0x5F15E0,0xB79D00,0x10801A0,0x1523720,0x1963D80,0x1D414C0,0x20BBCE0, + 0x23196E0,0x28A1E00,0x2DF5DC0,0x3299340,0x36D99A0,0x3AB70E0,0x3E31900,0x4149200}; + +#endif // MAME_INCLUDES_N64_H diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/n64.cpp b/waterbox/ares64/ares/thirdparty/mame/mame/video/n64.cpp new file mode 100644 index 0000000000..4a88912495 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/n64.cpp @@ -0,0 +1,4415 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz +/****************************************************************************** + + + SGI/Nintendo Reality Display Processor + ------------------- + + by Ryan Holtz + based on initial C code by Ville Linde + contains additional improvements from angrylion, Ziggy, Gonetz and Orkin + + +******************************************************************************* + +STATUS: + +Much behavior needs verification against real hardware. Many edge cases must +be verified on real hardware as well. + +TODO: + +- Further re-work class structure to avoid dependencies + +*******************************************************************************/ + +#include "emu.h" +#include "includes/n64.h" +#include "video/rdpblend.h" +#include "video/rdptpipe.h" +#if !defined(MAME_RDP) +#include "screen.h" +#endif + +#include + +#define LOG_RDP_EXECUTION 0 +#define DEBUG_RDP_PIXEL 0 +#define DRAW_FRAME_COUNTER 0 + +#if DEBUG_RDP_PIXEL +static bool s_debug_drawing = false; +#endif + +static FILE* rdp_exec; + +uint32_t n64_rdp::s_special_9bit_clamptable[512]; + +bool n64_rdp::rdp_range_check(uint32_t addr) +{ + if(m_misc_state.m_fb_size == 0) return false; + + int32_t fbcount = ((m_misc_state.m_fb_width * m_scissor.m_yl) << (m_misc_state.m_fb_size - 1)) * 3; + int32_t fbaddr = m_misc_state.m_fb_address & 0x007fffff; + if ((addr >= fbaddr) && (addr < (fbaddr + fbcount))) + { + return false; + } + + int32_t zbcount = m_misc_state.m_fb_width * m_scissor.m_yl * 2; + int32_t zbaddr = m_misc_state.m_zb_address & 0x007fffff; + if ((addr >= zbaddr) && (addr < (zbaddr + zbcount))) + { + return false; + } + + printf("Check failed: %08x vs. %08x-%08x, %08x-%08x (%d, %d)\n", addr, fbaddr, fbaddr + fbcount, zbaddr, zbaddr + zbcount, m_misc_state.m_fb_width, m_scissor.m_yl); + fflush(stdout); + return true; +} + +/*****************************************************************************/ + +// The functions in this file should be moved into the parent Processor class. +#include "rdpfiltr.hxx" + +int32_t n64_rdp::get_alpha_cvg(int32_t comb_alpha, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + int32_t temp = comb_alpha; + int32_t temp2 = userdata->m_current_pix_cvg; + int32_t temp3 = 0; + + if (object.m_other_modes.cvg_times_alpha) + { + temp3 = (temp * temp2) + 4; + userdata->m_current_pix_cvg = (temp3 >> 8) & 0xf; + } + if (object.m_other_modes.alpha_cvg_select) + { + temp = (m_other_modes.cvg_times_alpha) ? (temp3 >> 3) : (temp2 << 5); + } + if (temp > 0xff) + { + temp = 0xff; + } + return temp; +} + +/*****************************************************************************/ + +void n64_state::video_start() +{ + m_rdp = std::make_unique(*this, m_rdram, m_rsp_dmem); + + m_rdp->set_machine(machine()); + m_rdp->init_internal_state(); + m_rdp->set_n64_periphs(m_rcp_periphs); + + m_rdp->m_blender.set_machine(machine()); + m_rdp->m_blender.set_processor(m_rdp.get()); + + m_rdp->m_tex_pipe.set_machine(machine()); + + m_rdp->m_aux_buf = make_unique_clear(EXTENT_AUX_COUNT); + + if (LOG_RDP_EXECUTION) + { + rdp_exec = fopen("rdp_execute.txt", "wt"); + } +} + +#if !defined(MAME_RDP) +uint32_t n64_state::screen_update_n64(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect) +{ + //uint16_t* frame_buffer = (uint16_t*)&rdram[(m_rcp_periphs->vi_origin & 0xffffff) >> 2]; + //uint8_t* cvg_buffer = &m_rdp.m_hidden_bits[((m_rcp_periphs->vi_origin & 0xffffff) >> 2) >> 1]; + //int32_t vibuffering = ((m_rcp_periphs->vi_control & 2) && fsaa && divot); + + //vibuffering = 0; // Disabled for now + + /* + if (vibuffering && ((m_rcp_periphs->vi_control & 3) == 2)) + { + if (frame_buffer) + { + for (j=0; j < vres; j++) + { + for (i=0; i < hres; i++) + { + uint16_t pix; + pix = frame_buffer[pixels ^ WORD_ADDR_XOR]; + curpixel_cvg = ((pix & 1) << 2) | (cvg_buffer[pixels ^ BYTE_ADDR_XOR] & 3); // Reuse of this variable + if (curpixel_cvg < 7 && i > 1 && j > 1 && i < (hres - 2) && j < (vres - 2) && fsaa) + { + newc = video_filter16(&frame_buffer[pixels ^ WORD_ADDR_XOR], &cvg_buffer[pixels ^ BYTE_ADDR_XOR], m_rcp_periphs->vi_width); + ViBuffer[i][j] = newc; + } + else + { + newc.i.r = ((pix >> 8) & 0xf8) | (pix >> 13); + newc.i.g = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07); + newc.i.b = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07); + ViBuffer[i][j] = newc; + } + pixels++; + } + pixels += invisiblewidth; + } + } + } + */ + + m_rdp->mark_frame(); + + if (m_rcp_periphs->vi_blank) + { + bitmap.fill(0, screen.visible_area()); + return 0; + } + + m_rcp_periphs->video_update(bitmap); + + return 0; +} + +WRITE_LINE_MEMBER(n64_state::screen_vblank_n64) +{ +} + +void n64_periphs::video_update(bitmap_rgb32 &bitmap) +{ + + if (vi_control & 0x40) /* Interlace */ + { + field ^= 1; + } + else + { + field = 0; + } + + switch (vi_control & 0x3) + { + case PIXEL_SIZE_16BIT: + video_update16(bitmap); + break; + + case PIXEL_SIZE_32BIT: + video_update32(bitmap); + break; + + default: + //fatalerror("Unsupported framebuffer depth: m_fb_size=%d\n", m_misc_state.m_fb_size); + break; + } +} + +void n64_periphs::video_update16(bitmap_rgb32 &bitmap) +{ + //int32_t fsaa = (((n64->vi_control >> 8) & 3) < 2); + //int32_t divot = (n64->vi_control >> 4) & 1; + + //uint32_t prev_cvg = 0; + //uint32_t next_cvg = 0; + //int32_t dither_filter = (n64->vi_control >> 16) & 1; + //int32_t vibuffering = ((n64->vi_control & 2) && fsaa && divot); + + uint16_t* frame_buffer = (uint16_t*)&m_rdram[(vi_origin & 0xffffff) >> 2]; + //uint32_t hb = ((n64->vi_origin & 0xffffff) >> 2) >> 1; + //uint8_t* hidden_buffer = &m_hidden_bits[hb]; + + int32_t hend = vi_hstart & 0x3ff; + int32_t hstart = (vi_hstart >> 16) & 0x3ff; + int32_t hdiff = hend - hstart; + float hcoeff = ((float)(vi_xscale & 0xfff) / (1 << 10)); + uint32_t hres = ((float)hdiff * hcoeff); + + int32_t vend = (vi_vstart & 0x3ff) >> 1; + int32_t vstart = ((vi_vstart >> 16) & 0x3ff) >> 1; + int32_t vdiff = vend - vstart; + float vcoeff = ((float)(vi_yscale & 0xfff) / (1 << 10)); + uint32_t vres = ((float)vdiff * vcoeff); + + fflush(stdout); + + if (vdiff <= 0 || hdiff <= 0) + { + return; + } + + if (vres > bitmap.height()) // makes Perfect Dark boot w/o crashing + { + vres = bitmap.height(); + } + +#if DRAW_FRAME_COUNTER + static uint32_t frame_num = 0; + static const uint8_t s_numbers[10][9] = { + { 0x00, 0x3c, 0x66, 0x6e, 0x7e, 0x76, 0x66, 0x3c, 0x00 }, + { 0x00, 0x18, 0x38, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x00 }, + { 0x00, 0x3c, 0x66, 0x06, 0x3c, 0x60, 0x60, 0x7e, 0x00 }, + { 0x00, 0x3c, 0x66, 0x06, 0x0c, 0x06, 0x66, 0x3c, 0x00 }, + { 0x00, 0x66, 0x66, 0x66, 0x7e, 0x06, 0x06, 0x06, 0x00 }, + { 0x00, 0x7e, 0x60, 0x60, 0x7c, 0x06, 0x66, 0x3c, 0x00 }, + { 0x00, 0x3c, 0x66, 0x60, 0x7c, 0x66, 0x66, 0x3c, 0x00 }, + { 0x00, 0x7e, 0x66, 0x06, 0x0c, 0x18, 0x18, 0x18, 0x00 }, + { 0x00, 0x3c, 0x66, 0x66, 0x3c, 0x66, 0x66, 0x3c, 0x00 }, + { 0x00, 0x3c, 0x66, 0x66, 0x3e, 0x06, 0x66, 0x3c, 0x00 } + }; +#endif + + if (frame_buffer) + { +#if DRAW_FRAME_COUNTER + uint32_t digits[4] = { (frame_num / 1000) % 10, (frame_num / 100) % 10, (frame_num / 10) % 10, frame_num % 10 }; + + for (int32_t d = 0; d < 4; d++) + { + for (int32_t y = 0; y < 9; y++) + { + const uint8_t *pixdata = s_numbers[digits[d]]; + for (int32_t x = 0; x < 8; x++) + { + frame_buffer[((y + 16) * vi_width + d * 8 + x + 16) ^ WORD_ADDR_XOR] = BIT(pixdata[y], 7 - x) ? 0x0000 : 0xffff; + } + } + } +#if DEBUG_RDP_PIXEL + s_debug_drawing = (frame_num == 1392); +#endif + frame_num++; +#endif + + const uint32_t aa_control = (vi_control >> 8) & 3; + float v0 = 0.0f; + if (aa_control < 3) // Resample pixels + { + for (int32_t j = 0; j < vdiff; j++, v0 += vcoeff) + { + uint32_t *const d = &bitmap.pix(j); + + float u0 = (float)0.0f; + + int iv0 = (int)v0; + int pix_v0_line = iv0 * vi_width; + + int iv1 = (iv0 >= (vres - 1) ? iv0 : (iv0 + 1)); + int pix_v1_line = iv1 * vi_width; + + for (int32_t i = 0; i < hdiff; i++) + { + int iu0 = (int)u0; + int iu1 = (iu0 >= (hres - 1) ? iu0 : (iu0 + 1)); + uint16_t pix00 = frame_buffer[(pix_v0_line + iu0) ^ WORD_ADDR_XOR]; + uint16_t pix10 = frame_buffer[(pix_v0_line + iu1) ^ WORD_ADDR_XOR]; + uint16_t pix01 = frame_buffer[(pix_v1_line + iu0) ^ WORD_ADDR_XOR]; + uint16_t pix11 = frame_buffer[(pix_v1_line + iu1) ^ WORD_ADDR_XOR]; + + const uint8_t r00 = ((pix00 >> 8) & 0xf8) | (pix00 >> 13); + const uint8_t g00 = ((pix00 >> 3) & 0xf8) | ((pix00 >> 8) & 0x07); + const uint8_t b00 = ((pix00 << 2) & 0xf8) | ((pix00 >> 3) & 0x07); + + const uint8_t r10 = ((pix10 >> 8) & 0xf8) | (pix10 >> 13); + const uint8_t g10 = ((pix10 >> 3) & 0xf8) | ((pix10 >> 8) & 0x07); + const uint8_t b10 = ((pix10 << 2) & 0xf8) | ((pix10 >> 3) & 0x07); + + const uint8_t r01 = ((pix01 >> 8) & 0xf8) | (pix01 >> 13); + const uint8_t g01 = ((pix01 >> 3) & 0xf8) | ((pix01 >> 8) & 0x07); + const uint8_t b01 = ((pix01 << 2) & 0xf8) | ((pix01 >> 3) & 0x07); + + const uint8_t r11 = ((pix11 >> 8) & 0xf8) | (pix11 >> 13); + const uint8_t g11 = ((pix11 >> 3) & 0xf8) | ((pix11 >> 8) & 0x07); + const uint8_t b11 = ((pix11 << 2) & 0xf8) | ((pix11 >> 3) & 0x07); + + const float ut = u0 - (int)u0; + const float vt = v0 - (int)v0; + + float ur0 = (1.0f - ut) * r00 + ut * r10; + float ug0 = (1.0f - ut) * g00 + ut * g10; + float ub0 = (1.0f - ut) * b00 + ut * b10; + + float ur1 = (1.0f - ut) * r01 + ut * r11; + float ug1 = (1.0f - ut) * g01 + ut * g11; + float ub1 = (1.0f - ut) * b01 + ut * b11; + + float r = (1.0f - vt) * ur0 + vt * ur1; + float g = (1.0f - vt) * ug0 + vt * ug1; + float b = (1.0f - vt) * ub0 + vt * ub1; + + uint8_t r8 = std::clamp((uint8_t)r, (uint8_t)0, (uint8_t)255); + uint8_t g8 = std::clamp((uint8_t)g, (uint8_t)0, (uint8_t)255); + uint8_t b8 = std::clamp((uint8_t)b, (uint8_t)0, (uint8_t)255); + + d[iu0] = (r8 << 16) | (g8 << 8) | b8; + + u0 += hcoeff; + } + } + } + else // Replicate pixels + { + for (int32_t j = 0; j < vdiff; j++, v0 += vcoeff) + { + uint32_t *const d = &bitmap.pix(j); + + int iv0 = (int)v0; + int pix_v0_line = iv0 * vi_width; + + for (int32_t i = 0; i < hdiff; i++) + { + int u0 = (int)(i * hcoeff); + uint16_t pix = frame_buffer[(pix_v0_line + u0) ^ WORD_ADDR_XOR]; + + const uint8_t r = ((pix >> 8) & 0xf8) | (pix >> 13); + const uint8_t g = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07); + const uint8_t b = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07); + d[u0] = (r << 16) | (g << 8) | b; + } + } + } + } +} + +void n64_periphs::video_update32(bitmap_rgb32 &bitmap) +{ + //int32_t gamma = (vi_control >> 3) & 1; + //int32_t gamma_dither = (vi_control >> 2) & 1; + //int32_t vibuffering = ((n64->vi_control & 2) && fsaa && divot); + + uint32_t* frame_buffer32 = (uint32_t*)&m_rdram[(vi_origin & 0xffffff) >> 2]; + + int32_t hend = vi_hstart & 0x3ff; + int32_t hstart = (vi_hstart >> 16) & 0x3ff; + int32_t hdiff = hend - hstart; + const float hcoeff = ((float)(vi_xscale & 0xfff) / (1 << 10)); + uint32_t hres = ((float)hdiff * hcoeff); + + int32_t vend = (vi_vstart & 0x3ff) >> 1; + int32_t vstart = ((vi_vstart >> 16) & 0x3ff) >> 1; + int32_t vdiff = vend - vstart; + const float vcoeff = ((float)(vi_yscale & 0xfff) / (1 << 10)); + const uint32_t vres = ((float)vdiff * vcoeff); + + if (vdiff <= 0 || hdiff <= 0) + { + return; + } + + //printf("hd,vd: %d,%d hc,vc: %f,%f hs,he: %d,%d vs,ve: %d,%d hr,vr: %d, %d viw: %d\n", hdiff, vdiff, hcoeff, vcoeff, hstart, hend, vstart, vend, hres, vres, vi_width); + + if (frame_buffer32) + { + const uint32_t aa_control = (vi_control >> 8) & 3; + float v0 = 0.0f; + if (aa_control < 3) // Resample pixels + { + for (int32_t j = 0; j < vres; j++, v0 += 1.0f) + { + uint32_t *const d = &bitmap.pix(j); + + float u0 = 0.0f; + + int iv0 = (int)v0; + int pix_v0_line = iv0 * vi_width; + + int iv1 = (iv0 >= (vres - 1) ? iv0 : (iv0 + 1)); + int pix_v1_line = iv1 * vi_width; + + for (int32_t i = 0; i < hdiff; i++) + { + int iu0 = (int)u0; + int iu1 = (iu0 >= (hres - 1) ? iu0 : (iu0 + 1)); + uint32_t pix00 = frame_buffer32[pix_v0_line + iu0]; + uint32_t pix10 = frame_buffer32[pix_v0_line + iu1]; + uint32_t pix01 = frame_buffer32[pix_v1_line + iu0]; + uint32_t pix11 = frame_buffer32[pix_v1_line + iu1]; + + const uint8_t r00 = (uint8_t)(pix00 >> 24); + const uint8_t g00 = (uint8_t)(pix00 >> 16); + const uint8_t b00 = (uint8_t)(pix00 >> 8); + + const uint8_t r10 = (uint8_t)(pix01 >> 24); + const uint8_t g10 = (uint8_t)(pix01 >> 16); + const uint8_t b10 = (uint8_t)(pix01 >> 8); + + const uint8_t r01 = (uint8_t)(pix10 >> 24); + const uint8_t g01 = (uint8_t)(pix10 >> 16); + const uint8_t b01 = (uint8_t)(pix10 >> 8); + + const uint8_t r11 = (uint8_t)(pix11 >> 24); + const uint8_t g11 = (uint8_t)(pix11 >> 16); + const uint8_t b11 = (uint8_t)(pix11 >> 8); + + const float ut = u0 - (int)u0; + const float vt = v0 - (int)v0; + + float ur0 = (1.0f - ut) * r00 + ut * r10; + float ug0 = (1.0f - ut) * g00 + ut * g10; + float ub0 = (1.0f - ut) * b00 + ut * b10; + + float ur1 = (1.0f - ut) * r01 + ut * r11; + float ug1 = (1.0f - ut) * g01 + ut * g11; + float ub1 = (1.0f - ut) * b01 + ut * b11; + + float r = (1.0f - vt) * ur0 + vt * ur1; + float g = (1.0f - vt) * ug0 + vt * ug1; + float b = (1.0f - vt) * ub0 + vt * ub1; + + uint8_t r8 = std::clamp((uint8_t)r, (uint8_t)0, (uint8_t)255); + uint8_t g8 = std::clamp((uint8_t)g, (uint8_t)0, (uint8_t)255); + uint8_t b8 = std::clamp((uint8_t)b, (uint8_t)0, (uint8_t)255); + + d[iu0] = (r8 << 16) | (g8 << 8) | b8; + + u0 += hcoeff; + } + } + } + else // Replicate pixels + { + for (int32_t j = 0; j < vdiff; j++, v0 += vcoeff) + { + uint32_t *const d = &bitmap.pix(j); + + int iv0 = (int)v0; + int pix_v0_line = iv0 * vi_width; + + for (int32_t i = 0; i < hdiff; i++) + { + int u0 = (int)(i * hcoeff); + d[u0] = (frame_buffer32[pix_v0_line + u0] >> 8); + } + } + } + } +} +#endif + +/*****************************************************************************/ + +void n64_rdp::tc_div_no_perspective(int32_t ss, int32_t st, int32_t sw, int32_t* sss, int32_t* sst) +{ + *sss = (SIGN16(ss)) & 0x1ffff; + *sst = (SIGN16(st)) & 0x1ffff; +} + +void n64_rdp::tc_div(int32_t ss, int32_t st, int32_t sw, int32_t* sss, int32_t* sst) +{ + int32_t w_carry = 0; + if ((sw & 0x8000) || !(sw & 0x7fff)) + { + w_carry = 1; + } + + sw &= 0x7fff; + + int32_t shift; + for (shift = 1; shift <= 14 && !((sw << shift) & 0x8000); shift++); + shift -= 1; + + int32_t normout = (sw << shift) & 0x3fff; + int32_t wnorm = (normout & 0xff) << 2; + normout >>= 8; + + int32_t temppoint = m_norm_point_rom[normout]; + int32_t tempslope = m_norm_slope_rom[normout]; + + int32_t tlu_rcp = ((-(tempslope * wnorm)) >> 10) + temppoint; + + int32_t sprod = SIGN16(ss) * tlu_rcp; + int32_t tprod = SIGN16(st) * tlu_rcp; + int32_t tempmask = ((1 << (shift + 1)) - 1) << (29 - shift); + int32_t shift_value = 13 - shift; + + int32_t outofbounds_s = sprod & tempmask; + int32_t outofbounds_t = tprod & tempmask; + if (shift == 0xe) + { + *sss = sprod << 1; + *sst = tprod << 1; + } + else + { + *sss = sprod = (sprod >> shift_value); + *sst = tprod = (tprod >> shift_value); + } + //compute clamp flags + int32_t under_s = 0; + int32_t under_t = 0; + int32_t over_s = 0; + int32_t over_t = 0; + + if (outofbounds_s != tempmask && outofbounds_s != 0) + { + if (sprod & (1 << 29)) + { + under_s = 1; + } + else + { + over_s = 1; + } + } + + if (outofbounds_t != tempmask && outofbounds_t != 0) + { + if (tprod & (1 << 29)) + { + under_t = 1; + } + else + { + over_t = 1; + } + } + + over_s |= w_carry; + over_t |= w_carry; + + *sss = (*sss & 0x1ffff) | (over_s << 18) | (under_s << 17); + *sst = (*sst & 0x1ffff) | (over_t << 18) | (under_t << 17); +} + +int32_t n64_rdp::color_combiner_equation(int32_t a, int32_t b, int32_t c, int32_t d) +{ + a = KURT_AKELEY_SIGN9(a); + b = KURT_AKELEY_SIGN9(b); + c = SIGN9(c); + d = KURT_AKELEY_SIGN9(d); + a = (((a - b) * c) + (d << 8) + 0x80); + a = SIGN17(a) >> 8; + a = s_special_9bit_clamptable[a & 0x1ff]; + return a; +} + +int32_t n64_rdp::alpha_combiner_equation(int32_t a, int32_t b, int32_t c, int32_t d) +{ + a = KURT_AKELEY_SIGN9(a); + b = KURT_AKELEY_SIGN9(b); + c = SIGN9(c); + d = KURT_AKELEY_SIGN9(d); + a = (((a - b) * c) + (d << 8) + 0x80) >> 8; + a = SIGN9(a); + a = s_special_9bit_clamptable[a & 0x1ff]; + return a; +} + +void n64_rdp::set_suba_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata) +{ + switch (code & 0xf) + { + case 0: *input = &userdata->m_combined_color; break; + case 1: *input = &userdata->m_texel0_color; break; + case 2: *input = &userdata->m_texel1_color; break; + case 3: *input = &userdata->m_prim_color; break; + case 4: *input = &userdata->m_shade_color; break; + case 5: *input = &userdata->m_env_color; break; + case 6: *input = &m_one; break; + case 7: *input = &userdata->m_noise_color; break; + case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15: + { + *input = &m_zero; break; + } + } +} + +void n64_rdp::set_subb_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata) +{ + switch (code & 0xf) + { + case 0: *input = &userdata->m_combined_color; break; + case 1: *input = &userdata->m_texel0_color; break; + case 2: *input = &userdata->m_texel1_color; break; + case 3: *input = &userdata->m_prim_color; break; + case 4: *input = &userdata->m_shade_color; break; + case 5: *input = &userdata->m_env_color; break; + case 6: fatalerror("SET_SUBB_RGB_INPUT: key_center\n"); + case 7: *input = &userdata->m_k4; break; + case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15: + { + *input = &m_zero; break; + } + } +} + +void n64_rdp::set_mul_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata) +{ + switch (code & 0x1f) + { + case 0: *input = &userdata->m_combined_color; break; + case 1: *input = &userdata->m_texel0_color; break; + case 2: *input = &userdata->m_texel1_color; break; + case 3: *input = &userdata->m_prim_color; break; + case 4: *input = &userdata->m_shade_color; break; + case 5: *input = &userdata->m_env_color; break; + case 6: *input = &userdata->m_key_scale; break; + case 7: *input = &userdata->m_combined_alpha; break; + case 8: *input = &userdata->m_texel0_alpha; break; + case 9: *input = &userdata->m_texel1_alpha; break; + case 10: *input = &userdata->m_prim_alpha; break; + case 11: *input = &userdata->m_shade_alpha; break; + case 12: *input = &userdata->m_env_alpha; break; + case 13: *input = &userdata->m_lod_fraction; break; + case 14: *input = &userdata->m_prim_lod_fraction; break; + case 15: *input = &userdata->m_k5; break; + case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23: + case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31: + { + *input = &m_zero; break; + } + } +} + +void n64_rdp::set_add_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata) +{ + switch (code & 0x7) + { + case 0: *input = &userdata->m_combined_color; break; + case 1: *input = &userdata->m_texel0_color; break; + case 2: *input = &userdata->m_texel1_color; break; + case 3: *input = &userdata->m_prim_color; break; + case 4: *input = &userdata->m_shade_color; break; + case 5: *input = &userdata->m_env_color; break; + case 6: *input = &m_one; break; + case 7: *input = &m_zero; break; + } +} + +void n64_rdp::set_sub_input_alpha(color_t** input, int32_t code, rdp_span_aux* userdata) +{ + switch (code & 0x7) + { + case 0: *input = &userdata->m_combined_alpha; break; + case 1: *input = &userdata->m_texel0_alpha; break; + case 2: *input = &userdata->m_texel1_alpha; break; + case 3: *input = &userdata->m_prim_alpha; break; + case 4: *input = &userdata->m_shade_alpha; break; + case 5: *input = &userdata->m_env_alpha; break; + case 6: *input = &m_one; break; + case 7: *input = &m_zero; break; + } +} + +void n64_rdp::set_mul_input_alpha(color_t** input, int32_t code, rdp_span_aux* userdata) +{ + switch (code & 0x7) + { + case 0: *input = &userdata->m_lod_fraction; break; + case 1: *input = &userdata->m_texel0_alpha; break; + case 2: *input = &userdata->m_texel1_alpha; break; + case 3: *input = &userdata->m_prim_alpha; break; + case 4: *input = &userdata->m_shade_alpha; break; + case 5: *input = &userdata->m_env_alpha; break; + case 6: *input = &userdata->m_prim_lod_fraction; break; + case 7: *input = &m_zero; break; + } +} + +void n64_rdp::set_blender_input(int32_t cycle, int32_t which, color_t** input_rgb, color_t** input_a, int32_t a, int32_t b, rdp_span_aux* userdata) +{ + switch (a & 0x3) + { + case 0: + *input_rgb = cycle == 0 ? &userdata->m_pixel_color : &userdata->m_blended_pixel_color; + break; + + case 1: + *input_rgb = &userdata->m_memory_color; + break; + + case 2: + *input_rgb = &userdata->m_blend_color; + break; + + case 3: + *input_rgb = &userdata->m_fog_color; + break; + } + + if (which == 0) + { + switch (b & 0x3) + { + case 0: *input_a = &userdata->m_pixel_color; break; + case 1: *input_a = &userdata->m_fog_color; break; + case 2: *input_a = &userdata->m_shade_color; break; + case 3: *input_a = &m_zero; break; + } + } + else + { + switch (b & 0x3) + { + case 0: *input_a = &userdata->m_inv_pixel_color; break; + case 1: *input_a = &userdata->m_memory_color; break; + case 2: *input_a = &m_one; break; + case 3: *input_a = &m_zero; break; + } + } +} + +uint8_t const n64_rdp::s_bayer_matrix[16] = +{ /* Bayer matrix */ + 0, 4, 1, 5, + 6, 2, 7, 3, + 1, 5, 0, 4, + 7, 3, 6, 2 +}; + +uint8_t const n64_rdp::s_magic_matrix[16] = +{ /* Magic square matrix */ + 0, 4, 3, 7, + 6, 2, 5, 1, + 1, 5, 2, 6, + 7, 3, 4, 0 +}; + +z_decompress_entry_t const n64_rdp::m_z_dec_table[8] = +{ + { 6, 0x00000 }, + { 5, 0x20000 }, + { 4, 0x30000 }, + { 3, 0x38000 }, + { 2, 0x3c000 }, + { 1, 0x3e000 }, + { 0, 0x3f000 }, + { 0, 0x3f800 }, +}; + +/*****************************************************************************/ + +void n64_rdp::z_build_com_table(void) +{ + uint16_t altmem = 0; + for(int32_t z = 0; z < 0x40000; z++) + { + switch((z >> 11) & 0x7f) + { + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0c: + case 0x0d: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + altmem = (z >> 4) & 0x1ffc; + break; + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5a: + case 0x5b: + case 0x5c: + case 0x5d: + case 0x5e: + case 0x5f: + altmem = ((z >> 3) & 0x1ffc) | 0x2000; + break; + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6a: + case 0x6b: + case 0x6c: + case 0x6d: + case 0x6e: + case 0x6f: + altmem = ((z >> 2) & 0x1ffc) | 0x4000; + break; + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + altmem = ((z >> 1) & 0x1ffc) | 0x6000; + break; + case 0x78://uncompressed z = 0x3c000 + case 0x79: + case 0x7a: + case 0x7b: + altmem = (z & 0x1ffc) | 0x8000; + break; + case 0x7c://uncompressed z = 0x3e000 + case 0x7d: + altmem = ((z << 1) & 0x1ffc) | 0xa000; + break; + case 0x7e://uncompressed z = 0x3f000 + altmem = ((z << 2) & 0x1ffc) | 0xc000; + break; + case 0x7f://uncompressed z = 0x3f000 + altmem = ((z << 2) & 0x1ffc) | 0xe000; + break; + } + + m_z_com_table[z] = altmem; + + } +} + +void n64_rdp::precalc_cvmask_derivatives(void) +{ + const uint8_t yarray[16] = {0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; + const uint8_t xarray[16] = {0, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}; + + for (int32_t i = 0; i < 0x10000; i++) + { + m_compressed_cvmasks[i] = (i & 1) | ((i & 4) >> 1) | ((i & 0x20) >> 3) | ((i & 0x80) >> 4) | + ((i & 0x100) >> 4) | ((i & 0x400) >> 5) | ((i & 0x2000) >> 7) | ((i & 0x8000) >> 8); + } + + for (int32_t i = 0; i < 0x100; i++) + { + uint16_t mask = decompress_cvmask_frombyte(i); + cvarray[i].cvg = cvarray[i].cvbit = 0; + cvarray[i].cvbit = (i >> 7) & 1; + for (int32_t k = 0; k < 8; k++) + { + cvarray[i].cvg += ((i >> k) & 1); + } + + uint16_t masky = 0; + for (int32_t k = 0; k < 4; k++) + { + masky |= ((mask & (0xf000 >> (k << 2))) > 0) << k; + } + uint8_t offy = yarray[masky]; + + uint16_t maskx = (mask & (0xf000 >> (offy << 2))) >> ((offy ^ 3) << 2); + uint8_t offx = xarray[maskx]; + + cvarray[i].xoff = offx; + cvarray[i].yoff = offy; + } +} + +uint16_t n64_rdp::decompress_cvmask_frombyte(uint8_t x) +{ + uint16_t y = (x & 1) | ((x & 2) << 1) | ((x & 4) << 3) | ((x & 8) << 4) | + ((x & 0x10) << 4) | ((x & 0x20) << 5) | ((x & 0x40) << 7) | ((x & 0x80) << 8); + return y; +} + +void n64_rdp::lookup_cvmask_derivatives(uint32_t mask, uint8_t* offx, uint8_t* offy, rdp_span_aux* userdata) +{ + const uint32_t index = m_compressed_cvmasks[mask]; + userdata->m_current_pix_cvg = cvarray[index].cvg; + userdata->m_current_cvg_bit = cvarray[index].cvbit; + *offx = cvarray[index].xoff; + *offy = cvarray[index].yoff; +} + +void n64_rdp::z_store(const rdp_poly_state &object, uint32_t zcurpixel, uint32_t dzcurpixel, uint32_t z, uint32_t enc) +{ + uint16_t zval = m_z_com_table[z & 0x3ffff]|(enc >> 2); + if(zcurpixel <= MEM16_LIMIT) + { + ((uint16_t*)m_rdram)[zcurpixel ^ WORD_ADDR_XOR] = zval; + } + if(dzcurpixel <= MEM8_LIMIT) + { + m_hidden_bits[dzcurpixel ^ BYTE_ADDR_XOR] = enc & 3; + } +} + +int32_t n64_rdp::normalize_dzpix(int32_t sum) +{ + if (sum & 0xc000) + { + return 0x8000; + } + if (!(sum & 0xffff)) + { + return 1; + } + for(int32_t count = 0x2000; count > 0; count >>= 1) + { + if (sum & count) + { + return(count << 1); + } + } + return 0; +} + +uint32_t n64_rdp::z_decompress(uint32_t zcurpixel) +{ + return m_z_complete_dec_table[(RREADIDX16(zcurpixel) >> 2) & 0x3fff]; +} + +uint32_t n64_rdp::dz_decompress(uint32_t zcurpixel, uint32_t dzcurpixel) +{ + const uint16_t zval = RREADIDX16(zcurpixel); + const uint8_t dzval = (((dzcurpixel) <= 0x7fffff) ? (m_hidden_bits[(dzcurpixel) ^ BYTE_ADDR_XOR]) : 0); + const uint32_t dz_compressed = ((zval & 3) << 2) | (dzval & 3); + return (1 << dz_compressed); +} + +uint32_t n64_rdp::dz_compress(uint32_t value) +{ + int32_t j = 0; + for (; value > 1; j++, value >>= 1); + return j; +} + +void n64_rdp::get_dither_values(int32_t x, int32_t y, int32_t* cdith, int32_t* adith, const rdp_poly_state& object) +{ + const int32_t dithindex = ((y & 3) << 2) | (x & 3); + switch((object.m_other_modes.rgb_dither_sel << 2) | object.m_other_modes.alpha_dither_sel) + { + case 0: + *adith = *cdith = s_magic_matrix[dithindex]; + break; + case 1: + *cdith = s_magic_matrix[dithindex]; + *adith = (~(*cdith)) & 7; + break; + case 2: + *cdith = s_magic_matrix[dithindex]; + *adith = machine().rand() & 7; + break; + case 3: + *cdith = s_magic_matrix[dithindex]; + *adith = 0; + break; + case 4: + *adith = *cdith = s_bayer_matrix[dithindex]; + break; + case 5: + *cdith = s_bayer_matrix[dithindex]; + *adith = (~(*cdith)) & 7; + break; + case 6: + *cdith = s_bayer_matrix[dithindex]; + *adith = machine().rand() & 7; + break; + case 7: + *cdith = s_bayer_matrix[dithindex]; + *adith = 0; + break; + case 8: + *cdith = machine().rand() & 7; + *adith = s_magic_matrix[dithindex]; + break; + case 9: + *cdith = machine().rand() & 7; + *adith = (~s_magic_matrix[dithindex]) & 7; + break; + case 10: + *cdith = machine().rand() & 7; + *adith = (*cdith + 17) & 7; + break; + case 11: + *cdith = machine().rand() & 7; + *adith = 0; + break; + case 12: + *cdith = 0; + *adith = s_bayer_matrix[dithindex]; + break; + case 13: + *cdith = 0; + *adith = (~s_bayer_matrix[dithindex]) & 7; + break; + case 14: + *cdith = 0; + *adith = machine().rand() & 7; + break; + case 15: + *adith = *cdith = 0; + break; + } +} + +int32_t CLAMP(int32_t in, int32_t min, int32_t max) +{ + if(in < min) return min; + if(in > max) return max; + return in; +} + +bool n64_rdp::z_compare(uint32_t zcurpixel, uint32_t dzcurpixel, uint32_t sz, uint16_t dzpix, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + bool force_coplanar = false; + sz &= 0x3ffff; + + uint32_t oz; + uint32_t dzmem; + uint32_t zval; + int32_t rawdzmem; + + if (object.m_other_modes.z_compare_en) + { + oz = z_decompress(zcurpixel); + dzmem = dz_decompress(zcurpixel, dzcurpixel); + zval = RREADIDX16(zcurpixel); + rawdzmem = ((zval & 3) << 2) | ((((dzcurpixel) <= 0x3fffff) ? (m_hidden_bits[(dzcurpixel) ^ BYTE_ADDR_XOR]) : 0) & 3); + } + else + { + oz = 0; + dzmem = 1 << 0xf; + zval = 0x3; + rawdzmem = 0xf; + } + + userdata->m_dzpix_enc = dz_compress(dzpix & 0xffff); + userdata->m_shift_a = CLAMP(userdata->m_dzpix_enc - rawdzmem, 0, 4); + userdata->m_shift_b = CLAMP(rawdzmem - userdata->m_dzpix_enc, 0, 4); + + int32_t precision_factor = (zval >> 13) & 0xf; + if (precision_factor < 3) + { + int32_t dzmemmodifier = 16 >> precision_factor; + if (dzmem == 0x8000) + { + force_coplanar = true; + } + dzmem <<= 1; + if (dzmem <= dzmemmodifier) + { + dzmem = dzmemmodifier; + } + if (!dzmem) + { + dzmem = 0xffff; + } + } + if (dzmem > 0x8000) + { + dzmem = 0xffff; + } + + uint32_t dznew = (dzmem > dzpix) ? dzmem : (uint32_t)dzpix; + uint32_t dznotshift = dznew; + dznew <<= 3; + + bool farther = (sz + dznew) >= oz; + bool infront = sz < oz; + + if (force_coplanar) + { + farther = true; + } + + bool overflow = ((userdata->m_current_mem_cvg + userdata->m_current_pix_cvg) & 8) > 0; + userdata->m_blend_enable = (object.m_other_modes.force_blend || (!overflow && object.m_other_modes.antialias_en && farther)) ? 1 : 0; + userdata->m_pre_wrap = overflow; + + int32_t cvgcoeff = 0; + uint32_t dzenc = 0; + + if (object.m_other_modes.z_mode == 1 && infront && farther && overflow) + { + dzenc = dz_compress(dznotshift & 0xffff); + cvgcoeff = ((oz >> dzenc) - (sz >> dzenc)) & 0xf; + userdata->m_current_pix_cvg = ((cvgcoeff * userdata->m_current_pix_cvg) >> 3) & 0xf; + } + + if (!object.m_other_modes.z_compare_en) + { + return true; + } + + int32_t diff = (int32_t)sz - (int32_t)dznew; + bool nearer = diff <= (int32_t)oz; + bool max = (oz == 0x3ffff); + if (force_coplanar) + { + nearer = true; + } + + switch(object.m_other_modes.z_mode) + { + case 0: + return (max || (overflow ? infront : nearer)); + case 1: + return (max || (overflow ? infront : nearer)); + case 2: + return (infront || max); + case 3: + return (farther && nearer && !max); + } + + return false; +} + +uint32_t n64_rdp::get_log2(uint32_t lod_clamp) +{ + if (lod_clamp < 2) + { + return 0; + } + else + { + for (int32_t i = 7; i > 0; i--) + { + if ((lod_clamp >> i) & 1) + { + return i; + } + } + } + + return 0; +} + +/*****************************************************************************/ + +uint64_t n64_rdp::read_data(uint32_t address) +{ + if (m_status & 0x1) // XBUS_DMEM_DMA enabled + { + return (uint64_t(m_dmem[(address & 0xfff) / 4]) << 32) | m_dmem[((address + 4) & 0xfff) / 4]; + } + else + { + return (uint64_t(m_rdram[((address & 0xffffff) / 4)]) << 32) | m_rdram[(((address + 4) & 0xffffff) / 4)]; + } +} + +char const *const n64_rdp::s_image_format[] = { "RGBA", "YUV", "CI", "IA", "I", "???", "???", "???" }; +char const *const n64_rdp::s_image_size[] = { "4-bit", "8-bit", "16-bit", "32-bit" }; + +int32_t const n64_rdp::s_rdp_command_length[64] = +{ + 8, // 0x00, No Op + 8, // 0x01, ??? + 8, // 0x02, ??? + 8, // 0x03, ??? + 8, // 0x04, ??? + 8, // 0x05, ??? + 8, // 0x06, ??? + 8, // 0x07, ??? + 32, // 0x08, Non-Shaded Triangle + 32+16, // 0x09, Non-Shaded, Z-Buffered Triangle + 32+64, // 0x0a, Textured Triangle + 32+64+16, // 0x0b, Textured, Z-Buffered Triangle + 32+64, // 0x0c, Shaded Triangle + 32+64+16, // 0x0d, Shaded, Z-Buffered Triangle + 32+64+64, // 0x0e, Shaded+Textured Triangle + 32+64+64+16,// 0x0f, Shaded+Textured, Z-Buffered Triangle + 8, // 0x10, ??? + 8, // 0x11, ??? + 8, // 0x12, ??? + 8, // 0x13, ??? + 8, // 0x14, ??? + 8, // 0x15, ??? + 8, // 0x16, ??? + 8, // 0x17, ??? + 8, // 0x18, ??? + 8, // 0x19, ??? + 8, // 0x1a, ??? + 8, // 0x1b, ??? + 8, // 0x1c, ??? + 8, // 0x1d, ??? + 8, // 0x1e, ??? + 8, // 0x1f, ??? + 8, // 0x20, ??? + 8, // 0x21, ??? + 8, // 0x22, ??? + 8, // 0x23, ??? + 16, // 0x24, Texture_Rectangle + 16, // 0x25, Texture_Rectangle_Flip + 8, // 0x26, Sync_Load + 8, // 0x27, Sync_Pipe + 8, // 0x28, Sync_Tile + 8, // 0x29, Sync_Full + 8, // 0x2a, Set_Key_GB + 8, // 0x2b, Set_Key_R + 8, // 0x2c, Set_Convert + 8, // 0x2d, Set_Scissor + 8, // 0x2e, Set_Prim_Depth + 8, // 0x2f, Set_Other_Modes + 8, // 0x30, Load_TLUT + 8, // 0x31, ??? + 8, // 0x32, Set_Tile_Size + 8, // 0x33, Load_Block + 8, // 0x34, Load_Tile + 8, // 0x35, Set_Tile + 8, // 0x36, Fill_Rectangle + 8, // 0x37, Set_Fill_Color + 8, // 0x38, Set_Fog_Color + 8, // 0x39, Set_Blend_Color + 8, // 0x3a, Set_Prim_Color + 8, // 0x3b, Set_Env_Color + 8, // 0x3c, Set_Combine + 8, // 0x3d, Set_Texture_Image + 8, // 0x3e, Set_Mask_Image + 8 // 0x3f, Set_Color_Image +}; + +void n64_rdp::disassemble(uint64_t *cmd_buf, char* buffer) +{ + char sl[32], tl[32], sh[32], th[32]; + char s[32], t[32], w[32]; + char dsdx[32], dtdx[32], dwdx[32]; + char dsdy[32], dtdy[32], dwdy[32]; + char dsde[32], dtde[32], dwde[32]; + char yl[32], yh[32], ym[32], xl[32], xh[32], xm[32]; + char dxldy[32], dxhdy[32], dxmdy[32]; + char rt[32], gt[32], bt[32], at[32]; + char drdx[32], dgdx[32], dbdx[32], dadx[32]; + char drdy[32], dgdy[32], dbdy[32], dady[32]; + char drde[32], dgde[32], dbde[32], dade[32]; + + const int32_t tile = (cmd_buf[0] >> 56) & 0x7; + sprintf(sl, "%4.2f", (float)((cmd_buf[0] >> 44) & 0xfff) / 4.0f); + sprintf(tl, "%4.2f", (float)((cmd_buf[0] >> 32) & 0xfff) / 4.0f); + sprintf(sh, "%4.2f", (float)((cmd_buf[0] >> 12) & 0xfff) / 4.0f); + sprintf(th, "%4.2f", (float)((cmd_buf[0] >> 0) & 0xfff) / 4.0f); + + const char* format = s_image_format[(cmd_buf[0] >> 53) & 0x7]; + const char* size = s_image_size[(cmd_buf[0] >> 51) & 0x3]; + + const uint32_t r = (cmd_buf[0] >> 24) & 0xff; + const uint32_t g = (cmd_buf[0] >> 16) & 0xff; + const uint32_t b = (cmd_buf[0] >> 8) & 0xff; + const uint32_t a = (cmd_buf[0] >> 0) & 0xff; + + const uint32_t command = (cmd_buf[0] >> 56) & 0x3f; + switch (command) + { + case 0x00: sprintf(buffer, "No Op"); break; + case 0x08: // Tri_NoShade + { + const int32_t lft = (cmd_buf[0] >> 55) & 0x1; + + sprintf(yl, "%4.4f", (float)((cmd_buf[0] >> 32) & 0x1fff) / 4.0f); + sprintf(ym, "%4.4f", (float)((cmd_buf[0] >> 16) & 0x1fff) / 4.0f); + sprintf(yh, "%4.4f", (float)((cmd_buf[0] >> 0) & 0x1fff) / 4.0f); + sprintf(xl, "%4.4f", (float)int32_t(cmd_buf[1] >> 32) / 65536.0f); + sprintf(dxldy, "%4.4f", (float)int32_t(cmd_buf[1]) / 65536.0f); + sprintf(xh, "%4.4f", (float)int32_t(cmd_buf[2] >> 32) / 65536.0f); + sprintf(dxhdy, "%4.4f", (float)int32_t(cmd_buf[2]) / 65536.0f); + sprintf(xm, "%4.4f", (float)int32_t(cmd_buf[3] >> 32) / 65536.0f); + sprintf(dxmdy, "%4.4f", (float)int32_t(cmd_buf[3]) / 65536.0f); + + sprintf(buffer, "Tri_NoShade %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh); + break; + } + case 0x09: // Tri_NoShadeZ + { + const int32_t lft = (cmd_buf[0] >> 55) & 0x1; + + sprintf(yl, "%4.4f", (float)((cmd_buf[0] >> 32) & 0x1fff) / 4.0f); + sprintf(ym, "%4.4f", (float)((cmd_buf[0] >> 16) & 0x1fff) / 4.0f); + sprintf(yh, "%4.4f", (float)((cmd_buf[0] >> 0) & 0x1fff) / 4.0f); + sprintf(xl, "%4.4f", (float)int32_t(cmd_buf[1] >> 32) / 65536.0f); + sprintf(dxldy, "%4.4f", (float)int32_t(cmd_buf[1]) / 65536.0f); + sprintf(xh, "%4.4f", (float)int32_t(cmd_buf[2] >> 32) / 65536.0f); + sprintf(dxhdy, "%4.4f", (float)int32_t(cmd_buf[2]) / 65536.0f); + sprintf(xm, "%4.4f", (float)int32_t(cmd_buf[3] >> 32) / 65536.0f); + sprintf(dxmdy, "%4.4f", (float)int32_t(cmd_buf[3]) / 65536.0f); + + sprintf(buffer, "Tri_NoShadeZ %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh); + break; + } + case 0x0a: // Tri_Tex + { + const int32_t lft = (cmd_buf[0] >> 55) & 0x1; + + sprintf(yl, "%4.4f", (float)((cmd_buf[0] >> 32) & 0x1fff) / 4.0f); + sprintf(ym, "%4.4f", (float)((cmd_buf[0] >> 16) & 0x1fff) / 4.0f); + sprintf(yh, "%4.4f", (float)((cmd_buf[0] >> 0) & 0x1fff) / 4.0f); + sprintf(xl, "%4.4f", (float)int32_t(cmd_buf[1] >> 32) / 65536.0f); + sprintf(dxldy, "%4.4f", (float)int32_t(cmd_buf[1]) / 65536.0f); + sprintf(xh, "%4.4f", (float)int32_t(cmd_buf[2] >> 32) / 65536.0f); + sprintf(dxhdy, "%4.4f", (float)int32_t(cmd_buf[2]) / 65536.0f); + sprintf(xm, "%4.4f", (float)int32_t(cmd_buf[3] >> 32) / 65536.0f); + sprintf(dxmdy, "%4.4f", (float)int32_t(cmd_buf[3]) / 65536.0f); + + sprintf(s, "%4.4f", (float)int32_t( ((cmd_buf[4] >> 32) & 0xffff0000) | ((cmd_buf[ 6] >> 48) & 0xffff)) / 65536.0f); + sprintf(t, "%4.4f", (float)int32_t((((cmd_buf[4] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 6] >> 32) & 0xffff)) / 65536.0f); + sprintf(w, "%4.4f", (float)int32_t( (cmd_buf[4] & 0xffff0000) | ((cmd_buf[ 6] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsdx, "%4.4f", (float)int32_t( ((cmd_buf[5] >> 32) & 0xffff0000) | ((cmd_buf[ 7] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtdx, "%4.4f", (float)int32_t((((cmd_buf[5] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 7] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwdx, "%4.4f", (float)int32_t( (cmd_buf[5] & 0xffff0000) | ((cmd_buf[ 7] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsde, "%4.4f", (float)int32_t( ((cmd_buf[8] >> 32) & 0xffff0000) | ((cmd_buf[10] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtde, "%4.4f", (float)int32_t((((cmd_buf[8] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[10] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwde, "%4.4f", (float)int32_t( (cmd_buf[8] & 0xffff0000) | ((cmd_buf[10] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsdy, "%4.4f", (float)int32_t( ((cmd_buf[9] >> 32) & 0xffff0000) | ((cmd_buf[11] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtdy, "%4.4f", (float)int32_t((((cmd_buf[9] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[11] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwdy, "%4.4f", (float)int32_t( (cmd_buf[9] & 0xffff0000) | ((cmd_buf[11] >> 16) & 0xffff)) / 65536.0f); + + buffer+=sprintf(buffer, "Tri_Tex %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " S: %s, T: %s, W: %s\n", s, t, w); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy); + break; + } + case 0x0b: // Tri_TexZ + { + const int32_t lft = (cmd_buf[0] >> 55) & 0x1; + + sprintf(yl, "%4.4f", (float)((cmd_buf[0] >> 32) & 0x1fff) / 4.0f); + sprintf(ym, "%4.4f", (float)((cmd_buf[0] >> 16) & 0x1fff) / 4.0f); + sprintf(yh, "%4.4f", (float)((cmd_buf[0] >> 0) & 0x1fff) / 4.0f); + sprintf(xl, "%4.4f", (float)int32_t(cmd_buf[1] >> 32) / 65536.0f); + sprintf(dxldy, "%4.4f", (float)int32_t(cmd_buf[1]) / 65536.0f); + sprintf(xh, "%4.4f", (float)int32_t(cmd_buf[2] >> 32) / 65536.0f); + sprintf(dxhdy, "%4.4f", (float)int32_t(cmd_buf[2]) / 65536.0f); + sprintf(xm, "%4.4f", (float)int32_t(cmd_buf[3] >> 32) / 65536.0f); + sprintf(dxmdy, "%4.4f", (float)int32_t(cmd_buf[3]) / 65536.0f); + + sprintf(s, "%4.4f", (float)int32_t( ((cmd_buf[4] >> 32) & 0xffff0000) | ((cmd_buf[ 6] >> 48) & 0xffff)) / 65536.0f); + sprintf(t, "%4.4f", (float)int32_t((((cmd_buf[4] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 6] >> 32) & 0xffff)) / 65536.0f); + sprintf(w, "%4.4f", (float)int32_t( (cmd_buf[4] & 0xffff0000) | ((cmd_buf[ 6] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsdx, "%4.4f", (float)int32_t( ((cmd_buf[5] >> 32) & 0xffff0000) | ((cmd_buf[ 7] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtdx, "%4.4f", (float)int32_t((((cmd_buf[5] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 7] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwdx, "%4.4f", (float)int32_t( (cmd_buf[5] & 0xffff0000) | ((cmd_buf[ 7] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsde, "%4.4f", (float)int32_t( ((cmd_buf[8] >> 32) & 0xffff0000) | ((cmd_buf[10] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtde, "%4.4f", (float)int32_t((((cmd_buf[8] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[10] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwde, "%4.4f", (float)int32_t( (cmd_buf[8] & 0xffff0000) | ((cmd_buf[10] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsdy, "%4.4f", (float)int32_t( ((cmd_buf[9] >> 32) & 0xffff0000) | ((cmd_buf[11] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtdy, "%4.4f", (float)int32_t((((cmd_buf[9] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[11] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwdy, "%4.4f", (float)int32_t( (cmd_buf[9] & 0xffff0000) | ((cmd_buf[11] >> 16) & 0xffff)) / 65536.0f); + + buffer+=sprintf(buffer, "Tri_TexZ %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " S: %s, T: %s, W: %s\n", s, t, w); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy); + break; + } + case 0x0c: // Tri_Shade + { + const int32_t lft = (cmd_buf[0] >> 23) & 0x1; + + sprintf(yl, "%4.4f", (float)((cmd_buf[0] >> 32) & 0x1fff) / 4.0f); + sprintf(ym, "%4.4f", (float)((cmd_buf[0] >> 16) & 0x1fff) / 4.0f); + sprintf(yh, "%4.4f", (float)((cmd_buf[0] >> 0) & 0x1fff) / 4.0f); + sprintf(xl, "%4.4f", (float)int32_t(cmd_buf[1] >> 32) / 65536.0f); + sprintf(dxldy, "%4.4f", (float)int32_t(cmd_buf[1]) / 65536.0f); + sprintf(xh, "%4.4f", (float)int32_t(cmd_buf[2] >> 32) / 65536.0f); + sprintf(dxhdy, "%4.4f", (float)int32_t(cmd_buf[2]) / 65536.0f); + sprintf(xm, "%4.4f", (float)int32_t(cmd_buf[3] >> 32) / 65536.0f); + sprintf(dxmdy, "%4.4f", (float)int32_t(cmd_buf[3]) / 65536.0f); + + sprintf(rt, "%4.4f", (float)int32_t( ((cmd_buf[4] >> 32) & 0xffff0000) | ((cmd_buf[ 6] >> 48) & 0xffff)) / 65536.0f); + sprintf(gt, "%4.4f", (float)int32_t((((cmd_buf[4] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 6] >> 32) & 0xffff)) / 65536.0f); + sprintf(bt, "%4.4f", (float)int32_t( (cmd_buf[4] & 0xffff0000) | ((cmd_buf[ 6] >> 16) & 0xffff)) / 65536.0f); + sprintf(at, "%4.4f", (float)int32_t( ((cmd_buf[4] & 0x0000ffff) << 16) | ( cmd_buf[ 6] & 0xffff)) / 65536.0f); + sprintf(drdx, "%4.4f", (float)int32_t( ((cmd_buf[5] >> 32) & 0xffff0000) | ((cmd_buf[ 7] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgdx, "%4.4f", (float)int32_t((((cmd_buf[5] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 7] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbdx, "%4.4f", (float)int32_t( (cmd_buf[5] & 0xffff0000) | ((cmd_buf[ 7] >> 16) & 0xffff)) / 65536.0f); + sprintf(dadx, "%4.4f", (float)int32_t( ((cmd_buf[5] & 0x0000ffff) << 16) | ( cmd_buf[ 7] & 0xffff)) / 65536.0f); + sprintf(drde, "%4.4f", (float)int32_t( ((cmd_buf[8] >> 32) & 0xffff0000) | ((cmd_buf[10] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgde, "%4.4f", (float)int32_t((((cmd_buf[8] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[10] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbde, "%4.4f", (float)int32_t( (cmd_buf[8] & 0xffff0000) | ((cmd_buf[10] >> 16) & 0xffff)) / 65536.0f); + sprintf(dade, "%4.4f", (float)int32_t( ((cmd_buf[8] & 0x0000ffff) << 16) | ( cmd_buf[10] & 0xffff)) / 65536.0f); + sprintf(drdy, "%4.4f", (float)int32_t( ((cmd_buf[9] >> 32) & 0xffff0000) | ((cmd_buf[11] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgdy, "%4.4f", (float)int32_t((((cmd_buf[9] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[11] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbdy, "%4.4f", (float)int32_t( (cmd_buf[9] & 0xffff0000) | ((cmd_buf[11] >> 16) & 0xffff)) / 65536.0f); + sprintf(dady, "%4.4f", (float)int32_t( ((cmd_buf[9] & 0x0000ffff) << 16) | ( cmd_buf[11] & 0xffff)) / 65536.0f); + + buffer+=sprintf(buffer, "Tri_Shade %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady); + break; + } + case 0x0d: // Tri_ShadeZ + { + const int32_t lft = (cmd_buf[0] >> 23) & 0x1; + + sprintf(yl, "%4.4f", (float)((cmd_buf[0] >> 32) & 0x1fff) / 4.0f); + sprintf(ym, "%4.4f", (float)((cmd_buf[0] >> 16) & 0x1fff) / 4.0f); + sprintf(yh, "%4.4f", (float)((cmd_buf[0] >> 0) & 0x1fff) / 4.0f); + sprintf(xl, "%4.4f", (float)int32_t(cmd_buf[1] >> 32) / 65536.0f); + sprintf(dxldy, "%4.4f", (float)int32_t(cmd_buf[1]) / 65536.0f); + sprintf(xh, "%4.4f", (float)int32_t(cmd_buf[2] >> 32) / 65536.0f); + sprintf(dxhdy, "%4.4f", (float)int32_t(cmd_buf[2]) / 65536.0f); + sprintf(xm, "%4.4f", (float)int32_t(cmd_buf[3] >> 32) / 65536.0f); + sprintf(dxmdy, "%4.4f", (float)int32_t(cmd_buf[3]) / 65536.0f); + + sprintf(rt, "%4.4f", (float)int32_t( ((cmd_buf[4] >> 32) & 0xffff0000) | ((cmd_buf[ 6] >> 48) & 0xffff)) / 65536.0f); + sprintf(gt, "%4.4f", (float)int32_t((((cmd_buf[4] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 6] >> 32) & 0xffff)) / 65536.0f); + sprintf(bt, "%4.4f", (float)int32_t( (cmd_buf[4] & 0xffff0000) | ((cmd_buf[ 6] >> 16) & 0xffff)) / 65536.0f); + sprintf(at, "%4.4f", (float)int32_t( ((cmd_buf[4] & 0x0000ffff) << 16) | ( cmd_buf[ 6] & 0xffff)) / 65536.0f); + sprintf(drdx, "%4.4f", (float)int32_t( ((cmd_buf[5] >> 32) & 0xffff0000) | ((cmd_buf[ 7] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgdx, "%4.4f", (float)int32_t((((cmd_buf[5] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 7] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbdx, "%4.4f", (float)int32_t( (cmd_buf[5] & 0xffff0000) | ((cmd_buf[ 7] >> 16) & 0xffff)) / 65536.0f); + sprintf(dadx, "%4.4f", (float)int32_t( ((cmd_buf[5] & 0x0000ffff) << 16) | ( cmd_buf[ 7] & 0xffff)) / 65536.0f); + sprintf(drde, "%4.4f", (float)int32_t( ((cmd_buf[8] >> 32) & 0xffff0000) | ((cmd_buf[10] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgde, "%4.4f", (float)int32_t((((cmd_buf[8] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[10] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbde, "%4.4f", (float)int32_t( (cmd_buf[8] & 0xffff0000) | ((cmd_buf[10] >> 16) & 0xffff)) / 65536.0f); + sprintf(dade, "%4.4f", (float)int32_t( ((cmd_buf[8] & 0x0000ffff) << 16) | ( cmd_buf[10] & 0xffff)) / 65536.0f); + sprintf(drdy, "%4.4f", (float)int32_t( ((cmd_buf[9] >> 32) & 0xffff0000) | ((cmd_buf[11] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgdy, "%4.4f", (float)int32_t((((cmd_buf[9] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[11] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbdy, "%4.4f", (float)int32_t( (cmd_buf[9] & 0xffff0000) | ((cmd_buf[11] >> 16) & 0xffff)) / 65536.0f); + sprintf(dady, "%4.4f", (float)int32_t( ((cmd_buf[9] & 0x0000ffff) << 16) | ( cmd_buf[11] & 0xffff)) / 65536.0f); + + buffer+=sprintf(buffer, "Tri_ShadeZ %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady); + break; + } + case 0x0e: // Tri_TexShade + { + const int32_t lft = (cmd_buf[0] >> 23) & 0x1; + + sprintf(yl, "%4.4f", (float)((cmd_buf[0] >> 32) & 0x1fff) / 4.0f); + sprintf(ym, "%4.4f", (float)((cmd_buf[0] >> 16) & 0x1fff) / 4.0f); + sprintf(yh, "%4.4f", (float)((cmd_buf[0] >> 0) & 0x1fff) / 4.0f); + sprintf(xl, "%4.4f", (float)int32_t(cmd_buf[1] >> 32) / 65536.0f); + sprintf(dxldy, "%4.4f", (float)int32_t(cmd_buf[1]) / 65536.0f); + sprintf(xh, "%4.4f", (float)int32_t(cmd_buf[2] >> 32) / 65536.0f); + sprintf(dxhdy, "%4.4f", (float)int32_t(cmd_buf[2]) / 65536.0f); + sprintf(xm, "%4.4f", (float)int32_t(cmd_buf[3] >> 32) / 65536.0f); + sprintf(dxmdy, "%4.4f", (float)int32_t(cmd_buf[3]) / 65536.0f); + + sprintf(rt, "%4.4f", (float)int32_t( ((cmd_buf[4] >> 32) & 0xffff0000) | ((cmd_buf[ 6] >> 48) & 0xffff)) / 65536.0f); + sprintf(gt, "%4.4f", (float)int32_t((((cmd_buf[4] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 6] >> 32) & 0xffff)) / 65536.0f); + sprintf(bt, "%4.4f", (float)int32_t( (cmd_buf[4] & 0xffff0000) | ((cmd_buf[ 6] >> 16) & 0xffff)) / 65536.0f); + sprintf(at, "%4.4f", (float)int32_t( ((cmd_buf[4] & 0x0000ffff) << 16) | ( cmd_buf[ 6] & 0xffff)) / 65536.0f); + sprintf(drdx, "%4.4f", (float)int32_t( ((cmd_buf[5] >> 32) & 0xffff0000) | ((cmd_buf[ 7] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgdx, "%4.4f", (float)int32_t((((cmd_buf[5] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 7] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbdx, "%4.4f", (float)int32_t( (cmd_buf[5] & 0xffff0000) | ((cmd_buf[ 7] >> 16) & 0xffff)) / 65536.0f); + sprintf(dadx, "%4.4f", (float)int32_t( ((cmd_buf[5] & 0x0000ffff) << 16) | ( cmd_buf[ 7] & 0xffff)) / 65536.0f); + sprintf(drde, "%4.4f", (float)int32_t( ((cmd_buf[8] >> 32) & 0xffff0000) | ((cmd_buf[10] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgde, "%4.4f", (float)int32_t((((cmd_buf[8] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[10] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbde, "%4.4f", (float)int32_t( (cmd_buf[8] & 0xffff0000) | ((cmd_buf[10] >> 16) & 0xffff)) / 65536.0f); + sprintf(dade, "%4.4f", (float)int32_t( ((cmd_buf[8] & 0x0000ffff) << 16) | ( cmd_buf[10] & 0xffff)) / 65536.0f); + sprintf(drdy, "%4.4f", (float)int32_t( ((cmd_buf[9] >> 32) & 0xffff0000) | ((cmd_buf[11] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgdy, "%4.4f", (float)int32_t((((cmd_buf[9] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[11] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbdy, "%4.4f", (float)int32_t( (cmd_buf[9] & 0xffff0000) | ((cmd_buf[11] >> 16) & 0xffff)) / 65536.0f); + sprintf(dady, "%4.4f", (float)int32_t( ((cmd_buf[9] & 0x0000ffff) << 16) | ( cmd_buf[11] & 0xffff)) / 65536.0f); + + sprintf(s, "%4.4f", (float)int32_t( ((cmd_buf[4] >> 32) & 0xffff0000) | ((cmd_buf[ 6] >> 48) & 0xffff)) / 65536.0f); + sprintf(t, "%4.4f", (float)int32_t((((cmd_buf[4] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 6] >> 32) & 0xffff)) / 65536.0f); + sprintf(w, "%4.4f", (float)int32_t( (cmd_buf[4] & 0xffff0000) | ((cmd_buf[ 6] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsdx, "%4.4f", (float)int32_t( ((cmd_buf[5] >> 32) & 0xffff0000) | ((cmd_buf[ 7] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtdx, "%4.4f", (float)int32_t((((cmd_buf[5] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 7] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwdx, "%4.4f", (float)int32_t( (cmd_buf[5] & 0xffff0000) | ((cmd_buf[ 7] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsde, "%4.4f", (float)int32_t( ((cmd_buf[8] >> 32) & 0xffff0000) | ((cmd_buf[10] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtde, "%4.4f", (float)int32_t((((cmd_buf[8] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[10] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwde, "%4.4f", (float)int32_t( (cmd_buf[8] & 0xffff0000) | ((cmd_buf[10] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsdy, "%4.4f", (float)int32_t( ((cmd_buf[9] >> 32) & 0xffff0000) | ((cmd_buf[11] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtdy, "%4.4f", (float)int32_t((((cmd_buf[9] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[11] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwdy, "%4.4f", (float)int32_t( (cmd_buf[9] & 0xffff0000) | ((cmd_buf[11] >> 16) & 0xffff)) / 65536.0f); + + buffer+=sprintf(buffer, "Tri_TexShade %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady); + + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " S: %s, T: %s, W: %s\n", s, t, w); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy); + break; + } + case 0x0f: // Tri_TexShadeZ + { + const int32_t lft = (cmd_buf[0] >> 23) & 0x1; + + sprintf(yl, "%4.4f", (float)((cmd_buf[0] >> 32) & 0x1fff) / 4.0f); + sprintf(ym, "%4.4f", (float)((cmd_buf[0] >> 16) & 0x1fff) / 4.0f); + sprintf(yh, "%4.4f", (float)((cmd_buf[0] >> 0) & 0x1fff) / 4.0f); + sprintf(xl, "%4.4f", (float)int32_t(cmd_buf[1] >> 32) / 65536.0f); + sprintf(dxldy, "%4.4f", (float)int32_t(cmd_buf[1]) / 65536.0f); + sprintf(xh, "%4.4f", (float)int32_t(cmd_buf[2] >> 32) / 65536.0f); + sprintf(dxhdy, "%4.4f", (float)int32_t(cmd_buf[2]) / 65536.0f); + sprintf(xm, "%4.4f", (float)int32_t(cmd_buf[3] >> 32) / 65536.0f); + sprintf(dxmdy, "%4.4f", (float)int32_t(cmd_buf[3]) / 65536.0f); + + sprintf(rt, "%4.4f", (float)int32_t( ((cmd_buf[4] >> 32) & 0xffff0000) | ((cmd_buf[ 6] >> 48) & 0xffff)) / 65536.0f); + sprintf(gt, "%4.4f", (float)int32_t((((cmd_buf[4] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 6] >> 32) & 0xffff)) / 65536.0f); + sprintf(bt, "%4.4f", (float)int32_t( (cmd_buf[4] & 0xffff0000) | ((cmd_buf[ 6] >> 16) & 0xffff)) / 65536.0f); + sprintf(at, "%4.4f", (float)int32_t( ((cmd_buf[4] & 0x0000ffff) << 16) | ( cmd_buf[ 6] & 0xffff)) / 65536.0f); + sprintf(drdx, "%4.4f", (float)int32_t( ((cmd_buf[5] >> 32) & 0xffff0000) | ((cmd_buf[ 7] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgdx, "%4.4f", (float)int32_t((((cmd_buf[5] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 7] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbdx, "%4.4f", (float)int32_t( (cmd_buf[5] & 0xffff0000) | ((cmd_buf[ 7] >> 16) & 0xffff)) / 65536.0f); + sprintf(dadx, "%4.4f", (float)int32_t( ((cmd_buf[5] & 0x0000ffff) << 16) | ( cmd_buf[ 7] & 0xffff)) / 65536.0f); + sprintf(drde, "%4.4f", (float)int32_t( ((cmd_buf[8] >> 32) & 0xffff0000) | ((cmd_buf[10] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgde, "%4.4f", (float)int32_t((((cmd_buf[8] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[10] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbde, "%4.4f", (float)int32_t( (cmd_buf[8] & 0xffff0000) | ((cmd_buf[10] >> 16) & 0xffff)) / 65536.0f); + sprintf(dade, "%4.4f", (float)int32_t( ((cmd_buf[8] & 0x0000ffff) << 16) | ( cmd_buf[10] & 0xffff)) / 65536.0f); + sprintf(drdy, "%4.4f", (float)int32_t( ((cmd_buf[9] >> 32) & 0xffff0000) | ((cmd_buf[11] >> 48) & 0xffff)) / 65536.0f); + sprintf(dgdy, "%4.4f", (float)int32_t((((cmd_buf[9] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[11] >> 32) & 0xffff)) / 65536.0f); + sprintf(dbdy, "%4.4f", (float)int32_t( (cmd_buf[9] & 0xffff0000) | ((cmd_buf[11] >> 16) & 0xffff)) / 65536.0f); + sprintf(dady, "%4.4f", (float)int32_t( ((cmd_buf[9] & 0x0000ffff) << 16) | ( cmd_buf[11] & 0xffff)) / 65536.0f); + + sprintf(s, "%4.4f", (float)int32_t( ((cmd_buf[4] >> 32) & 0xffff0000) | ((cmd_buf[ 6] >> 48) & 0xffff)) / 65536.0f); + sprintf(t, "%4.4f", (float)int32_t((((cmd_buf[4] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 6] >> 32) & 0xffff)) / 65536.0f); + sprintf(w, "%4.4f", (float)int32_t( (cmd_buf[4] & 0xffff0000) | ((cmd_buf[ 6] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsdx, "%4.4f", (float)int32_t( ((cmd_buf[5] >> 32) & 0xffff0000) | ((cmd_buf[ 7] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtdx, "%4.4f", (float)int32_t((((cmd_buf[5] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[ 7] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwdx, "%4.4f", (float)int32_t( (cmd_buf[5] & 0xffff0000) | ((cmd_buf[ 7] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsde, "%4.4f", (float)int32_t( ((cmd_buf[8] >> 32) & 0xffff0000) | ((cmd_buf[10] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtde, "%4.4f", (float)int32_t((((cmd_buf[8] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[10] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwde, "%4.4f", (float)int32_t( (cmd_buf[8] & 0xffff0000) | ((cmd_buf[10] >> 16) & 0xffff)) / 65536.0f); + sprintf(dsdy, "%4.4f", (float)int32_t( ((cmd_buf[9] >> 32) & 0xffff0000) | ((cmd_buf[11] >> 48) & 0xffff)) / 65536.0f); + sprintf(dtdy, "%4.4f", (float)int32_t((((cmd_buf[9] >> 32) & 0x0000ffff) << 16) | ((cmd_buf[11] >> 32) & 0xffff)) / 65536.0f); + sprintf(dwdy, "%4.4f", (float)int32_t( (cmd_buf[9] & 0xffff0000) | ((cmd_buf[11] >> 16) & 0xffff)) / 65536.0f); + + buffer+=sprintf(buffer, "Tri_TexShadeZ %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady); + + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " S: %s, T: %s, W: %s\n", s, t, w); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde); + buffer+=sprintf(buffer, " "); + buffer+=sprintf(buffer, " DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy); + break; + } + case 0x24: + case 0x25: + { + sprintf(s, "%4.4f", (float)int16_t((cmd_buf[1] >> 48) & 0xffff) / 32.0f); + sprintf(t, "%4.4f", (float)int16_t((cmd_buf[1] >> 32) & 0xffff) / 32.0f); + sprintf(dsdx, "%4.4f", (float)int16_t((cmd_buf[1] >> 16) & 0xffff) / 1024.0f); + sprintf(dtdy, "%4.4f", (float)int16_t((cmd_buf[1] >> 0) & 0xffff) / 1024.0f); + + if (command == 0x24) + sprintf(buffer, "Texture_Rectangle %d, %s, %s, %s, %s, %s, %s, %s, %s", tile, sh, th, sl, tl, s, t, dsdx, dtdy); + else + sprintf(buffer, "Texture_Rectangle_Flip %d, %s, %s, %s, %s, %s, %s, %s, %s", tile, sh, th, sl, tl, s, t, dsdx, dtdy); + + break; + } + case 0x26: sprintf(buffer, "Sync_Load"); break; + case 0x27: sprintf(buffer, "Sync_Pipe"); break; + case 0x28: sprintf(buffer, "Sync_Tile"); break; + case 0x29: sprintf(buffer, "Sync_Full"); break; + case 0x2d: sprintf(buffer, "Set_Scissor %s, %s, %s, %s", sl, tl, sh, th); break; + case 0x2e: sprintf(buffer, "Set_Prim_Depth %04X, %04X", uint32_t(cmd_buf[0] >> 16) & 0xffff, (uint32_t)cmd_buf[0] & 0xffff); break; + case 0x2f: sprintf(buffer, "Set_Other_Modes %08X %08X", uint32_t(cmd_buf[0] >> 32), (uint32_t)cmd_buf[0]); break; + case 0x30: sprintf(buffer, "Load_TLUT %d, %s, %s, %s, %s", tile, sl, tl, sh, th); break; + case 0x32: sprintf(buffer, "Set_Tile_Size %d, %s, %s, %s, %s", tile, sl, tl, sh, th); break; + case 0x33: sprintf(buffer, "Load_Block %d, %03X, %03X, %03X, %03X", tile, uint32_t(cmd_buf[0] >> 44) & 0xfff, uint32_t(cmd_buf[0] >> 32) & 0xfff, uint32_t(cmd_buf[0] >> 12) & 0xfff, uint32_t(cmd_buf[0]) & 0xfff); break; + case 0x34: sprintf(buffer, "Load_Tile %d, %s, %s, %s, %s", tile, sl, tl, sh, th); break; + case 0x35: sprintf(buffer, "Set_Tile %d, %s, %s, %d, %04X", tile, format, size, (uint32_t(cmd_buf[0] >> 41) & 0x1ff) * 8, (uint32_t(cmd_buf[0] >> 32) & 0x1ff) * 8); break; + case 0x36: sprintf(buffer, "Fill_Rectangle %s, %s, %s, %s", sh, th, sl, tl); break; + case 0x37: sprintf(buffer, "Set_Fill_Color R: %d, G: %d, B: %d, A: %d", r, g, b, a); break; + case 0x38: sprintf(buffer, "Set_Fog_Color R: %d, G: %d, B: %d, A: %d", r, g, b, a); break; + case 0x39: sprintf(buffer, "Set_Blend_Color R: %d, G: %d, B: %d, A: %d", r, g, b, a); break; + case 0x3a: sprintf(buffer, "Set_Prim_Color %d, %d, R: %d, G: %d, B: %d, A: %d", uint32_t(cmd_buf[0] >> 40) & 0x1f, uint32_t(cmd_buf[0] >> 32) & 0xff, r, g, b, a); break; + case 0x3b: sprintf(buffer, "Set_Env_Color R: %d, G: %d, B: %d, A: %d", r, g, b, a); break; + case 0x3c: sprintf(buffer, "Set_Combine %08X %08X", uint32_t(cmd_buf[0] >> 32), (uint32_t)cmd_buf[0]); break; + case 0x3d: sprintf(buffer, "Set_Texture_Image %s, %s, %d, %08X", format, size, (uint32_t(cmd_buf[0] >> 32) & 0x1ff) + 1, (uint32_t)cmd_buf[0]); break; + case 0x3e: sprintf(buffer, "Set_Mask_Image %08X", (uint32_t)cmd_buf[0]); break; + case 0x3f: sprintf(buffer, "Set_Color_Image %s, %s, %d, %08X", format, size, (uint32_t(cmd_buf[0] >> 32) & 0x1ff) + 1, (uint32_t)cmd_buf[0]); break; + default: sprintf(buffer, "Unknown (%08X %08X)", uint32_t(cmd_buf[0] >> 32), (uint32_t)cmd_buf[0]); break; + } +} + +/*****************************************************************************/ + +static uint32_t rightcvghex(uint32_t x, uint32_t fmask) +{ + uint32_t stickybit = ((x >> 1) & 0x1fff) > 0; + uint32_t covered = ((x >> 14) & 3) + stickybit; + covered = (0xf0 >> covered) & 0xf; + return (covered & fmask); +} + +static uint32_t leftcvghex(uint32_t x, uint32_t fmask) +{ + uint32_t stickybit = ((x >> 1) & 0x1fff) > 0; + uint32_t covered = ((x >> 14) & 3) + stickybit; + covered = 0xf >> covered; + return (covered & fmask); +} + +static int32_t CLIP(int32_t value,int32_t min,int32_t max) +{ + if (value < min) + { + return min; + } + else if (value > max) + { + return max; + } + else + { + return value; + } +} + +void n64_rdp::compute_cvg_noflip(extent_t* spans, int32_t* majorx, int32_t* minorx, int32_t* majorxint, int32_t* minorxint, int32_t scanline, int32_t yh, int32_t yl, int32_t base) +{ + int32_t purgestart = 0xfff; + int32_t purgeend = 0; + const bool writablescanline = !(scanline & ~0x3ff); + const int32_t scanlinespx = scanline << 2; + + if (!writablescanline) return; + + for(int32_t i = 0; i < 4; i++) + { + if (minorxint[i] < purgestart) + { + purgestart = minorxint[i]; + } + if (majorxint[i] > purgeend) + { + purgeend = majorxint[i]; + } + } + + purgestart = CLIP(purgestart, 0, 1023); + purgeend = CLIP(purgeend, 0, 1023); + int32_t length = purgeend - purgestart; + + if (length < 0) return; + + rdp_span_aux* userdata = (rdp_span_aux*)spans[scanline - base].userdata; + memset(&userdata->m_cvg[purgestart], 0, (length + 1) << 1); + + for(int32_t i = 0; i < 4; i++) + { + int32_t minorcur = minorx[i]; + int32_t majorcur = majorx[i]; + int32_t minorcurint = minorxint[i]; + int32_t majorcurint = majorxint[i]; + length = majorcurint - minorcurint; + + int32_t fmask = (i & 1) ? 5 : 0xa; + int32_t maskshift = (i ^ 3) << 2; + int32_t fmaskshifted = fmask << maskshift; + int32_t fleft = CLIP(minorcurint + 1, 0, 647); + int32_t fright = CLIP(majorcurint - 1, 0, 647); + bool valid_y = ((scanlinespx + i) >= yh && (scanlinespx + i) < yl); + if (valid_y && length >= 0) + { + if (minorcurint != majorcurint) + { + if (!(minorcurint & ~0x3ff)) + { + userdata->m_cvg[minorcurint] |= (leftcvghex(minorcur, fmask) << maskshift); + } + if (!(majorcurint & ~0x3ff)) + { + userdata->m_cvg[majorcurint] |= (rightcvghex(majorcur, fmask) << maskshift); + } + } + else + { + if (!(majorcurint & ~0x3ff)) + { + int32_t samecvg = leftcvghex(minorcur, fmask) & rightcvghex(majorcur, fmask); + userdata->m_cvg[majorcurint] |= (samecvg << maskshift); + } + } + for (; fleft <= fright; fleft++) + { + userdata->m_cvg[fleft] |= fmaskshifted; + } + } + } +} + +void n64_rdp::compute_cvg_flip(extent_t* spans, int32_t* majorx, int32_t* minorx, int32_t* majorxint, int32_t* minorxint, int32_t scanline, int32_t yh, int32_t yl, int32_t base) +{ + int32_t purgestart = 0xfff; + int32_t purgeend = 0; + const bool writablescanline = !(scanline & ~0x3ff); + const int32_t scanlinespx = scanline << 2; + + if(!writablescanline) return; + + for(int32_t i = 0; i < 4; i++) + { + if (majorxint[i] < purgestart) + { + purgestart = majorxint[i]; + } + if (minorxint[i] > purgeend) + { + purgeend = minorxint[i]; + } + } + + purgestart = CLIP(purgestart, 0, 1023); + purgeend = CLIP(purgeend, 0, 1023); + + int32_t length = purgeend - purgestart; + + if (length < 0) return; + + rdp_span_aux* userdata = (rdp_span_aux*)spans[scanline - base].userdata; + memset(&userdata->m_cvg[purgestart], 0, (length + 1) << 1); + + for(int32_t i = 0; i < 4; i++) + { + int32_t minorcur = minorx[i]; + int32_t majorcur = majorx[i]; + int32_t minorcurint = minorxint[i]; + int32_t majorcurint = majorxint[i]; + length = minorcurint - majorcurint; + + int32_t fmask = (i & 1) ? 5 : 0xa; + int32_t maskshift = (i ^ 3) << 2; + int32_t fmaskshifted = fmask << maskshift; + int32_t fleft = CLIP(majorcurint + 1, 0, 647); + int32_t fright = CLIP(minorcurint - 1, 0, 647); + bool valid_y = ((scanlinespx + i) >= yh && (scanlinespx + i) < yl); + if (valid_y && length >= 0) + { + if (minorcurint != majorcurint) + { + if (!(minorcurint & ~0x3ff)) + { + userdata->m_cvg[minorcurint] |= (rightcvghex(minorcur, fmask) << maskshift); + } + if (!(majorcurint & ~0x3ff)) + { + userdata->m_cvg[majorcurint] |= (leftcvghex(majorcur, fmask) << maskshift); + } + } + else + { + if (!(majorcurint & ~0x3ff)) + { + int32_t samecvg = rightcvghex(minorcur, fmask) & leftcvghex(majorcur, fmask); + userdata->m_cvg[majorcurint] |= (samecvg << maskshift); + } + } + for (; fleft <= fright; fleft++) + { + userdata->m_cvg[fleft] |= fmaskshifted; + } + } + } +} + +#define SIGN(x, numb) (((x) & ((1 << numb) - 1)) | -((x) & (1 << (numb - 1)))) + +void n64_rdp::draw_triangle(uint64_t *cmd_buf, bool shade, bool texture, bool zbuffer, bool rect) +{ + const uint64_t* cmd_data = rect ? m_temp_rect_data : cmd_buf; + const uint64_t w1 = cmd_data[0]; + + int32_t flip = int32_t(w1 >> 55) & 1; + m_misc_state.m_max_level = uint32_t(w1 >> 51) & 7; + int32_t tilenum = int32_t(w1 >> 48) & 0x7; + + int32_t dsdiff = 0, dtdiff = 0, dwdiff = 0, drdiff = 0, dgdiff = 0, dbdiff = 0, dadiff = 0, dzdiff = 0; + int32_t dsdeh = 0, dtdeh = 0, dwdeh = 0, drdeh = 0, dgdeh = 0, dbdeh = 0, dadeh = 0, dzdeh = 0; + int32_t dsdxh = 0, dtdxh = 0, dwdxh = 0, drdxh = 0, dgdxh = 0, dbdxh = 0, dadxh = 0, dzdxh = 0; + int32_t dsdyh = 0, dtdyh = 0, dwdyh = 0, drdyh = 0, dgdyh = 0, dbdyh = 0, dadyh = 0, dzdyh = 0; + + int32_t maxxmx = 0; // maxxmx / minxhx very opaque names, consider re-naming + int32_t minxmx = 0; + int32_t maxxhx = 0; + int32_t minxhx = 0; + + int32_t shade_base = 4; + int32_t texture_base = 4; + int32_t zbuffer_base = 4; + if(shade) + { + texture_base += 8; + zbuffer_base += 8; + } + if(texture) + { + zbuffer_base += 8; + } + + uint64_t w2 = cmd_data[1]; + uint64_t w3 = cmd_data[2]; + uint64_t w4 = cmd_data[3]; + + int32_t yl = int32_t(w1 >> 32) & 0x3fff; + int32_t ym = int32_t(w1 >> 16) & 0x3fff; + int32_t yh = int32_t(w1 >> 0) & 0x3fff; + int32_t xl = (int32_t)(w2 >> 32) & 0x3fffffff; + int32_t xh = (int32_t)(w3 >> 32) & 0x3fffffff; + int32_t xm = (int32_t)(w4 >> 32) & 0x3fffffff; + // Inverse slopes in 16.16 format + int32_t dxldy = (int32_t)w2; + int32_t dxhdy = (int32_t)w3; + int32_t dxmdy = (int32_t)w4; + + if (yl & 0x2000) yl |= 0xffffc000; + if (ym & 0x2000) ym |= 0xffffc000; + if (yh & 0x2000) yh |= 0xffffc000; + + if (xl & 0x20000000) xl |= 0xc0000000; + if (xm & 0x20000000) xm |= 0xc0000000; + if (xh & 0x20000000) xh |= 0xc0000000; + + int32_t r = int32_t(((cmd_data[shade_base] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 2] >> 48) & 0x0000ffff)); + int32_t g = int32_t(((cmd_data[shade_base] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 2] >> 32) & 0x0000ffff)); + int32_t b = int32_t( (cmd_data[shade_base] & 0xffff0000) | ((cmd_data[shade_base + 2] >> 16) & 0x0000ffff)); + int32_t a = int32_t(((cmd_data[shade_base] << 16) & 0xffff0000) | (cmd_data[shade_base + 2] & 0x0000ffff)); + const int32_t drdx = int32_t(((cmd_data[shade_base + 1] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 3] >> 48) & 0x0000ffff)); + const int32_t dgdx = int32_t(((cmd_data[shade_base + 1] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 3] >> 32) & 0x0000ffff)); + const int32_t dbdx = int32_t( (cmd_data[shade_base + 1] & 0xffff0000) | ((cmd_data[shade_base + 3] >> 16) & 0x0000ffff)); + const int32_t dadx = int32_t(((cmd_data[shade_base + 1] << 16) & 0xffff0000) | (cmd_data[shade_base + 3] & 0x0000ffff)); + const int32_t drde = int32_t(((cmd_data[shade_base + 4] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 6] >> 48) & 0x0000ffff)); + const int32_t dgde = int32_t(((cmd_data[shade_base + 4] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 6] >> 32) & 0x0000ffff)); + const int32_t dbde = int32_t( (cmd_data[shade_base + 4] & 0xffff0000) | ((cmd_data[shade_base + 6] >> 16) & 0x0000ffff)); + const int32_t dade = int32_t(((cmd_data[shade_base + 4] << 16) & 0xffff0000) | (cmd_data[shade_base + 6] & 0x0000ffff)); + const int32_t drdy = int32_t(((cmd_data[shade_base + 5] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 7] >> 48) & 0x0000ffff)); + const int32_t dgdy = int32_t(((cmd_data[shade_base + 5] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 7] >> 32) & 0x0000ffff)); + const int32_t dbdy = int32_t( (cmd_data[shade_base + 5] & 0xffff0000) | ((cmd_data[shade_base + 7] >> 16) & 0x0000ffff)); + const int32_t dady = int32_t(((cmd_data[shade_base + 5] << 16) & 0xffff0000) | (cmd_data[shade_base + 7] & 0x0000ffff)); + + int32_t s = int32_t(((cmd_data[texture_base] >> 32) & 0xffff0000) | ((cmd_data[texture_base+ 2 ] >> 48) & 0x0000ffff)); + int32_t t = int32_t(((cmd_data[texture_base] >> 16) & 0xffff0000) | ((cmd_data[texture_base+ 2 ] >> 32) & 0x0000ffff)); + int32_t w = int32_t( (cmd_data[texture_base] & 0xffff0000) | ((cmd_data[texture_base+ 2 ] >> 16) & 0x0000ffff)); + const int32_t dsdx = int32_t(((cmd_data[texture_base + 1] >> 32) & 0xffff0000) | ((cmd_data[texture_base + 3] >> 48) & 0x0000ffff)); + const int32_t dtdx = int32_t(((cmd_data[texture_base + 1] >> 16) & 0xffff0000) | ((cmd_data[texture_base + 3] >> 32) & 0x0000ffff)); + const int32_t dwdx = int32_t( (cmd_data[texture_base + 1] & 0xffff0000) | ((cmd_data[texture_base + 3] >> 16) & 0x0000ffff)); + const int32_t dsde = int32_t(((cmd_data[texture_base + 4] >> 32) & 0xffff0000) | ((cmd_data[texture_base + 6] >> 48) & 0x0000ffff)); + const int32_t dtde = int32_t(((cmd_data[texture_base + 4] >> 16) & 0xffff0000) | ((cmd_data[texture_base + 6] >> 32) & 0x0000ffff)); + const int32_t dwde = int32_t( (cmd_data[texture_base + 4] & 0xffff0000) | ((cmd_data[texture_base + 6] >> 16) & 0x0000ffff)); + const int32_t dsdy = int32_t(((cmd_data[texture_base + 5] >> 32) & 0xffff0000) | ((cmd_data[texture_base + 7] >> 48) & 0x0000ffff)); + const int32_t dtdy = int32_t(((cmd_data[texture_base + 5] >> 16) & 0xffff0000) | ((cmd_data[texture_base + 7] >> 32) & 0x0000ffff)); + const int32_t dwdy = int32_t( (cmd_data[texture_base + 5] & 0xffff0000) | ((cmd_data[texture_base + 7] >> 16) & 0x0000ffff)); + + int32_t z = int32_t(cmd_data[zbuffer_base] >> 32); + const int32_t dzdx = int32_t(cmd_data[zbuffer_base]); + const int32_t dzde = int32_t(cmd_data[zbuffer_base+1] >> 32); + const int32_t dzdy = int32_t(cmd_data[zbuffer_base+1]); + + const int32_t dzdy_dz = (dzdy >> 16) & 0xffff; + const int32_t dzdx_dz = (dzdx >> 16) & 0xffff; + + extent_t spans[2048]; +#ifdef MAME_DEBUG + memset(spans, 0xcc, sizeof(spans)); +#endif + + m_span_base.m_span_drdy = drdy; + m_span_base.m_span_dgdy = dgdy; + m_span_base.m_span_dbdy = dbdy; + m_span_base.m_span_dady = dady; + m_span_base.m_span_dzdy = m_other_modes.z_source_sel ? 0 : dzdy; + + uint32_t temp_dzpix = ((dzdy_dz & 0x8000) ? ((~dzdy_dz) & 0x7fff) : dzdy_dz) + ((dzdx_dz & 0x8000) ? ((~dzdx_dz) & 0x7fff) : dzdx_dz); + m_span_base.m_span_dr = drdx & ~0x1f; + m_span_base.m_span_dg = dgdx & ~0x1f; + m_span_base.m_span_db = dbdx & ~0x1f; + m_span_base.m_span_da = dadx & ~0x1f; + m_span_base.m_span_ds = dsdx; + m_span_base.m_span_dt = dtdx; + m_span_base.m_span_dw = dwdx; + m_span_base.m_span_dz = m_other_modes.z_source_sel ? 0 : dzdx; + m_span_base.m_span_dymax = 0; + m_span_base.m_span_dzpix = m_dzpix_normalize[temp_dzpix & 0xffff]; + + int32_t xleft_inc = (dxmdy >> 2) & ~1; + int32_t xright_inc = (dxhdy >> 2) & ~1; + + int32_t xright = xh & ~1; + int32_t xleft = xm & ~1; + + const int32_t sign_dxhdy = (dxhdy & 0x80000000) ? 1 : 0; + const int32_t do_offset = !(sign_dxhdy ^ (flip)); + + if (do_offset) + { + dsdeh = dsde >> 9; dsdyh = dsdy >> 9; + dtdeh = dtde >> 9; dtdyh = dtdy >> 9; + dwdeh = dwde >> 9; dwdyh = dwdy >> 9; + drdeh = drde >> 9; drdyh = drdy >> 9; + dgdeh = dgde >> 9; dgdyh = dgdy >> 9; + dbdeh = dbde >> 9; dbdyh = dbdy >> 9; + dadeh = dade >> 9; dadyh = dady >> 9; + dzdeh = dzde >> 9; dzdyh = dzdy >> 9; + + dsdiff = (dsdeh << 8) + (dsdeh << 7) - (dsdyh << 8) - (dsdyh << 7); + dtdiff = (dtdeh << 8) + (dtdeh << 7) - (dtdyh << 8) - (dtdyh << 7); + dwdiff = (dwdeh << 8) + (dwdeh << 7) - (dwdyh << 8) - (dwdyh << 7); + drdiff = (drdeh << 8) + (drdeh << 7) - (drdyh << 8) - (drdyh << 7); + dgdiff = (dgdeh << 8) + (dgdeh << 7) - (dgdyh << 8) - (dgdyh << 7); + dbdiff = (dbdeh << 8) + (dbdeh << 7) - (dbdyh << 8) - (dbdyh << 7); + dadiff = (dadeh << 8) + (dadeh << 7) - (dadyh << 8) - (dadyh << 7); + dzdiff = (dzdeh << 8) + (dzdeh << 7) - (dzdyh << 8) - (dzdyh << 7); + } + else + { + dsdiff = dtdiff = dwdiff = drdiff = dgdiff = dbdiff = dadiff = dzdiff = 0; + } + + dsdxh = dsdx >> 8; + dtdxh = dtdx >> 8; + dwdxh = dwdx >> 8; + drdxh = drdx >> 8; + dgdxh = dgdx >> 8; + dbdxh = dbdx >> 8; + dadxh = dadx >> 8; + dzdxh = dzdx >> 8; + + const int32_t ycur = yh & ~3; + const int32_t ylfar = yl | 3; + const int32_t ldflag = (sign_dxhdy ^ flip) ? 0 : 3; + int32_t majorx[4]; + int32_t minorx[4]; + int32_t majorxint[4]; + int32_t minorxint[4]; + + int32_t xfrac = ((xright >> 8) & 0xff); + + const int32_t clipy1 = m_scissor.m_yh; + const int32_t clipy2 = m_scissor.m_yl; + + // Trivial reject + if((ycur >> 2) >= clipy2 && (ylfar >> 2) >= clipy2) + { + return; + } + if((ycur >> 2) < clipy1 && (ylfar >> 2) < clipy1) + { + return; + } + + bool new_object = true; + rdp_poly_state* object = nullptr; + bool valid = false; + + int32_t* minx = flip ? &minxhx : &minxmx; + int32_t* maxx = flip ? &maxxmx : &maxxhx; + int32_t* startx = flip ? maxx : minx; + int32_t* endx = flip ? minx : maxx; + + for (int32_t k = ycur; k <= ylfar; k++) + { + if (k == ym) + { + xleft = xl & ~1; + xleft_inc = (dxldy >> 2) & ~1; + } + + const int32_t xstart = xleft >> 16; + const int32_t xend = xright >> 16; + const int32_t j = k >> 2; + const int32_t spanidx = (k - ycur) >> 2; + const int32_t spix = k & 3; + bool valid_y = !(k < yh || k >= yl); + + if (spanidx >= 0 && spanidx < 2048) + { + majorxint[spix] = xend; + minorxint[spix] = xstart; + majorx[spix] = xright; + minorx[spix] = xleft; + + if (spix == 0) + { + *maxx = 0; + *minx = 0xfff; + } + + if (valid_y) + { + if (flip) + { + *maxx = std::max(xstart, *maxx); + *minx = std::min(xend, *minx); + } + else + { + *minx = std::min(xstart, *minx); + *maxx = std::max(xend, *maxx); + } + } + + if (spix == 0) + { + if(new_object) + { + object = &object_data().next(); + memcpy(object->m_tmem, m_tmem.get(), 0x1000); + new_object = false; + } + + spans[spanidx].userdata = (void*)((uint8_t*)m_aux_buf.get() + m_aux_buf_ptr); + valid = true; + m_aux_buf_ptr += sizeof(rdp_span_aux); + + if(m_aux_buf_ptr >= EXTENT_AUX_COUNT) + { + fatalerror("n64_rdp::draw_triangle: span aux buffer overflow\n"); + } + + rdp_span_aux* userdata = (rdp_span_aux*)spans[spanidx].userdata; + memcpy(&userdata->m_combine, &m_combine, sizeof(combine_modes_t)); + userdata->m_tmem = object->m_tmem; + + userdata->m_blend_color = m_blend_color; + userdata->m_prim_color = m_prim_color; + userdata->m_env_color = m_env_color; + userdata->m_fog_color = m_fog_color; + userdata->m_prim_alpha = m_prim_alpha; + userdata->m_env_alpha = m_env_alpha; + userdata->m_key_scale = m_key_scale; + userdata->m_lod_fraction = m_lod_fraction; + userdata->m_prim_lod_fraction = m_prim_lod_fraction; + userdata->m_k4 = m_k4; + userdata->m_k5 = m_k5; + + // Setup blender data for this scanline + set_blender_input(0, 0, &userdata->m_color_inputs.blender1a_rgb[0], &userdata->m_color_inputs.blender1b_a[0], m_other_modes.blend_m1a_0, m_other_modes.blend_m1b_0, userdata); + set_blender_input(0, 1, &userdata->m_color_inputs.blender2a_rgb[0], &userdata->m_color_inputs.blender2b_a[0], m_other_modes.blend_m2a_0, m_other_modes.blend_m2b_0, userdata); + set_blender_input(1, 0, &userdata->m_color_inputs.blender1a_rgb[1], &userdata->m_color_inputs.blender1b_a[1], m_other_modes.blend_m1a_1, m_other_modes.blend_m1b_1, userdata); + set_blender_input(1, 1, &userdata->m_color_inputs.blender2a_rgb[1], &userdata->m_color_inputs.blender2b_a[1], m_other_modes.blend_m2a_1, m_other_modes.blend_m2b_1, userdata); + + // Setup color combiner data for this scanline + set_suba_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_a[0], m_combine.sub_a_rgb0, userdata); + set_subb_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_b[0], m_combine.sub_b_rgb0, userdata); + set_mul_input_rgb(&userdata->m_color_inputs.combiner_rgbmul[0], m_combine.mul_rgb0, userdata); + set_add_input_rgb(&userdata->m_color_inputs.combiner_rgbadd[0], m_combine.add_rgb0, userdata); + set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_a[0], m_combine.sub_a_a0, userdata); + set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_b[0], m_combine.sub_b_a0, userdata); + set_mul_input_alpha(&userdata->m_color_inputs.combiner_alphamul[0], m_combine.mul_a0, userdata); + set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphaadd[0], m_combine.add_a0, userdata); + + set_suba_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_a[1], m_combine.sub_a_rgb1, userdata); + set_subb_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_b[1], m_combine.sub_b_rgb1, userdata); + set_mul_input_rgb(&userdata->m_color_inputs.combiner_rgbmul[1], m_combine.mul_rgb1, userdata); + set_add_input_rgb(&userdata->m_color_inputs.combiner_rgbadd[1], m_combine.add_rgb1, userdata); + set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_a[1], m_combine.sub_a_a1, userdata); + set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_b[1], m_combine.sub_b_a1, userdata); + set_mul_input_alpha(&userdata->m_color_inputs.combiner_alphamul[1], m_combine.mul_a1, userdata); + set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphaadd[1], m_combine.add_a1, userdata); + } + + if (spix == 3) + { + spans[spanidx].startx = *startx; + spans[spanidx].stopx = *endx; + ((this)->*(m_compute_cvg[flip]))(spans, majorx, minorx, majorxint, minorxint, j, yh, yl, ycur >> 2); + } + + if (spix == ldflag) + { + ((rdp_span_aux*)spans[spanidx].userdata)->m_unscissored_rx = xend; + xfrac = ((xright >> 8) & 0xff); + spans[spanidx].param[SPAN_R].start = ((r >> 9) << 9) + drdiff - (xfrac * drdxh); + spans[spanidx].param[SPAN_G].start = ((g >> 9) << 9) + dgdiff - (xfrac * dgdxh); + spans[spanidx].param[SPAN_B].start = ((b >> 9) << 9) + dbdiff - (xfrac * dbdxh); + spans[spanidx].param[SPAN_A].start = ((a >> 9) << 9) + dadiff - (xfrac * dadxh); + spans[spanidx].param[SPAN_S].start = (((s >> 9) << 9) + dsdiff - (xfrac * dsdxh)) & ~0x1f; + spans[spanidx].param[SPAN_T].start = (((t >> 9) << 9) + dtdiff - (xfrac * dtdxh)) & ~0x1f; + spans[spanidx].param[SPAN_W].start = (((w >> 9) << 9) + dwdiff - (xfrac * dwdxh)) & ~0x1f; + spans[spanidx].param[SPAN_Z].start = ((z >> 9) << 9) + dzdiff - (xfrac * dzdxh); + } + } + + if (spix == 3) + { + r += drde; + g += dgde; + b += dbde; + a += dade; + s += dsde; + t += dtde; + w += dwde; + z += dzde; + } + xleft += xleft_inc; + xright += xright_inc; + } + + if(!new_object && valid) + { + render_spans(yh >> 2, yl >> 2, tilenum, flip ? true : false, spans, rect, object); + } + m_aux_buf_ptr = 0; // Spans can be reused once render completes + //wait("draw_triangle"); +} + +/*****************************************************************************/ + +//////////////////////// +// RDP COMMANDS +//////////////////////// + +void n64_rdp::triangle(uint64_t *cmd_buf, bool shade, bool texture, bool zbuffer) +{ + draw_triangle(cmd_buf, shade, texture, zbuffer, false); + m_pipe_clean = false; +} + +void n64_rdp::cmd_tex_rect(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + const uint64_t w2 = cmd_buf[1]; + + const uint64_t tilenum = (w1 >> 24) & 0x7; + const uint64_t xh = (w1 >> 12) & 0xfff; + const uint64_t xl = (w1 >> 44) & 0xfff; + const uint64_t yh = (w1 >> 0) & 0xfff; + uint64_t yl = (w1 >> 32) & 0xfff; + + const uint64_t s = (w2 >> 48) & 0xffff; + const uint64_t t = (w2 >> 32) & 0xffff; + const uint64_t dsdx = SIGN16((w2 >> 16) & 0xffff); + const uint64_t dtdy = SIGN16((w2 >> 0) & 0xffff); + + if (m_other_modes.cycle_type == CYCLE_TYPE_FILL || m_other_modes.cycle_type == CYCLE_TYPE_COPY) + { + yl |= 3; + } + + const uint64_t xlint = (xl >> 2) & 0x3ff; + const uint64_t xhint = (xh >> 2) & 0x3ff; + + uint64_t* ewdata = m_temp_rect_data; + ewdata[0] = ((uint64_t)0x24 << 56) | ((0x80L | tilenum) << 48) | (yl << 32) | (yl << 16) | yh; // command, flipped, tile, yl + ewdata[1] = (xlint << 48) | ((xl & 3) << 46); // xl, xl frac, dxldy (0), dxldy frac (0) + ewdata[2] = (xhint << 48) | ((xh & 3) << 46); // xh, xh frac, dxhdy (0), dxhdy frac (0) + ewdata[3] = (xlint << 48) | ((xl & 3) << 46); // xm, xm frac, dxmdy (0), dxmdy frac (0) + memset(&ewdata[4], 0, 8 * sizeof(uint64_t)); // shade + ewdata[12] = (s << 48) | (t << 32); // s, t, w (0) + ewdata[13] = (dsdx >> 5) << 48; // dsdx, dtdx, dwdx (0) + ewdata[14] = 0; // s frac (0), t frac (0), w frac (0) + ewdata[15] = (dsdx & 0x1f) << 59; // dsdx frac, dtdx frac, dwdx frac (0) + ewdata[16] = ((dtdy >> 5) & 0xffff) << 32; // dsde, dtde, dwde (0) + ewdata[17] = ((dtdy >> 5) & 0xffff) << 32; // dsdy, dtdy, dwdy (0) + ewdata[18] = ((dtdy & 0x1f) << 11) << 32; // dsde frac, dtde frac, dwde frac (0) + ewdata[19] = ((dtdy & 0x1f) << 11) << 32; // dsdy frac, dtdy frac, dwdy frac (0) + // ewdata[40-43] = 0; // depth + + draw_triangle(cmd_buf, true, true, false, true); +} + +void n64_rdp::cmd_tex_rect_flip(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + const uint64_t w2 = cmd_buf[1]; + + const uint64_t tilenum = (w1 >> 24) & 0x7; + const uint64_t xh = (w1 >> 12) & 0xfff; + const uint64_t xl = (w1 >> 44) & 0xfff; + const uint64_t yh = (w1 >> 0) & 0xfff; + uint64_t yl = (w1 >> 32) & 0xfff; + + const uint64_t s = (w2 >> 48) & 0xffff; + const uint64_t t = (w2 >> 32) & 0xffff; + const uint64_t dsdx = SIGN16((w2 >> 16) & 0xffff); + const uint64_t dtdy = SIGN16((w2 >> 0) & 0xffff); + + if (m_other_modes.cycle_type == CYCLE_TYPE_FILL || m_other_modes.cycle_type == CYCLE_TYPE_COPY) + { + yl |= 3; + } + + const uint64_t xlint = (xl >> 2) & 0x3ff; + const uint64_t xhint = (xh >> 2) & 0x3ff; + + uint64_t* ewdata = m_temp_rect_data; + ewdata[0] = ((uint64_t)0x25 << 56) | ((0x80L | tilenum) << 48) | (yl << 32) | (yl << 16) | yh; // command, flipped, tile, yl + ewdata[1] = (xlint << 48) | ((xl & 3) << 46); // xl, xl frac, dxldy (0), dxldy frac (0) + ewdata[2] = (xhint << 48) | ((xh & 3) << 46); // xh, xh frac, dxhdy (0), dxhdy frac (0) + ewdata[3] = (xlint << 48) | ((xl & 3) << 46); // xm, xm frac, dxmdy (0), dxmdy frac (0) + memset(&ewdata[4], 0, 8 * sizeof(uint64_t)); // shade + ewdata[12] = (s << 48) | (t << 32); // s, t, w (0) + ewdata[13] = ((dtdy >> 5) & 0xffff) << 32; // dsdx, dtdx, dwdx (0) + ewdata[14] = 0; // s frac (0), t frac (0), w frac (0) + ewdata[15] = ((dtdy & 0x1f) << 43); // dsdx frac, dtdx frac, dwdx frac (0) + ewdata[16] = (dsdx >> 5) << 48; // dsde, dtde, dwde (0) + ewdata[17] = (dsdx >> 5) << 48; // dsdy, dtdy, dwdy (0) + ewdata[18] = (dsdx & 0x1f) << 59; // dsde frac, dtde frac, dwde frac (0) + ewdata[19] = (dsdx & 0x1f) << 59; // dsdy frac, dtdy frac, dwdy frac (0) + + draw_triangle(cmd_buf, true, true, false, true); +} + +void n64_rdp::cmd_sync_load(uint64_t *cmd_buf) +{ + //wait("SyncLoad"); +} + +void n64_rdp::cmd_sync_pipe(uint64_t *cmd_buf) +{ + //wait("SyncPipe"); +} + +void n64_rdp::cmd_sync_tile(uint64_t *cmd_buf) +{ + //wait("SyncTile"); +} + +void n64_rdp::cmd_sync_full(uint64_t *cmd_buf) +{ + //wait("SyncFull"); + m_n64_periphs->dp_full_sync(); +} + +void n64_rdp::cmd_set_key_gb(uint64_t *cmd_buf) +{ + m_key_scale.set_b(uint32_t(cmd_buf[0] >> 0) & 0xff); + m_key_scale.set_g(uint32_t(cmd_buf[0] >> 16) & 0xff); +} + +void n64_rdp::cmd_set_key_r(uint64_t *cmd_buf) +{ + m_key_scale.set_r(uint32_t(cmd_buf[0] & 0xff)); +} + +void n64_rdp::cmd_set_fill_color32(uint64_t *cmd_buf) +{ + //wait("SetFillColor"); + m_fill_color = (uint32_t)cmd_buf[0]; +} + +void n64_rdp::cmd_set_convert(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + + if(!m_pipe_clean) { m_pipe_clean = true; wait("SetConvert"); } + int32_t k0 = int32_t(w1 >> 45) & 0x1ff; + int32_t k1 = int32_t(w1 >> 36) & 0x1ff; + int32_t k2 = int32_t(w1 >> 27) & 0x1ff; + int32_t k3 = int32_t(w1 >> 18) & 0x1ff; + int32_t k4 = int32_t(w1 >> 9) & 0x1ff; + int32_t k5 = int32_t(w1 >> 0) & 0x1ff; + + k0 = (SIGN9(k0) << 1) + 1; + k1 = (SIGN9(k1) << 1) + 1; + k2 = (SIGN9(k2) << 1) + 1; + k3 = (SIGN9(k3) << 1) + 1; + + set_yuv_factors(rgbaint_t(0, k0, k2, 0), rgbaint_t(0, 0, k1, k3), rgbaint_t(k4, k4, k4, k4), rgbaint_t(k5, k5, k5, k5)); +} + +void n64_rdp::cmd_set_scissor(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + + m_scissor.m_xh = ((w1 >> 44) & 0xfff) >> 2; + m_scissor.m_yh = ((w1 >> 32) & 0xfff) >> 2; + m_scissor.m_xl = ((w1 >> 12) & 0xfff) >> 2; + m_scissor.m_yl = ((w1 >> 0) & 0xfff) >> 2; + + // TODO: handle f & o? +} + +void n64_rdp::cmd_set_prim_depth(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + m_misc_state.m_primitive_z = (uint16_t)(w1 >> 16) & 0x7fff; + m_misc_state.m_primitive_dz = (uint16_t)(w1 >> 32); +} + +void n64_rdp::cmd_set_other_modes(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + //wait("SetOtherModes"); + m_other_modes.cycle_type = (w1 >> 52) & 0x3; // 01 + m_other_modes.persp_tex_en = (w1 >> 51) & 1; // 1 + m_other_modes.detail_tex_en = (w1 >> 50) & 1; // 0 + m_other_modes.sharpen_tex_en = (w1 >> 49) & 1; // 0 + m_other_modes.tex_lod_en = (w1 >> 48) & 1; // 0 + m_other_modes.en_tlut = (w1 >> 47) & 1; // 0 + m_other_modes.tlut_type = (w1 >> 46) & 1; // 0 + m_other_modes.sample_type = (w1 >> 45) & 1; // 1 + m_other_modes.mid_texel = (w1 >> 44) & 1; // 0 + m_other_modes.bi_lerp0 = (w1 >> 43) & 1; // 1 + m_other_modes.bi_lerp1 = (w1 >> 42) & 1; // 1 + m_other_modes.convert_one = (w1 >> 41) & 1; // 0 + m_other_modes.key_en = (w1 >> 40) & 1; // 0 + m_other_modes.rgb_dither_sel = (w1 >> 38) & 0x3; // 00 + m_other_modes.alpha_dither_sel = (w1 >> 36) & 0x3; // 01 + m_other_modes.blend_m1a_0 = (w1 >> 30) & 0x3; // 11 + m_other_modes.blend_m1a_1 = (w1 >> 28) & 0x3; // 00 + m_other_modes.blend_m1b_0 = (w1 >> 26) & 0x3; // 10 + m_other_modes.blend_m1b_1 = (w1 >> 24) & 0x3; // 00 + m_other_modes.blend_m2a_0 = (w1 >> 22) & 0x3; // 00 + m_other_modes.blend_m2a_1 = (w1 >> 20) & 0x3; // 01 + m_other_modes.blend_m2b_0 = (w1 >> 18) & 0x3; // 00 + m_other_modes.blend_m2b_1 = (w1 >> 16) & 0x3; // 01 + m_other_modes.force_blend = (w1 >> 14) & 1; // 0 + m_other_modes.blend_shift = m_other_modes.force_blend ? 5 : 2; + m_other_modes.alpha_cvg_select = (w1 >> 13) & 1; // 1 + m_other_modes.cvg_times_alpha = (w1 >> 12) & 1; // 0 + m_other_modes.z_mode = (w1 >> 10) & 0x3; // 00 + m_other_modes.cvg_dest = (w1 >> 8) & 0x3; // 00 + m_other_modes.color_on_cvg = (w1 >> 7) & 1; // 0 + m_other_modes.image_read_en = (w1 >> 6) & 1; // 1 + m_other_modes.z_update_en = (w1 >> 5) & 1; // 1 + m_other_modes.z_compare_en = (w1 >> 4) & 1; // 1 + m_other_modes.antialias_en = (w1 >> 3) & 1; // 1 + m_other_modes.z_source_sel = (w1 >> 2) & 1; // 0 + m_other_modes.dither_alpha_en = (w1 >> 1) & 1; // 0 + m_other_modes.alpha_compare_en = (w1 >> 0) & 1; // 0 + m_other_modes.alpha_dither_mode = (m_other_modes.alpha_compare_en << 1) | m_other_modes.dither_alpha_en; +} + +void n64_rdp::cmd_load_tlut(uint64_t *cmd_buf) +{ + //wait("LoadTLUT"); + n64_tile_t* tile = m_tiles; + const uint64_t w1 = cmd_buf[0]; + + const int32_t tilenum = (w1 >> 24) & 0x7; + const int32_t sl = tile[tilenum].sl = int32_t(w1 >> 44) & 0xfff; + const int32_t tl = tile[tilenum].tl = int32_t(w1 >> 32) & 0xfff; + const int32_t sh = tile[tilenum].sh = int32_t(w1 >> 12) & 0xfff; + const int32_t th = tile[tilenum].th = int32_t(w1 >> 0) & 0xfff; + + if (tl != th) + { + fatalerror("Load tlut: tl=%d, th=%d\n",tl,th); + } + + m_capture.data_begin(); + + const int32_t count = ((sh >> 2) - (sl >> 2) + 1) << 2; + + switch (m_misc_state.m_ti_size) + { + case PIXEL_SIZE_16BIT: + { + if (tile[tilenum].tmem < 256) + { + fatalerror("rdp_load_tlut: loading tlut into low half at %d qwords\n",tile[tilenum].tmem); + } + int32_t srcstart = (m_misc_state.m_ti_address + (tl >> 2) * (m_misc_state.m_ti_width << 1) + (sl >> 1)) >> 1; + int32_t dststart = tile[tilenum].tmem << 2; + uint16_t* dst = get_tmem16(); + + for (int32_t i = 0; i < count; i += 4) + { + if (dststart < 2048) + { + dst[dststart] = U_RREADIDX16(srcstart); + m_capture.data_block()->put16(dst[dststart]); + dst[dststart + 1] = dst[dststart]; + dst[dststart + 2] = dst[dststart]; + dst[dststart + 3] = dst[dststart]; + dststart += 4; + srcstart += 1; + } + } + break; + } + default: fatalerror("RDP: load_tlut: size = %d\n", m_misc_state.m_ti_size); + } + + m_capture.data_end(); + + m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th); + m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl); +} + +void n64_rdp::cmd_set_tile_size(uint64_t *cmd_buf) +{ + //wait("SetTileSize"); + const uint64_t w1 = cmd_buf[0]; + const int32_t tilenum = int32_t(w1 >> 24) & 0x7; + + m_tiles[tilenum].sl = int32_t(w1 >> 44) & 0xfff; + m_tiles[tilenum].tl = int32_t(w1 >> 32) & 0xfff; + m_tiles[tilenum].sh = int32_t(w1 >> 12) & 0xfff; + m_tiles[tilenum].th = int32_t(w1 >> 0) & 0xfff; + + m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th); + m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl); +} + +void n64_rdp::cmd_load_block(uint64_t *cmd_buf) +{ + //wait("LoadBlock"); + n64_tile_t* tile = m_tiles; + const uint64_t w1 = cmd_buf[0]; + + const uint8_t tilenum = uint8_t(w1 >> 24) & 0x7; + uint16_t* tc = get_tmem16(); + + int32_t sl, tl, sh, dxt; + tile[tilenum].sl = sl = int32_t((w1 >> 44) & 0xfff); + tile[tilenum].tl = tl = int32_t((w1 >> 32) & 0xfff); + tile[tilenum].sh = sh = int32_t((w1 >> 12) & 0xfff); + tile[tilenum].th = dxt = int32_t((w1 >> 0) & 0xfff); + + /*uint16_t tl_masked = tl & 0x3ff; + + int32_t load_edge_walker_data[10] = { + ((cmd_buf[0] >> 32) & 0xff000000) | (0x10 << 19) | (tilenum << 16) | ((tl_masked << 2) | 3), + (((tl_masked << 2) | 3) << 16) | (tl_masked << 2), + sh << 16, + sl << 16, + sh << 16, + ((sl << 3) << 16) | (tl << 3), + (dxt & 0xff) << 8, + ((0x80 >> wstate->ti_size) << 16) | (dxt >> 8), + 0x20, + 0x20 + }; + + do_load_edge_walker(load_edge_walker_data);*/ + + int32_t width = (sh - sl) + 1; + + width = (width << m_misc_state.m_ti_size) >> 1; + if (width & 7) + { + width = (width & ~7) + 8; + } + width >>= 3; + + const int32_t tb = tile[tilenum].tmem << 2; + + const int32_t tiwinwords = (m_misc_state.m_ti_width << m_misc_state.m_ti_size) >> 2; + const int32_t slinwords = (sl << m_misc_state.m_ti_size) >> 2; + + const uint32_t src = (m_misc_state.m_ti_address >> 1) + (tl * tiwinwords) + slinwords; + + m_capture.data_begin(); + + if (dxt != 0) + { + int32_t j = 0; + int32_t t = 0; + int32_t oldt = 0; + + if (tile[tilenum].size != PIXEL_SIZE_32BIT && tile[tilenum].format != FORMAT_YUV) + { + for (int32_t i = 0; i < width; i ++) + { + oldt = t; + t = ((j >> 11) & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR; + if (t != oldt) + { + i += tile[tilenum].line; + } + + int32_t ptr = tb + (i << 2); + int32_t srcptr = src + (i << 2); + + tc[(ptr ^ t) & 0x7ff] = U_RREADIDX16(srcptr); + tc[((ptr + 1) ^ t) & 0x7ff] = U_RREADIDX16(srcptr + 1); + tc[((ptr + 2) ^ t) & 0x7ff] = U_RREADIDX16(srcptr + 2); + tc[((ptr + 3) ^ t) & 0x7ff] = U_RREADIDX16(srcptr + 3); + + m_capture.data_block()->put16(U_RREADIDX16(srcptr)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+1)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+2)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+3)); + + j += dxt; + } + } + else if (tile[tilenum].format == FORMAT_YUV) + { + for (int32_t i = 0; i < width; i ++) + { + oldt = t; + t = ((j >> 11) & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR; + if (t != oldt) + { + i += tile[tilenum].line; + } + + int32_t ptr = ((tb + (i << 1)) ^ t) & 0x3ff; + int32_t srcptr = src + (i << 2); + + int32_t first = U_RREADIDX16(srcptr); + int32_t sec = U_RREADIDX16(srcptr + 1); + tc[ptr] = ((first >> 8) << 8) | (sec >> 8); + tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff); + + ptr = ((tb + (i << 1) + 1) ^ t) & 0x3ff; + first = U_RREADIDX16(srcptr + 2); + sec = U_RREADIDX16(srcptr + 3); + tc[ptr] = ((first >> 8) << 8) | (sec >> 8); + tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff); + + m_capture.data_block()->put16(U_RREADIDX16(srcptr)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+1)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+2)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+3)); + j += dxt; + } + } + else + { + for (int32_t i = 0; i < width; i ++) + { + oldt = t; + t = ((j >> 11) & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR; + if (t != oldt) + i += tile[tilenum].line; + + int32_t ptr = ((tb + (i << 1)) ^ t) & 0x3ff; + int32_t srcptr = src + (i << 2); + tc[ptr] = U_RREADIDX16(srcptr); + tc[ptr | 0x400] = U_RREADIDX16(srcptr + 1); + + ptr = ((tb + (i << 1) + 1) ^ t) & 0x3ff; + tc[ptr] = U_RREADIDX16(srcptr + 2); + tc[ptr | 0x400] = U_RREADIDX16(srcptr + 3); + + m_capture.data_block()->put16(U_RREADIDX16(srcptr)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+1)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+2)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+3)); + + j += dxt; + } + } + tile[tilenum].th = tl + (j >> 11); + } + else + { + if (tile[tilenum].size != PIXEL_SIZE_32BIT && tile[tilenum].format != FORMAT_YUV) + { + for (int32_t i = 0; i < width; i ++) + { + int32_t ptr = tb + (i << 2); + int32_t srcptr = src + (i << 2); + tc[(ptr ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr); + tc[((ptr + 1) ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr + 1); + tc[((ptr + 2) ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr + 2); + tc[((ptr + 3) ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr + 3); + + m_capture.data_block()->put16(U_RREADIDX16(srcptr)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+1)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+2)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+3)); + } + } + else if (tile[tilenum].format == FORMAT_YUV) + { + for (int32_t i = 0; i < width; i ++) + { + int32_t ptr = ((tb + (i << 1)) ^ WORD_ADDR_XOR) & 0x3ff; + int32_t srcptr = src + (i << 2); + int32_t first = U_RREADIDX16(srcptr); + int32_t sec = U_RREADIDX16(srcptr + 1); + tc[ptr] = ((first >> 8) << 8) | (sec >> 8);//UV pair + tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff); + + ptr = ((tb + (i << 1) + 1) ^ WORD_ADDR_XOR) & 0x3ff; + first = U_RREADIDX16(srcptr + 2); + sec = U_RREADIDX16(srcptr + 3); + tc[ptr] = ((first >> 8) << 8) | (sec >> 8); + tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff); + + m_capture.data_block()->put16(U_RREADIDX16(srcptr)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+1)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+2)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+3)); + } + } + else + { + for (int32_t i = 0; i < width; i ++) + { + int32_t ptr = ((tb + (i << 1)) ^ WORD_ADDR_XOR) & 0x3ff; + int32_t srcptr = src + (i << 2); + tc[ptr] = U_RREADIDX16(srcptr); + tc[ptr | 0x400] = U_RREADIDX16(srcptr + 1); + + ptr = ((tb + (i << 1) + 1) ^ WORD_ADDR_XOR) & 0x3ff; + tc[ptr] = U_RREADIDX16(srcptr + 2); + tc[ptr | 0x400] = U_RREADIDX16(srcptr + 3); + + m_capture.data_block()->put16(U_RREADIDX16(srcptr)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+1)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+2)); + m_capture.data_block()->put16(U_RREADIDX16(srcptr+3)); + } + } + tile[tilenum].th = tl; + } + + m_capture.data_end(); + + m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th); + m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl); +} + +void n64_rdp::cmd_load_tile(uint64_t *cmd_buf) +{ + //wait("LoadTile"); + n64_tile_t* tile = m_tiles; + const uint64_t w1 = cmd_buf[0]; + const int32_t tilenum = int32_t(w1 >> 24) & 0x7; + + tile[tilenum].sl = int32_t(w1 >> 44) & 0xfff; + tile[tilenum].tl = int32_t(w1 >> 32) & 0xfff; + tile[tilenum].sh = int32_t(w1 >> 12) & 0xfff; + tile[tilenum].th = int32_t(w1 >> 0) & 0xfff; + + const int32_t sl = tile[tilenum].sl >> 2; + const int32_t tl = tile[tilenum].tl >> 2; + const int32_t sh = tile[tilenum].sh >> 2; + const int32_t th = tile[tilenum].th >> 2; + + const int32_t width = (sh - sl) + 1; + const int32_t height = (th - tl) + 1; +/* + int32_t topad; + if (m_misc_state.m_ti_size < 3) + { + topad = (width * m_misc_state.m_ti_size) & 0x7; + } + else + { + topad = (width << 2) & 0x7; + } + topad = 0; // ???? +*/ + + m_capture.data_begin(); + + switch (m_misc_state.m_ti_size) + { + case PIXEL_SIZE_8BIT: + { + const uint32_t src = m_misc_state.m_ti_address; + const int32_t tb = tile[tilenum].tmem << 3; + uint8_t* tc = get_tmem8(); + + for (int32_t j = 0; j < height; j++) + { + const int32_t tline = tb + ((tile[tilenum].line << 3) * j); + const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl; + const int32_t xorval8 = ((j & 1) ? BYTE_XOR_DWORD_SWAP : BYTE_ADDR_XOR); + + for (int32_t i = 0; i < width; i++) + { + const uint8_t data = U_RREADADDR8(src + s + i); + m_capture.data_block()->put8(data); + tc[((tline + i) ^ xorval8) & 0xfff] = data; + } + } + break; + } + case PIXEL_SIZE_16BIT: + { + const uint32_t src = m_misc_state.m_ti_address >> 1; + uint16_t* tc = get_tmem16(); + + if (tile[tilenum].format != FORMAT_YUV) + { + for (int32_t j = 0; j < height; j++) + { + const int32_t tb = tile[tilenum].tmem << 2; + const int32_t tline = tb + ((tile[tilenum].line << 2) * j); + const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl; + const int32_t xorval16 = (j & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR; + + for (int32_t i = 0; i < width; i++) + { + const uint32_t taddr = (tline + i) ^ xorval16; + const uint16_t data = U_RREADIDX16(src + s + i); + m_capture.data_block()->put16(data); + tc[taddr & 0x7ff] = data; + } + } + } + else + { + for (int32_t j = 0; j < height; j++) + { + const int32_t tb = tile[tilenum].tmem << 3; + const int32_t tline = tb + ((tile[tilenum].line << 3) * j); + const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl; + const int32_t xorval8 = (j & 1) ? BYTE_XOR_DWORD_SWAP : BYTE_ADDR_XOR; + + for (int32_t i = 0; i < width; i++) + { + uint32_t taddr = ((tline + i) ^ xorval8) & 0x7ff; + uint16_t yuvword = U_RREADIDX16(src + s + i); + m_capture.data_block()->put16(yuvword); + get_tmem8()[taddr] = yuvword >> 8; + get_tmem8()[taddr | 0x800] = yuvword & 0xff; + } + } + } + break; + } + case PIXEL_SIZE_32BIT: + { + const uint32_t src = m_misc_state.m_ti_address >> 2; + const int32_t tb = (tile[tilenum].tmem << 2); + uint16_t* tc16 = get_tmem16(); + + for (int32_t j = 0; j < height; j++) + { + const int32_t tline = tb + ((tile[tilenum].line << 2) * j); + + const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl; + const int32_t xorval32cur = (j & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR; + for (int32_t i = 0; i < width; i++) + { + uint32_t c = U_RREADIDX32(src + s + i); + m_capture.data_block()->put32(c); + uint32_t ptr = ((tline + i) ^ xorval32cur) & 0x3ff; + tc16[ptr] = c >> 16; + tc16[ptr | 0x400] = c & 0xffff; + } + } + break; + } + + default: fatalerror("RDP: load_tile: size = %d\n", m_misc_state.m_ti_size); + } + + m_capture.data_end(); + + m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th); + m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl); +} + +void n64_rdp::cmd_set_tile(uint64_t *cmd_buf) +{ + //wait("SetTile"); + const uint64_t w1 = cmd_buf[0]; + const int32_t tilenum = int32_t(w1 >> 24) & 0x7; + n64_tile_t* tex_tile = &m_tiles[tilenum]; + + tex_tile->format = int32_t(w1 >> 53) & 0x7; + tex_tile->size = int32_t(w1 >> 51) & 0x3; + tex_tile->line = int32_t(w1 >> 41) & 0x1ff; + tex_tile->tmem = int32_t(w1 >> 32) & 0x1ff; + tex_tile->palette = int32_t(w1 >> 20) & 0xf; + tex_tile->ct = int32_t(w1 >> 19) & 0x1; + tex_tile->mt = int32_t(w1 >> 18) & 0x1; + tex_tile->mask_t = int32_t(w1 >> 14) & 0xf; + tex_tile->shift_t = int32_t(w1 >> 10) & 0xf; + tex_tile->cs = int32_t(w1 >> 9) & 0x1; + tex_tile->ms = int32_t(w1 >> 8) & 0x1; + tex_tile->mask_s = int32_t(w1 >> 4) & 0xf; + tex_tile->shift_s = int32_t(w1 >> 0) & 0xf; + + tex_tile->lshift_s = (tex_tile->shift_s >= 11) ? (16 - tex_tile->shift_s) : 0; + tex_tile->rshift_s = (tex_tile->shift_s < 11) ? tex_tile->shift_s : 0; + tex_tile->lshift_t = (tex_tile->shift_t >= 11) ? (16 - tex_tile->shift_t) : 0; + tex_tile->rshift_t = (tex_tile->shift_t < 11) ? tex_tile->shift_t : 0; + tex_tile->wrapped_mask_s = (tex_tile->mask_s > 10 ? 10 : tex_tile->mask_s); + tex_tile->wrapped_mask_t = (tex_tile->mask_t > 10 ? 10 : tex_tile->mask_t); + tex_tile->wrapped_mask = rgbaint_t(tex_tile->wrapped_mask_s, tex_tile->wrapped_mask_s, tex_tile->wrapped_mask_t, tex_tile->wrapped_mask_t); + tex_tile->clamp_s = tex_tile->cs || !tex_tile->mask_s; + tex_tile->clamp_t = tex_tile->ct || !tex_tile->mask_t; + tex_tile->mm = rgbaint_t(tex_tile->ms ? ~0 : 0, tex_tile->ms ? ~0 : 0, tex_tile->mt ? ~0 : 0, tex_tile->mt ? ~0 : 0); + tex_tile->invmm = rgbaint_t(tex_tile->ms ? 0 : ~0, tex_tile->ms ? 0 : ~0, tex_tile->mt ? 0 : ~0, tex_tile->mt ? 0 : ~0); + tex_tile->mask = rgbaint_t(tex_tile->mask_s ? ~0 : 0, tex_tile->mask_s ? ~0 : 0, tex_tile->mask_t ? ~0 : 0, tex_tile->mask_t ? ~0 : 0); + tex_tile->invmask = rgbaint_t(tex_tile->mask_s ? 0 : ~0, tex_tile->mask_s ? 0 : ~0, tex_tile->mask_t ? 0 : ~0, tex_tile->mask_t ? 0 : ~0); + tex_tile->lshift = rgbaint_t(tex_tile->lshift_s, tex_tile->lshift_s, tex_tile->lshift_t, tex_tile->lshift_t); + tex_tile->rshift = rgbaint_t(tex_tile->rshift_s, tex_tile->rshift_s, tex_tile->rshift_t, tex_tile->rshift_t); + tex_tile->clamp_st = rgbaint_t(tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0); + + if (tex_tile->format == FORMAT_I && tex_tile->size > PIXEL_SIZE_8BIT) + { + tex_tile->format = FORMAT_RGBA; // Used by Supercross 2000 (in-game) + } + if (tex_tile->format == FORMAT_CI && tex_tile->size > PIXEL_SIZE_8BIT) + { + tex_tile->format = FORMAT_RGBA; // Used by Clay Fighter - Sculptor's Cut + } + + if (tex_tile->format == FORMAT_RGBA && tex_tile->size < PIXEL_SIZE_16BIT) + { + tex_tile->format = FORMAT_CI; // Used by Exterem-G2, Madden Football 64, and Rat Attack + } + + //m_pending_mode_block = true; +} + +void n64_rdp::cmd_fill_rect(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + //if(m_pending_mode_block) { wait("Block on pending mode-change"); m_pending_mode_block = false; } + const uint64_t xh = (w1 >> 12) & 0xfff; + const uint64_t xl = (w1 >> 44) & 0xfff; + const uint64_t yh = (w1 >> 0) & 0xfff; + uint64_t yl = (w1 >> 32) & 0xfff; + + if (m_other_modes.cycle_type == CYCLE_TYPE_FILL || m_other_modes.cycle_type == CYCLE_TYPE_COPY) + { + yl |= 3; + } + + const uint64_t xlint = (xl >> 2) & 0x3ff; + const uint64_t xhint = (xh >> 2) & 0x3ff; + + uint64_t* ewdata = m_temp_rect_data; + ewdata[0] = ((uint64_t)0x3680 << 48) | (yl << 32) | (yl << 16) | yh; // command, flipped, tile, yl, ym, yh + ewdata[1] = (xlint << 48) | ((xl & 3) << 46); // xl, xl frac, dxldy (0), dxldy frac (0) + ewdata[2] = (xhint << 48) | ((xh & 3) << 46); // xh, xh frac, dxhdy (0), dxhdy frac (0) + ewdata[3] = (xlint << 48) | ((xl & 3) << 46); // xm, xm frac, dxmdy (0), dxmdy frac (0) + memset(&ewdata[4], 0, 18 * sizeof(uint64_t));//shade, texture, depth + + draw_triangle(cmd_buf, false, false, false, true); +} + +void n64_rdp::cmd_set_fog_color(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + m_fog_color.set(uint8_t(w1), uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8)); +} + +void n64_rdp::cmd_set_blend_color(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + m_blend_color.set(uint8_t(w1), uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8)); +} + +void n64_rdp::cmd_set_prim_color(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + m_misc_state.m_min_level = uint32_t(w1 >> 40) & 0x1f; + const uint8_t prim_lod_fraction(w1 >> 32); + m_prim_lod_fraction.set(prim_lod_fraction, prim_lod_fraction, prim_lod_fraction, prim_lod_fraction); + + const uint8_t alpha(w1); + m_prim_color.set(alpha, uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8)); + m_prim_alpha.set(alpha, alpha, alpha, alpha); +} + +void n64_rdp::cmd_set_env_color(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + const uint8_t alpha(w1); + m_env_color.set(alpha, uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8)); + m_env_alpha.set(alpha, alpha, alpha, alpha); +} + +void n64_rdp::cmd_set_combine(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + m_combine.sub_a_rgb0 = uint32_t(w1 >> 52) & 0xf; + m_combine.mul_rgb0 = uint32_t(w1 >> 47) & 0x1f; + m_combine.sub_a_a0 = uint32_t(w1 >> 44) & 0x7; + m_combine.mul_a0 = uint32_t(w1 >> 41) & 0x7; + m_combine.sub_a_rgb1 = uint32_t(w1 >> 37) & 0xf; + m_combine.mul_rgb1 = uint32_t(w1 >> 32) & 0x1f; + + m_combine.sub_b_rgb0 = uint32_t(w1 >> 28) & 0xf; + m_combine.sub_b_rgb1 = uint32_t(w1 >> 24) & 0xf; + m_combine.sub_a_a1 = uint32_t(w1 >> 21) & 0x7; + m_combine.mul_a1 = uint32_t(w1 >> 18) & 0x7; + m_combine.add_rgb0 = uint32_t(w1 >> 15) & 0x7; + m_combine.sub_b_a0 = uint32_t(w1 >> 12) & 0x7; + m_combine.add_a0 = uint32_t(w1 >> 9) & 0x7; + m_combine.add_rgb1 = uint32_t(w1 >> 6) & 0x7; + m_combine.sub_b_a1 = uint32_t(w1 >> 3) & 0x7; + m_combine.add_a1 = uint32_t(w1 >> 0) & 0x7; + + /*static const char *s_suba_rgb[16] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "ONE", "NOISE", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO" }; + static const char *s_subb_rgb[16] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "KEYC", "K4", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO" }; + static const char *s_mul_rgb[32] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "KEYS", "CombinedA", "TEX0A", "TEX1A", "PRIMA", "SHADEA", "ENVA", "LODF", "PLODF", "K5", + "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO" }; + static const char *s_add_rgb[8] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "ONE", "ZERO" }; + static const char *s_sub_a[16] = { "CombinedA", "TEX0A", "TEX1A", "PRIMA", "SHADEA", "ENVA", "ONE", "ZERO" }; + static const char *s_mul_a[16] = { "LODF", "TEX0A", "TEX1A", "PRIMA", "SHADEA", "ENVA", "PLODF", "ZERO" }; + printf("Cycle 0, Color: (%s - %s) * %s + %s\n", s_suba_rgb[m_combine.sub_a_rgb0], s_subb_rgb[m_combine.sub_b_rgb0], s_mul_rgb[m_combine.mul_rgb0], s_add_rgb[m_combine.add_rgb0]); + printf("Cycle 0, Alpha: (%s - %s) * %s + %s\n", s_sub_a[m_combine.sub_a_a0], s_sub_a[m_combine.sub_b_a0], s_mul_a[m_combine.mul_a0], s_add_rgb[m_combine.add_a0]); + printf("Cycle 1, Color: (%s - %s) * %s + %s\n", s_suba_rgb[m_combine.sub_a_rgb1], s_subb_rgb[m_combine.sub_b_rgb1], s_mul_rgb[m_combine.mul_rgb1], s_add_rgb[m_combine.add_rgb1]); + printf("Cycle 1, Alpha: (%s - %s) * %s + %s\n\n", s_sub_a[m_combine.sub_a_a1], s_sub_a[m_combine.sub_b_a1], s_mul_a[m_combine.mul_a1], s_add_rgb[m_combine.add_a1]);*/ +} + +void n64_rdp::cmd_set_texture_image(uint64_t *cmd_buf) +{ + const uint64_t w1 = cmd_buf[0]; + m_misc_state.m_ti_format = uint32_t(w1 >> 53) & 0x7; + m_misc_state.m_ti_size = uint32_t(w1 >> 51) & 0x3; + m_misc_state.m_ti_width = (uint32_t(w1 >> 32) & 0x3ff) + 1; + m_misc_state.m_ti_address = uint32_t(w1) & 0x01ffffff; +} + +void n64_rdp::cmd_set_mask_image(uint64_t *cmd_buf) +{ + //wait("SetMaskImage"); + const uint64_t w1 = cmd_buf[0]; + m_misc_state.m_zb_address = uint32_t(w1) & 0x01ffffff; +} + +void n64_rdp::cmd_set_color_image(uint64_t *cmd_buf) +{ + //wait("SetColorImage"); + const uint64_t w1 = cmd_buf[0]; + m_misc_state.m_fb_format = uint32_t(w1 >> 53) & 0x7; + m_misc_state.m_fb_size = uint32_t(w1 >> 51) & 0x3; + m_misc_state.m_fb_width = (uint32_t(w1 >> 32) & 0x3ff) + 1; + m_misc_state.m_fb_address = uint32_t(w1) & 0x01ffffff; +} + +/*****************************************************************************/ + +void n64_rdp::cmd_noop(uint64_t *cmd_buf) +{ + // Do nothing +} + + +void n64_rdp::process_command_list() +{ + int32_t length = m_end - m_current; + + if (length <= 0) + { + m_current = m_end; + return; + } + + //printf("length: %08x\n", (uint32_t)length); fflush(stdout); + + set_status(get_status() &~ DP_STATUS_FREEZE); + + uint64_t curr_cmd_buf[176]; + + while (m_current < m_end) + { + uint32_t start = m_current; + uint32_t buf_index = 0; + curr_cmd_buf[buf_index++] = read_data(m_current & 0x1fffffff); + uint8_t cmd = (curr_cmd_buf[0] >> 56) & 0x3f; + + if ((m_end - m_current) < s_rdp_command_length[cmd]) + { + // Not enough data, continue waiting. + break; + } + m_current += 8; + + while ((buf_index << 3) < s_rdp_command_length[cmd]) + { + curr_cmd_buf[buf_index++] = read_data(m_current & 0x1fffffff); + m_current += 8; + } + + m_capture.command(&curr_cmd_buf[0], s_rdp_command_length[cmd] / 8); + + if (LOG_RDP_EXECUTION) + { + char string[4000]; + disassemble(curr_cmd_buf, string); + + fprintf(rdp_exec, "%08X: %08X%08X %s\n", start, (uint32_t)(curr_cmd_buf[0] >> 32), (uint32_t)curr_cmd_buf[0], string); + fflush(rdp_exec); + } + + // execute the command + switch(cmd) + { + case 0x00: cmd_noop(curr_cmd_buf); break; + + case 0x08: triangle(curr_cmd_buf, false, false, false); break; + case 0x09: triangle(curr_cmd_buf, false, false, true); break; + case 0x0a: triangle(curr_cmd_buf, false, true, false); break; + case 0x0b: triangle(curr_cmd_buf, false, true, true); break; + case 0x0c: triangle(curr_cmd_buf, true, false, false); break; + case 0x0d: triangle(curr_cmd_buf, true, false, true); break; + case 0x0e: triangle(curr_cmd_buf, true, true, false); break; + case 0x0f: triangle(curr_cmd_buf, true, true, true); break; + + case 0x24: cmd_tex_rect(curr_cmd_buf); break; + case 0x25: cmd_tex_rect_flip(curr_cmd_buf); break; + + case 0x26: cmd_sync_load(curr_cmd_buf); break; + case 0x27: cmd_sync_pipe(curr_cmd_buf); break; + case 0x28: cmd_sync_tile(curr_cmd_buf); break; + case 0x29: cmd_sync_full(curr_cmd_buf); break; + + case 0x2a: cmd_set_key_gb(curr_cmd_buf); break; + case 0x2b: cmd_set_key_r(curr_cmd_buf); break; + + case 0x2c: cmd_set_convert(curr_cmd_buf); break; + case 0x3c: cmd_set_combine(curr_cmd_buf); break; + case 0x2d: cmd_set_scissor(curr_cmd_buf); break; + case 0x2e: cmd_set_prim_depth(curr_cmd_buf); break; + case 0x2f: cmd_set_other_modes(curr_cmd_buf);break; + + case 0x30: cmd_load_tlut(curr_cmd_buf); break; + case 0x33: cmd_load_block(curr_cmd_buf); break; + case 0x34: cmd_load_tile(curr_cmd_buf); break; + + case 0x32: cmd_set_tile_size(curr_cmd_buf); break; + case 0x35: cmd_set_tile(curr_cmd_buf); break; + + case 0x36: cmd_fill_rect(curr_cmd_buf); break; + + case 0x37: cmd_set_fill_color32(curr_cmd_buf); break; + case 0x38: cmd_set_fog_color(curr_cmd_buf); break; + case 0x39: cmd_set_blend_color(curr_cmd_buf);break; + case 0x3a: cmd_set_prim_color(curr_cmd_buf); break; + case 0x3b: cmd_set_env_color(curr_cmd_buf); break; + + case 0x3d: cmd_set_texture_image(curr_cmd_buf); break; + case 0x3e: cmd_set_mask_image(curr_cmd_buf); break; + case 0x3f: cmd_set_color_image(curr_cmd_buf); break; + } + }; +} + +/*****************************************************************************/ + +n64_rdp::n64_rdp(n64_state &state, uint32_t* rdram, uint32_t* dmem) : poly_manager(state.machine()) +{ + ignore = false; + dolog = false; + + m_rdram = rdram; + m_dmem = dmem; + + m_aux_buf_ptr = 0; + m_aux_buf = nullptr; + m_pipe_clean = true; + + m_pending_mode_block = false; + + m_start = 0; + m_end = 0; + m_current = 0; + m_status = 0x88; + + m_one.set(0xff, 0xff, 0xff, 0xff); + m_zero.set(0, 0, 0, 0); + + m_tmem = nullptr; + + m_machine = nullptr; + m_n64_periphs = nullptr; + + //memset(m_hidden_bits, 3, 8388608); + + m_prim_lod_fraction.set(0, 0, 0, 0); + z_build_com_table(); + + memset(m_temp_rect_data, 0, sizeof(uint32_t) * 0x1000); + + for (int32_t i = 0; i < 0x4000; i++) + { + uint32_t exponent = (i >> 11) & 7; + uint32_t mantissa = i & 0x7ff; + m_z_complete_dec_table[i] = ((mantissa << m_z_dec_table[exponent].shift) + m_z_dec_table[exponent].add) & 0x3fffff; + } + + precalc_cvmask_derivatives(); + + for(int32_t i = 0; i < 0x200; i++) + { + switch((i >> 7) & 3) + { + case 0: + case 1: + s_special_9bit_clamptable[i] = i & 0xff; + break; + case 2: + s_special_9bit_clamptable[i] = 0xff; + break; + case 3: + s_special_9bit_clamptable[i] = 0; + break; + } + } + + for(int32_t i = 0; i < 32; i++) + { + m_replicated_rgba[i] = (i << 3) | ((i >> 2) & 7); + } + + for(int32_t i = 0; i < 0x10000; i++) + { + m_dzpix_normalize[i] = (uint16_t)normalize_dzpix(i & 0xffff); + } + + m_compute_cvg[0] = &n64_rdp::compute_cvg_noflip; + m_compute_cvg[1] = &n64_rdp::compute_cvg_flip; + + m_write_pixel[0] = &n64_rdp::write_pixel4; + m_write_pixel[1] = &n64_rdp::write_pixel8; + m_write_pixel[2] = &n64_rdp::write_pixel16; + m_write_pixel[3] = &n64_rdp::write_pixel32; + + m_read_pixel[0] = &n64_rdp::read_pixel4; + m_read_pixel[1] = &n64_rdp::read_pixel8; + m_read_pixel[2] = &n64_rdp::read_pixel16; + m_read_pixel[3] = &n64_rdp::read_pixel32; + + m_copy_pixel[0] = &n64_rdp::copy_pixel4; + m_copy_pixel[1] = &n64_rdp::copy_pixel8; + m_copy_pixel[2] = &n64_rdp::copy_pixel16; + m_copy_pixel[3] = &n64_rdp::copy_pixel32; + + m_fill_pixel[0] = &n64_rdp::fill_pixel4; + m_fill_pixel[1] = &n64_rdp::fill_pixel8; + m_fill_pixel[2] = &n64_rdp::fill_pixel16; + m_fill_pixel[3] = &n64_rdp::fill_pixel32; +} + +void n64_rdp::render_spans(int32_t start, int32_t end, int32_t tilenum, bool flip, extent_t* spans, bool rect, rdp_poly_state* object) +{ + const int32_t clipy1 = m_scissor.m_yh; + const int32_t clipy2 = m_scissor.m_yl; + const rectangle clip(m_scissor.m_xh, m_scissor.m_xl, m_scissor.m_yh, m_scissor.m_yl); + + int32_t offset = 0; + + if (clipy2 <= 0) + { + return; + } + + if (start < clipy1) + { + offset = clipy1 - start; + start = clipy1; + } + if (start >= clipy2) + { + offset = start - (clipy2 - 1); + start = clipy2 - 1; + } + if (end < clipy1) + { + end = clipy1; + } + if (end >= clipy2) + { + end = clipy2 - 1; + } + + object->m_rdp = this; + memcpy(&object->m_misc_state, &m_misc_state, sizeof(misc_state_t)); + memcpy(&object->m_other_modes, &m_other_modes, sizeof(other_modes_t)); + memcpy(&object->m_span_base, &m_span_base, sizeof(span_base_t)); + memcpy(&object->m_scissor, &m_scissor, sizeof(rectangle_t)); + memcpy(&object->m_tiles, &m_tiles, 8 * sizeof(n64_tile_t)); + object->tilenum = tilenum; + object->flip = flip; + object->m_fill_color = m_fill_color; + object->rect = rect; + + switch(m_other_modes.cycle_type) + { + case CYCLE_TYPE_1: + render_extents<8>(clip, render_delegate(&n64_rdp::span_draw_1cycle, this), start, (end - start) + 1, spans + offset); + break; + + case CYCLE_TYPE_2: + render_extents<8>(clip, render_delegate(&n64_rdp::span_draw_2cycle, this), start, (end - start) + 1, spans + offset); + break; + + case CYCLE_TYPE_COPY: + render_extents<8>(clip, render_delegate(&n64_rdp::span_draw_copy, this), start, (end - start) + 1, spans + offset); + break; + + case CYCLE_TYPE_FILL: + render_extents<8>(clip, render_delegate(&n64_rdp::span_draw_fill, this), start, (end - start) + 1, spans + offset); + break; + } + wait("render spans"); +} + +void n64_rdp::rgbaz_clip(int32_t sr, int32_t sg, int32_t sb, int32_t sa, int32_t* sz, rdp_span_aux* userdata) +{ + userdata->m_shade_color.set(sa, sr, sg, sb); + userdata->m_shade_color.clamp_and_clear(0xfffffe00); + uint32_t a = userdata->m_shade_color.get_a(); + userdata->m_shade_alpha.set(a, a, a, a); + + int32_t zanded = (*sz) & 0x60000; + + zanded >>= 17; + switch(zanded) + { + case 0: *sz &= 0x3ffff; break; + case 1: *sz &= 0x3ffff; break; + case 2: *sz = 0x3ffff; break; + case 3: *sz = 0x3ffff; break; + } +} + +void n64_rdp::rgbaz_correct_triangle(int32_t offx, int32_t offy, int32_t* r, int32_t* g, int32_t* b, int32_t* a, int32_t* z, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + if (userdata->m_current_pix_cvg == 8) + { + *r >>= 2; + *g >>= 2; + *b >>= 2; + *a >>= 2; + *z = (*z >> 3) & 0x7ffff; + } + else + { + int32_t summand_xr = offx * SIGN13(object.m_span_base.m_span_dr >> 14); + int32_t summand_yr = offy * SIGN13(object.m_span_base.m_span_drdy >> 14); + int32_t summand_xb = offx * SIGN13(object.m_span_base.m_span_db >> 14); + int32_t summand_yb = offy * SIGN13(object.m_span_base.m_span_dbdy >> 14); + int32_t summand_xg = offx * SIGN13(object.m_span_base.m_span_dg >> 14); + int32_t summand_yg = offy * SIGN13(object.m_span_base.m_span_dgdy >> 14); + int32_t summand_xa = offx * SIGN13(object.m_span_base.m_span_da >> 14); + int32_t summand_ya = offy * SIGN13(object.m_span_base.m_span_dady >> 14); + + int32_t summand_xz = offx * SIGN22(object.m_span_base.m_span_dz >> 10); + int32_t summand_yz = offy * SIGN22(object.m_span_base.m_span_dzdy >> 10); + + *r = ((*r << 2) + summand_xr + summand_yr) >> 4; + *g = ((*g << 2) + summand_xg + summand_yg) >> 4; + *b = ((*b << 2) + summand_xb + summand_yb) >> 4; + *a = ((*a << 2) + summand_xa + summand_ya) >> 4; + *z = (((*z << 2) + summand_xz + summand_yz) >> 5) & 0x7ffff; + } +} + +void n64_rdp::write_pixel4(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + // Not yet implemented +#if DEBUG_RDP_PIXEL + if (s_debug_drawing) + { + uint32_t y = curpixel / object.m_misc_state.m_fb_width; + uint32_t x = curpixel % object.m_misc_state.m_fb_width; + if (x == 157 && y == 89) + { + printf("Writing 4-bit final color: %08x\n", (uint32_t)color.to_rgba()); + } + } +#endif +} + +void n64_rdp::write_pixel8(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + const uint8_t c = (color.get_r() & 0xf8) | ((color.get_g() & 0xf8) >> 5); + if (c != 0) + RWRITEADDR8(object.m_misc_state.m_fb_address + curpixel, c); + +#if DEBUG_RDP_PIXEL + if (s_debug_drawing) + { + uint32_t y = curpixel / object.m_misc_state.m_fb_width; + uint32_t x = curpixel % object.m_misc_state.m_fb_width; + if (x == 157 && y == 89) + { + printf("Writing 8-bit final color: %08x\n", (uint32_t)color.to_rgba()); + } + } +#endif +} + +void n64_rdp::write_pixel16(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + const uint32_t fb = (object.m_misc_state.m_fb_address >> 1) + curpixel; + + uint16_t finalcolor; + if (object.m_other_modes.color_on_cvg && !userdata->m_pre_wrap) + { + finalcolor = RREADIDX16(fb) & 0xfffe; + } + else + { + color.shr_imm(3); + finalcolor = (color.get_r() << 11) | (color.get_g() << 6) | (color.get_b() << 1); + } + +#if DEBUG_RDP_PIXEL + if (s_debug_drawing) + { + uint32_t y = curpixel / object.m_misc_state.m_fb_width; + uint32_t x = curpixel % object.m_misc_state.m_fb_width; + if (x == 157 && y == 89) + { + printf("Writing 16-bit final color: %04x\n", finalcolor); + } + } +#endif + + switch (object.m_other_modes.cvg_dest) + { + case 0: + if (userdata->m_blend_enable) + { + uint32_t finalcvg = userdata->m_current_pix_cvg + userdata->m_current_mem_cvg; + if (finalcvg & 8) + { + finalcvg = 7; + } + RWRITEIDX16(fb, finalcolor | (finalcvg >> 2)); + HWRITEADDR8(fb, finalcvg & 3); + } + else + { + const uint32_t finalcvg = (userdata->m_current_pix_cvg - 1) & 7; + RWRITEIDX16(fb, finalcolor | (finalcvg >> 2)); + HWRITEADDR8(fb, finalcvg & 3); + } + break; + case 1: + { + const uint32_t finalcvg = (userdata->m_current_pix_cvg + userdata->m_current_mem_cvg) & 7; + RWRITEIDX16(fb, finalcolor | (finalcvg >> 2)); + HWRITEADDR8(fb, finalcvg & 3); + break; + } + case 2: + RWRITEIDX16(fb, finalcolor | 1); + HWRITEADDR8(fb, 3); + break; + case 3: + RWRITEIDX16(fb, finalcolor | (userdata->m_current_mem_cvg >> 2)); + HWRITEADDR8(fb, userdata->m_current_mem_cvg & 3); + break; + } +} + +void n64_rdp::write_pixel32(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + const uint32_t fb = (object.m_misc_state.m_fb_address >> 2) + curpixel; + + uint32_t finalcolor; + if (object.m_other_modes.color_on_cvg && !userdata->m_pre_wrap) + { + finalcolor = RREADIDX32(fb) & 0xffffff00; + } + else + { + finalcolor = (color.get_r() << 24) | (color.get_g() << 16) | (color.get_b() << 8); + } + +#if DEBUG_RDP_PIXEL + if (s_debug_drawing) + { + uint32_t y = curpixel / object.m_misc_state.m_fb_width; + uint32_t x = curpixel % object.m_misc_state.m_fb_width; + if (x == 157 && y == 89) + { + printf("Writing 32-bit final color: %08x\n", finalcolor); + } + } +#endif + + switch (object.m_other_modes.cvg_dest) + { + case 0: + if (userdata->m_blend_enable) + { + uint32_t finalcvg = userdata->m_current_pix_cvg + userdata->m_current_mem_cvg; + if (finalcvg & 8) + { + finalcvg = 7; + } + + RWRITEIDX32(fb, finalcolor | (finalcvg << 5)); + } + else + { + RWRITEIDX32(fb, finalcolor | (((userdata->m_current_pix_cvg - 1) & 7) << 5)); + } + break; + case 1: + RWRITEIDX32(fb, finalcolor | (((userdata->m_current_pix_cvg + userdata->m_current_mem_cvg) & 7) << 5)); + break; + case 2: + RWRITEIDX32(fb, finalcolor | 0xE0); + break; + case 3: + RWRITEIDX32(fb, finalcolor | (userdata->m_current_mem_cvg << 5)); + break; + } +} + +void n64_rdp::read_pixel4(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + userdata->m_memory_color.set(0, 0, 0, 0); + userdata->m_current_mem_cvg = 7; +} + +void n64_rdp::read_pixel8(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + const uint8_t fbyte = RREADADDR8(object.m_misc_state.m_fb_address + curpixel); + const uint8_t r8 = (fbyte & 0xf8) | (fbyte >> 5); + uint8_t g8 = (fbyte & 0x07); + g8 |= g8 << 3; + g8 |= g8 << 6; + userdata->m_memory_color.set(0, r8, g8, 0); + userdata->m_memory_color.set_a(0xff); + userdata->m_current_mem_cvg = 7; +} + +void n64_rdp::read_pixel16(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + const uint16_t fword = RREADIDX16((object.m_misc_state.m_fb_address >> 1) + curpixel); + + userdata->m_memory_color.set(0, GETHICOL(fword), GETMEDCOL(fword), GETLOWCOL(fword)); + if (object.m_other_modes.image_read_en) + { + uint8_t hbyte = HREADADDR8((object.m_misc_state.m_fb_address >> 1) + curpixel); + userdata->m_memory_color.set_a(userdata->m_current_mem_cvg << 5); + userdata->m_current_mem_cvg = ((fword & 1) << 2) | (hbyte & 3); + } + else + { + userdata->m_memory_color.set_a(0xff); + userdata->m_current_mem_cvg = 7; + } +} + +void n64_rdp::read_pixel32(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object) +{ + const uint32_t mem = RREADIDX32((object.m_misc_state.m_fb_address >> 2) + curpixel); + userdata->m_memory_color.set(0, (mem >> 24) & 0xff, (mem >> 16) & 0xff, (mem >> 8) & 0xff); + if (object.m_other_modes.image_read_en) + { + userdata->m_memory_color.set_a(mem & 0xff); + userdata->m_current_mem_cvg = (mem >> 5) & 7; + } + else + { + userdata->m_memory_color.set_a(0xff); + userdata->m_current_mem_cvg = 7; + } +} + +void n64_rdp::copy_pixel4(uint32_t curpixel, color_t& color, const rdp_poly_state &object) +{ + // Not yet implemented +} + +void n64_rdp::copy_pixel8(uint32_t curpixel, color_t& color, const rdp_poly_state &object) +{ + const uint8_t c = (color.get_r() & 0xf8) | ((color.get_g() & 0xf8) >> 5); + if (c != 0) + RWRITEADDR8(object.m_misc_state.m_fb_address + curpixel, c); +} + +void n64_rdp::copy_pixel16(uint32_t curpixel, color_t& color, const rdp_poly_state &object) +{ + const uint32_t current_pix_cvg = color.get_a() ? 7 : 0; + const uint8_t r = color.get_r(); // Vectorize me + const uint8_t g = color.get_g(); + const uint8_t b = color.get_b(); + RWRITEIDX16((object.m_misc_state.m_fb_address >> 1) + curpixel, ((r >> 3) << 11) | ((g >> 3) << 6) | ((b >> 3) << 1) | ((current_pix_cvg >> 2) & 1)); + HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + curpixel, current_pix_cvg & 3); +} + +void n64_rdp::copy_pixel32(uint32_t curpixel, color_t& color, const rdp_poly_state &object) +{ + const uint32_t current_pix_cvg = color.get_a() ? 7 : 0; + const uint8_t r = color.get_r(); // Vectorize me + const uint8_t g = color.get_g(); + const uint8_t b = color.get_b(); + RWRITEIDX32((object.m_misc_state.m_fb_address >> 2) + curpixel, (r << 24) | (g << 16) | (b << 8) | (current_pix_cvg << 5)); +} + +void n64_rdp::fill_pixel4(uint32_t curpixel, const rdp_poly_state &object) +{ + // Not yet implemented +} + +void n64_rdp::fill_pixel8(uint32_t curpixel, const rdp_poly_state &object) +{ + const uint8_t byte_shift = ((curpixel & 3) ^ BYTE_ADDR_XOR) << 3; + RWRITEADDR8(object.m_misc_state.m_fb_address + curpixel, (uint8_t)(object.m_fill_color >> byte_shift)); +} + +void n64_rdp::fill_pixel16(uint32_t curpixel, const rdp_poly_state &object) +{ + uint16_t val; + if (curpixel & 1) + { + val = object.m_fill_color & 0xffff; + } + else + { + val = (object.m_fill_color >> 16) & 0xffff; + } + RWRITEIDX16((object.m_misc_state.m_fb_address >> 1) + curpixel, val); + HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + curpixel, ((val & 1) << 1) | (val & 1)); +} + +void n64_rdp::fill_pixel32(uint32_t curpixel, const rdp_poly_state &object) +{ + RWRITEIDX32((object.m_misc_state.m_fb_address >> 2) + curpixel, object.m_fill_color); + HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + (curpixel << 1), (object.m_fill_color & 0x10000) ? 3 : 0); + HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + (curpixel << 1) + 1, (object.m_fill_color & 0x1) ? 3 : 0); +} + +void n64_rdp::span_draw_1cycle(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid) +{ + assert(object.m_misc_state.m_fb_size < 4); + + const int32_t clipx1 = object.m_scissor.m_xh; + const int32_t clipx2 = object.m_scissor.m_xl; + const int32_t tilenum = object.tilenum; + const bool flip = object.flip; + + span_param_t r; r.w = extent.param[SPAN_R].start; + span_param_t g; g.w = extent.param[SPAN_G].start; + span_param_t b; b.w = extent.param[SPAN_B].start; + span_param_t a; a.w = extent.param[SPAN_A].start; + span_param_t z; z.w = extent.param[SPAN_Z].start; + span_param_t s; s.w = extent.param[SPAN_S].start; + span_param_t t; t.w = extent.param[SPAN_T].start; + span_param_t w; w.w = extent.param[SPAN_W].start; + + const uint32_t zb = object.m_misc_state.m_zb_address >> 1; + const uint32_t zhb = object.m_misc_state.m_zb_address; + +#ifdef PTR64 + assert(extent.userdata != (const void *)0xcccccccccccccccc); +#else + assert(extent.userdata != (const void *)0xcccccccc); +#endif + rdp_span_aux* userdata = (rdp_span_aux*)extent.userdata; + + m_tex_pipe.calculate_clamp_diffs(tilenum, userdata, object); + + const bool partialreject = (userdata->m_color_inputs.blender2b_a[0] == &userdata->m_inv_pixel_color && userdata->m_color_inputs.blender1b_a[0] == &userdata->m_pixel_color); + const int32_t sel0 = (userdata->m_color_inputs.blender2b_a[0] == &userdata->m_memory_color) ? 1 : 0; + + int32_t drinc, dginc, dbinc, dainc; + int32_t dzinc, dzpix; + int32_t dsinc, dtinc, dwinc; + int32_t xinc; + + if (!flip) + { + drinc = -object.m_span_base.m_span_dr; + dginc = -object.m_span_base.m_span_dg; + dbinc = -object.m_span_base.m_span_db; + dainc = -object.m_span_base.m_span_da; + dzinc = -object.m_span_base.m_span_dz; + dsinc = -object.m_span_base.m_span_ds; + dtinc = -object.m_span_base.m_span_dt; + dwinc = -object.m_span_base.m_span_dw; + xinc = -1; + } + else + { + drinc = object.m_span_base.m_span_dr; + dginc = object.m_span_base.m_span_dg; + dbinc = object.m_span_base.m_span_db; + dainc = object.m_span_base.m_span_da; + dzinc = object.m_span_base.m_span_dz; + dsinc = object.m_span_base.m_span_ds; + dtinc = object.m_span_base.m_span_dt; + dwinc = object.m_span_base.m_span_dw; + xinc = 1; + } + + const int32_t fb_index = object.m_misc_state.m_fb_width * scanline; + + const int32_t xstart = extent.startx; + const int32_t xend = userdata->m_unscissored_rx; + const int32_t xend_scissored = extent.stopx; + + int32_t x = xend; + + const int32_t length = flip ? (xstart - xend) : (xend - xstart); + + if(object.m_other_modes.z_source_sel) + { + z.w = (uint32_t)object.m_misc_state.m_primitive_z << 16; + dzpix = object.m_misc_state.m_primitive_dz; + dzinc = 0; + } + else + { + dzpix = object.m_span_base.m_span_dzpix; + } + + if (object.m_misc_state.m_fb_size > 4) + fatalerror("unsupported m_fb_size %d\n", object.m_misc_state.m_fb_size); + + const int32_t blend_index = (object.m_other_modes.alpha_cvg_select ? 2 : 0) | ((object.m_other_modes.rgb_dither_sel < 3) ? 1 : 0); + const int32_t cycle0 = ((object.m_other_modes.sample_type & 1) << 1) | (object.m_other_modes.bi_lerp0 & 1); + + int32_t sss = 0; + int32_t sst = 0; + + if (object.m_other_modes.persp_tex_en) + { + tc_div(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst); + } + else + { + tc_div_no_perspective(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst); + } + + userdata->m_start_span = true; + for (int32_t j = 0; j <= length; j++) + { + int32_t sr = r.w >> 14; + int32_t sg = g.w >> 14; + int32_t sb = b.w >> 14; + int32_t sa = a.w >> 14; + int32_t sz = (z.w >> 10) & 0x3fffff; + const bool valid_x = (flip) ? (x >= xend_scissored) : (x <= xend_scissored); + + if (x >= clipx1 && x < clipx2 && valid_x) + { + uint8_t offx, offy; + lookup_cvmask_derivatives(userdata->m_cvg[x], &offx, &offy, userdata); + + m_tex_pipe.lod_1cycle(&sss, &sst, s.w, t.w, w.w, dsinc, dtinc, dwinc, userdata, object); + + rgbaz_correct_triangle(offx, offy, &sr, &sg, &sb, &sa, &sz, userdata, object); + rgbaz_clip(sr, sg, sb, sa, &sz, userdata); + + ((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle0]))(&userdata->m_texel0_color, &userdata->m_texel0_color, sss, sst, tilenum, 0, userdata, object/*, false*/); + uint32_t t0a = userdata->m_texel0_color.get_a(); + userdata->m_texel0_alpha.set(t0a, t0a, t0a, t0a); + userdata->m_texel1_color = userdata->m_texel0_color; + userdata->m_texel1_alpha = userdata->m_texel0_alpha; + + const uint8_t noise = machine().rand() << 3; // Not accurate + userdata->m_noise_color.set(0, noise, noise, noise); + + rgbaint_t rgbsub_a(*userdata->m_color_inputs.combiner_rgbsub_a[1]); + rgbaint_t rgbsub_b(*userdata->m_color_inputs.combiner_rgbsub_b[1]); + rgbaint_t rgbmul(*userdata->m_color_inputs.combiner_rgbmul[1]); + rgbaint_t rgbadd(*userdata->m_color_inputs.combiner_rgbadd[1]); + + rgbsub_a.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_a[1]); + rgbsub_b.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_b[1]); + rgbmul.merge_alpha(*userdata->m_color_inputs.combiner_alphamul[1]); + rgbadd.merge_alpha(*userdata->m_color_inputs.combiner_alphaadd[1]); + + rgbsub_a.sign_extend(0x180, 0xfffffe00); + rgbsub_b.sign_extend(0x180, 0xfffffe00); + rgbadd.sign_extend(0x180, 0xfffffe00); + + rgbadd.shl_imm(8); + rgbsub_a.sub(rgbsub_b); + rgbsub_a.mul(rgbmul); + rgbsub_a.add(rgbadd); + rgbsub_a.add_imm(0x0080); + rgbsub_a.sra_imm(8); + rgbsub_a.clamp_and_clear(0xfffffe00); + + userdata->m_pixel_color = rgbsub_a; + + //Alpha coverage combiner + userdata->m_pixel_color.set_a(get_alpha_cvg(userdata->m_pixel_color.get_a(), userdata, object)); + + const uint32_t curpixel = fb_index + x; + const uint32_t zbcur = zb + curpixel; + const uint32_t zhbcur = zhb + curpixel; + + ((this)->*(m_read_pixel[object.m_misc_state.m_fb_size]))(curpixel, userdata, object); + +#if DEBUG_RDP_PIXEL + if (s_debug_drawing) + { + //uint32_t x = curpixel % m_n64_periphs->vi_width; + //uint32_t y = curpixel / m_n64_periphs->vi_width; + //printf("%d, %d ", x, scanline); + if (x == 157 && scanline == 89) + { + if (true)//finalcolor == 0) + { + static const char *s_fb_format[4] = { "I", "IA", "CI", "RGBA" }; + static const char *s_blend1a_c0[4] = { "PIXC", "MEMC", "BLENDC", "FOGC" }; + static const char *s_blend1b_c0[4] = { "PIXA", "FOGA", "SHADEA", "ZERO" }; + static const char *s_blend2a_c0[4] = { "PIXC", "MEMC", "BLENDC", "FOGC" }; + static const char *s_blend2b_c0[4] = { "INVPIXA", "MEMA", "ONE", "ZERO" }; + static const char *s_blend1a_c1[4] = { "BPIXC", "MEMC", "BLENDC", "FOGC" }; + static const char *s_blend1b_c1[4] = { "PIXA", "FOGA", "SHADEA", "ZERO" }; + static const char *s_blend2a_c1[4] = { "BPIXC", "MEMC", "BLENDC", "FOGC" }; + static const char *s_blend2b_c1[4] = { "INVPIXA", "MEMA", "ONE", "ZERO" }; + static const char *s_suba_rgb[16] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "ONE", "NOISE", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO" }; + static const char *s_subb_rgb[16] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "KEYC", "K4", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO" }; + static const char *s_mul_rgb[32] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "KEYS", "CombinedA", "TEX0A", "TEX1A", "PRIMA", "SHADEA", "ENVA", "LODF", "PLODF", "K5", + "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO" }; + static const char *s_add_rgb[8] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "ONE", "ZERO" }; + static const char *s_sub_a[16] = { "CombinedA", "TEX0A", "TEX1A", "PRIMA", "SHADEA", "ENVA", "ONE", "ZERO" }; + static const char *s_mul_a[16] = { "LODF", "TEX0A", "TEX1A", "PRIMA", "SHADEA", "ENVA", "PLODF", "ZERO" }; + + printf("Write to %08x: %d, %d\n", curpixel, x, scanline); + printf("m_fb_size: %d\n", 4 << object.m_misc_state.m_fb_size); + printf("m_fb_format: %s\n", s_fb_format[object.m_misc_state.m_fb_format]); + printf("blend enable: %d\n", userdata->m_blend_enable); + printf("other modes:\n"); + printf(" cycle_type: %d\n", object.m_other_modes.cycle_type); + printf(" persp_tex_en: %d\n", object.m_other_modes.persp_tex_en); + printf(" detail_tex_en: %d\n", object.m_other_modes.detail_tex_en); + printf(" sharpen_tex_en: %d\n", object.m_other_modes.sharpen_tex_en); + printf(" tex_lod_en: %d\n", object.m_other_modes.tex_lod_en); + printf(" en_tlut: %d\n", object.m_other_modes.en_tlut); + printf(" tlut_type: %d\n", object.m_other_modes.tlut_type); + printf(" sample_type: %d\n", object.m_other_modes.sample_type); + printf(" mid_texel: %d\n", object.m_other_modes.mid_texel); + printf(" bi_lerp0: %d\n", object.m_other_modes.bi_lerp0); + printf(" bi_lerp1: %d\n", object.m_other_modes.bi_lerp1); + printf(" convert_one: %d\n", object.m_other_modes.convert_one); + printf(" key_en: %d\n", object.m_other_modes.key_en); + printf(" rgb_dither_sel: %d\n", object.m_other_modes.rgb_dither_sel); + printf(" alpha_dither_sel: %d\n", object.m_other_modes.alpha_dither_sel); + printf(" blend_m1a_0 (A Cycle 0, 1): %s\n", s_blend1a_c0[object.m_other_modes.blend_m1a_0]); + printf(" blend_m1a_1 (A Cycle 1, 1): %s\n", s_blend1a_c1[object.m_other_modes.blend_m1a_1]); + printf(" blend_m1b_0 (B Cycle 0, 1): %s\n", s_blend1b_c0[object.m_other_modes.blend_m1b_0]); + printf(" blend_m1b_1 (B Cycle 1, 1): %s\n", s_blend1b_c1[object.m_other_modes.blend_m1b_1]); + printf(" blend_m2a_0 (A Cycle 0, 2): %s\n", s_blend2a_c0[object.m_other_modes.blend_m2a_0]); + printf(" blend_m2a_1 (A Cycle 1, 2): %s\n", s_blend2a_c1[object.m_other_modes.blend_m2a_1]); + printf(" blend_m2b_0 (B Cycle 0, 2): %s\n", s_blend2b_c0[object.m_other_modes.blend_m2b_0]); + printf(" blend_m2b_1 (B Cycle 1, 2): %s\n", s_blend2b_c1[object.m_other_modes.blend_m2b_1]); + printf(" tex_edge: %d\n", object.m_other_modes.tex_edge); + printf(" force_blend: %d\n", object.m_other_modes.force_blend); + printf(" blend_shift: %d\n", object.m_other_modes.blend_shift); + printf(" alpha_cvg_select: %d\n", object.m_other_modes.alpha_cvg_select); + printf(" cvg_times_alpha: %d\n", object.m_other_modes.cvg_times_alpha); + printf(" z_mode: %d\n", object.m_other_modes.z_mode); + printf(" cvg_dest: %d\n", object.m_other_modes.cvg_dest); + printf(" color_on_cvg: %d\n", object.m_other_modes.color_on_cvg); + printf(" image_read_en: %d\n", object.m_other_modes.image_read_en); + printf(" z_update_en: %d\n", object.m_other_modes.z_update_en); + printf(" z_compare_en: %d\n", object.m_other_modes.z_compare_en); + printf(" antialias_en: %d\n", object.m_other_modes.antialias_en); + printf(" z_source_sel: %d\n", object.m_other_modes.z_source_sel); + printf(" dither_alpha_en: %d\n", object.m_other_modes.dither_alpha_en); + printf(" alpha_compare_en: %d\n", object.m_other_modes.alpha_compare_en); + printf(" alpha_dither_mode: %d\n", object.m_other_modes.alpha_dither_mode); + printf("combine:\n"); + printf(" RGB sub A, cycle 0: %s\n", s_suba_rgb[m_combine.sub_a_rgb0]); + printf(" RGB sub B, cycle 0: %s\n", s_subb_rgb[m_combine.sub_b_rgb0]); + printf(" RGB mul, cycle 0: %s\n", s_mul_rgb[m_combine.mul_rgb0]); + printf(" RGB add, cycle 0: %s\n", s_add_rgb[m_combine.add_rgb0]); + printf(" Alpha sub A, cycle 0: %s\n", s_sub_a[m_combine.sub_a_a0]); + printf(" Alpha sub B, cycle 0: %s\n", s_sub_a[m_combine.sub_b_a0]); + printf(" Alpha mul, cycle 0: %s\n", s_mul_a[m_combine.mul_a0]); + printf(" Alpha add, cycle 0: %s\n\n", s_add_rgb[m_combine.add_a0]); + printf(" RGB sub A, cycle 1: %s\n", s_suba_rgb[m_combine.sub_a_rgb1]); + printf(" RGB sub B, cycle 1: %s\n", s_subb_rgb[m_combine.sub_b_rgb1]); + printf(" RGB mul, cycle 1: %s\n", s_mul_rgb[m_combine.mul_rgb1]); + printf(" RGB add, cycle 1: %s\n", s_add_rgb[m_combine.add_rgb1]); + printf(" Alpha sub A, cycle 1: %s\n", s_sub_a[m_combine.sub_a_a1]); + printf(" Alpha sub B, cycle 1: %s\n", s_sub_a[m_combine.sub_b_a1]); + printf(" Alpha mul, cycle 1: %s\n", s_mul_a[m_combine.mul_a1]); + printf(" Alpha add, cycle 1: %s\n\n", s_add_rgb[m_combine.add_a1]); + printf("Texel 0: %08x\n", (uint32_t)userdata->m_texel0_color.to_rgba()); + printf("Texel 1: %08x\n", (uint32_t)userdata->m_texel1_color.to_rgba()); + printf("Env: %08x\n", (uint32_t)userdata->m_env_color.to_rgba()); + printf("Prim: %08x\n", (uint32_t)userdata->m_prim_color.to_rgba()); + printf("Mem: %08x\n", (uint32_t)userdata->m_memory_color.to_rgba()); + printf("Shade: %08x\n", (uint32_t)userdata->m_shade_color.to_rgba()); + printf("sargb: %08x, %08x, %08x, %08x\n", (uint32_t)sa, (uint32_t)sr, (uint32_t)sg, (uint32_t)sb); + + printf("Blend index: %d\n", (userdata->m_blend_enable << 2) | blend_index); + int32_t cdith = 0; + int32_t adith = 0; + get_dither_values(scanline, j, &cdith, &adith, object); + color_t reblended_pixel; + ((&m_blender)->*(m_blender.blend1[(userdata->m_blend_enable << 2) | blend_index]))(reblended_pixel, cdith, adith, partialreject, sel0, userdata, object/*, true*/); + + //((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle0]))(&userdata->m_texel0_color, &userdata->m_texel0_color, sss, sst, tilenum, 0, userdata, object/*, true*/); + } + } + } +#endif + + if (z_compare(zbcur, zhbcur, sz, dzpix, userdata, object)) + { + int32_t cdith = 0; + int32_t adith = 0; + get_dither_values(scanline, j, &cdith, &adith, object); + + color_t blended_pixel; + bool rendered = ((&m_blender)->*(m_blender.blend1[(userdata->m_blend_enable << 2) | blend_index]))(blended_pixel, cdith, adith, partialreject, sel0, userdata, object/*, false*/); + + if (rendered) + { +#if DEBUG_RDP_PIXEL + if (x == 157 && scanline == 89 && s_debug_drawing) + { + printf("WRITE1: %08x\n", (uint32_t)blended_pixel.to_rgba()); + } +#endif + ((this)->*(m_write_pixel[object.m_misc_state.m_fb_size]))(curpixel, blended_pixel, userdata, object); + if (object.m_other_modes.z_update_en) + { + z_store(object, zbcur, zhbcur, sz, userdata->m_dzpix_enc); + } + } + } + + sss = userdata->m_precomp_s; + sst = userdata->m_precomp_t; + } + + r.w += drinc; + g.w += dginc; + b.w += dbinc; + a.w += dainc; + s.w += dsinc; + t.w += dtinc; + w.w += dwinc; + z.w += dzinc; + + x += xinc; + } +} + +void n64_rdp::span_draw_2cycle(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid) +{ + assert(object.m_misc_state.m_fb_size < 4); + + const int32_t clipx1 = object.m_scissor.m_xh; + const int32_t clipx2 = object.m_scissor.m_xl; + const int32_t tilenum = object.tilenum; + const bool flip = object.flip; + + span_param_t r; r.w = extent.param[SPAN_R].start; + span_param_t g; g.w = extent.param[SPAN_G].start; + span_param_t b; b.w = extent.param[SPAN_B].start; + span_param_t a; a.w = extent.param[SPAN_A].start; + span_param_t z; z.w = extent.param[SPAN_Z].start; + span_param_t s; s.w = extent.param[SPAN_S].start; + span_param_t t; t.w = extent.param[SPAN_T].start; + span_param_t w; w.w = extent.param[SPAN_W].start; + + const uint32_t zb = object.m_misc_state.m_zb_address >> 1; + const uint32_t zhb = object.m_misc_state.m_zb_address; + + int32_t tile2 = (tilenum + 1) & 7; + int32_t tile1 = tilenum; + const uint32_t prim_tile = tilenum; + + int32_t newtile1 = tile1; + int32_t news = 0; + int32_t newt = 0; + +#ifdef PTR64 + assert(extent.userdata != (const void *)0xcccccccccccccccc); +#else + assert(extent.userdata != (const void *)0xcccccccc); +#endif + rdp_span_aux* userdata = (rdp_span_aux*)extent.userdata; + + m_tex_pipe.calculate_clamp_diffs(tile1, userdata, object); + + bool partialreject = (userdata->m_color_inputs.blender2b_a[1] == &userdata->m_inv_pixel_color && userdata->m_color_inputs.blender1b_a[1] == &userdata->m_pixel_color); + int32_t sel0 = (userdata->m_color_inputs.blender2b_a[0] == &userdata->m_memory_color) ? 1 : 0; + int32_t sel1 = (userdata->m_color_inputs.blender2b_a[1] == &userdata->m_memory_color) ? 1 : 0; + + int32_t drinc, dginc, dbinc, dainc; + int32_t dzinc, dzpix; + int32_t dsinc, dtinc, dwinc; + int32_t xinc; + + if (!flip) + { + drinc = -object.m_span_base.m_span_dr; + dginc = -object.m_span_base.m_span_dg; + dbinc = -object.m_span_base.m_span_db; + dainc = -object.m_span_base.m_span_da; + dzinc = -object.m_span_base.m_span_dz; + dsinc = -object.m_span_base.m_span_ds; + dtinc = -object.m_span_base.m_span_dt; + dwinc = -object.m_span_base.m_span_dw; + xinc = -1; + } + else + { + drinc = object.m_span_base.m_span_dr; + dginc = object.m_span_base.m_span_dg; + dbinc = object.m_span_base.m_span_db; + dainc = object.m_span_base.m_span_da; + dzinc = object.m_span_base.m_span_dz; + dsinc = object.m_span_base.m_span_ds; + dtinc = object.m_span_base.m_span_dt; + dwinc = object.m_span_base.m_span_dw; + xinc = 1; + } + + const int32_t fb_index = object.m_misc_state.m_fb_width * scanline; + + int32_t cdith = 0; + int32_t adith = 0; + + const int32_t xstart = extent.startx; + const int32_t xend = userdata->m_unscissored_rx; + const int32_t xend_scissored = extent.stopx; + + int32_t x = xend; + + const int32_t length = flip ? (xstart - xend) : (xend - xstart); + + if(object.m_other_modes.z_source_sel) + { + z.w = (uint32_t)object.m_misc_state.m_primitive_z << 16; + dzpix = object.m_misc_state.m_primitive_dz; + dzinc = 0; + } + else + { + dzpix = object.m_span_base.m_span_dzpix; + } + + if (object.m_misc_state.m_fb_size > 4) + fatalerror("unsupported m_fb_size %d\n", object.m_misc_state.m_fb_size); + + const int32_t blend_index = (object.m_other_modes.alpha_cvg_select ? 2 : 0) | ((object.m_other_modes.rgb_dither_sel < 3) ? 1 : 0); + const int32_t cycle0 = ((object.m_other_modes.sample_type & 1) << 1) | (object.m_other_modes.bi_lerp0 & 1); + const int32_t cycle1 = ((object.m_other_modes.sample_type & 1) << 1) | (object.m_other_modes.bi_lerp1 & 1); + + int32_t sss = 0; + int32_t sst = 0; + + if (object.m_other_modes.persp_tex_en) + { + tc_div(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst); + } + else + { + tc_div_no_perspective(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst); + } + + userdata->m_start_span = true; + for (int32_t j = 0; j <= length; j++) + { + int32_t sr = r.w >> 14; + int32_t sg = g.w >> 14; + int32_t sb = b.w >> 14; + int32_t sa = a.w >> 14; + int32_t sz = (z.w >> 10) & 0x3fffff; + + const bool valid_x = (flip) ? (x >= xend_scissored) : (x <= xend_scissored); + + if (x >= clipx1 && x < clipx2 && valid_x) + { + const uint32_t compidx = m_compressed_cvmasks[userdata->m_cvg[x]]; + userdata->m_current_pix_cvg = cvarray[compidx].cvg; + userdata->m_current_cvg_bit = cvarray[compidx].cvbit; + const uint8_t offx = cvarray[compidx].xoff; + const uint8_t offy = cvarray[compidx].yoff; + //lookup_cvmask_derivatives(userdata->m_cvg[x], &offx, &offy, userdata); + + m_tex_pipe.lod_2cycle(&sss, &sst, s.w, t.w, w.w, dsinc, dtinc, dwinc, prim_tile, &tile1, &tile2, userdata, object); + + news = userdata->m_precomp_s; + newt = userdata->m_precomp_t; + m_tex_pipe.lod_2cycle_limited(&news, &newt, s.w + dsinc, t.w + dtinc, w.w + dwinc, dsinc, dtinc, dwinc, prim_tile, &newtile1, object); + + rgbaz_correct_triangle(offx, offy, &sr, &sg, &sb, &sa, &sz, userdata, object); + rgbaz_clip(sr, sg, sb, sa, &sz, userdata); + + ((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle0]))(&userdata->m_texel0_color, &userdata->m_texel0_color, sss, sst, tile1, 0, userdata, object/*, false*/); + ((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle1]))(&userdata->m_texel1_color, &userdata->m_texel0_color, sss, sst, tile2, 1, userdata, object/*, false*/); + + uint32_t t0a = userdata->m_texel0_color.get_a(); + uint32_t t1a = userdata->m_texel1_color.get_a(); + uint32_t tna = userdata->m_next_texel_color.get_a(); + userdata->m_texel0_alpha.set(t0a, t0a, t0a, t0a); + userdata->m_texel1_alpha.set(t1a, t1a, t1a, t1a); + userdata->m_next_texel_alpha.set(tna, tna, tna, tna); + + const uint8_t noise = machine().rand() << 3; // Not accurate + userdata->m_noise_color.set(0, noise, noise, noise); + + rgbaint_t rgbsub_a(*userdata->m_color_inputs.combiner_rgbsub_a[0]); + rgbaint_t rgbsub_b(*userdata->m_color_inputs.combiner_rgbsub_b[0]); + rgbaint_t rgbmul(*userdata->m_color_inputs.combiner_rgbmul[0]); + rgbaint_t rgbadd(*userdata->m_color_inputs.combiner_rgbadd[0]); + + rgbsub_a.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_a[0]); + rgbsub_b.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_b[0]); + rgbmul.merge_alpha(*userdata->m_color_inputs.combiner_alphamul[0]); + rgbadd.merge_alpha(*userdata->m_color_inputs.combiner_alphaadd[0]); + + rgbsub_a.sign_extend(0x180, 0xfffffe00); + rgbsub_b.sign_extend(0x180, 0xfffffe00); + rgbadd.sign_extend(0x180, 0xfffffe00); + + rgbadd.shl_imm(8); + rgbsub_a.sub(rgbsub_b); + rgbsub_a.mul(rgbmul); + + rgbsub_a.add(rgbadd); + rgbsub_a.add_imm(0x0080); + rgbsub_a.sra_imm(8); + rgbsub_a.clamp_and_clear(0xfffffe00); + + userdata->m_combined_color.set(rgbsub_a); + + rgbaint_t temp_color(userdata->m_texel0_color); + userdata->m_texel0_color = userdata->m_texel1_color; + userdata->m_texel1_color = temp_color; + + uint32_t ca = userdata->m_combined_color.get_a(); + userdata->m_combined_alpha.set(ca, ca, ca, ca); + userdata->m_texel0_alpha.set(userdata->m_texel1_alpha); + userdata->m_texel1_alpha.set(userdata->m_next_texel_alpha); + + rgbsub_a.set(*userdata->m_color_inputs.combiner_rgbsub_a[1]); + rgbsub_b.set(*userdata->m_color_inputs.combiner_rgbsub_b[1]); + rgbmul.set(*userdata->m_color_inputs.combiner_rgbmul[1]); + rgbadd.set(*userdata->m_color_inputs.combiner_rgbadd[1]); + + rgbsub_a.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_a[1]); + rgbsub_b.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_b[1]); + rgbmul.merge_alpha(*userdata->m_color_inputs.combiner_alphamul[1]); + rgbadd.merge_alpha(*userdata->m_color_inputs.combiner_alphaadd[1]); + + rgbsub_a.sign_extend(0x180, 0xfffffe00); + rgbsub_b.sign_extend(0x180, 0xfffffe00); + rgbadd.sign_extend(0x180, 0xfffffe00); + + rgbadd.shl_imm(8); + rgbsub_a.sub(rgbsub_b); + rgbsub_a.mul(rgbmul); + rgbsub_a.add(rgbadd); + rgbsub_a.add_imm(0x0080); + rgbsub_a.sra_imm(8); + rgbsub_a.clamp_and_clear(0xfffffe00); + + userdata->m_pixel_color.set(rgbsub_a); + + //Alpha coverage combiner + userdata->m_pixel_color.set_a(get_alpha_cvg(userdata->m_pixel_color.get_a(), userdata, object)); + + const uint32_t curpixel = fb_index + x; + const uint32_t zbcur = zb + curpixel; + const uint32_t zhbcur = zhb + curpixel; + + ((this)->*(m_read_pixel[object.m_misc_state.m_fb_size]))(curpixel, userdata, object); + +#if DEBUG_RDP_PIXEL + if (s_debug_drawing) + { + //uint32_t x = curpixel % m_n64_periphs->vi_width; + //uint32_t y = curpixel / m_n64_periphs->vi_width; + //printf("%d, %d ", x, scanline); + if (x == 157 && scanline == 89) + { + if (true)//finalcolor == 0) + { + static const char *s_fb_format[4] = { "I", "IA", "CI", "RGBA" }; + static const char *s_blend1a_c0[4] = { "PIXC", "MEMC", "BLENDC", "FOGC" }; + static const char *s_blend1b_c0[4] = { "PIXA", "FOGA", "SHADEA", "ZERO" }; + static const char *s_blend2a_c0[4] = { "PIXC", "MEMC", "BLENDC", "FOGC" }; + static const char *s_blend2b_c0[4] = { "INVPIXA", "MEMA", "ONE", "ZERO" }; + static const char *s_blend1a_c1[4] = { "BPIXC", "MEMC", "BLENDC", "FOGC" }; + static const char *s_blend1b_c1[4] = { "PIXA", "FOGA", "SHADEA", "ZERO" }; + static const char *s_blend2a_c1[4] = { "BPIXC", "MEMC", "BLENDC", "FOGC" }; + static const char *s_blend2b_c1[4] = { "INVPIXA", "MEMA", "ONE", "ZERO" }; + static const char *s_suba_rgb[16] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "ONE", "NOISE", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO" }; + static const char *s_subb_rgb[16] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "KEYC", "K4", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO" }; + static const char *s_mul_rgb[32] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "KEYS", "CombinedA", "TEX0A", "TEX1A", "PRIMA", "SHADEA", "ENVA", "LODF", "PLODF", "K5", + "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO", "ZERO" }; + static const char *s_add_rgb[8] = { "Combined", "TEX0C", "TEX1C", "PRIMC", "SHADEC", "ENVC", "ONE", "ZERO" }; + static const char *s_sub_a[16] = { "CombinedA", "TEX0A", "TEX1A", "PRIMA", "SHADEA", "ENVA", "ONE", "ZERO" }; + static const char *s_mul_a[16] = { "LODF", "TEX0A", "TEX1A", "PRIMA", "SHADEA", "ENVA", "PLODF", "ZERO" }; + + printf("Write to %08x: %d, %d\n", curpixel, x, scanline); + printf("m_fb_size: %d\n", 4 << object.m_misc_state.m_fb_size); + printf("m_fb_format: %s\n", s_fb_format[object.m_misc_state.m_fb_format]); + printf("blend enable: %d\n", userdata->m_blend_enable); + printf("other modes:\n"); + printf(" cycle_type: %d\n", object.m_other_modes.cycle_type); + printf(" persp_tex_en: %d\n", object.m_other_modes.persp_tex_en); + printf(" detail_tex_en: %d\n", object.m_other_modes.detail_tex_en); + printf(" sharpen_tex_en: %d\n", object.m_other_modes.sharpen_tex_en); + printf(" tex_lod_en: %d\n", object.m_other_modes.tex_lod_en); + printf(" en_tlut: %d\n", object.m_other_modes.en_tlut); + printf(" tlut_type: %d\n", object.m_other_modes.tlut_type); + printf(" sample_type: %d\n", object.m_other_modes.sample_type); + printf(" mid_texel: %d\n", object.m_other_modes.mid_texel); + printf(" bi_lerp0: %d\n", object.m_other_modes.bi_lerp0); + printf(" bi_lerp1: %d\n", object.m_other_modes.bi_lerp1); + printf(" convert_one: %d\n", object.m_other_modes.convert_one); + printf(" key_en: %d\n", object.m_other_modes.key_en); + printf(" rgb_dither_sel: %d\n", object.m_other_modes.rgb_dither_sel); + printf(" alpha_dither_sel: %d\n", object.m_other_modes.alpha_dither_sel); + printf(" blend_m1a_0 (A Cycle 0, 1): %s\n", s_blend1a_c0[object.m_other_modes.blend_m1a_0]); + printf(" blend_m1a_1 (A Cycle 1, 1): %s\n", s_blend1a_c1[object.m_other_modes.blend_m1a_1]); + printf(" blend_m1b_0 (B Cycle 0, 1): %s\n", s_blend1b_c0[object.m_other_modes.blend_m1b_0]); + printf(" blend_m1b_1 (B Cycle 1, 1): %s\n", s_blend1b_c1[object.m_other_modes.blend_m1b_1]); + printf(" blend_m2a_0 (A Cycle 0, 2): %s\n", s_blend2a_c0[object.m_other_modes.blend_m2a_0]); + printf(" blend_m2a_1 (A Cycle 1, 2): %s\n", s_blend2a_c1[object.m_other_modes.blend_m2a_1]); + printf(" blend_m2b_0 (B Cycle 0, 2): %s\n", s_blend2b_c0[object.m_other_modes.blend_m2b_0]); + printf(" blend_m2b_1 (B Cycle 1, 2): %s\n", s_blend2b_c1[object.m_other_modes.blend_m2b_1]); + printf(" tex_edge: %d\n", object.m_other_modes.tex_edge); + printf(" force_blend: %d\n", object.m_other_modes.force_blend); + printf(" blend_shift: %d\n", object.m_other_modes.blend_shift); + printf(" alpha_cvg_select: %d\n", object.m_other_modes.alpha_cvg_select); + printf(" cvg_times_alpha: %d\n", object.m_other_modes.cvg_times_alpha); + printf(" z_mode: %d\n", object.m_other_modes.z_mode); + printf(" cvg_dest: %d\n", object.m_other_modes.cvg_dest); + printf(" color_on_cvg: %d\n", object.m_other_modes.color_on_cvg); + printf(" image_read_en: %d\n", object.m_other_modes.image_read_en); + printf(" z_update_en: %d\n", object.m_other_modes.z_update_en); + printf(" z_compare_en: %d\n", object.m_other_modes.z_compare_en); + printf(" antialias_en: %d\n", object.m_other_modes.antialias_en); + printf(" z_source_sel: %d\n", object.m_other_modes.z_source_sel); + printf(" dither_alpha_en: %d\n", object.m_other_modes.dither_alpha_en); + printf(" alpha_compare_en: %d\n", object.m_other_modes.alpha_compare_en); + printf(" alpha_dither_mode: %d\n", object.m_other_modes.alpha_dither_mode); + printf("combine:\n"); + printf(" RGB sub A, cycle 0: %s\n", s_suba_rgb[m_combine.sub_a_rgb0]); + printf(" RGB sub B, cycle 0: %s\n", s_subb_rgb[m_combine.sub_b_rgb0]); + printf(" RGB mul, cycle 0: %s\n", s_mul_rgb[m_combine.mul_rgb0]); + printf(" RGB add, cycle 0: %s\n", s_add_rgb[m_combine.add_rgb0]); + printf(" Alpha sub A, cycle 0: %s\n", s_sub_a[m_combine.sub_a_a0]); + printf(" Alpha sub B, cycle 0: %s\n", s_sub_a[m_combine.sub_b_a0]); + printf(" Alpha mul, cycle 0: %s\n", s_mul_a[m_combine.mul_a0]); + printf(" Alpha add, cycle 0: %s\n\n", s_add_rgb[m_combine.add_a0]); + printf(" RGB sub A, cycle 1: %s\n", s_suba_rgb[m_combine.sub_a_rgb1]); + printf(" RGB sub B, cycle 1: %s\n", s_subb_rgb[m_combine.sub_b_rgb1]); + printf(" RGB mul, cycle 1: %s\n", s_mul_rgb[m_combine.mul_rgb1]); + printf(" RGB add, cycle 1: %s\n", s_add_rgb[m_combine.add_rgb1]); + printf(" Alpha sub A, cycle 1: %s\n", s_sub_a[m_combine.sub_a_a1]); + printf(" Alpha sub B, cycle 1: %s\n", s_sub_a[m_combine.sub_b_a1]); + printf(" Alpha mul, cycle 1: %s\n", s_mul_a[m_combine.mul_a1]); + printf(" Alpha add, cycle 1: %s\n\n", s_add_rgb[m_combine.add_a1]); + printf("Texel 0: %08x\n", (uint32_t)userdata->m_texel0_color.to_rgba()); + printf("Texel 1: %08x\n", (uint32_t)userdata->m_texel1_color.to_rgba()); + printf("Env: %08x\n", (uint32_t)userdata->m_env_color.to_rgba()); + printf("Prim: %08x\n", (uint32_t)userdata->m_prim_color.to_rgba()); + printf("Mem: %08x\n", (uint32_t)userdata->m_memory_color.to_rgba()); + printf("Shade: %08x\n", (uint32_t)userdata->m_shade_color.to_rgba()); + printf("sargb: %08x, %08x, %08x, %08x\n", (uint32_t)sa, (uint32_t)sr, (uint32_t)sg, (uint32_t)sb); + + printf("Blend index: %d\n", (userdata->m_blend_enable << 2) | blend_index); + int32_t cdith = 0; + int32_t adith = 0; + get_dither_values(scanline, j, &cdith, &adith, object); + color_t reblended_pixel; + ((&m_blender)->*(m_blender.blend2[(userdata->m_blend_enable << 2) | blend_index]))(reblended_pixel, cdith, adith, partialreject, sel0, sel1, userdata, object/*, true*/); + + //((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle0]))(&userdata->m_texel0_color, &userdata->m_texel0_color, sss, sst, tilenum, 0, userdata, object/*, true*/); + } + } + } +#endif + + if(z_compare(zbcur, zhbcur, sz, dzpix, userdata, object)) + { + get_dither_values(scanline, j, &cdith, &adith, object); + + color_t blended_pixel; + bool rendered = ((&m_blender)->*(m_blender.blend2[(userdata->m_blend_enable << 2) | blend_index]))(blended_pixel, cdith, adith, partialreject, sel0, sel1, userdata, object/*, false*/); + + if (rendered) + { +#if DEBUG_RDP_PIXEL + if (x == 157 && scanline == 89 && s_debug_drawing) + { + printf("WRITE2: %08x\n", (uint32_t)blended_pixel.to_rgba()); + } +#endif + ((this)->*(m_write_pixel[object.m_misc_state.m_fb_size]))(curpixel, blended_pixel, userdata, object); + if (object.m_other_modes.z_update_en) + { + z_store(object, zbcur, zhbcur, sz, userdata->m_dzpix_enc); + } + } + } + sss = userdata->m_precomp_s; + sst = userdata->m_precomp_t; + } + + r.w += drinc; + g.w += dginc; + b.w += dbinc; + a.w += dainc; + s.w += dsinc; + t.w += dtinc; + w.w += dwinc; + z.w += dzinc; + + x += xinc; + } +} + +void n64_rdp::span_draw_copy(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid) +{ + const int32_t clipx1 = object.m_scissor.m_xh; + const int32_t clipx2 = object.m_scissor.m_xl; + const int32_t tilenum = object.tilenum; + const bool flip = object.flip; + + rdp_span_aux* userdata = (rdp_span_aux*)extent.userdata; + const int32_t xstart = extent.startx; + const int32_t xend = userdata->m_unscissored_rx; + const int32_t xend_scissored = extent.stopx; + const int32_t xinc = flip ? 1 : -1; + const int32_t length = flip ? (xstart - xend) : (xend - xstart); + + span_param_t s; s.w = extent.param[SPAN_S].start; + span_param_t t; t.w = extent.param[SPAN_T].start; + + const int32_t ds = object.m_span_base.m_span_ds / 4; + const int32_t dt = object.m_span_base.m_span_dt / 4; + const int32_t dsinc = flip ? (ds) : -ds; + const int32_t dtinc = flip ? (dt) : -dt; + + const int32_t fb_index = object.m_misc_state.m_fb_width * scanline; + + int32_t x = xend; + + for (int32_t j = 0; j <= length; j++) + { + const bool valid_x = (flip) ? (x >= xend_scissored) : (x <= xend_scissored); + + if (x >= clipx1 && x < clipx2 && valid_x) + { + int32_t sss = s.h.h; + int32_t sst = t.h.h; + m_tex_pipe.copy(&userdata->m_texel0_color, sss, sst, tilenum, object, userdata); + + uint32_t curpixel = fb_index + x; + if (userdata->m_texel0_color.get_a() != 0 || !object.m_other_modes.alpha_compare_en || object.m_misc_state.m_fb_size == 1) + { + ((this)->*(m_copy_pixel[object.m_misc_state.m_fb_size]))(curpixel, userdata->m_texel0_color, object); + } + } + + s.w += dsinc; + t.w += dtinc; + x += xinc; + } +} + +void n64_rdp::span_draw_fill(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid) +{ + assert(object.m_misc_state.m_fb_size < 4); + + const bool flip = object.flip; + + const int32_t clipx1 = object.m_scissor.m_xh; + const int32_t clipx2 = object.m_scissor.m_xl; + + const int32_t xinc = flip ? 1 : -1; + + const int32_t fb_index = object.m_misc_state.m_fb_width * scanline; + + const int32_t xstart = extent.startx; + const int32_t xend_scissored = extent.stopx; + + int32_t x = xend_scissored; + + const int32_t length = flip ? (xstart - xend_scissored) : (xend_scissored - xstart); + + for (int32_t j = 0; j <= length; j++) + { + if (x >= clipx1 && x < clipx2) + { + ((this)->*(m_fill_pixel[object.m_misc_state.m_fb_size]))(fb_index + x, object); + } + + x += xinc; + } +} diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/n64.h b/waterbox/ares64/ares/thirdparty/mame/mame/video/n64.h new file mode 100644 index 0000000000..764219c964 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/n64.h @@ -0,0 +1,426 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz +#ifndef _VIDEO_N64_H_ +#define _VIDEO_N64_H_ + +#include "video/poly.h" +#include "pin64.h" + +/*****************************************************************************/ + +#define PIXEL_SIZE_4BIT 0 +#define PIXEL_SIZE_8BIT 1 +#define PIXEL_SIZE_16BIT 2 +#define PIXEL_SIZE_32BIT 3 + +#define CYCLE_TYPE_1 0 +#define CYCLE_TYPE_2 1 +#define CYCLE_TYPE_COPY 2 +#define CYCLE_TYPE_FILL 3 + +#define SAMPLE_TYPE_1x1 0 +#define SAMPLE_TYPE_2x2 1 + +#define BYTE_ADDR_XOR BYTE4_XOR_BE(0) +#define WORD_ADDR_XOR (WORD_XOR_BE(0) >> 1) + +#define XOR_SWAP_BYTE_SHIFT 2 +#define XOR_SWAP_WORD_SHIFT 1 +#define XOR_SWAP_DWORD_SHIFT 0 + +#define XOR_SWAP_BYTE 4 +#define XOR_SWAP_WORD 2 +#define XOR_SWAP_DWORD 1 + +#define FORMAT_RGBA 0 +#define FORMAT_YUV 1 +#define FORMAT_CI 2 +#define FORMAT_IA 3 +#define FORMAT_I 4 + +#ifdef LSB_FIRST +#define BYTE_XOR_DWORD_SWAP 7 +#define WORD_XOR_DWORD_SWAP 3 +#else +#define BYTE_XOR_DWORD_SWAP 4 +#define WORD_XOR_DWORD_SWAP 2 +#endif +#define DWORD_XOR_DWORD_SWAP 1 + +#define GET_LOW_RGBA16_TMEM(x) (m_rdp->m_replicated_rgba[((x) >> 1) & 0x1f]) +#define GET_MED_RGBA16_TMEM(x) (m_rdp->m_replicated_rgba[((x) >> 6) & 0x1f]) +#define GET_HI_RGBA16_TMEM(x) (m_rdp->m_replicated_rgba[((x) >> 11) & 0x1f]) + +#define MEM8_LIMIT 0x7fffff +#define MEM16_LIMIT 0x3fffff +#define MEM32_LIMIT 0x1fffff + +#define RDP_RANGE_CHECK (0) + +#if RDP_RANGE_CHECK +#define CHECK8(in) if(rdp_range_check((in))) { printf("Check8: Address %08x out of range!\n", (in)); fflush(stdout); fatalerror("Address %08x out of range!\n", (in)); } +#define CHECK16(in) if(rdp_range_check((in) << 1)) { printf("Check16: Address %08x out of range!\n", (in) << 1); fflush(stdout); fatalerror("Address %08x out of range!\n", (in) << 1); } +#define CHECK32(in) if(rdp_range_check((in) << 2)) { printf("Check32: Address %08x out of range!\n", (in) << 2); fflush(stdout); fatalerror("Address %08x out of range!\n", (in) << 2); } +#else +#define CHECK8(in) { } +#define CHECK16(in) { } +#define CHECK32(in) { } +#endif + +#if RDP_RANGE_CHECK +#define RREADADDR8(in) ((rdp_range_check((in))) ? 0 : (((uint8_t*)m_rdram)[(in) ^ BYTE_ADDR_XOR])) +#define RREADIDX16(in) ((rdp_range_check((in) << 1)) ? 0 : (((uint16_t*)m_rdram)[(in) ^ WORD_ADDR_XOR])) +#define RREADIDX32(in) ((rdp_range_check((in) << 2)) ? 0 : m_rdram[(in)]) + +#define RWRITEADDR8(in, val) if(rdp_range_check((in))) { printf("Write8: Address %08x out of range!\n", (in)); fflush(stdout); fatalerror("Address %08x out of range!\n", (in)); } else { ((uint8_t*)m_rdram)[(in) ^ BYTE_ADDR_XOR] = val;} +#define RWRITEIDX16(in, val) if(rdp_range_check((in) << 1)) { printf("Write16: Address %08x out of range!\n", ((object.m_misc_state.m_fb_address >> 1) + curpixel) << 1); fflush(stdout); fatalerror("Address out of range\n"); } else { ((uint16_t*)m_rdram)[(in) ^ WORD_ADDR_XOR] = val;} +#define RWRITEIDX32(in, val) if(rdp_range_check((in) << 2)) { printf("Write32: Address %08x out of range!\n", (in) << 2); fflush(stdout); fatalerror("Address %08x out of range!\n", (in) << 2); } else { m_rdram[(in)] = val;} +#else +#define RREADADDR8(in) (((uint8_t*)m_rdram)[(in) ^ BYTE_ADDR_XOR]) +#define RREADIDX16(in) (((uint16_t*)m_rdram)[(in) ^ WORD_ADDR_XOR]) +#define RREADIDX32(in) (m_rdram[(in)]) + +#define RWRITEADDR8(in, val) ((uint8_t*)m_rdram)[(in) ^ BYTE_ADDR_XOR] = val; +#define RWRITEIDX16(in, val) ((uint16_t*)m_rdram)[(in) ^ WORD_ADDR_XOR] = val; +#define RWRITEIDX32(in, val) m_rdram[(in)] = val +#endif + +#define U_RREADADDR8(in) (((uint8_t*)m_rdram)[(in) ^ BYTE_ADDR_XOR]) +#define U_RREADIDX16(in) (((uint16_t*)m_rdram)[(in) ^ WORD_ADDR_XOR]) +#define U_RREADIDX32(in) (m_rdram[(in)]) + +#define GETLOWCOL(x) (((x) & 0x3e) << 2) +#define GETMEDCOL(x) (((x) & 0x7c0) >> 3) +#define GETHICOL(x) (((x) & 0xf800) >> 8) + +#define HREADADDR8(in) /*(((in) <= MEM8_LIMIT) ? */(m_hidden_bits[(in) ^ BYTE_ADDR_XOR])/* : 0)*/ +#define HWRITEADDR8(in, val) /*{if ((in) <= MEM8_LIMIT) */m_hidden_bits[(in) ^ BYTE_ADDR_XOR] = val;/*}*/ + +//sign-extension macros +#define SIGN22(x) (((x & 0x00200000) * 0x7ff) | (x & 0x1fffff)) +#define SIGN17(x) (((x & 0x00010000) * 0xffff) | (x & 0xffff)) +#define SIGN16(x) (((x & 0x00008000) * 0x1ffff) | (x & 0x7fff)) +#define SIGN13(x) (((x & 0x00001000) * 0xfffff) | (x & 0xfff)) +#define SIGN11(x) (((x & 0x00000400) * 0x3fffff) | (x & 0x3ff)) +#define SIGN9(x) (((x & 0x00000100) * 0xffffff) | (x & 0xff)) +#define SIGN8(x) (((x & 0x00000080) * 0x1ffffff) | (x & 0x7f)) + +#define KURT_AKELEY_SIGN9(x) ((((x) & 0x180) == 0x180) ? ((x) | ~0x1ff) : ((x) & 0x1ff)) + +#define SPAN_R (0) +#define SPAN_G (1) +#define SPAN_B (2) +#define SPAN_A (3) +#define SPAN_S (4) +#define SPAN_T (5) +#define SPAN_W (6) +#define SPAN_Z (7) + +#define EXTENT_AUX_COUNT (sizeof(rdp_span_aux)*(480*192)) // Screen coverage *192, more or less + +/*****************************************************************************/ + +class n64_periphs; +class n64_rdp; + +#include "video/n64types.h" +#include "video/rdpblend.h" +#include "video/rdptpipe.h" + +class n64_state; + +class n64_rdp : public poly_manager +{ +public: + n64_rdp(n64_state &state, uint32_t* rdram, uint32_t* dmem); + + running_machine &machine() const { assert(m_machine != nullptr); return *m_machine; } + + void init_internal_state() + { + m_tmem = std::make_unique(0x1000); + memset(m_tmem.get(), 0, 0x1000); + +#if !defined(MAME_RDP) + uint8_t* normpoint = machine().root_device().memregion("normpoint")->base(); + uint8_t* normslope = machine().root_device().memregion("normslope")->base(); + + for(int32_t i = 0; i < 64; i++) + { + m_norm_point_rom[i] = (normpoint[(i << 1) + 1] << 8) | normpoint[i << 1]; + m_norm_slope_rom[i] = (normslope[(i << 1) + 1] << 8) | normslope[i << 1]; + } +#endif + + memset(m_tiles, 0, 8 * sizeof(n64_tile_t)); + memset(m_cmd_data, 0, sizeof(m_cmd_data)); + + for (int32_t i = 0; i < 8; i++) + { + m_tiles[i].num = i; + m_tiles[i].invmm = rgbaint_t(~0, ~0, ~0, ~0); + m_tiles[i].invmask = rgbaint_t(~0, ~0, ~0, ~0); + } + } + + void process_command_list(); + uint64_t read_data(uint32_t address); + void disassemble(uint64_t *cmd_buf, char* buffer); + + void set_machine(running_machine& machine) { m_machine = &machine; } + void set_n64_periphs(n64_periphs* periphs) { m_n64_periphs = periphs; } + + // CPU-visible registers + void set_start(uint32_t val) { m_start = val; } + uint32_t get_start() const { return m_start; } + + void set_end(uint32_t val) { m_end = val; } + uint32_t get_end() const { return m_end; } + + void set_current(uint32_t val) { m_current = val; } + uint32_t get_current() const { return m_current; } + + void set_status(uint32_t val) { m_status = val; } + uint32_t get_status() const { return m_status; } + + // Color Combiner + int32_t color_combiner_equation(int32_t a, int32_t b, int32_t c, int32_t d); + int32_t alpha_combiner_equation(int32_t a, int32_t b, int32_t c, int32_t d); + void set_suba_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata); + void set_subb_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata); + void set_mul_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata); + void set_add_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata); + void set_sub_input_alpha(color_t** input, int32_t code, rdp_span_aux* userdata); + void set_mul_input_alpha(color_t** input, int32_t code, rdp_span_aux* userdata); + + // Texture memory + uint8_t* get_tmem8() { return m_tmem.get(); } + uint16_t* get_tmem16() { return (uint16_t*)m_tmem.get(); } + + // YUV Factors + void set_yuv_factors(color_t k02, color_t k13, color_t k4, color_t k5) { m_k02 = k02; m_k13 = k13; m_k4 = k4; m_k5 = k5; } + color_t& get_k02() { return m_k02; } + color_t& get_k13() { return m_k13; } + + // Blender-related (move into RDP::Blender) + void set_blender_input(int32_t cycle, int32_t which, color_t** input_rgb, color_t** input_a, int32_t a, int32_t b, rdp_span_aux* userdata); + + // Span rasterization + void span_draw_1cycle(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid); + void span_draw_2cycle(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid); + void span_draw_copy(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid); + void span_draw_fill(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid); + + // Render-related (move into eventual drawing-related classes?) + void tc_div(int32_t ss, int32_t st, int32_t sw, int32_t* sss, int32_t* sst); + void tc_div_no_perspective(int32_t ss, int32_t st, int32_t sw, int32_t* sss, int32_t* sst); + uint32_t get_log2(uint32_t lod_clamp); + void render_spans(int32_t start, int32_t end, int32_t tilenum, bool flip, extent_t* spans, bool rect, rdp_poly_state* object); + int32_t get_alpha_cvg(int32_t comb_alpha, rdp_span_aux* userdata, const rdp_poly_state &object); + + void z_store(const rdp_poly_state &object, uint32_t zcurpixel, uint32_t dzcurpixel, uint32_t z, uint32_t enc); + uint32_t z_decompress(uint32_t zcurpixel); + uint32_t dz_decompress(uint32_t zcurpixel, uint32_t dzcurpixel); + uint32_t dz_compress(uint32_t value); + int32_t normalize_dzpix(int32_t sum); + bool z_compare(uint32_t zcurpixel, uint32_t dzcurpixel, uint32_t sz, uint16_t dzpix, rdp_span_aux* userdata, const rdp_poly_state &object); + + // Commands + void cmd_noop(uint64_t *cmd_buf); + void cmd_tex_rect(uint64_t *cmd_buf); + void cmd_tex_rect_flip(uint64_t *cmd_buf); + void cmd_sync_load(uint64_t *cmd_buf); + void cmd_sync_pipe(uint64_t *cmd_buf); + void cmd_sync_tile(uint64_t *cmd_buf); + void cmd_sync_full(uint64_t *cmd_buf); + void cmd_set_key_gb(uint64_t *cmd_buf); + void cmd_set_key_r(uint64_t *cmd_buf); + void cmd_set_fill_color32(uint64_t *cmd_buf); + void cmd_set_convert(uint64_t *cmd_buf); + void cmd_set_scissor(uint64_t *cmd_buf); + void cmd_set_prim_depth(uint64_t *cmd_buf); + void cmd_set_other_modes(uint64_t *cmd_buf); + void cmd_load_tlut(uint64_t *cmd_buf); + void cmd_set_tile_size(uint64_t *cmd_buf); + void cmd_load_block(uint64_t *cmd_buf); + void cmd_load_tile(uint64_t *cmd_buf); + void cmd_fill_rect(uint64_t *cmd_buf); + void cmd_set_tile(uint64_t *cmd_buf); + void cmd_set_fog_color(uint64_t *cmd_buf); + void cmd_set_blend_color(uint64_t *cmd_buf); + void cmd_set_prim_color(uint64_t *cmd_buf); + void cmd_set_env_color(uint64_t *cmd_buf); + void cmd_set_combine(uint64_t *cmd_buf); + void cmd_set_texture_image(uint64_t *cmd_buf); + void cmd_set_mask_image(uint64_t *cmd_buf); + void cmd_set_color_image(uint64_t *cmd_buf); + + void rgbaz_clip(int32_t sr, int32_t sg, int32_t sb, int32_t sa, int32_t* sz, rdp_span_aux* userdata); + void rgbaz_correct_triangle(int32_t offx, int32_t offy, int32_t* r, int32_t* g, int32_t* b, int32_t* a, int32_t* z, rdp_span_aux* userdata, const rdp_poly_state &object); + + void triangle(uint64_t *cmd_buf, bool shade, bool texture, bool zbuffer); + + void get_dither_values(int32_t x, int32_t y, int32_t* cdith, int32_t* adith, const rdp_poly_state &object); + + uint16_t decompress_cvmask_frombyte(uint8_t x); + void lookup_cvmask_derivatives(uint32_t mask, uint8_t* offx, uint8_t* offy, rdp_span_aux* userdata); + + void mark_frame() { m_capture.mark_frame(*m_machine); } + + misc_state_t m_misc_state; + + // Color constants + color_t m_blend_color; /* constant blend color */ + color_t m_prim_color; /* flat primitive color */ + color_t m_prim_alpha; /* flat primitive alpha */ + color_t m_env_color; /* generic color constant ('environment') */ + color_t m_env_alpha; /* generic alpha constant ('environment') */ + color_t m_fog_color; /* generic color constant ('fog') */ + color_t m_key_scale; /* color-keying constant */ + color_t m_lod_fraction; /* Z-based LOD fraction for this poly */ + color_t m_prim_lod_fraction; /* fixed LOD fraction for this poly */ + + color_t m_one; + color_t m_zero; + + uint32_t m_fill_color; + + other_modes_t m_other_modes; + + n64_blender_t m_blender; + + n64_texture_pipe_t m_tex_pipe; + + uint8_t m_hidden_bits[0x800000]; + + uint8_t m_replicated_rgba[32]; + + uint16_t m_dzpix_normalize[0x10000]; + + rectangle_t m_scissor; + span_base_t m_span_base; + + void draw_triangle(uint64_t *cmd_buf, bool shade, bool texture, bool zbuffer, bool rect); + + std::unique_ptr m_aux_buf; + uint32_t m_aux_buf_ptr; + uint32_t m_aux_buf_index; + + bool rdp_range_check(uint32_t addr); + + n64_tile_t m_tiles[8]; + +private: + void compute_cvg_noflip(extent_t* spans, int32_t* majorx, int32_t* minorx, int32_t* majorxint, int32_t* minorxint, int32_t scanline, int32_t yh, int32_t yl, int32_t base); + void compute_cvg_flip(extent_t* spans, int32_t* majorx, int32_t* minorx, int32_t* majorxint, int32_t* minorxint, int32_t scanline, int32_t yh, int32_t yl, int32_t base); + + void write_pixel4(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object); + void write_pixel8(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object); + void write_pixel16(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object); + void write_pixel32(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object); + void read_pixel4(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object); + void read_pixel8(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object); + void read_pixel16(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object); + void read_pixel32(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object); + void copy_pixel4(uint32_t curpixel, color_t& color, const rdp_poly_state &object); + void copy_pixel8(uint32_t curpixel, color_t& color, const rdp_poly_state &object); + void copy_pixel16(uint32_t curpixel, color_t& color, const rdp_poly_state &object); + void copy_pixel32(uint32_t curpixel, color_t& color, const rdp_poly_state &object); + void fill_pixel4(uint32_t curpixel, const rdp_poly_state &object); + void fill_pixel8(uint32_t curpixel, const rdp_poly_state &object); + void fill_pixel16(uint32_t curpixel, const rdp_poly_state &object); + void fill_pixel32(uint32_t curpixel, const rdp_poly_state &object); + + void precalc_cvmask_derivatives(void); + void z_build_com_table(void); + + typedef void (n64_rdp::*compute_cvg_t) (extent_t* spans, int32_t* majorx, int32_t* minorx, int32_t* majorxint, int32_t* minorxint, int32_t scanline, int32_t yh, int32_t yl, int32_t base); + compute_cvg_t m_compute_cvg[2]; + + typedef void (n64_rdp::*write_pixel_t) (uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object); + typedef void (n64_rdp::*read_pixel_t) (uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object); + typedef void (n64_rdp::*copy_pixel_t) (uint32_t curpixel, color_t& color, const rdp_poly_state &object); + typedef void (n64_rdp::*fill_pixel_t) (uint32_t curpixel, const rdp_poly_state &object); + + write_pixel_t m_write_pixel[4]; + read_pixel_t m_read_pixel[4]; + copy_pixel_t m_copy_pixel[4]; + fill_pixel_t m_fill_pixel[4]; + + running_machine* m_machine; + uint32_t* m_rdram; + uint32_t* m_dmem; + n64_periphs* m_n64_periphs; + + combine_modes_t m_combine; + bool m_pending_mode_block; + bool m_pipe_clean; + + cv_mask_derivative_t cvarray[(1 << 8)]; + + uint16_t m_z_com_table[0x40000]; //precalced table of compressed z values, 18b: 512 KB array! + uint32_t m_z_complete_dec_table[0x4000]; //the same for decompressed z values, 14b + uint8_t m_compressed_cvmasks[0x10000]; //16bit cvmask -> to byte + + uint64_t m_cmd_data[0x800]; + uint64_t m_temp_rect_data[0x800]; + + uint32_t m_start; + uint32_t m_end; + uint32_t m_current; + uint32_t m_status; + + std::unique_ptr m_tmem; + + // YUV factors + color_t m_k02; + color_t m_k13; + color_t m_k4; + color_t m_k5; + + // Texture perspective division +#if !defined(MAME_RDP) + int32_t m_norm_point_rom[64]; + int32_t m_norm_slope_rom[64]; +#else + int32_t m_norm_point_rom[64] = + { + 0x4000, 0x3f04, 0x3e10, 0x3d22, 0x3c3c, 0x3b5d, 0x3a83, 0x39b1, + 0x38e4, 0x381c, 0x375a, 0x369d, 0x35e5, 0x3532, 0x3483, 0x33d9, + 0x3333, 0x3291, 0x31f4, 0x3159, 0x30c3, 0x3030, 0x2fa1, 0x2f15, + 0x2e8c, 0x2e06, 0x2d83, 0x2d03, 0x2c86, 0x2c0b, 0x2b93, 0x2b1e, + 0x2aab, 0x2a3a, 0x29cc, 0x2960, 0x28f6, 0x288e, 0x2828, 0x27c4, + 0x2762, 0x2702, 0x26a4, 0x2648, 0x25ed, 0x2594, 0x253d, 0x24e7, + 0x2492, 0x243f, 0x23ee, 0x239e, 0x234f, 0x2302, 0x22b6, 0x226c, + 0x2222, 0x21da, 0x2193, 0x214d, 0x2108, 0x20c5, 0x2082, 0x2041, + }; + int32_t m_norm_slope_rom[64] = + { + 0xfc, 0xf4, 0xee, 0xe6, 0xdf, 0xda, 0xd2, 0xcd, + 0xc8, 0xc2, 0xbd, 0xb8, 0xb3, 0xaf, 0xaa, 0xa6, + 0xa2, 0x9d, 0x9b, 0x96, 0x93, 0x8f, 0x8c, 0x89, + 0x86, 0x83, 0x80, 0x7d, 0x7b, 0x78, 0x75, 0x73, + 0x71, 0x6e, 0x6c, 0x6a, 0x68, 0x66, 0x64, 0x62, + 0x60, 0x5e, 0x5c, 0x5b, 0x59, 0x57, 0x56, 0x55, + 0x53, 0x51, 0x50, 0x4f, 0x4d, 0x4c, 0x4a, 0x4a, + 0x48, 0x47, 0x46, 0x45, 0x43, 0x43, 0x41, 0x41, + }; +#endif + + pin64_t m_capture; + + static uint32_t s_special_9bit_clamptable[512]; + static z_decompress_entry_t const m_z_dec_table[8]; + + static uint8_t const s_bayer_matrix[16]; + static uint8_t const s_magic_matrix[16]; + static int32_t const s_rdp_command_length[]; + static char const *const s_image_format[]; + static char const *const s_image_size[]; + +public: + bool ignore; + bool dolog; +}; + +#endif // _VIDEO_N64_H_ diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/n64types.h b/waterbox/ares64/ares/thirdparty/mame/mame/video/n64types.h new file mode 100644 index 0000000000..4fd845593f --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/n64types.h @@ -0,0 +1,340 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz + +#ifndef _VIDEO_N64TYPES_H_ +#define _VIDEO_N64TYPES_H_ + +#include "video/rgbutil.h" + +struct misc_state_t +{ + misc_state_t() + { + m_max_level = 0; + m_min_level = 0; + } + + int32_t m_fb_format; // Framebuffer pixel format index (0 - I, 1 - IA, 2 - CI, 3 - RGBA) + int32_t m_fb_size; // Framebuffer pixel size index (0 - 4bpp, 1 - 8bpp, 2 - 16bpp, 3 - 32bpp) + int32_t m_fb_width; // Framebuffer width, in pixels + int32_t m_fb_height; // Framebuffer height, in pixels + uint32_t m_fb_address; // Framebuffer source address offset (in bytes) from start of RDRAM + + uint32_t m_zb_address; // Z-buffer source address offset (in bytes) from start of RDRAM + + int32_t m_ti_format; // Format for Texture Interface (TI) transfers + int32_t m_ti_size; // Size (in bytes) of TI transfers + int32_t m_ti_width; // Width (in pixels) of TI transfers + uint32_t m_ti_address; // Destination address for TI transfers + + uint32_t m_max_level; // Maximum LOD level for texture filtering + uint32_t m_min_level; // Minimum LOD level for texture filtering + + uint16_t m_primitive_z; // Forced Z value for current primitive, if applicable + uint16_t m_primitive_dz; // Forced Delta-Z value for current primitive, if applicable +}; + +#if 0 +class color_t +{ + public: + color_t() + { + c = 0; + } + + color_t(uint32_t color) + { + set(color); + } + + color_t(uint8_t a, uint8_t r, uint8_t g, uint8_t b) + { + set(a, r, g, b); + } + + inline void set(color_t& other) + { + c = other.c; + } + + inline void set(uint32_t color) + { + i.a = (color >> 24) & 0xff; + i.r = (color >> 16) & 0xff; + i.g = (color >> 8) & 0xff; + i.b = color & 0xff; + } + + void set(uint8_t a, uint8_t r, uint8_t g, uint8_t b) + { + i.a = a; + i.r = r; + i.g = g; + i.b = b; + } + + inline void set_direct(uint32_t color) + { + c = color; + } + + uint32_t get() + { + return i.a << 24 | i.r << 16 | i.g << 8 | i.b; + } + + union + { + uint32_t c; +#ifdef LSB_FIRST + struct { uint8_t a, b, g, r; } i; +#else + struct { uint8_t r, g, b, a; } i; +#endif + }; +}; +#else +#define color_t rgbaint_t +#endif + +enum +{ + BIT_DEPTH_32 = 0, + BIT_DEPTH_16, + + BIT_DEPTH_COUNT +}; + +struct n64_tile_t +{ + int32_t format; // Image data format: RGBA, YUV, CI, IA, I + int32_t size; // Size of texel element: 4b, 8b, 16b, 32b + int32_t line; // Size of tile line in bytes + int32_t tmem; // Starting tmem address for this tile in bytes + int32_t palette; // Palette number for 4b CI texels + int32_t ct, mt, cs, ms; // Clamp / mirror enable bits for S / T direction + int32_t mask_t, shift_t, mask_s, shift_s; // Mask values / LOD shifts + int32_t lshift_s, rshift_s, lshift_t, rshift_t; + int32_t wrapped_mask_s, wrapped_mask_t; + bool clamp_s, clamp_t; + rgbaint_t mm, invmm; + rgbaint_t wrapped_mask; + rgbaint_t mask; + rgbaint_t invmask; + rgbaint_t lshift; + rgbaint_t rshift; + rgbaint_t sth; + rgbaint_t stl; + rgbaint_t clamp_st; + uint16_t sl, tl, sh, th; // 10.2 fixed-point, starting and ending texel row / column + int32_t num; +}; + +struct span_base_t +{ + int32_t m_span_dr; + int32_t m_span_dg; + int32_t m_span_db; + int32_t m_span_da; + int32_t m_span_ds; + int32_t m_span_dt; + int32_t m_span_dw; + int32_t m_span_dz; + int32_t m_span_dymax; + int32_t m_span_dzpix; + int32_t m_span_drdy; + int32_t m_span_dgdy; + int32_t m_span_dbdy; + int32_t m_span_dady; + int32_t m_span_dzdy; +}; + +struct combine_modes_t +{ + int32_t sub_a_rgb0; + int32_t sub_b_rgb0; + int32_t mul_rgb0; + int32_t add_rgb0; + int32_t sub_a_a0; + int32_t sub_b_a0; + int32_t mul_a0; + int32_t add_a0; + + int32_t sub_a_rgb1; + int32_t sub_b_rgb1; + int32_t mul_rgb1; + int32_t add_rgb1; + int32_t sub_a_a1; + int32_t sub_b_a1; + int32_t mul_a1; + int32_t add_a1; +}; + +struct color_inputs_t +{ + // combiner inputs + color_t* combiner_rgbsub_a[2]; + color_t* combiner_rgbsub_b[2]; + color_t* combiner_rgbmul[2]; + color_t* combiner_rgbadd[2]; + + color_t* combiner_alphasub_a[2]; + color_t* combiner_alphasub_b[2]; + color_t* combiner_alphamul[2]; + color_t* combiner_alphaadd[2]; + + // blender input + color_t* blender1a_rgb[2]; + color_t* blender1b_a[2]; + color_t* blender2a_rgb[2]; + color_t* blender2b_a[2]; +}; + +struct other_modes_t +{ + int32_t cycle_type; + bool persp_tex_en; + bool detail_tex_en; + bool sharpen_tex_en; + bool tex_lod_en; + bool en_tlut; + bool tlut_type; + bool sample_type; + bool mid_texel; + bool bi_lerp0; + bool bi_lerp1; + bool convert_one; + bool key_en; + int32_t rgb_dither_sel; + int32_t alpha_dither_sel; + int32_t blend_m1a_0; + int32_t blend_m1a_1; + int32_t blend_m1b_0; + int32_t blend_m1b_1; + int32_t blend_m2a_0; + int32_t blend_m2a_1; + int32_t blend_m2b_0; + int32_t blend_m2b_1; + int32_t tex_edge; + int32_t force_blend; + int32_t blend_shift; + bool alpha_cvg_select; + bool cvg_times_alpha; + int32_t z_mode; + int32_t cvg_dest; + bool color_on_cvg; + uint8_t image_read_en; + bool z_update_en; + bool z_compare_en; + bool antialias_en; + bool z_source_sel; + int32_t dither_alpha_en; + int32_t alpha_compare_en; + int32_t alpha_dither_mode; +}; + +struct rectangle_t +{ + uint16_t m_xl; // 10.2 fixed-point + uint16_t m_yl; // 10.2 fixed-point + uint16_t m_xh; // 10.2 fixed-point + uint16_t m_yh; // 10.2 fixed-point +}; + +struct rdp_poly_state +{ + n64_rdp* m_rdp; /* pointer back to the RDP state */ + + misc_state_t m_misc_state; /* miscellaneous rasterizer bits */ + other_modes_t m_other_modes; /* miscellaneous rasterizer bits (2) */ + span_base_t m_span_base; /* span initial values for triangle rasterization */ + rectangle_t m_scissor; /* screen-space scissor bounds */ + uint32_t m_fill_color; /* poly fill color */ + n64_tile_t m_tiles[8]; /* texture tile state */ + uint8_t m_tmem[0x1000]; /* texture cache */ + int32_t tilenum; /* texture tile index */ + bool flip; /* left-major / right-major flip */ + bool rect; /* primitive is rectangle (vs. triangle) */ +}; + +#define RDP_CVG_SPAN_MAX (1024) + +// This is enormous and horrible +struct rdp_span_aux +{ + uint32_t m_unscissored_rx; + uint16_t m_cvg[RDP_CVG_SPAN_MAX]; + color_t m_memory_color; + color_t m_pixel_color; + color_t m_inv_pixel_color; + color_t m_blended_pixel_color; + + color_t m_combined_color; + color_t m_combined_alpha; + color_t m_texel0_color; + color_t m_texel0_alpha; + color_t m_texel1_color; + color_t m_texel1_alpha; + color_t m_next_texel_color; + color_t m_next_texel_alpha; + color_t m_blend_color; /* constant blend color */ + color_t m_prim_color; /* flat primitive color */ + color_t m_prim_alpha; /* flat primitive alpha */ + color_t m_env_color; /* generic color constant ('environment') */ + color_t m_env_alpha; /* generic alpha constant ('environment') */ + color_t m_fog_color; /* generic color constant ('fog') */ + color_t m_shade_color; /* gouraud-shaded color */ + color_t m_shade_alpha; /* gouraud-shaded alpha */ + color_t m_key_scale; /* color-keying constant */ + color_t m_noise_color; /* noise */ + color_t m_lod_fraction; /* Z-based LOD fraction for this poly */ + color_t m_prim_lod_fraction; /* fixed LOD fraction for this poly */ + color_t m_k4; + color_t m_k5; + color_inputs_t m_color_inputs; + uint32_t m_current_pix_cvg; + uint32_t m_current_mem_cvg; + uint32_t m_current_cvg_bit; + int32_t m_shift_a; + int32_t m_shift_b; + int32_t m_precomp_s; + int32_t m_precomp_t; + int32_t m_blend_enable; + bool m_pre_wrap; + int32_t m_dzpix_enc; + uint8_t* m_tmem; /* pointer to texture cache for this polygon */ + bool m_start_span; + rgbaint_t m_clamp_diff[8]; + combine_modes_t m_combine; +}; + +struct z_decompress_entry_t +{ + uint32_t shift; + uint32_t add; +}; + +struct cv_mask_derivative_t +{ + uint8_t cvg; + uint8_t cvbit; + uint8_t xoff; + uint8_t yoff; +}; + +class span_param_t +{ + public: + union + { + uint32_t w; +#ifdef LSB_FIRST + struct { uint16_t l; int16_t h; } h; +#else + struct { int16_t h; uint16_t l; } h; +#endif + }; +}; + +#endif // _VIDEO_N64TYPES_H_ diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/pin64.cpp b/waterbox/ares64/ares/thirdparty/mame/mame/video/pin64.cpp new file mode 100644 index 0000000000..ba9b217d4f --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/pin64.cpp @@ -0,0 +1,511 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz + +#include "emu.h" +#include "pin64.h" + +#define CAP_NAME "pin64_%d.cap" + +// pin64_fileutil_t members + +void pin64_fileutil_t::write(FILE* file, uint32_t data) { + if (!file) + return; + + uint8_t temp(data >> 24); + fwrite(&temp, 1, 1, file); + + temp = (uint8_t)(data >> 16); + fwrite(&temp, 1, 1, file); + + temp = (uint8_t)(data >> 8); + fwrite(&temp, 1, 1, file); + + temp = (uint8_t)data; + fwrite(&temp, 1, 1, file); +} + +void pin64_fileutil_t::write(FILE* file, const uint8_t* data, uint32_t size) { + if (!file) + return; + + fwrite(data, 1, size, file); +} + + + +// pin64_data_t members + +void pin64_data_t::put8(uint8_t data) { + m_data.push_back(data); + m_offset++; +} + +void pin64_data_t::put16(uint16_t data) { + put8((uint8_t)(data >> 8)); + put8((uint8_t)data); +} + +void pin64_data_t::put32(uint32_t data) { + put16((uint16_t)(data >> 16)); + put16((uint16_t)data); +} + +void pin64_data_t::put64(uint64_t data) { + put32((uint32_t)(data >> 32)); + put32((uint32_t)data); +} + +uint8_t pin64_data_t::get8() { + if (m_offset >= m_data.size()) + fatalerror("PIN64: Call to pin64_data_t::get8() at end of block (requested offset %x, size %x)\n", m_offset, (uint32_t)m_data.size()); + + uint8_t ret = m_data[m_offset]; + m_offset++; + + return ret; +} + +uint16_t pin64_data_t::get16() { + uint16_t ret = (uint16_t)get8() << 8; + return ret | get8(); +} + +uint32_t pin64_data_t::get32() { + uint32_t ret = (uint32_t)get16() << 16; + return ret | get16(); +} + +uint64_t pin64_data_t::get64() { + uint64_t ret = (uint64_t)get32() << 32; + return ret | get32(); +} + +uint8_t pin64_data_t::get8(uint32_t offset, bool temp_access) { + update_offset(offset, temp_access); + + uint8_t ret = get8(); + m_offset = m_old_offset; + return ret; +} + +uint16_t pin64_data_t::get16(uint32_t offset, bool temp_access) { + update_offset(offset, temp_access); + + uint16_t ret = get16(); + m_offset = m_old_offset; + return ret; +} + +uint32_t pin64_data_t::get32(uint32_t offset, bool temp_access) { + update_offset(offset, temp_access); + + uint32_t ret = get32(); + m_offset = m_old_offset; + return ret; +} + +uint64_t pin64_data_t::get64(uint32_t offset, bool temp_access) { + update_offset(offset, temp_access); + + uint32_t ret = get64(); + m_offset = m_old_offset; + return ret; +} + +void pin64_data_t::reset() { + m_old_offset = 0; + m_offset = 0; +} + +void pin64_data_t::clear() { + reset(); + m_data.clear(); +} + +void pin64_data_t::update_offset(uint32_t offset, bool update_current) { + m_old_offset = (update_current ? offset : m_offset); + m_offset = offset; +} + + + +// pin64_printer_t members + +void pin64_printer_t::print_data(pin64_block_t* block) { + pin64_data_t* data = block->data(); + + printf(" CRC32: %08x\n", (uint32_t)block->crc32()); fflush(stdout); + printf(" Data Size: %08x\n", (uint32_t)data->size()); fflush(stdout); + printf(" Data: "); fflush(stdout); + + const uint32_t data_size = data->size(); + const uint32_t row_count = (data_size + 31) / 32; + const uint8_t* bytes = data->bytes(); + for (uint32_t row = 0; row < row_count; row++) { + const uint32_t row_index = row * 32; + const uint32_t data_remaining = data_size - row_index; + const uint32_t col_count = (data_remaining > 32 ? 32 : data_remaining); + for (uint32_t col = 0; col < col_count; col++) { + printf("%02x ", bytes[row_index + col]); fflush(stdout); + } + + if (row == (row_count - 1)) { + printf("\n"); fflush(stdout); + } else { + printf("\n "); fflush(stdout); + } + } + + printf("\n"); fflush(stdout); +} + +void pin64_printer_t::print_command(int cmd_start, int cmd, std::unordered_map& blocks, std::vector& commands) { + pin64_block_t* block = blocks[commands[cmd]]; + pin64_data_t* data = block->data(); + + printf(" Command %d:\n", cmd - cmd_start); fflush(stdout); + const uint32_t cmd_size(data->get32()); + printf(" CRC32: %08x\n", (uint32_t)commands[cmd]); fflush(stdout); + printf(" Packet Data Size: %d words\n", cmd_size); fflush(stdout); + printf(" Packet Data: "); fflush(stdout); + + bool load_command = false; + for (int i = 0; i < cmd_size; i++) { + const uint64_t cmd_entry(data->get64()); + if (i == 0) { + const uint8_t top_byte = uint8_t(cmd_entry >> 56) & 0x3f; + if (top_byte == 0x30 || top_byte == 0x33 || top_byte == 0x34) + load_command = true; + } + printf("%08x%08x\n", uint32_t(cmd_entry >> 32), (uint32_t)cmd_entry); fflush(stdout); + + if (i < (cmd_size - 1)) { + printf(" "); fflush(stdout); + } + } + + printf(" Data Block Present: %s\n", load_command ? "Yes" : "No"); fflush(stdout); + + if (load_command) { + printf(" Data Block CRC32: %08x\n", data->get32()); fflush(stdout); + } + + data->reset(); +}; + + + +// pin64_block_t members + +void pin64_block_t::finalize() { +#if !defined(MAME_RDP) + if (m_data.size() > 0) + m_crc32 = util::crc32_creator::simple(m_data.bytes(), m_data.size()); + else +#endif + m_crc32 = ~0; + m_data.reset(); +} + +void pin64_block_t::clear() { + m_crc32 = 0; + m_data.clear(); +} + +void pin64_block_t::write(FILE* file) { + pin64_fileutil_t::write(file, m_crc32); + pin64_fileutil_t::write(file, m_data.size()); + if (m_data.size() > 0) + pin64_fileutil_t::write(file, m_data.bytes(), m_data.size()); +} + +uint32_t pin64_block_t::size() { + return sizeof(uint32_t) // data CRC32 + + sizeof(uint32_t) // data size + + m_data.size(); // data +} + + + +// pin64_t members + +const uint8_t pin64_t::CAP_ID[8] = { 'P', 'I', 'N', '6', '4', 'C', 'A', 'P' }; + +pin64_t::~pin64_t() { + if (m_capture_file) + finish(); + + clear(); +} + +void pin64_t::start(int frames) +{ + if (m_capture_index == ~0) + init_capture_index(); + + if (m_capture_file) + fatalerror("PIN64: Call to start() while already capturing\n"); + + char name_buf[256]; + sprintf(name_buf, CAP_NAME, m_capture_index); + m_capture_index++; + + m_capture_file = fopen(name_buf, "wb"); + + m_capture_frames = frames; + + m_frames.push_back(0); +} + +void pin64_t::finish() { + if (!m_capture_file) + return; + + finalize(); + print(); + + write(m_capture_file); + fclose(m_capture_file); + m_capture_file = nullptr; + + clear(); +} + +void pin64_t::finalize() { + finish_command(); + data_end(); +} + +void pin64_t::play(int index) { +} + +void pin64_t::mark_frame(running_machine& machine) { + if (m_capture_file) { + if (m_frames.size() == m_capture_frames && m_capture_frames > 0) { + printf("\n"); + finish(); +#if !defined(MAME_RDP) + machine.popmessage("Done recording."); +#endif + } else { + printf("%d ", (uint32_t)m_commands.size()); + m_frames.push_back((uint32_t)m_commands.size()); + } + } + +#if PIN64_ENABLE_CAPTURE + if (machine.input().code_pressed_once(KEYCODE_N) && !m_capture_file) { + start(1); + machine.popmessage("Capturing PIN64 snapshot to pin64_%d.cap", m_capture_index - 1); + } else if (machine.input().code_pressed_once(KEYCODE_M)) { + if (m_capture_file) { + finish(); + machine.popmessage("Done recording."); + } else { + start(); + machine.popmessage("Recording PIN64 movie to pin64_%d.cap", m_capture_index - 1); + } + } +#endif +} + +void pin64_t::command(uint64_t* cmd_data, uint32_t size) { + if (!capturing()) + return; + + finish_command(); + + m_current_command = new pin64_block_t(); + m_current_command->data()->put32(size); + + for (uint32_t i = 0 ; i < size; i++) + m_current_command->data()->put64(cmd_data[i]); +} + +void pin64_t::finish_command() { + if (!m_current_command) + return; + + m_current_command->finalize(); + if (m_blocks.find(m_current_command->crc32()) == m_blocks.end()) + m_blocks[m_current_command->crc32()] = m_current_command; + + m_commands.push_back(m_current_command->crc32()); +} + +void pin64_t::data_begin() { + if (!capturing()) + return; + + if (m_current_data) + data_end(); + + m_current_data = new pin64_block_t(); +} + +pin64_data_t* pin64_t::data_block() { + if (!capturing() || !m_current_data) + return &m_dummy_data; + + return m_current_data->data(); +} + +void pin64_t::data_end() { + if (!capturing() || !m_current_data) + return; + + m_current_data->finalize(); + m_current_command->data()->put32(m_current_data->crc32()); + finish_command(); + + if (m_blocks.find(m_current_data->crc32()) == m_blocks.end()) + m_blocks[m_current_data->crc32()] = m_current_data; + + m_current_data = nullptr; +} + +size_t pin64_t::size() { + return header_size() + block_directory_size() + cmdlist_directory_size() + cmdlist_size(); +} + +size_t pin64_t::header_size() { + return sizeof(uint8_t) * 8 // "PIN64CAP" + + sizeof(uint32_t) // total file size + + sizeof(uint32_t) // start of block directory data + + sizeof(uint32_t) // start of command-list directory data + + sizeof(uint32_t) // start of blocks + + sizeof(uint32_t); // start of commands +} + +size_t pin64_t::block_directory_size() { + return (m_blocks.size() + 1) * sizeof(uint32_t); +} + +size_t pin64_t::cmdlist_directory_size() { + return (m_frames.size() + 1) * sizeof(uint16_t); +} + +size_t pin64_t::blocks_size() { + size_t block_size = 0; + for (std::pair block_pair : m_blocks) + block_size += (block_pair.second)->size(); + + return block_size; +} + +size_t pin64_t::cmdlist_size() { + return (m_commands.size() + 1) * sizeof(uint32_t); +} + +void pin64_t::print() +{ + printf("Total Size: %9x bytes\n", (uint32_t)size()); fflush(stdout); + printf("Header Size: %9x bytes\n", (uint32_t)header_size()); fflush(stdout); + printf("Block Dir Size: %9x bytes\n", (uint32_t)block_directory_size()); fflush(stdout); + printf("Cmdlist Dir Size: %9x bytes\n", (uint32_t)cmdlist_directory_size()); fflush(stdout); + printf("Blocks Size: %9x bytes\n", (uint32_t)blocks_size()); fflush(stdout); + printf("Cmdlist Size: %9x bytes\n", (uint32_t)cmdlist_size()); fflush(stdout); + + printf("Command-List Count: %d\n", (uint32_t)m_frames.size()); fflush(stdout); + for (int i = 0; i < m_frames.size(); i++) { + printf(" List %d:\n", i); fflush(stdout); + + const int next_start = ((i == (m_frames.size() - 1)) ? m_commands.size() : m_frames[i+1]); + for (int cmd = m_frames[i]; cmd < next_start; cmd++) { + pin64_printer_t::print_command(m_frames[i], cmd, m_blocks, m_commands); + } + if (i == (m_frames.size() - 1)) { + printf("\n"); fflush(stdout); + } + } + + printf("\nData Block Count: %d\n", (uint32_t)m_blocks.size()); fflush(stdout); + int i = 0; + for (std::pair block_pair : m_blocks) { + printf(" Block %d:\n", i); fflush(stdout); + + pin64_printer_t::print_data((block_pair.second)); + if (i == (m_blocks.size() - 1)) { + printf("\n"); fflush(stdout); + } + i++; + } +} + +void pin64_t::write(FILE* file) { + const uint32_t size_total = size(); + const uint32_t size_header = header_size(); + const uint32_t size_block_dir = block_directory_size(); + const uint32_t size_cmdlist_dir = cmdlist_directory_size(); + const uint32_t size_blocks_dir = blocks_size(); + + pin64_fileutil_t::write(file, CAP_ID, 8); + pin64_fileutil_t::write(file, size_total); + pin64_fileutil_t::write(file, size_header); + pin64_fileutil_t::write(file, size_header + size_block_dir); + pin64_fileutil_t::write(file, size_header + size_block_dir + size_cmdlist_dir); + pin64_fileutil_t::write(file, size_header + size_block_dir + size_cmdlist_dir + size_blocks_dir); + + write_data_directory(file); + write_cmdlist_directory(file); + + for (std::pair block_pair : m_blocks) + (block_pair.second)->write(file); + + pin64_fileutil_t::write(file, m_commands.size()); + for (util::crc32_t crc : m_commands) + pin64_fileutil_t::write(file, crc); +} + +void pin64_t::write_data_directory(FILE* file) { + pin64_fileutil_t::write(file, m_blocks.size()); + size_t offset(header_size()); + for (std::pair block_pair : m_blocks) { + pin64_fileutil_t::write(file, offset); + offset += (block_pair.second)->size(); + } +} + +void pin64_t::write_cmdlist_directory(FILE* file) { + pin64_fileutil_t::write(file, m_frames.size()); + for (uint32_t frame : m_frames) + pin64_fileutil_t::write(file, frame); +} + +void pin64_t::clear() { + if (m_capture_file != nullptr) { + fclose(m_capture_file); + m_capture_file = nullptr; + } + + for (std::pair block_pair : m_blocks) + delete block_pair.second; + + m_blocks.clear(); + m_commands.clear(); + m_frames.clear(); + + m_current_data = nullptr; + m_current_command = nullptr; +} + +void pin64_t::init_capture_index() +{ + char name_buf[256]; + bool found = true; + + m_capture_index = 0; + + do { + sprintf(name_buf, CAP_NAME, m_capture_index); + + FILE* temp = fopen(name_buf, "rb"); + if (temp == nullptr) { + break; + } else { + fclose(temp); + m_capture_index++; + } + } while(found); +} diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/pin64.h b/waterbox/ares64/ares/thirdparty/mame/mame/video/pin64.h new file mode 100644 index 0000000000..31b836f7ed --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/pin64.h @@ -0,0 +1,180 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz +#ifndef MAME_VIDEO_PIN64_H +#define MAME_VIDEO_PIN64_H + +#pragma once + +#include +#include +#include + + +#define PIN64_ENABLE_CAPTURE (0) + + +class pin64_fileutil_t { +public: + static void write(FILE* file, uint32_t data); + static void write(FILE* file, const uint8_t* data, uint32_t size); +}; + +class pin64_command_t { +public: + std::vector data; +}; + +class pin64_data_t { +public: + pin64_data_t() + : m_offset(0) + , m_old_offset(0) { } + + void reset(); + void clear(); + + // setters + virtual void put8(uint8_t data); + virtual void put16(uint16_t data); + virtual void put32(uint32_t data); + virtual void put64(uint64_t data); + + // getters + virtual uint8_t get8(); + virtual uint8_t get8(uint32_t offset, bool temp_access = false); + virtual uint16_t get16(); + virtual uint16_t get16(uint32_t offset, bool temp_access = false); + virtual uint32_t get32(); + virtual uint32_t get32(uint32_t offset, bool temp_access = false); + virtual uint64_t get64(); + virtual uint64_t get64(uint32_t offset, bool temp_access = false); + virtual uint32_t offset() { return m_offset; } + uint8_t* bytes() { return (m_data.size() > 0) ? &m_data[0] : nullptr; } + uint32_t size() { return m_data.size(); } + +private: + void update_offset(uint32_t offset, bool temp_access = false); + +protected: + std::vector m_data; + + uint32_t m_offset; + uint32_t m_old_offset; +}; + +class pin64_dummy_data_t : public pin64_data_t { +public: + void put8(uint8_t data) override { } + void put16(uint16_t data) override { } + void put32(uint32_t data) override { } + void put64(uint64_t data) override { } + + uint8_t get8() override { return 0; } + uint8_t get8(uint32_t offset, bool update_current = true) override { return 0; } + uint16_t get16() override { return 0; } + uint16_t get16(uint32_t offset, bool update_current = true) override { return 0; } + uint32_t get32() override { return 0; } + uint32_t get32(uint32_t offset, bool update_current = true) override { return 0; } + uint64_t get64() override { return 0; } + uint64_t get64(uint32_t offset, bool update_current = true) override { return 0; } + + uint32_t offset() override { return 0; } +}; + +class pin64_block_t { +public: + pin64_block_t() + : m_crc32{0} { } + virtual ~pin64_block_t() { } + + void finalize(); + void clear(); + + void write(FILE* file); + + // getters + uint32_t size(); + pin64_data_t* data() { return &m_data; } + util::crc32_t crc32() const { return m_crc32; } + +protected: + util::crc32_t m_crc32; + pin64_data_t m_data; +}; + +class pin64_printer_t { +public: + static void print_data(pin64_block_t* block); + static void print_command(int cmd_start, int cmd, std::unordered_map& blocks, std::vector& commands); +}; + +class pin64_t +{ +public: + pin64_t() + : m_capture_file(nullptr) + , m_capture_index(~0) + , m_capture_frames(0) + , m_current_data(nullptr) + , m_current_command(nullptr) + , m_playing(false) + { } + ~pin64_t(); + + void start(int frames = 0); + void finish(); + void clear(); + void print(); + + void mark_frame(running_machine& machine); + void play(int index); + + void command(uint64_t* cmd_data, uint32_t size); + + void data_begin(); + pin64_data_t* data_block(); + pin64_block_t& block() { return *m_current_data; } + void data_end(); + + bool capturing() const { return m_capture_file != nullptr; } + bool playing() const { return m_playing; } + + size_t size(); + +private: + void start_command_block(); + + void write(FILE* file); + + size_t header_size(); + size_t block_directory_size(); + size_t cmdlist_directory_size(); + size_t blocks_size(); + size_t cmdlist_size(); + + void finish_command(); + + void write_data_directory(FILE* file); + void write_cmdlist_directory(FILE* file); + void init_capture_index(); + + void finalize(); + + FILE *m_capture_file; + int32_t m_capture_index; + int m_capture_frames; + + pin64_block_t* m_current_data; + pin64_block_t* m_current_command; + std::unordered_map m_blocks; + + std::vector m_commands; + std::vector m_frames; + + bool m_playing; + + pin64_dummy_data_t m_dummy_data; + static const uint8_t CAP_ID[8]; +}; + +#endif // MAME_VIDEO_PIN64_H diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/rdpblend.cpp b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdpblend.cpp new file mode 100644 index 0000000000..7e20b68aa6 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdpblend.cpp @@ -0,0 +1,461 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz +/****************************************************************************** + + + SGI/Nintendo Reality Display Processor Blend Unit (BL) + ------------------- + + by Ryan Holtz + based on initial C code by Ville Linde + contains additional improvements from angrylion, Ziggy, Gonetz and Orkin + + +******************************************************************************/ + +#include "emu.h" +#include "includes/n64.h" +#include "video/n64.h" + +n64_blender_t::n64_blender_t() +{ + blend1[0] = &n64_blender_t::cycle1_noblend_noacvg_nodither; + blend1[1] = &n64_blender_t::cycle1_noblend_noacvg_dither; + blend1[2] = &n64_blender_t::cycle1_noblend_acvg_nodither; + blend1[3] = &n64_blender_t::cycle1_noblend_acvg_dither; + blend1[4] = &n64_blender_t::cycle1_blend_noacvg_nodither; + blend1[5] = &n64_blender_t::cycle1_blend_noacvg_dither; + blend1[6] = &n64_blender_t::cycle1_blend_acvg_nodither; + blend1[7] = &n64_blender_t::cycle1_blend_acvg_dither; + + blend2[0] = &n64_blender_t::cycle2_noblend_noacvg_nodither; + blend2[1] = &n64_blender_t::cycle2_noblend_noacvg_dither; + blend2[2] = &n64_blender_t::cycle2_noblend_acvg_nodither; + blend2[3] = &n64_blender_t::cycle2_noblend_acvg_dither; + blend2[4] = &n64_blender_t::cycle2_blend_noacvg_nodither; + blend2[5] = &n64_blender_t::cycle2_blend_noacvg_dither; + blend2[6] = &n64_blender_t::cycle2_blend_acvg_nodither; + blend2[7] = &n64_blender_t::cycle2_blend_acvg_dither; + + for (int value = 0; value < 256; value++) + { + for (int dither = 0; dither < 8; dither++) + { + m_color_dither[(value << 3) | dither] = (uint8_t)dither_color(value, dither); + m_alpha_dither[(value << 3) | dither] = (uint8_t)dither_alpha(value, dither); + } + } +} + +int32_t n64_blender_t::dither_alpha(int32_t alpha, int32_t dither) +{ + return min(alpha + dither, 0xff); +} + +int32_t n64_blender_t::dither_color(int32_t color, int32_t dither) +{ + if ((color & 7) > dither) + { + color = (color & 0xf8) + 8; + if (color > 247) + { + color = 255; + } + } + return color; +} + +bool n64_blender_t::test_for_reject(rdp_span_aux* userdata, const rdp_poly_state& object) +{ + if (alpha_reject(userdata, object)) + { + return true; + } + if (object.m_other_modes.antialias_en ? !userdata->m_current_pix_cvg : !userdata->m_current_cvg_bit) + { + return true; + } + return false; +} + +bool n64_blender_t::alpha_reject(rdp_span_aux* userdata, const rdp_poly_state& object) +{ + switch (object.m_other_modes.alpha_dither_mode) + { + case 0: + case 1: + return false; + + case 2: + return userdata->m_pixel_color.get_a() < userdata->m_blend_color.get_a(); + + case 3: + return userdata->m_pixel_color.get_a() < (machine().rand() & 0xff); + + default: + return false; + } +} + +bool n64_blender_t::cycle1_noblend_noacvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_pixel_color.set_a(m_alpha_dither[((uint8_t)userdata->m_pixel_color.get_a() << 3) | adseed]); + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + if (test_for_reject(userdata, object)) + { + return false; + } + blended_pixel.set(*userdata->m_color_inputs.blender1a_rgb[0]); + + return true; +} + +bool n64_blender_t::cycle1_noblend_noacvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_pixel_color.set_a(m_alpha_dither[((uint8_t)userdata->m_pixel_color.get_a() << 3) | adseed]); + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + if (test_for_reject(userdata, object)) + { + return false; + } + + rgbaint_t index(*userdata->m_color_inputs.blender1a_rgb[0]); + index.shl_imm(3); + index.or_imm(dith); + index.and_imm(0x7ff); + blended_pixel.set(0, m_color_dither[index.get_r32()], m_color_dither[index.get_g32()], m_color_dither[index.get_b32()]); + + return true; +} + +bool n64_blender_t::cycle1_noblend_acvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + blended_pixel.set(*userdata->m_color_inputs.blender1a_rgb[0]); + + return true; +} + +bool n64_blender_t::cycle1_noblend_acvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + rgbaint_t index(*userdata->m_color_inputs.blender1a_rgb[0]); + index.shl_imm(3); + index.or_imm(dith); + index.and_imm(0x7ff); + blended_pixel.set(0, m_color_dither[index.get_r32()], m_color_dither[index.get_g32()], m_color_dither[index.get_b32()]); + + return true; +} + +bool n64_blender_t::cycle1_blend_noacvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_pixel_color.set_a(m_alpha_dither[((uint8_t)userdata->m_pixel_color.get_a() << 3) | adseed]); + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + blend_with_partial_reject(blended_pixel, 0, partialreject, sel0, userdata, object); + + return true; +} + +bool n64_blender_t::cycle1_blend_noacvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_pixel_color.set_a(m_alpha_dither[((uint8_t)userdata->m_pixel_color.get_a() << 3) | adseed]); + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + color_t rgb; + blend_with_partial_reject(rgb, 0, partialreject, sel0, userdata, object); + + rgb.shl_imm(3); + rgb.or_imm(dith); + rgb.and_imm(0x7ff); + blended_pixel.set(0, m_color_dither[rgb.get_r32()], m_color_dither[rgb.get_g32()], m_color_dither[rgb.get_b32()]); + + return true; +} + +bool n64_blender_t::cycle1_blend_acvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + blend_with_partial_reject(blended_pixel, 0, partialreject, sel0, userdata, object); + + return true; +} + +bool n64_blender_t::cycle1_blend_acvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + color_t rgb; + blend_with_partial_reject(rgb, 0, partialreject, sel0, userdata, object); + + rgb.shl_imm(3); + rgb.or_imm(dith); + rgb.and_imm(0x7ff); + blended_pixel.set(0, m_color_dither[rgb.get_r32()], m_color_dither[rgb.get_g32()], m_color_dither[rgb.get_b32()]); + + return true; +} + +bool n64_blender_t::cycle2_noblend_noacvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_pixel_color.set_a(m_alpha_dither[((uint8_t)userdata->m_pixel_color.get_a() << 3) | adseed]); + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + userdata->m_inv_pixel_color.set_a(0xff - userdata->m_color_inputs.blender1b_a[0]->get_a()); + blend_pipe(0, sel0, userdata->m_blended_pixel_color, userdata, object); + userdata->m_blended_pixel_color.set_a(userdata->m_pixel_color.get_a()); + + blended_pixel.set(*userdata->m_color_inputs.blender1a_rgb[1]); + + return true; +} + +bool n64_blender_t::cycle2_noblend_noacvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_pixel_color.set_a(m_alpha_dither[((uint8_t)userdata->m_pixel_color.get_a() << 3) | adseed]); + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + userdata->m_inv_pixel_color.set_a(0xff - (uint8_t)userdata->m_color_inputs.blender1b_a[0]->get_a()); + blend_pipe(0, sel0, userdata->m_blended_pixel_color, userdata, object); + userdata->m_blended_pixel_color.set_a(userdata->m_pixel_color.get_a()); + + rgbaint_t index(*userdata->m_color_inputs.blender1a_rgb[1]); + index.shl_imm(3); + index.or_imm(dith); + index.and_imm(0x7ff); + blended_pixel.set(0, m_color_dither[index.get_r32()], m_color_dither[index.get_g32()], m_color_dither[index.get_b32()]); + + return true; +} + +bool n64_blender_t::cycle2_noblend_acvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + userdata->m_inv_pixel_color.set_a(0xff - userdata->m_color_inputs.blender1b_a[0]->get_a()); + blend_pipe(0, sel0, userdata->m_blended_pixel_color, userdata, object); + userdata->m_blended_pixel_color.set_a(userdata->m_pixel_color.get_a()); + + blended_pixel.set(*userdata->m_color_inputs.blender1a_rgb[1]); + + return true; +} + +bool n64_blender_t::cycle2_noblend_acvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + userdata->m_inv_pixel_color.set_a(0xff - userdata->m_color_inputs.blender1b_a[0]->get_a()); + blend_pipe(0, sel0, userdata->m_blended_pixel_color, userdata, object); + userdata->m_blended_pixel_color.set_a(userdata->m_pixel_color.get_a()); + + rgbaint_t index(*userdata->m_color_inputs.blender1a_rgb[1]); + index.shl_imm(3); + index.or_imm(dith); + index.and_imm(0x7ff); + blended_pixel.set(0, m_color_dither[index.get_r32()], m_color_dither[index.get_g32()], m_color_dither[index.get_b32()]); + + return true; +} + +bool n64_blender_t::cycle2_blend_noacvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_pixel_color.set_a(m_alpha_dither[((uint8_t)userdata->m_pixel_color.get_a() << 3) | adseed]); + userdata->m_shade_color.set_a(m_alpha_dither[((uint8_t)userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + userdata->m_inv_pixel_color.set_a(0xff - userdata->m_color_inputs.blender1b_a[0]->get_a()); + blend_pipe(0, sel0, userdata->m_blended_pixel_color, userdata, object); + userdata->m_blended_pixel_color.set_a(userdata->m_pixel_color.get_a()); + + blend_with_partial_reject(blended_pixel, 1, partialreject, sel1, userdata, object); + + return true; +} + +bool n64_blender_t::cycle2_blend_noacvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_pixel_color.set_a(m_alpha_dither[(userdata->m_pixel_color.get_a() << 3) | adseed]); + userdata->m_shade_color.set_a(m_alpha_dither[(userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + userdata->m_inv_pixel_color.set_a(0xff - userdata->m_color_inputs.blender1b_a[0]->get_a()); + blend_pipe(0, sel0, userdata->m_blended_pixel_color, userdata, object); + userdata->m_blended_pixel_color.set_a(userdata->m_pixel_color.get_a()); + + color_t rgb; + blend_with_partial_reject(rgb, 1, partialreject, sel1, userdata, object); + + rgb.shl_imm(3); + rgb.or_imm(dith); + rgb.and_imm(0x7ff); + blended_pixel.set(0, m_color_dither[rgb.get_r32()], m_color_dither[rgb.get_g32()], m_color_dither[rgb.get_b32()]); + + return true; +} + +bool n64_blender_t::cycle2_blend_acvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_shade_color.set_a(m_alpha_dither[(userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + userdata->m_inv_pixel_color.set_a(0xff - userdata->m_color_inputs.blender1b_a[0]->get_a()); + blend_pipe(0, sel0, userdata->m_blended_pixel_color, userdata, object); + userdata->m_blended_pixel_color.set_a(userdata->m_pixel_color.get_a()); + + blend_with_partial_reject(blended_pixel, 1, partialreject, sel1, userdata, object); + + return true; +} + +bool n64_blender_t::cycle2_blend_acvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + userdata->m_shade_color.set_a(m_alpha_dither[(userdata->m_shade_color.get_a() << 3) | adseed]); + + if (test_for_reject(userdata, object)) + { + return false; + } + + userdata->m_inv_pixel_color.set_a(0xff - userdata->m_color_inputs.blender1b_a[0]->get_a()); + blend_pipe(0, sel0, userdata->m_blended_pixel_color, userdata, object); + userdata->m_blended_pixel_color.set_a(userdata->m_pixel_color.get_a()); + + color_t rgb; + blend_with_partial_reject(rgb, 1, partialreject, sel1, userdata, object); + + rgb.shl_imm(3); + rgb.or_imm(dith); + rgb.and_imm(0x7ff); + blended_pixel.set(0, m_color_dither[rgb.get_r32()], m_color_dither[rgb.get_g32()], m_color_dither[rgb.get_b32()]); + + return true; +} + +void n64_blender_t::blend_with_partial_reject(color_t& out, int32_t cycle, int32_t partialreject, int32_t select, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + if (partialreject && userdata->m_pixel_color.get_a() >= 0xff) + { + out.set(*userdata->m_color_inputs.blender1a_rgb[cycle]); + } + else + { + userdata->m_inv_pixel_color.set_a(0xff - userdata->m_color_inputs.blender1b_a[cycle]->get_a()); + blend_pipe(cycle, select, out, userdata, object); + } +} + +void n64_blender_t::blend_pipe(const int cycle, const int special, color_t& out, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + const int32_t mask = 0xff &~ (0x73 * special); + const int32_t shift_a = 3 + userdata->m_shift_a * special; + const int32_t shift_b = 3 + userdata->m_shift_b * special; + const int32_t blend1a = (userdata->m_color_inputs.blender1b_a[cycle]->get_a() >> shift_a) & mask; + const int32_t blend2a = (userdata->m_color_inputs.blender2b_a[cycle]->get_a() >> shift_b) & mask; + const int32_t special_shift = special << 1; + + rgbaint_t temp(*userdata->m_color_inputs.blender1a_rgb[cycle]); + temp.mul_imm(blend1a); + + rgbaint_t secondary(*userdata->m_color_inputs.blender2a_rgb[cycle]); + rgbaint_t other(*userdata->m_color_inputs.blender2a_rgb[cycle]); + other.mul_imm(blend2a); + + temp.add(other); + secondary.shl_imm(special_shift); + temp.add(secondary); + temp.shr_imm(object.m_other_modes.blend_shift); + + int32_t factor_sum = 0; + if (!object.m_other_modes.force_blend) + { + factor_sum = ((blend1a >> 2) + (blend2a >> 2) + 1) & 0xf; + if (factor_sum) + { + temp.set_r(temp.get_r32() / factor_sum); + temp.set_g(temp.get_g32() / factor_sum); + temp.set_b(temp.get_b32() / factor_sum); + } + else + { + temp.set(0, 0xff, 0xff, 0xff); + } + } + + temp.min(255); + out.set(temp); +} + +inline int32_t n64_blender_t::min(const int32_t x, const int32_t min) +{ + if (x < min) + { + return x; + } + return min; +} diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/rdpblend.h b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdpblend.h new file mode 100644 index 0000000000..b81e817e2b --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdpblend.h @@ -0,0 +1,72 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz +/****************************************************************************** + + + SGI/Nintendo Reality Display Processor Blend Unit (BL) + ------------------- + + by Ryan Holtz + based on initial C code by Ville Linde + contains additional improvements from angrylion, Ziggy, Gonetz and Orkin + + +******************************************************************************/ + +#ifndef _VIDEO_RDPBLEND_H_ +#define _VIDEO_RDPBLEND_H_ + +#include "video/n64.h" + +class n64_blender_t +{ + public: + typedef bool (n64_blender_t::*blender1)(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object); + typedef bool (n64_blender_t::*blender2)(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object); + + n64_blender_t(); + + blender1 blend1[8]; + blender2 blend2[8]; + + void set_machine(running_machine& machine) { m_machine = &machine; } + void set_processor(n64_rdp* rdp) { m_rdp = rdp; } + + running_machine &machine() const { assert(m_machine != nullptr); return *m_machine; } + + private: + running_machine* m_machine; + n64_rdp* m_rdp; + + int32_t min(const int32_t x, const int32_t min); + bool alpha_reject(rdp_span_aux* userdata, const rdp_poly_state& object); + bool test_for_reject(rdp_span_aux* userdata, const rdp_poly_state& object); + void blend_pipe(const int cycle, const int special, color_t& out, rdp_span_aux* userdata, const rdp_poly_state& object); + void blend_with_partial_reject(color_t& out, int32_t cycle, int32_t partialreject, int32_t select, rdp_span_aux* userdata, const rdp_poly_state& object); + + bool cycle1_noblend_noacvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle1_noblend_noacvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle1_noblend_acvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle1_noblend_acvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle1_blend_noacvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle1_blend_noacvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle1_blend_acvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle1_blend_acvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, rdp_span_aux* userdata, const rdp_poly_state& object); + + bool cycle2_noblend_noacvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle2_noblend_noacvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle2_noblend_acvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle2_noblend_acvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle2_blend_noacvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle2_blend_noacvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle2_blend_acvg_nodither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object); + bool cycle2_blend_acvg_dither(color_t& blended_pixel, int dith, int adseed, int partialreject, int sel0, int sel1, rdp_span_aux* userdata, const rdp_poly_state& object); + + int32_t dither_alpha(int32_t alpha, int32_t dither); + int32_t dither_color(int32_t color, int32_t dither); + + uint8_t m_color_dither[256 * 8]; + uint8_t m_alpha_dither[256 * 8]; +}; + +#endif // _VIDEO_RDPBLEND_H_ diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/rdpfiltr.hxx b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdpfiltr.hxx new file mode 100644 index 0000000000..ff8fbfb371 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdpfiltr.hxx @@ -0,0 +1,617 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz +#if 0 +static inline void video_filter16(int *out_r, int *out_g, int *out_b, uint16_t* vbuff, uint8_t* hbuff, const uint32_t hres); +static inline void divot_filter16(uint8_t* r, uint8_t* g, uint8_t* b, uint16_t* fbuff, uint32_t fbuff_index); +static inline void restore_filter16(int32_t* r, int32_t* g, int32_t* b, uint16_t* fbuff, uint32_t fbuff_index, uint32_t hres); +static inline void divot_filter16_buffer(int32_t* r, int32_t* g, int32_t* b, color_t* vibuffer); +static inline void restore_filter16_buffer(int32_t* r, int32_t* g, int32_t* b, color_t* vibuff, uint32_t hres); +static inline void restore_two(color_t* filtered, color_t* neighbour); +static inline void video_max(uint32_t* Pixels, uint8_t* max, uint32_t* enb); +static inline uint32_t ge_two(uint32_t enb); + +static inline void video_filter16(int *out_r, int *out_g, int *out_b, uint16_t* vbuff, uint8_t* hbuff, const uint32_t hres) +{ + color_t penumax, penumin, max, min; + uint16_t pix = *vbuff; + const uint8_t centercvg = (*hbuff & 3) + ((pix & 1) << 2) + 1; + uint32_t numoffull = 1; + uint32_t cvg; + uint32_t backr[7], backg[7], backb[7]; + uint32_t invr[7], invg[7], invb[7]; + int32_t coeff; + int32_t leftup = -hres - 2; + int32_t leftdown = hres - 2; + int32_t toleft = -2; + uint32_t colr, colg, colb; + uint32_t enb; + uint32_t r = ((pix >> 8) & 0xf8) | (pix >> 13); + uint32_t g = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07); + uint32_t b = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07); + + *out_r = *out_g = *out_b = 0; + + backr[0] = r; + backg[0] = g; + backb[0] = b; + invr[0] = ~r; + invg[0] = ~g; + invb[0] = ~b; + + if (centercvg == 8) + { + *out_r = r; + *out_g = g; + *out_b = b; + return; + } + + for(int i = 0; i < 5; i++) + { + pix = vbuff[leftup ^ WORD_ADDR_XOR]; + cvg = hbuff[leftup ^ BYTE_ADDR_XOR] & 3; + if(i & 1) + { + if (cvg == 3 && (pix & 1)) + { + backr[numoffull] = ((pix >> 8) & 0xf8) | (pix >> 13); + backg[numoffull] = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07); + backb[numoffull] = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07); + invr[numoffull] = ~backr[numoffull]; + invg[numoffull] = ~backg[numoffull]; + invb[numoffull] = ~backb[numoffull]; + } + else + { + backr[numoffull] = invr[numoffull] = 0; + backg[numoffull] = invg[numoffull] = 0; + backb[numoffull] = invb[numoffull] = 0; + } + numoffull++; + } + leftup++; + } + + for(int i = 0; i < 5; i++) + { + pix = vbuff[leftdown ^ WORD_ADDR_XOR]; + cvg = hbuff[leftdown ^ BYTE_ADDR_XOR] & 3; + if (i&1) + { + if (cvg == 3 && (pix & 1)) + { + backr[numoffull] = ((pix >> 8) & 0xf8) | (pix >> 13); + backg[numoffull] = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07); + backb[numoffull] = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07); + invr[numoffull] = ~backr[numoffull]; + invg[numoffull] = ~backg[numoffull]; + invb[numoffull] = ~backb[numoffull]; + } + else + { + backr[numoffull] = invr[numoffull] = 0; + backg[numoffull] = invg[numoffull] = 0; + backb[numoffull] = invb[numoffull] = 0; + } + numoffull++; + } + leftdown++; + } + + for(int i = 0; i < 5; i++) + { + pix = vbuff[toleft ^ WORD_ADDR_XOR]; + cvg = hbuff[toleft ^ BYTE_ADDR_XOR] & 3; + if (!(i&3)) + { + if (cvg == 3 && (pix & 1)) + { + backr[numoffull] = ((pix >> 8) & 0xf8) | (pix >> 13); + backg[numoffull] = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07); + backb[numoffull] = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07); + invr[numoffull] = ~backr[numoffull]; + invg[numoffull] = ~backg[numoffull]; + invb[numoffull] = ~backb[numoffull]; + } + else + { + backr[numoffull] = invr[numoffull] = 0; + backg[numoffull] = invg[numoffull] = 0; + backb[numoffull] = invb[numoffull] = 0; + } + numoffull++; + } + toleft++; + } + + video_max(&backr[0], &max.i.r, &enb); + for(int i = 1; i < 7; i++) + { + if (!((enb >> i) & 1)) + { + backr[i] = 0; + } + } + video_max(&backg[0], &max.i.g, &enb); + for (int i = 1; i < 7; i++) + { + if (!((enb >> i) & 1)) + { + backg[i] = 0; + } + } + video_max(&backb[0], &max.i.b, &enb); + for (int i = 1; i < 7; i++) + { + if (!((enb >> i) & 1)) + { + backb[i] = 0; + } + } + video_max(&invr[0], &min.i.r, &enb); + for (int i = 1; i < 7; i++) + { + if (!((enb >> i) & 1)) + { + backr[i] = 0; + } + } + video_max(&invg[0], &min.i.g, &enb); + for (int i = 1; i < 7; i++) + { + if (!((enb >> i) & 1)) + { + backg[i] = 0; + } + } + video_max(&invb[0], &min.i.b, &enb); + for (int i = 1; i < 7; i++) + { + if (!((enb >> i) & 1)) + { + backb[i] = 0; + } + } + + video_max(&backr[0], &penumax.i.r, &enb); + penumax.i.r = ge_two(enb) ? max.i.r : penumax.i.r; + + video_max(&backg[0], &penumax.i.g, &enb); + penumax.i.g = ge_two(enb) ? max.i.g : penumax.i.g; + + video_max(&backb[0], &penumax.i.b, &enb); + penumax.i.b = ge_two(enb) ? max.i.b : penumax.i.b; + + video_max(&invr[0], &penumin.i.r, &enb); + penumin.i.r = ge_two(enb) ? min.i.r : penumin.i.r; + + video_max(&invg[0], &penumin.i.g, &enb); + penumin.i.g = ge_two(enb) ? min.i.g : penumin.i.g; + + video_max(&invb[0], &penumin.i.b, &enb); + penumin.i.b = ge_two(enb) ? min.i.b : penumin.i.b; + + penumin.i.r = ~penumin.i.r; + penumin.i.g = ~penumin.i.g; + penumin.i.b = ~penumin.i.b; + + colr = (uint32_t)penumin.i.r + (uint32_t)penumax.i.r - (r << 1); + colg = (uint32_t)penumin.i.g + (uint32_t)penumax.i.g - (g << 1); + colb = (uint32_t)penumin.i.b + (uint32_t)penumax.i.b - (b << 1); + coeff = 8 - centercvg; + colr = (((colr * coeff) + 4) >> 3) + r; + colg = (((colg * coeff) + 4) >> 3) + g; + colb = (((colb * coeff) + 4) >> 3) + b; + + *out_r = colr & 0xff; + *out_g = colg & 0xff; + *out_b = colb & 0xff; + return; +} + +// This needs to be fixed for endianness. +static inline void divot_filter16(uint8_t* r, uint8_t* g, uint8_t* b, uint16_t* fbuff, uint32_t fbuff_index) +{ + uint8_t leftr, leftg, leftb, rightr, rightg, rightb; + uint16_t leftpix, rightpix; + uint16_t* next, *prev; + uint32_t Lsw = fbuff_index & 1; + next = (Lsw) ? (uint16_t*)(fbuff - 1) : (uint16_t*)(fbuff + 3); + prev = (Lsw) ? (uint16_t*)(fbuff - 3) : (uint16_t*)(fbuff + 1); + leftpix = *prev; + rightpix = *next; + + //leftpix = *(fbuff - 1); //for BE targets + //rightpix = *(fbuff + 1); + + leftr = ((leftpix >> 8) & 0xf8) | (leftpix >> 13); + leftg = ((leftpix >> 3) & 0xf8) | ((leftpix >> 8) & 0x07); + leftb = ((leftpix << 2) & 0xf8) | ((leftpix >> 3) & 0x07); + rightr = ((rightpix >> 8) & 0xf8) | (rightpix >> 13); + rightg = ((rightpix >> 3) & 0xf8) | ((rightpix >> 8) & 0x07); + rightb = ((rightpix << 2) & 0xf8) | ((rightpix >> 3) & 0x07); + if ((leftr >= *r && rightr >= leftr) || (leftr >= rightr && *r >= leftr)) + { + *r = leftr; //left = median value + } + if ((rightr >= *r && leftr >= rightr) || (rightr >= leftr && *r >= rightr)) + { + *r = rightr; //right = median, else *r itself is median + } + if ((leftg >= *g && rightg >= leftg) || (leftg >= rightg && *g >= leftg)) + { + *g = leftg; + } + if ((rightg >= *g && leftg >= rightg) || (rightg >= leftg && *g >= rightg)) + { + *g = rightg; + } + if ((leftb >= *b && rightb >= leftb) || (leftb >= rightb && *b >= leftb)) + { + *b = leftb; + } + if ((rightb >= *b && leftb >= rightb) || (rightb >= leftb && *b >= rightb)) + { + *b = rightb; + } +} + +static inline void divot_filter16_buffer(int* r, int* g, int* b, color_t* vibuffer) +{ + color_t leftpix = vibuffer[-1]; + color_t rightpix = vibuffer[1]; + color_t filtered = *vibuffer; + + *r = filtered.i.r; + *g = filtered.i.g; + *b = filtered.i.b; + uint32_t leftr = leftpix.i.r; + uint32_t leftg = leftpix.i.g; + uint32_t leftb = leftpix.i.b; + uint32_t rightr = rightpix.i.r; + uint32_t rightg = rightpix.i.g; + uint32_t rightb = rightpix.i.b; + + if ((leftr >= *r && rightr >= leftr) || (leftr >= rightr && *r >= leftr)) + { + *r = leftr; //left = median value + } + if ((rightr >= *r && leftr >= rightr) || (rightr >= leftr && *r >= rightr)) + { + *r = rightr; //right = median, else *r itself is median + } + if ((leftg >= *g && rightg >= leftg) || (leftg >= rightg && *g >= leftg)) + { + *g = leftg; + } + if ((rightg >= *g && leftg >= rightg) || (rightg >= leftg && *g >= rightg)) + { + *g = rightg; + } + if ((leftb >= *b && rightb >= leftb) || (leftb >= rightb && *b >= leftb)) + { + *b = leftb; + } + if ((rightb >= *b && leftb >= rightb) || (rightb >= leftb && *b >= rightb)) + { + *b = rightb; + } + + filtered.i.r = *r; + filtered.i.g = *g; + filtered.i.b = *b; +} + +// Fix me. +static inline void restore_filter16(int* r, int* g, int* b, uint16_t* fbuff, uint32_t fbuff_index, uint32_t hres) +{ + int32_t leftuppix = -hres - 1; + int32_t leftdownpix = hres - 1; + int32_t toleftpix = -1; + uint8_t tempr, tempg, tempb; + uint16_t pix; + int i; + + uint8_t r5 = *r; + uint8_t g5 = *g; + uint8_t b5 = *b; + r5 &= ~7; + g5 &= ~7; + b5 &= ~7; + + for (i = 0; i < 3; i++) + { + pix = fbuff[leftuppix ^ 1]; + tempr = ((pix >> 8) & 0xf8) | (pix >> 13); + tempg = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07); + tempb = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07); + tempr &= ~7; + tempg &= ~7; + tempb &= ~7; + if (tempr > r5) + { + *r += 1; + } + if (tempr < r5) + { + *r -= 1; + } + if (tempg > g5) + { + *g += 1; + } + if (tempg < g5) + { + *g -= 1; + } + if (tempb > b5) + { + *b += 1; + } + if (tempb < b5) + { + *b -= 1; + } + leftuppix++; + } + + for (i = 0; i < 3; i++) + { + pix = fbuff[leftdownpix ^ 1]; + tempr = ((pix >> 8) & 0xf8) | (pix >> 13); + tempg = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07); + tempb = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07); + tempr &= ~7; + tempg &= ~7; + tempb &= ~7; + if (tempr > r5) + { + *r += 1; + } + if (tempr < r5) + { + *r -= 1; + } + if (tempg > g5) + { + *g += 1; + } + if (tempg < g5) + { + *g -= 1; + } + if (tempb > b5) + { + *b += 1; + } + if (tempb < b5) + { + *b -= 1; + } + leftdownpix++; + } + for(i = 0; i < 3; i++) + { + if (!(i & 1)) + { + pix = fbuff[toleftpix ^ 1]; + tempr = ((pix >> 8) & 0xf8) | (pix >> 13); + tempg = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07); + tempb = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07); + tempr &= ~7; + tempg &= ~7; + tempb &= ~7; + if (tempr > r5) + { + *r += 1; + } + if (tempr < r5) + { + *r -= 1; + } + if (tempg > g5) + { + *g += 1; + } + if (tempg < g5) + { + *g -= 1; + } + if (tempb > b5) + { + *b += 1; + } + if (tempb < b5) + { + *b -= 1; + } + } + toleftpix++; + } +} + +static inline void restore_filter16_buffer(int32_t* r, int32_t* g, int32_t* b, color_t* vibuff, uint32_t hres) +{ + color_t filtered; + color_t leftuppix, leftdownpix, leftpix; + color_t rightuppix, rightdownpix, rightpix; + color_t uppix, downpix; + int32_t ihres = (int32_t)hres; //can't apply unary minus to unsigned + + leftuppix = vibuff[-ihres - 1]; + leftdownpix = vibuff[ihres - 1]; + leftpix = vibuff[-1]; + + rightuppix = vibuff[-ihres + 1]; + rightdownpix = vibuff[ihres + 1]; + rightpix = vibuff[1]; + + uppix = vibuff[-ihres]; + downpix = vibuff[ihres]; + filtered = *vibuff; + + restore_two(&filtered, &leftuppix); + restore_two(&filtered, &uppix); + restore_two(&filtered, &rightuppix); + + restore_two(&filtered, &leftpix); + restore_two(&filtered, &rightpix); + + restore_two(&filtered, &leftdownpix); + restore_two(&filtered, &downpix); + restore_two(&filtered, &rightdownpix); + + *r = filtered.i.r; + *g = filtered.i.g; + *b = filtered.i.b; + + if(*r < 0) *r = 0; + else if(*r > 255) *r = 255; + if(*g < 0) *g = 0; + else if(*g > 255) *g = 255; + if(*b < 0) *b = 0; + else if(*b > 255) *b = 255; +} + +// This is wrong, only the 5 upper bits are compared. +static inline void restore_two(color_t* filtered, color_t* neighbour) +{ + if (neighbour->i.r > filtered->i.r) + { + filtered->i.r += 1; + } + if (neighbour->i.r < filtered->i.r) + { + filtered->i.r -= 1; + } + if (neighbour->i.g > filtered->i.g) + { + filtered->i.g += 1; + } + if (neighbour->i.g < filtered->i.g) + { + filtered->i.g -= 1; + } + if (neighbour->i.b > filtered->i.b) + { + filtered->i.b += 1; + } + if (neighbour->i.b < filtered->i.b) + { + filtered->i.b -= 1; + } +} + +static inline void video_max(uint32_t* Pixels, uint8_t* max, uint32_t* enb) +{ + int i; + int pos = 0; + *enb = 0; + for(i = 0; i < 7; i++) + { + if (Pixels[i] > Pixels[pos]) + { + *enb += (1 << i); + pos = i; + } + else if (Pixels[i] < Pixels[pos]) + { + *enb += (1 << i); + } + else + { + pos = i; + } + } + *max = Pixels[pos]; +} + +static inline uint32_t ge_two(uint32_t enb) +{ + if(enb & 1) + { + if(enb & 2) + return 1; + if(enb & 4) + return 1; + if(enb & 8) + return 1; + if(enb & 16) + return 1; + if(enb & 32) + return 1; + if(enb & 64) + return 1; + if(enb & 128) + return 1; + return 0; + } + else if(enb & 2) + { + if(enb & 4) + return 1; + if(enb & 8) + return 1; + if(enb & 16) + return 1; + if(enb & 32) + return 1; + if(enb & 64) + return 1; + if(enb & 128) + return 1; + return 0; + } + else if(enb & 4) + { + if(enb & 8) + return 1; + if(enb & 16) + return 1; + if(enb & 32) + return 1; + if(enb & 64) + return 1; + if(enb & 128) + return 1; + return 0; + } + else if(enb & 8) + { + if(enb & 16) + return 1; + if(enb & 32) + return 1; + if(enb & 64) + return 1; + if(enb & 128) + return 1; + return 0; + } + else if(enb & 16) + { + if(enb & 32) + return 1; + if(enb & 64) + return 1; + if(enb & 128) + return 1; + return 0; + } + else if(enb & 32) + { + if(enb & 64) + return 1; + if(enb & 128) + return 1; + return 0; + } + else if(enb & 64) + { + if(enb & 128) + return 1; + return 0; + } + return 0; +} +#endif diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/rdptpipe.cpp b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdptpipe.cpp new file mode 100644 index 0000000000..4cb556eb1c --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdptpipe.cpp @@ -0,0 +1,1023 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz +/****************************************************************************** + + + SGI/Nintendo Reality Display Texture Fetch Unit (TF) + ------------------- + + by Ryan Holtz + based on initial C code by Ville Linde + contains additional improvements from angrylion, Ziggy, Gonetz and Orkin + + +******************************************************************************/ + +#include "emu.h" +#include "includes/n64.h" +#include "video/rdptpipe.h" +#include "video/n64.h" +#include "video/rgbutil.h" + +#define RELATIVE(x, y) ((((x) >> 3) - (y)) << 3) | (x & 7); + +void n64_texture_pipe_t::set_machine(running_machine &machine) +{ + n64_state* state = machine.driver_data(); + + m_rdp = state->rdp(); + + for(int32_t i = 0; i < 0x10000; i++) + { + m_expand_16to32_table[i] = color_t((i & 1) ? 0xff : 0x00, m_rdp->m_replicated_rgba[(i >> 11) & 0x1f], m_rdp->m_replicated_rgba[(i >> 6) & 0x1f], m_rdp->m_replicated_rgba[(i >> 1) & 0x1f]); + } + + for(uint32_t i = 0; i < 0x80000; i++) + { + if (i & 0x40000) + { + m_lod_lookup[i] = 0x7fff; + } + else if (i & 0x20000) + { + m_lod_lookup[i] = 0x8000; + } + else + { + if ((i & 0x18000) == 0x8000) + { + m_lod_lookup[i] = 0x7fff; + } + else if ((i & 0x18000) == 0x10000) + { + m_lod_lookup[i] = 0x8000; + } + else + { + m_lod_lookup[i] = i & 0xffff; + } + } + } + + m_st2_add.set(1, 0, 1, 0); + m_v1.set(1, 1, 1, 1); +} + +void n64_texture_pipe_t::mask(rgbaint_t& sstt, const n64_tile_t& tile) +{ + uint32_t s_mask_bits = m_maskbits_table[tile.mask_s]; + uint32_t t_mask_bits = m_maskbits_table[tile.mask_t]; + rgbaint_t maskbits(s_mask_bits, s_mask_bits, t_mask_bits, t_mask_bits); + + rgbaint_t do_wrap(sstt); + do_wrap.sra(tile.wrapped_mask); + do_wrap.and_reg(m_v1); + do_wrap.cmpeq(m_v1); + do_wrap.and_reg(tile.mm); + + rgbaint_t wrapped(sstt); + wrapped.xor_reg(do_wrap); + wrapped.and_reg(maskbits); + wrapped.and_reg(tile.mask); + sstt.and_reg(tile.invmask); + sstt.or_reg(wrapped); +} + +rgbaint_t n64_texture_pipe_t::shift_cycle(rgbaint_t& st, const n64_tile_t& tile) +{ + st.sign_extend(0x00008000, 0xffff8000); + st.sra(tile.rshift); + st.shl(tile.lshift); + + rgbaint_t maxst(st); + maxst.sra_imm(3); + rgbaint_t maxst_eq(maxst); + maxst.cmpgt(tile.sth); + maxst_eq.cmpeq(tile.sth); + maxst.or_reg(maxst_eq); + + rgbaint_t stlsb(st); + stlsb.and_imm(7); + + st.sra_imm(3); + st.sub(tile.stl); + st.shl_imm(3); + st.or_reg(stlsb); + + return maxst; +} + +inline void n64_texture_pipe_t::shift_copy(rgbaint_t& st, const n64_tile_t& tile) +{ + st.shr(tile.rshift); + st.shl(tile.lshift); +} + +void n64_texture_pipe_t::clamp_cycle(rgbaint_t& st, rgbaint_t& stfrac, rgbaint_t& maxst, const int32_t tilenum, const n64_tile_t& tile, rdp_span_aux* userdata) +{ + rgbaint_t not_clamp(tile.clamp_st); + not_clamp.xor_imm(0xffffffff); + + rgbaint_t highbit_mask(0x10000, 0x10000, 0x10000, 0x10000); + rgbaint_t highbit(st); + highbit.and_reg(highbit_mask); + highbit.cmpeq(highbit_mask); + + rgbaint_t not_highbit(highbit); + not_highbit.xor_imm(0xffffffff); + + rgbaint_t not_maxst(maxst); + not_maxst.xor_imm(0xffffffff); + not_maxst.and_reg(not_highbit); + not_maxst.or_reg(not_clamp); + + rgbaint_t shifted_st(st); + shifted_st.sign_extend(0x00010000, 0xffff0000); + shifted_st.shr_imm(5); + shifted_st.and_imm(0x1fff); + shifted_st.and_reg(not_maxst); + stfrac.and_reg(not_maxst); + + rgbaint_t clamp_diff(userdata->m_clamp_diff[tilenum]); + clamp_diff.and_reg(tile.clamp_st); + clamp_diff.and_reg(maxst); + + st.set(shifted_st); + st.or_reg(clamp_diff); +} + +void n64_texture_pipe_t::clamp_cycle_light(rgbaint_t& st, rgbaint_t& maxst, const int32_t tilenum, const n64_tile_t& tile, rdp_span_aux* userdata) +{ + rgbaint_t not_clamp(tile.clamp_st); + not_clamp.xor_imm(0xffffffff); + + rgbaint_t highbit_mask(0x10000, 0x10000, 0x10000, 0x10000); + rgbaint_t highbit(st); + highbit.and_reg(highbit_mask); + highbit.cmpeq(highbit_mask); + + rgbaint_t not_highbit(highbit); + not_highbit.xor_imm(0xffffffff); + + rgbaint_t not_maxst(maxst); + not_maxst.xor_imm(0xffffffff); + not_maxst.and_reg(not_highbit); + not_maxst.or_reg(not_clamp); + + rgbaint_t shifted_st(st); + shifted_st.sign_extend(0x00010000, 0xffff0000); + shifted_st.shr_imm(5); + shifted_st.and_imm(0x1fff); + shifted_st.and_reg(not_maxst); + + rgbaint_t clamp_diff(userdata->m_clamp_diff[tilenum]); + clamp_diff.and_reg(tile.clamp_st); + clamp_diff.and_reg(maxst); + + st.set(shifted_st); + st.or_reg(clamp_diff); +} + +void n64_texture_pipe_t::cycle_nearest(color_t* TEX, color_t* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + const n64_tile_t& tile = object.m_tiles[tilenum]; + const uint32_t index = (tile.format << 4) | (tile.size << 2) | ((uint32_t) object.m_other_modes.en_tlut << 1) | (uint32_t) object.m_other_modes.tlut_type; + + rgbaint_t st(0, SSS, 0, SST); + rgbaint_t maxst = shift_cycle(st, tile); + clamp_cycle_light(st, maxst, tilenum, tile, userdata); + mask(st, tile); + + uint32_t tbase = tile.tmem + ((tile.line * st.get_b32()) & 0x1ff); + + rgbaint_t t0; + ((this)->*(m_texel_fetch[index]))(t0, st.get_r32(), st.get_b32(), tbase, tile.palette, userdata); + if (object.m_other_modes.convert_one && cycle) + { + t0.set(*prev); + } + + t0.sign_extend(0x00000100, 0xffffff00); + + rgbaint_t k13r(m_rdp->get_k13()); + k13r.mul_imm(t0.get_r32()); + + TEX->set(m_rdp->get_k02()); + TEX->mul_imm(t0.get_g32()); + TEX->add(k13r); + TEX->add_imm(0x80); + TEX->shr_imm(8); + TEX->add_imm(t0.get_b32()); + TEX->and_imm(0x1ff); +} + +void n64_texture_pipe_t::cycle_nearest_lerp(color_t* TEX, color_t* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + const n64_tile_t& tile = object.m_tiles[tilenum]; + const uint32_t index = (tile.format << 4) | (tile.size << 2) | ((uint32_t) object.m_other_modes.en_tlut << 1) | (uint32_t) object.m_other_modes.tlut_type; + + rgbaint_t st(0, SSS, 0, SST); + rgbaint_t maxst = shift_cycle(st, tile); + clamp_cycle_light(st, maxst, tilenum, tile, userdata); + mask(st, tile); + + uint32_t tbase = tile.tmem + ((tile.line * st.get_b32()) & 0x1ff); + + ((this)->*(m_texel_fetch[index]))(*TEX, st.get_r32(), st.get_b32(), tbase, tile.palette, userdata); +} + +void n64_texture_pipe_t::cycle_linear(color_t* TEX, color_t* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + const n64_tile_t& tile = object.m_tiles[tilenum]; + const uint32_t index = (tile.format << 4) | (tile.size << 2) | ((uint32_t) object.m_other_modes.en_tlut << 1) | (uint32_t) object.m_other_modes.tlut_type; + + rgbaint_t st(0, SSS, 0, SST); + rgbaint_t maxst = shift_cycle(st, tile); + + clamp_cycle_light(st, maxst, tilenum, tile, userdata); + + mask(st, tile); + + const uint32_t tbase = tile.tmem + ((tile.line * st.get_b32()) & 0x1ff); + + rgbaint_t t0; + ((this)->*(m_texel_fetch[index]))(t0, st.get_r32(), st.get_b32(), tbase, tile.palette, userdata); + if (object.m_other_modes.convert_one && cycle) + { + t0.set(*prev); + } + + t0.sign_extend(0x00000100, 0xffffff00); + + rgbaint_t k13r(m_rdp->get_k13()); + k13r.mul_imm(t0.get_r32()); + + TEX->set(m_rdp->get_k02()); + TEX->mul_imm(t0.get_g32()); + TEX->add(k13r); + TEX->add_imm(0x80); + TEX->shr_imm(8); + TEX->add_imm(t0.get_b32()); + TEX->and_imm(0x1ff); +} + +void n64_texture_pipe_t::cycle_linear_lerp(color_t* TEX, color_t* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + const n64_tile_t& tile = object.m_tiles[tilenum]; + + uint32_t tpal = tile.palette; + uint32_t index = (tile.format << 4) | (tile.size << 2) | ((uint32_t) object.m_other_modes.en_tlut << 1) | (uint32_t) object.m_other_modes.tlut_type; + + rgbaint_t sstt(SSS, SSS, SST, SST); + rgbaint_t maxst = shift_cycle(sstt, tile); + rgbaint_t stfrac = sstt; + stfrac.and_imm(0x1f); + + clamp_cycle(sstt, stfrac, maxst, tilenum, tile, userdata); + + sstt.add(m_st2_add); + + mask(sstt, tile); + + const uint32_t tbase1 = tile.tmem + ((tile.line * sstt.get_b32()) & 0x1ff); + const uint32_t tbase2 = tile.tmem + ((tile.line * sstt.get_g32()) & 0x1ff); + + bool upper = ((stfrac.get_r32() + stfrac.get_b32()) >= 0x20); + + rgbaint_t invstf(stfrac); + if (upper) + { + invstf.subr_imm(0x20); + invstf.shl_imm(3); + } + + stfrac.shl_imm(3); + + bool center = (stfrac.get_r32() == 0x10) && (stfrac.get_b32() == 0x10) && object.m_other_modes.mid_texel; + + rgbaint_t t2; + ((this)->*(m_texel_fetch[index]))(*TEX, sstt.get_a32(), sstt.get_b32(), tbase1, tpal, userdata); + ((this)->*(m_texel_fetch[index]))(t2, sstt.get_r32(), sstt.get_g32(), tbase2, tpal, userdata); + + if (!center) + { + if (upper) + { + rgbaint_t t3; + ((this)->*(m_texel_fetch[index]))(t3, sstt.get_a32(), sstt.get_g32(), tbase2, tpal, userdata); + + TEX->sub(t3); + t2.sub(t3); + + TEX->mul_imm(invstf.get_b32()); + t2.mul_imm(invstf.get_r32()); + + TEX->add(t2); + TEX->add_imm(0x0080); + TEX->sra_imm(8); + TEX->add(t3); + } + else + { + rgbaint_t t0; + ((this)->*(m_texel_fetch[index]))(t0, sstt.get_r32(), sstt.get_b32(), tbase1, tpal, userdata); + + TEX->sub(t0); + t2.sub(t0); + + TEX->mul_imm(stfrac.get_r32()); + t2.mul_imm(stfrac.get_b32()); + + TEX->add(t2); + TEX->add_imm(0x80); + TEX->sra_imm(8); + TEX->add(t0); + } + } + else + { + rgbaint_t t0, t3; + ((this)->*(m_texel_fetch[index]))(t0, sstt.get_r32(), sstt.get_b32(), tbase1, tpal, userdata); + ((this)->*(m_texel_fetch[index]))(t3, sstt.get_a32(), sstt.get_g32(), tbase2, tpal, userdata); + TEX->add(t0); + TEX->add(t2); + TEX->add(t3); + TEX->sra_imm(2); + } +} + +void n64_texture_pipe_t::copy(color_t* TEX, int32_t SSS, int32_t SST, uint32_t tilenum, const rdp_poly_state& object, rdp_span_aux* userdata) +{ + const n64_tile_t* tiles = object.m_tiles; + const n64_tile_t& tile = tiles[tilenum]; + + rgbaint_t st(0, SSS, 0, SST); + shift_copy(st, tile); + rgbaint_t stlsb(st); + stlsb.and_imm(7); + st.shr_imm(3); + st.sub(rgbaint_t(0, tile.sl, 0, tile.tl)); + st.shl_imm(3); + st.add(stlsb); + st.sign_extend(0x00010000, 0xffff0000); + st.shr_imm(5); + st.and_imm(0x1fff); + mask(st, tile); + + const uint32_t index = (tile.format << 4) | (tile.size << 2) | ((uint32_t) object.m_other_modes.en_tlut << 1) | (uint32_t) object.m_other_modes.tlut_type; + const uint32_t tbase = tile.tmem + ((tile.line * st.get_b32()) & 0x1ff); + ((this)->*(m_texel_fetch[index]))(*TEX, st.get_r32(), st.get_b32(), tbase, tile.palette, userdata); +} + +void n64_texture_pipe_t::lod_1cycle(int32_t* sss, int32_t* sst, const int32_t s, const int32_t t, const int32_t w, const int32_t dsinc, const int32_t dtinc, const int32_t dwinc, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + const int32_t nextsw = (w + dwinc) >> 16; + int32_t nexts = (s + dsinc) >> 16; + int32_t nextt = (t + dtinc) >> 16; + + if (object.m_other_modes.persp_tex_en) + { + m_rdp->tc_div(nexts, nextt, nextsw, &nexts, &nextt); + } + else + { + m_rdp->tc_div_no_perspective(nexts, nextt, nextsw, &nexts, &nextt); + } + + userdata->m_start_span = false; + userdata->m_precomp_s = nexts; + userdata->m_precomp_t = nextt; + + const int32_t lodclamp = (((*sst & 0x60000) > 0) | ((nextt & 0x60000) > 0)) || (((*sss & 0x60000) > 0) | ((nexts & 0x60000) > 0)); + + int32_t horstep = SIGN17(nexts & 0x1ffff) - SIGN17(*sss & 0x1ffff); + int32_t vertstep = SIGN17(nextt & 0x1ffff) - SIGN17(*sst & 0x1ffff); + if (horstep & 0x20000) + { + horstep = ~horstep & 0x1ffff; + } + if (vertstep & 0x20000) + { + vertstep = ~vertstep & 0x1ffff; + } + + int32_t lod = (horstep >= vertstep) ? horstep : vertstep; + + *sss = m_lod_lookup[*sss & 0x7ffff]; + *sst = m_lod_lookup[*sst & 0x7ffff]; + + if ((lod & 0x4000) || lodclamp) + { + lod = 0x7fff; + } + else if (lod < object.m_misc_state.m_min_level) + { + lod = object.m_misc_state.m_min_level; + } + + int32_t l_tile = m_rdp->get_log2((lod >> 5) & 0xff); + const bool magnify = (lod < 32); + const bool distant = ((lod & 0x6000) || (l_tile >= object.m_misc_state.m_max_level)); + + uint8_t lod_fraction = ((lod << 3) >> l_tile) & 0xff; + + if(!object.m_other_modes.sharpen_tex_en && !object.m_other_modes.detail_tex_en) + { + if (distant) + { + lod_fraction = 0xff; + } + else if (magnify) + { + lod_fraction = 0; + } + } + + userdata->m_lod_fraction.set(lod_fraction, lod_fraction, lod_fraction, lod_fraction); + /* FIXME: ??? + if(object.m_other_modes.sharpen_tex_en && magnify) + { + userdata->m_lod_fraction |= 0x100; + } + */ +} + +void n64_texture_pipe_t::lod_2cycle(int32_t* sss, int32_t* sst, const int32_t s, const int32_t t, const int32_t w, const int32_t dsinc, const int32_t dtinc, const int32_t dwinc, const int32_t prim_tile, int32_t* t1, int32_t* t2, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + const int32_t nextsw = (w + dwinc) >> 16; + int32_t nexts = (s + dsinc) >> 16; + int32_t nextt = (t + dtinc) >> 16; + + if (object.m_other_modes.persp_tex_en) + { + m_rdp->tc_div(nexts, nextt, nextsw, &nexts, &nextt); + } + else + { + m_rdp->tc_div_no_perspective(nexts, nextt, nextsw, &nexts, &nextt); + } + + userdata->m_start_span = false; + userdata->m_precomp_s = nexts; + userdata->m_precomp_t = nextt; + + const int32_t lodclamp = (((*sst & 0x60000) > 0) | ((nextt & 0x60000) > 0)) || (((*sss & 0x60000) > 0) | ((nexts & 0x60000) > 0)); + + int32_t horstep = SIGN17(nexts & 0x1ffff) - SIGN17(*sss & 0x1ffff); + int32_t vertstep = SIGN17(nextt & 0x1ffff) - SIGN17(*sst & 0x1ffff); + if (horstep & 0x20000) + { + horstep = ~horstep & 0x1ffff; + } + if (vertstep & 0x20000) + { + vertstep = ~vertstep & 0x1ffff; + } + + int32_t lod = (horstep >= vertstep) ? horstep : vertstep; + + *sss = m_lod_lookup[*sss & 0x7ffff]; + *sst = m_lod_lookup[*sst & 0x7ffff]; + + if ((lod & 0x4000) || lodclamp) + { + lod = 0x7fff; + } + else if (lod < object.m_misc_state.m_min_level) + { + lod = object.m_misc_state.m_min_level; + } + + int32_t l_tile = m_rdp->get_log2((lod >> 5) & 0xff); + const bool magnify = (lod < 32); + const bool distant = ((lod & 0x6000) || (l_tile >= object.m_misc_state.m_max_level)); + + uint8_t lod_fraction = ((lod << 3) >> l_tile) & 0xff; + + if(!object.m_other_modes.sharpen_tex_en && !object.m_other_modes.detail_tex_en) + { + if (distant) + { + lod_fraction = 0xff; + } + else if (magnify) + { + lod_fraction = 0; + } + } + + userdata->m_lod_fraction.set(lod_fraction, lod_fraction, lod_fraction, lod_fraction); + + /* FIXME: ??? + if(object.m_other_modes.sharpen_tex_en && magnify) + { + userdata->m_lod_fraction |= 0x100; + }*/ + + if (object.m_other_modes.tex_lod_en) + { + if (distant) + { + l_tile = object.m_misc_state.m_max_level; + } + if (!object.m_other_modes.detail_tex_en) + { + *t1 = (prim_tile + l_tile) & 7; + if (!(distant || (!object.m_other_modes.sharpen_tex_en && magnify))) + { + *t2 = (*t1 + 1) & 7; + } + else + { + *t2 = *t1; // World Driver Championship, Stunt Race 64, Beetle Adventure Racing + } + } + else // Beetle Adventure Racing, World Driver Championship (ingame_, NFL Blitz 2001, Pilotwings + { + if (!magnify) + { + *t1 = (prim_tile + l_tile + 1); + } + else + { + *t1 = (prim_tile + l_tile); + } + *t1 &= 7; + if (!distant && !magnify) + { + *t2 = (prim_tile + l_tile + 2) & 7; + } + else + { + *t2 = (prim_tile + l_tile + 1) & 7; + } + } + } +} + +void n64_texture_pipe_t::lod_2cycle_limited(int32_t* sss, int32_t* sst, const int32_t s, const int32_t t, const int32_t w, const int32_t dsinc, const int32_t dtinc, const int32_t dwinc, const int32_t prim_tile, int32_t* t1, const rdp_poly_state& object) +{ + const int32_t nextsw = (w + dwinc) >> 16; + int32_t nexts = (s + dsinc) >> 16; + int32_t nextt = (t + dtinc) >> 16; + + if (object.m_other_modes.persp_tex_en) + { + m_rdp->tc_div(nexts, nextt, nextsw, &nexts, &nextt); + } + else + { + m_rdp->tc_div_no_perspective(nexts, nextt, nextsw, &nexts, &nextt); + } + + const int32_t lodclamp = (((*sst & 0x60000) > 0) | ((nextt & 0x60000) > 0)) || (((*sss & 0x60000) > 0) | ((nexts & 0x60000) > 0)); + + int32_t horstep = SIGN17(nexts & 0x1ffff) - SIGN17(*sss & 0x1ffff); + int32_t vertstep = SIGN17(nextt & 0x1ffff) - SIGN17(*sst & 0x1ffff); + if (horstep & 0x20000) + { + horstep = ~horstep & 0x1ffff; + } + if (vertstep & 0x20000) + { + vertstep = ~vertstep & 0x1ffff; + } + + int32_t lod = (horstep >= vertstep) ? horstep : vertstep; + + *sss = m_lod_lookup[*sss & 0x7ffff]; + *sst = m_lod_lookup[*sst & 0x7ffff]; + + if ((lod & 0x4000) || lodclamp) + { + lod = 0x7fff; + } + else if (lod < object.m_misc_state.m_min_level) + { + lod = object.m_misc_state.m_min_level; + } + + int32_t l_tile = m_rdp->get_log2((lod >> 5) & 0xff); + const bool magnify = (lod < 32); + const bool distant = (lod & 0x6000) || (l_tile >= object.m_misc_state.m_max_level); + + if (object.m_other_modes.tex_lod_en) + { + if (distant) + { + l_tile = object.m_misc_state.m_max_level; + } + if (!object.m_other_modes.detail_tex_en) + { + *t1 = (prim_tile + l_tile) & 7; + } + else + { + if (!magnify) + { + *t1 = (prim_tile + l_tile + 1); + } + else + { + *t1 = (prim_tile + l_tile); + } + *t1 &= 7; + } + } +} + +void n64_texture_pipe_t::calculate_clamp_diffs(uint32_t prim_tile, rdp_span_aux* userdata, const rdp_poly_state& object) +{ + const n64_tile_t* tiles = object.m_tiles; + if (object.m_other_modes.cycle_type == CYCLE_TYPE_2) + { + if (object.m_other_modes.tex_lod_en) + { + for (int32_t start = 0; start <= 7; start++) + { + userdata->m_clamp_diff[start].set((tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2)); + } + } + else + { + const int32_t start = prim_tile; + const int32_t end = (prim_tile + 1) & 7; + userdata->m_clamp_diff[start].set((tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2)); + userdata->m_clamp_diff[end].set((tiles[end].sh >> 2) - (tiles[end].sl >> 2), (tiles[end].sh >> 2) - (tiles[end].sl >> 2), (tiles[end].th >> 2) - (tiles[end].tl >> 2), (tiles[end].th >> 2) - (tiles[end].tl >> 2)); + } + } + else//1-cycle or copy + { + userdata->m_clamp_diff[prim_tile].set((tiles[prim_tile].sh >> 2) - (tiles[prim_tile].sl >> 2), (tiles[prim_tile].sh >> 2) - (tiles[prim_tile].sl >> 2), (tiles[prim_tile].th >> 2) - (tiles[prim_tile].tl >> 2), (tiles[prim_tile].th >> 2) - (tiles[prim_tile].tl >> 2)); + } +} + +#define USE_64K_LUT (1) + +static int32_t sTexAddrSwap16[2] = { WORD_ADDR_XOR, WORD_XOR_DWORD_SWAP }; +static int32_t sTexAddrSwap8[2] = { BYTE_ADDR_XOR, BYTE_XOR_DWORD_SWAP }; + +void n64_texture_pipe_t::fetch_rgba16_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + int32_t taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x7ff; + + uint16_t c = ((uint16_t*)userdata->m_tmem)[taddr]; + c = ((uint16_t*)(userdata->m_tmem + 0x800))[(c >> 8) << 2]; + +#if USE_64K_LUT + out.set(m_expand_16to32_table[c]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_rgba16_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const int32_t taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x7ff; + + uint16_t c = ((uint16_t*)userdata->m_tmem)[taddr]; + c = ((uint16_t*)(userdata->m_tmem + 0x800))[(c >> 8) << 2]; + + const uint8_t k = (c >> 8) & 0xff; + out.set(c & 0xff, k, k, k); +} + +void n64_texture_pipe_t::fetch_rgba16_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const int32_t taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x7ff; + + const uint16_t c = ((uint16_t*)userdata->m_tmem)[taddr]; + +#if USE_64K_LUT + out.set(m_expand_16to32_table[c]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_rgba32_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint32_t *tc = ((uint32_t*)userdata->m_tmem); + const int32_t taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; + + uint32_t c = tc[taddr]; + c = ((uint16_t*)(userdata->m_tmem + 0x800))[(c >> 24) << 2]; + +#if USE_64K_LUT + out.set(m_expand_16to32_table[c]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_rgba32_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint32_t *tc = ((uint32_t*)userdata->m_tmem); + const int32_t taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; + + uint32_t c = tc[taddr]; + c = ((uint16_t*)(userdata->m_tmem + 0x800))[(c >> 24) << 2]; + + const uint8_t k = (c >> 8) & 0xff; + out.set(c & 0xff, k, k, k); +} + +void n64_texture_pipe_t::fetch_rgba32_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const int32_t taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; + + const uint16_t cl = ((uint16_t*)userdata->m_tmem)[taddr]; + const uint16_t ch = ((uint16_t*)userdata->m_tmem)[taddr | 0x400]; + + out.set(ch & 0xff, cl >> 8, cl & 0xff, ch >> 8); +} + +void n64_texture_pipe_t::fetch_nop(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) { } + +void n64_texture_pipe_t::fetch_yuv(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint16_t *tc = ((uint16_t*)userdata->m_tmem); + + const int32_t taddr = (tbase << 3) + s; + const int32_t taddrhi = (taddr ^ sTexAddrSwap8[t & 1]) & 0x7ff; + const int32_t taddrlow = ((taddr >> 1) ^ sTexAddrSwap16[t & 1]) & 0x3ff; + + const uint16_t c = tc[taddrlow]; + + int32_t y = userdata->m_tmem[taddrhi | 0x800]; + int32_t u = c >> 8; + int32_t v = c & 0xff; + + v ^= 0x80; u ^= 0x80; + u |= ((u & 0x80) << 1); + v |= ((v & 0x80) << 1); + + out.set(y, u, v, y); +} + +void n64_texture_pipe_t::fetch_ci4_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t p = (s & 1) ? (tc[taddr] & 0xf) : (tc[taddr] >> 4); + const uint16_t c = ((uint16_t*)(userdata->m_tmem + 0x800))[((tpal << 4) | p) << 2]; + +#if USE_64K_LUT + out.set(m_expand_16to32_table[c]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_ci4_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + int32_t taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t p = (s & 1) ? (tc[taddr] & 0xf) : (tc[taddr] >> 4); + const uint16_t c = ((uint16_t*)(userdata->m_tmem + 0x800))[((tpal << 4) | p) << 2]; + + const uint8_t k = (c >> 8) & 0xff; + out.set(c & 0xff, k, k, k); +} + +void n64_texture_pipe_t::fetch_ci4_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0xfff; + + uint8_t p = (s & 1) ? (tc[taddr] & 0xf) : (tc[taddr] >> 4); + p = (tpal << 4) | p; + + out.set(p, p, p, p); +} + +void n64_texture_pipe_t::fetch_ci8_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t p = tc[taddr]; + const uint16_t c = ((uint16_t*)(userdata->m_tmem + 0x800))[p << 2]; + +#if USE_64K_LUT + out.set(m_expand_16to32_table[c]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_ci8_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t p = tc[taddr]; + const uint16_t c = ((uint16_t*)(userdata->m_tmem + 0x800))[p << 2]; + + const uint8_t k = (c >> 8) & 0xff; + out.set(c & 0xff, k, k, k); +} + +void n64_texture_pipe_t::fetch_ci8_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0xfff; + + const uint8_t p = tc[taddr]; + out.set(p, p, p, p); +} + +void n64_texture_pipe_t::fetch_ia4_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t p = ((s) & 1) ? (tc[taddr] & 0xf) : (tc[taddr] >> 4); + const uint16_t c = ((uint16_t*)(userdata->m_tmem + 0x800))[((tpal << 4) | p) << 2]; + +#if USE_64K_LUT + out.set(m_expand_16to32_table[c]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_ia4_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t p = ((s) & 1) ? (tc[taddr] & 0xf) : (tc[taddr] >> 4); + const uint16_t c = ((uint16_t*)(userdata->m_tmem + 0x800))[((tpal << 4) | p) << 2]; + + const uint8_t k = (c >> 8) & 0xff; + out.set(c & 0xff, k, k, k); +} + +void n64_texture_pipe_t::fetch_ia4_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0xfff; + + const uint8_t p = ((s) & 1) ? (tc[taddr] & 0xf) : (tc[taddr] >> 4); + uint8_t i = p & 0xe; + i = (i << 4) | (i << 1) | (i >> 2); + + out.set((p & 1) * 0xff, i, i, i); +} + +void n64_texture_pipe_t::fetch_ia8_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t p = tc[taddr]; + const uint16_t c = ((uint16_t*)(userdata->m_tmem + 0x800))[p << 2]; + +#if USE_64K_LUT + out.set(m_expand_16to32_table[c]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_ia8_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t p = tc[taddr]; + const uint16_t c = ((uint16_t*)(userdata->m_tmem + 0x800))[p << 2]; + + const uint8_t k = (c >> 8) & 0xff; + out.set(c & 0xff, k, k, k); +} + +void n64_texture_pipe_t::fetch_ia8_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0xfff; + + const uint8_t p = tc[taddr]; + uint8_t i = p & 0xf0; + i |= (i >> 4); + + out.set(((p << 4) | (p & 0xf)) & 0xff, i, i, i); +} + +void n64_texture_pipe_t::fetch_ia16_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint16_t *tc = ((uint16_t*)userdata->m_tmem); + const int32_t taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; + + uint16_t c = tc[taddr]; + c = ((uint16_t*)(userdata->m_tmem + 0x800))[(c >> 8) << 2]; + +#if USE_64K_LUT + out.set(m_expand_16to32_table[c]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_ia16_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint16_t *tc = ((uint16_t*)userdata->m_tmem); + const int32_t taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x3ff; + + uint16_t c = tc[taddr]; + c = ((uint16_t*)(userdata->m_tmem + 0x800))[(c >> 8) << 2]; + + const uint8_t k = (c >> 8) & 0xff; + out.set(c & 0xff, k, k, k); +} + +void n64_texture_pipe_t::fetch_ia16_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint16_t *tc = ((uint16_t*)userdata->m_tmem); + const int32_t taddr = (((tbase << 2) + s) ^ sTexAddrSwap16[t & 1]) & 0x7ff; + + const uint16_t c = tc[taddr]; + const uint8_t i = (c >> 8); + out.set(c & 0xff, i, i, i); +} + +void n64_texture_pipe_t::fetch_i4_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t byteval = tc[taddr]; + const uint8_t c = ((s & 1)) ? (byteval & 0xf) : ((byteval >> 4) & 0xf); + +#if USE_64K_LUT + const uint16_t k = ((uint16_t*)(userdata->m_tmem + 0x800))[((tpal << 4) | c) << 2]; + out.set(m_expand_16to32_table[k]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_i4_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t byteval = tc[taddr]; + const uint8_t c = ((s & 1)) ? (byteval & 0xf) : ((byteval >> 4) & 0xf); + const uint16_t k = ((uint16_t*)(userdata->m_tmem + 0x800))[((tpal << 4) | c) << 2]; + + const uint8_t i = (k >> 8) & 0xff; + out.set(k & 0xff, i, i, i); +} + +void n64_texture_pipe_t::fetch_i4_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = ((((tbase << 4) + s) >> 1) ^ sTexAddrSwap8[t & 1]) & 0xfff; + + const uint8_t byteval = tc[taddr]; + uint8_t c = ((s & 1)) ? (byteval & 0xf) : ((byteval >> 4) & 0xf); + c |= (c << 4); + + out.set(c, c, c, c); +} + +void n64_texture_pipe_t::fetch_i8_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t c = tc[taddr]; + +#if USE_64K_LUT + const uint16_t k = ((uint16_t*)(userdata->m_tmem + 0x800))[c << 2]; + out.set(m_expand_16to32_table[k]); +#else + out.set((c & 1) * 0xff, GET_HI_RGBA16_TMEM(c), GET_MED_RGBA16_TMEM(c), GET_LOW_RGBA16_TMEM(c)); +#endif +} + +void n64_texture_pipe_t::fetch_i8_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0x7ff; + + const uint8_t c = tc[taddr]; + const uint16_t k = ((uint16_t*)(userdata->m_tmem + 0x800))[c << 2]; + + const uint8_t i = (k >> 8) & 0xff; + out.set(k & 0xff, i, i, i); +} + +void n64_texture_pipe_t::fetch_i8_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata) +{ + const uint8_t *tc = userdata->m_tmem; + const int32_t taddr = (((tbase << 3) + s) ^ sTexAddrSwap8[t & 1]) & 0xfff; + + const uint8_t c = tc[taddr]; + + out.set(c, c, c, c); +} diff --git a/waterbox/ares64/ares/thirdparty/mame/mame/video/rdptpipe.h b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdptpipe.h new file mode 100644 index 0000000000..b9c80e1535 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/mame/video/rdptpipe.h @@ -0,0 +1,165 @@ +// license:BSD-3-Clause +// copyright-holders:Ryan Holtz +/****************************************************************************** + + + SGI/Nintendo Reality Display Texture Fetch Unit (TF) + ------------------- + + by Ryan Holtz + based on initial C code by Ville Linde + contains additional improvements from angrylion, Ziggy, Gonetz and Orkin + + +******************************************************************************/ + +#ifndef _VIDEO_RDPTEXPIPE_H_ +#define _VIDEO_RDPTEXPIPE_H_ + +#include "video/n64types.h" + +class n64_texture_pipe_t +{ + public: + typedef void (n64_texture_pipe_t::*texel_fetcher_t) (rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + typedef void (n64_texture_pipe_t::*texel_cycler_t) (color_t* TEX, color_t* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle, rdp_span_aux* userdata, const rdp_poly_state& object); + + n64_texture_pipe_t() + { + m_maskbits_table[0] = 0xffff; + for(int i = 1; i < 16; i++) + { + m_maskbits_table[i] = ((uint16_t)(0xffff) >> (16 - i)) & 0x3ff; + } + m_start_span = false; + + for (auto & elem : m_texel_fetch) + { + elem = &n64_texture_pipe_t::fetch_nop; + } + + m_texel_fetch[ 8] = &n64_texture_pipe_t::fetch_rgba16_raw; + m_texel_fetch[ 9] = &n64_texture_pipe_t::fetch_rgba16_raw; + m_texel_fetch[10] = &n64_texture_pipe_t::fetch_rgba16_tlut0; + m_texel_fetch[11] = &n64_texture_pipe_t::fetch_rgba16_tlut1; + m_texel_fetch[12] = &n64_texture_pipe_t::fetch_rgba32_raw; + m_texel_fetch[13] = &n64_texture_pipe_t::fetch_rgba32_raw; + m_texel_fetch[14] = &n64_texture_pipe_t::fetch_rgba32_tlut0; + m_texel_fetch[15] = &n64_texture_pipe_t::fetch_rgba32_tlut1; + + m_texel_fetch[24] = &n64_texture_pipe_t::fetch_yuv; + m_texel_fetch[25] = &n64_texture_pipe_t::fetch_yuv; + m_texel_fetch[26] = &n64_texture_pipe_t::fetch_yuv; + m_texel_fetch[27] = &n64_texture_pipe_t::fetch_yuv; + + m_texel_fetch[32] = &n64_texture_pipe_t::fetch_ci4_raw; + m_texel_fetch[33] = &n64_texture_pipe_t::fetch_ci4_raw; + m_texel_fetch[34] = &n64_texture_pipe_t::fetch_ci4_tlut0; + m_texel_fetch[35] = &n64_texture_pipe_t::fetch_ci4_tlut1; + m_texel_fetch[36] = &n64_texture_pipe_t::fetch_ci8_raw; + m_texel_fetch[37] = &n64_texture_pipe_t::fetch_ci8_raw; + m_texel_fetch[38] = &n64_texture_pipe_t::fetch_ci8_tlut0; + m_texel_fetch[39] = &n64_texture_pipe_t::fetch_ci8_tlut1; + + m_texel_fetch[48] = &n64_texture_pipe_t::fetch_ia4_raw; + m_texel_fetch[49] = &n64_texture_pipe_t::fetch_ia4_raw; + m_texel_fetch[50] = &n64_texture_pipe_t::fetch_ia4_tlut0; + m_texel_fetch[51] = &n64_texture_pipe_t::fetch_ia4_tlut1; + m_texel_fetch[52] = &n64_texture_pipe_t::fetch_ia8_raw; + m_texel_fetch[53] = &n64_texture_pipe_t::fetch_ia8_raw; + m_texel_fetch[54] = &n64_texture_pipe_t::fetch_ia8_tlut0; + m_texel_fetch[55] = &n64_texture_pipe_t::fetch_ia8_tlut1; + m_texel_fetch[56] = &n64_texture_pipe_t::fetch_ia16_raw; + m_texel_fetch[57] = &n64_texture_pipe_t::fetch_ia16_raw; + m_texel_fetch[58] = &n64_texture_pipe_t::fetch_ia16_tlut0; + m_texel_fetch[59] = &n64_texture_pipe_t::fetch_ia16_tlut1; + + m_texel_fetch[64] = &n64_texture_pipe_t::fetch_i4_raw; + m_texel_fetch[65] = &n64_texture_pipe_t::fetch_i4_raw; + m_texel_fetch[66] = &n64_texture_pipe_t::fetch_i4_tlut0; + m_texel_fetch[67] = &n64_texture_pipe_t::fetch_i4_tlut1; + m_texel_fetch[68] = &n64_texture_pipe_t::fetch_i8_raw; + m_texel_fetch[69] = &n64_texture_pipe_t::fetch_i8_raw; + m_texel_fetch[70] = &n64_texture_pipe_t::fetch_i8_tlut0; + m_texel_fetch[71] = &n64_texture_pipe_t::fetch_i8_tlut1; + + m_cycle[0] = &n64_texture_pipe_t::cycle_nearest; + m_cycle[1] = &n64_texture_pipe_t::cycle_nearest_lerp; + m_cycle[2] = &n64_texture_pipe_t::cycle_linear; + m_cycle[3] = &n64_texture_pipe_t::cycle_linear_lerp; + } + + void cycle_nearest(color_t* TEX, color_t* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle, rdp_span_aux* userdata, const rdp_poly_state& object); + void cycle_nearest_lerp(color_t* TEX, color_t* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle, rdp_span_aux* userdata, const rdp_poly_state& object); + void cycle_linear(color_t* TEX, color_t* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle, rdp_span_aux* userdata, const rdp_poly_state& object); + void cycle_linear_lerp(color_t* TEX, color_t* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle, rdp_span_aux* userdata, const rdp_poly_state& object); + + texel_cycler_t m_cycle[4]; + + void copy(color_t* TEX, int32_t SSS, int32_t SST, uint32_t tilenum, const rdp_poly_state& object, rdp_span_aux* userdata); + void calculate_clamp_diffs(uint32_t prim_tile, rdp_span_aux* userdata, const rdp_poly_state& object); + void lod_1cycle(int32_t* sss, int32_t* sst, const int32_t s, const int32_t t, const int32_t w, const int32_t dsinc, const int32_t dtinc, const int32_t dwinc, rdp_span_aux* userdata, const rdp_poly_state& object); + void lod_2cycle(int32_t* sss, int32_t* sst, const int32_t s, const int32_t t, const int32_t w, const int32_t dsinc, const int32_t dtinc, const int32_t dwinc, const int32_t prim_tile, int32_t* t1, int32_t* t2, rdp_span_aux* userdata, const rdp_poly_state& object); + void lod_2cycle_limited(int32_t* sss, int32_t* sst, const int32_t s, const int32_t t, int32_t w, const int32_t dsinc, const int32_t dtinc, const int32_t dwinc, const int32_t prim_tile, int32_t* t1, const rdp_poly_state& object); + + void set_machine(running_machine& machine); + + bool m_start_span; + + private: + void mask(rgbaint_t& sstt, const n64_tile_t& tile); + + rgbaint_t shift_cycle(rgbaint_t& st, const n64_tile_t& tile); + void shift_copy(rgbaint_t& st, const n64_tile_t& tile); + + void clamp_cycle(rgbaint_t& st, rgbaint_t& stfrac, rgbaint_t& maxst, const int32_t tilenum, const n64_tile_t& tile, rdp_span_aux* userdata); + void clamp_cycle_light(rgbaint_t& st, rgbaint_t& maxst, const int32_t tilenum, const n64_tile_t& tile, rdp_span_aux* userdata); + + void fetch_nop(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + + void fetch_rgba16_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_rgba16_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_rgba16_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_rgba32_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_rgba32_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_rgba32_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + + void fetch_yuv(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + + void fetch_ci4_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ci4_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ci4_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ci8_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ci8_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ci8_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + + void fetch_ia4_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ia4_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ia4_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ia8_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ia8_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ia8_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ia16_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ia16_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_ia16_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + + void fetch_i4_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_i4_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_i4_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_i8_tlut0(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_i8_tlut1(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + void fetch_i8_raw(rgbaint_t& out, int32_t s, int32_t t, int32_t tbase, int32_t tpal, rdp_span_aux* userdata); + + texel_fetcher_t m_texel_fetch[16*5]; + + n64_rdp* m_rdp; + + int32_t m_maskbits_table[16]; + color_t m_expand_16to32_table[0x10000]; + uint16_t m_lod_lookup[0x80000]; + + rgbaint_t m_st2_add; + rgbaint_t m_v1; +}; + +#endif // _VIDEO_RDPTEXPIPE_H_ diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/eigcc.h b/waterbox/ares64/ares/thirdparty/mame/osd/eigcc.h new file mode 100644 index 0000000000..2c5bf34ba7 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/eigcc.h @@ -0,0 +1,148 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/*************************************************************************** + + eigccppc.h + + Inline implementations for GCC compilers. This code is automatically + included if appropriate by eminline.h. + +***************************************************************************/ + +#ifndef MAME_OSD_EIGCC_H +#define MAME_OSD_EIGCC_H + +#include + + +/*************************************************************************** + INLINE MATH FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + addu_32x32_co - perform an unsigned 32 bit + 32 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#ifndef addu_32x32_co +#define addu_32x32_co _addu_32x32_co +inline bool _addu_32x32_co(uint32_t a, uint32_t b, uint32_t &sum) +{ + return __builtin_add_overflow(a, b, &sum); +} +#endif + + +/*------------------------------------------------- + addu_64x64_co - perform an unsigned 64 bit + 64 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#ifndef addu_64x64_co +#define addu_64x64_co _addu_64x64_co +inline bool _addu_64x64_co(uint64_t a, uint64_t b, uint64_t &sum) +{ + return __builtin_add_overflow(a, b, &sum); +} +#endif + + + +/*************************************************************************** + INLINE BIT MANIPULATION FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + count_leading_zeros_32 - return the number of + leading zero bits in a 32-bit value +-------------------------------------------------*/ + +#ifndef count_leading_zeros_32 +#define count_leading_zeros_32 _count_leading_zeros_32 +inline uint8_t _count_leading_zeros_32(uint32_t val) +{ + // uses CPU feature if available, otherwise falls back to runtime library call + static_assert(sizeof(val) == sizeof(unsigned), "expected 32-bit unsigned int"); + return uint8_t(unsigned(val ? __builtin_clz(val) : 32)); +} +#endif + + +/*------------------------------------------------- + count_leading_ones_32 - return the number of + leading one bits in a 32-bit value +-------------------------------------------------*/ + +#ifndef count_leading_ones_32 +#define count_leading_ones_32 _count_leading_ones_32 +inline uint8_t _count_leading_ones_32(uint32_t val) +{ + return count_leading_zeros_32(~val); +} +#endif + + +/*------------------------------------------------- + count_leading_zeros_64 - return the number of + leading zero bits in a 64-bit value +-------------------------------------------------*/ + +#ifndef count_leading_zeros_64 +#define count_leading_zeros_64 _count_leading_zeros_64 +inline uint8_t _count_leading_zeros_64(uint64_t val) +{ + // uses CPU feature if available, otherwise falls back to runtime library call + static_assert(sizeof(val) == sizeof(unsigned long long), "expected 64-bit unsigned long long int"); + return uint8_t(unsigned(val ? __builtin_clzll(val) : 64)); +} +#endif + + +/*------------------------------------------------- + count_leading_ones_64 - return the number of + leading one bits in a 64-bit value +-------------------------------------------------*/ + +#ifndef count_leading_ones_64 +#define count_leading_ones_64 _count_leading_ones_64 +inline uint8_t _count_leading_ones_64(uint64_t val) +{ + return count_leading_zeros_64(~val); +} +#endif + + +/*------------------------------------------------- + population_count_32 - return the number of + one bits in a 32-bit value +-------------------------------------------------*/ + +#ifndef population_count_32 +#define population_count_32 _population_count_32 +inline unsigned _population_count_32(uint32_t val) +{ + // uses CPU feature if available, otherwise falls back to implementation similar to eminline.h + static_assert(sizeof(val) == sizeof(unsigned), "expected 32-bit unsigned int"); + return unsigned(__builtin_popcount(static_cast(val))); +} +#endif + + +/*------------------------------------------------- + population_count_64 - return the number of + one bits in a 64-bit value +-------------------------------------------------*/ + +#ifndef population_count_64 +#define population_count_64 _population_count_64 +inline unsigned _population_count_64(uint64_t val) +{ + // uses CPU feature if available, otherwise falls back to implementation similar to eminline.h + static_assert(sizeof(val) == sizeof(unsigned long long), "expected 64-bit unsigned long long int"); + return unsigned(__builtin_popcountll(static_cast(val))); +} +#endif + +#endif // MAME_OSD_EIGCC_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/eigccarm.h b/waterbox/ares64/ares/thirdparty/mame/osd/eigccarm.h new file mode 100644 index 0000000000..90004daf9d --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/eigccarm.h @@ -0,0 +1,332 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/*************************************************************************** + + eigccarm.h + + ARM/AArch64 inline implementations for GCC compilers. This code is + automatically included if appropriate by eminline.h. + +***************************************************************************/ + +#ifndef MAME_OSD_EIGCCARM_H +#define MAME_OSD_EIGCCARM_H + + +/*************************************************************************** + INLINE MATH FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + mul_32x32 - perform a signed 32 bit x 32 bit + multiply and return the full 64 bit result +-------------------------------------------------*/ + +// GCC can do a good job of this. + + +/*------------------------------------------------- + mulu_32x32 - perform an unsigned 32 bit x + 32 bit multiply and return the full 64 bit + result +-------------------------------------------------*/ + +// GCC can do a good job of this + + +/*------------------------------------------------- + mul_32x32_hi - perform a signed 32 bit x 32 bit + multiply and return the upper 32 bits of the + result +-------------------------------------------------*/ + +// GCC can do a good job of this + + +/*------------------------------------------------- + mulu_32x32_hi - perform an unsigned 32 bit x + 32 bit multiply and return the upper 32 bits + of the result +-------------------------------------------------*/ + +// GCC can do a good job of this + + +/*------------------------------------------------- + mul_32x32_shift - perform a signed 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#if !defined(__aarch64__) +#define mul_32x32_shift _mul_32x32_shift +inline int32_t ATTR_CONST ATTR_FORCE_INLINE +_mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift) +{ + uint32_t l, h; + + __asm__ ( + " smull %[l], %[h], %[val1], %[val2] \n" + : [l] "=r" (l) + , [h] "=r" (h) + : [val1] "%r" (val1) + , [val2] "r" (val2) + ); + + // Valid for (0 <= shift <= 31) + return int32_t((l >> shift) | (h << (32 - shift))); +} +#endif + + +/*------------------------------------------------- + mulu_32x32_shift - perform an unsigned 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#if !defined(__aarch64__) +#define mulu_32x32_shift _mulu_32x32_shift +inline uint32_t ATTR_CONST ATTR_FORCE_INLINE +_mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) +{ + uint32_t l, h; + + __asm__ ( + " umull %[l], %[h], %[val1], %[val2] \n" + : [l] "=r" (l) + , [h] "=r" (h) + : [val1] "%r" (val1) + , [val2] "r" (val2) + ); + + // Valid for (0 <= shift <= 31) + return (l >> shift) | (h << (32 - shift)); +} +#endif + + +/*------------------------------------------------- + div_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + divu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + div_64x32_rem - perform a signed 64 bit x 32 + bit divide and return the 32 bit quotient and + 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + divu_64x32_rem - perform an unsigned 64 bit x + 32 bit divide and return the 32 bit quotient + and 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + div_32x32_shift - perform a signed divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + divu_32x32_shift - perform an unsigned divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + mod_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + modu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + recip_approx - compute an approximate floating + point reciprocal +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define recip_approx _recip_approx +inline float ATTR_CONST ATTR_FORCE_INLINE +_recip_approx(float value) +{ + float result; + + __asm__ ( + " frecpe %s[result], %s[value] \n" + : [result] "=w" (result) + : [value] "w" (value) + ); + + return result; +} +#endif + + +/*------------------------------------------------- + mul_64x64 - perform a signed 64 bit x 64 bit + multiply and return the full 128 bit result +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define mul_64x64 _mul_64x64 +inline int64_t ATTR_FORCE_INLINE +_mul_64x64(int64_t a, int64_t b, int64_t &hi) +{ + __int128 const r(__int128(a) * b); + hi = int64_t(uint64_t((unsigned __int128)r >> 64)); + return int64_t(uint64_t((unsigned __int128)r)); +} +#endif + + +/*------------------------------------------------- + mulu_64x64 - perform an unsigned 64 bit x 64 + bit multiply and return the full 128 bit result +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define mulu_64x64 _mulu_64x64 +inline uint64_t ATTR_FORCE_INLINE +_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) +{ + unsigned __int128 const r((unsigned __int128)a * b); + hi = uint64_t(r >> 64); + return uint64_t(r); +} +#endif + + + +/*************************************************************************** + INLINE BIT MANIPULATION FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + count_leading_zeros_32 - return the number of + leading zero bits in a 32-bit value +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define count_leading_zeros_32 _count_leading_zeros_32 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_zeros_32(uint32_t value) +{ + uint32_t result; + + __asm__ ( + " clz %w[result], %w[value] \n" + : [result] "=r" (result) + : [value] "r" (value) + ); + + return uint8_t(result); +} +#endif + + +/*------------------------------------------------- + count_leading_ones_32 - return the number of + leading one bits in a 32-bit value +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define count_leading_ones_32 _count_leading_ones_32 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_ones_32(uint32_t value) +{ + uint32_t result; + + __asm__ ( + " clz %w[result], %w[value] \n" + : [result] "=r" (result) + : [value] "r" (~value) + ); + + return uint8_t(result); +} +#endif + + +/*------------------------------------------------- + count_leading_zeros_64 - return the number of + leading zero bits in a 64-bit value +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define count_leading_zeros_64 _count_leading_zeros_64 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_zeros_64(uint64_t value) +{ + uint64_t result; + + __asm__ ( + " clz %[result], %[value] \n" + : [result] "=r" (result) + : [value] "r" (value) + ); + + return uint8_t(result); +} +#endif + + +/*------------------------------------------------- + count_leading_ones_64 - return the number of + leading one bits in a 64-bit value +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define count_leading_ones_64 _count_leading_ones_64 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_ones_64(uint64_t value) +{ + uint64_t result; + + __asm__ ( + " clz %[result], %[value] \n" + : [result] "=r" (result) + : [value] "r" (~value) + ); + + return uint8_t(result); +} +#endif + +#endif // MAME_OSD_EIGCCARM_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/eigccppc.h b/waterbox/ares64/ares/thirdparty/mame/osd/eigccppc.h new file mode 100644 index 0000000000..b1f0fc638a --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/eigccppc.h @@ -0,0 +1,328 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/*************************************************************************** + + eigccppc.h + + PowerPC (32 and 64-bit) inline implementations for GCC compilers. This + code is automatically included if appropriate by eminline.h. + +***************************************************************************/ + +#ifndef MAME_OSD_EIGCCPPC_H +#define MAME_OSD_EIGCCPPC_H + + +/*************************************************************************** + INLINE MATH FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + mul_32x32 - perform a signed 32 bit x 32 bit + multiply and return the full 64 bit result +-------------------------------------------------*/ + +// GCC can do a good job of this. + + +/*------------------------------------------------- + mulu_32x32 - perform an unsigned 32 bit x + 32 bit multiply and return the full 64 bit + result +-------------------------------------------------*/ + +// GCC can do a good job of this + + +/*------------------------------------------------- + mul_32x32_hi - perform a signed 32 bit x 32 bit + multiply and return the upper 32 bits of the + result +-------------------------------------------------*/ + +// GCC can do a good job of this + + +/*------------------------------------------------- + mulu_32x32_hi - perform an unsigned 32 bit x + 32 bit multiply and return the upper 32 bits + of the result +-------------------------------------------------*/ + +// GCC can do a good job of this + + +/*------------------------------------------------- + mul_32x32_shift - perform a signed 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#if !defined(__ppc64__) && !defined(__PPC64__) && !defined(_ARCH_PPC64) +#define mul_32x32_shift _mul_32x32_shift +inline int32_t ATTR_CONST ATTR_FORCE_INLINE +_mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift) +{ + uint32_t l, h; + + __asm__ ( + " mullw %[l], %[val1], %[val2] \n" + " mulhw %[h], %[val1], %[val2] \n" + : [l] "=&r" (l) + , [h] "=r" (h) + : [val1] "%r" (val1) + , [val2] "r" (val2) + ); + + // Valid for (0 <= shift <= 31) + return int32_t((l >> shift) | (h << (32 - shift))); +} +#endif + + +/*------------------------------------------------- + mulu_32x32_shift - perform an unsigned 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#if !defined(__ppc64__) && !defined(__PPC64__) && !defined(_ARCH_PPC64) +#define mulu_32x32_shift _mulu_32x32_shift +inline uint32_t ATTR_CONST ATTR_FORCE_INLINE +_mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) +{ + uint32_t l, h; + + __asm__ ( + " mullw %[l], %[val1], %[val2] \n" + " mulhwu %[h], %[val1], %[val2] \n" + : [l] "=&r" (l) + , [h] "=r" (h) + : [val1] "%r" (val1) + , [val2] "r" (val2) + ); + + // Valid for (0 <= shift <= 31) + return (l >> shift) | (h << (32 - shift)); +} +#endif + + +/*------------------------------------------------- + div_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + divu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + div_64x32_rem - perform a signed 64 bit x 32 + bit divide and return the 32 bit quotient and + 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + divu_64x32_rem - perform an unsigned 64 bit x + 32 bit divide and return the 32 bit quotient + and 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + div_32x32_shift - perform a signed divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + divu_32x32_shift - perform an unsigned divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + mod_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + modu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + recip_approx - compute an approximate floating + point reciprocal +-------------------------------------------------*/ + +#define recip_approx _recip_approx +inline float ATTR_CONST ATTR_FORCE_INLINE +_recip_approx(float value) +{ + float result; + + __asm__ ( + " fres %[result], %[value] \n" + : [result] "=f" (result) + : [value] "f" (value) + ); + + return result; +} + + +/*------------------------------------------------- + mul_64x64 - perform a signed 64 bit x 64 bit + multiply and return the full 128 bit result +-------------------------------------------------*/ + +#if defined(__ppc64__) || defined(__PPC64___) || defined(_ARCH_PPC64) +#define mul_64x64 _mul_64x64 +inline int64_t ATTR_FORCE_INLINE +_mul_64x64(int64_t a, int64_t b, int64_t &hi) +{ + __int128 const r(__int128(a) * b); + hi = int64_t(uint64_t((unsigned __int128)r >> 64)); + return int64_t(uint64_t((unsigned __int128)r)); +} +#endif + + +/*------------------------------------------------- + mulu_64x64 - perform an unsigned 64 bit x 64 + bit multiply and return the full 128 bit result +-------------------------------------------------*/ + +#if defined(__ppc64__) || defined(__PPC64___) || defined(_ARCH_PPC64) +#define mulu_64x64 _mulu_64x64 +inline uint64_t ATTR_FORCE_INLINE +_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) +{ + unsigned __int128 const r((unsigned __int128)a * b); + hi = uint64_t(r >> 64); + return uint64_t(r); +} +#endif + + + +/*************************************************************************** + INLINE BIT MANIPULATION FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + count_leading_zeros_32 - return the number of + leading zero bits in a 32-bit value +-------------------------------------------------*/ + +#define count_leading_zeros_32 _count_leading_zeros_32 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_zeros_32(uint32_t value) +{ + uint32_t result; + + __asm__ ( + " cntlzw %[result], %[value] \n" + : [result] "=r" (result) + : [value] "r" (value) + ); + + return uint8_t(result); +} + + +/*------------------------------------------------- + count_leading_ones_32 - return the number of + leading one bits in a 32-bit value +-------------------------------------------------*/ + +#define count_leading_ones_32 _count_leading_ones_32 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_ones_32(uint32_t value) +{ + uint32_t result; + + __asm__ ( + " cntlzw %[result], %[value] \n" + : [result] "=r" (result) + : [value] "r" (~value) + ); + + return uint8_t(result); +} + + +/*------------------------------------------------- + count_leading_zeros_64 - return the number of + leading zero bits in a 64-bit value +-------------------------------------------------*/ + +#if defined(__ppc64__) || defined(__PPC64___) || defined(_ARCH_PPC64) +#define count_leading_zeros_64 _count_leading_zeros_64 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_zeros_64(uint64_t value) +{ + uint64_t result; + + __asm__ ( + " cntlzd %[result], %[value] \n" + : [result] "=r" (result) + : [value] "r" (value) + ); + + return uint8_t(result); +} +#endif + + +/*------------------------------------------------- + count_leading_ones_64 - return the number of + leading one bits in a 64-bit value +-------------------------------------------------*/ + +#if defined(__ppc64__) || defined(__PPC64___) || defined(_ARCH_PPC64) +#define count_leading_ones_64 _count_leading_ones_64 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_ones_64(uint64_t value) +{ + uint64_t result; + + __asm__ ( + " cntlzd %[result], %[value] \n" + : [result] "=r" (result) + : [value] "r" (~value) + ); + + return uint8_t(result); +} +#endif + +#endif // MAME_OSD_EIGCCPPC_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/eigccx86.h b/waterbox/ares64/ares/thirdparty/mame/osd/eigccx86.h new file mode 100644 index 0000000000..35f02e12f5 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/eigccx86.h @@ -0,0 +1,526 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/*************************************************************************** + + eigccx86.h + + x86 (32 and 64-bit) inline implementations for GCC compilers. This + code is automatically included if appropriate by eminline.h. + +***************************************************************************/ + +#ifndef MAME_OSD_EIGCCX86_H +#define MAME_OSD_EIGCCX86_H + +// Include MMX/SSE intrinsics headers + +#ifdef __SSE2__ +#include +#include // MMX +#include // SSE +#include // SSE2 +#endif + + +/*************************************************************************** + INLINE MATH FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + mul_32x32 - perform a signed 32 bit x 32 bit + multiply and return the full 64 bit result +-------------------------------------------------*/ + +// GCC can do a good job of this. + + +/*------------------------------------------------- + mulu_32x32 - perform an unsigned 32 bit x + 32 bit multiply and return the full 64 bit + result +-------------------------------------------------*/ + +// GCC can do a good job of this. + + +/*------------------------------------------------- + mul_32x32_hi - perform a signed 32 bit x 32 bit + multiply and return the upper 32 bits of the + result +-------------------------------------------------*/ + +// GCC can do a good job of this. + + +/*------------------------------------------------- + mulu_32x32_hi - perform an unsigned 32 bit x + 32 bit multiply and return the upper 32 bits + of the result +-------------------------------------------------*/ + +// GCC can do a good job of this. + + +/*------------------------------------------------- + mul_32x32_shift - perform a signed 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#ifndef __x86_64__ +#define mul_32x32_shift _mul_32x32_shift +inline int32_t ATTR_CONST ATTR_FORCE_INLINE +_mul_32x32_shift(int32_t a, int32_t b, uint8_t shift) +{ + int32_t result; + + // Valid for (0 <= shift <= 31) + __asm__ ( + " imull %[b] ;" + " shrdl %[shift], %%edx, %[result] ;" + : [result] "=a" (result) // result ends up in eax + : [a] "%0" (a) // 'a' should also be in eax on entry + , [b] "rm" (b) // 'b' can be memory or register + , [shift] "Ic" (shift) // 'shift' must be constant in 0-31 range or in cl + : "%edx", "cc" // clobbers edx and condition codes + ); + + return result; +} +#endif + + +/*------------------------------------------------- + mulu_32x32_shift - perform an unsigned 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#ifndef __x86_64__ +#define mulu_32x32_shift _mulu_32x32_shift +inline uint32_t ATTR_CONST ATTR_FORCE_INLINE +_mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +{ + uint32_t result; + + // Valid for (0 <= shift <= 31) + __asm__ ( + " mull %[b] ;" + " shrdl %[shift], %%edx, %[result] ;" + : [result] "=a" (result) // result ends up in eax + : [a] "%0" (a) // 'a' should also be in eax on entry + , [b] "rm" (b) // 'b' can be memory or register + , [shift] "Ic" (shift) // 'shift' must be constant in 0-31 range or in cl + : "%edx", "cc" // clobbers edx and condition codes + ); + + return result; +} +#endif + + +/*------------------------------------------------- + div_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +#ifndef __x86_64__ +#define div_64x32 _div_64x32 +inline int32_t ATTR_CONST ATTR_FORCE_INLINE +_div_64x32(int64_t a, int32_t b) +{ + int32_t result, temp; + + // Throws arithmetic exception if result doesn't fit in 32 bits + __asm__ ( + " idivl %[b] ;" + : [result] "=a" (result) // result ends up in eax + , [temp] "=d" (temp) // this is effectively a clobber + : [a] "A" (a) // 'a' in edx:eax + , [b] "rm" (b) // 'b' in register or memory + : "cc" // clobbers condition codes + ); + + return result; +} +#endif + + +/*------------------------------------------------- + divu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +#ifndef __x86_64__ +#define divu_64x32 _divu_64x32 +inline uint32_t ATTR_CONST ATTR_FORCE_INLINE +_divu_64x32(uint64_t a, uint32_t b) +{ + uint32_t result, temp; + + // Throws arithmetic exception if result doesn't fit in 32 bits + __asm__ ( + " divl %[b] ;" + : [result] "=a" (result) // result ends up in eax + , [temp] "=d" (temp) // this is effectively a clobber + : [a] "A" (a) // 'a' in edx:eax + , [b] "rm" (b) // 'b' in register or memory + : "cc" // clobbers condition codes + ); + + return result; +} +#endif + + +/*------------------------------------------------- + div_64x32_rem - perform a signed 64 bit x 32 + bit divide and return the 32 bit quotient and + 32 bit remainder +-------------------------------------------------*/ + +#define div_64x32_rem _div_64x32_rem +inline int32_t ATTR_FORCE_INLINE +_div_64x32_rem(int64_t dividend, int32_t divisor, int32_t &remainder) +{ + int32_t quotient; +#ifndef __x86_64__ + // Throws arithmetic exception if result doesn't fit in 32 bits + __asm__ ( + " idivl %[divisor] ;" + : [result] "=a" (quotient) // quotient ends up in eax + , [remainder] "=d" (remainder) // remainder ends up in edx + : [dividend] "A" (dividend) // 'dividend' in edx:eax + , [divisor] "rm" (divisor) // 'divisor' in register or memory + : "cc" // clobbers condition codes + ); +#else + int32_t const divh{ int32_t(uint32_t(uint64_t(dividend) >> 32)) }; + int32_t const divl{ int32_t(uint32_t(uint64_t(dividend))) }; + + // Throws arithmetic exception if result doesn't fit in 32 bits + __asm__ ( + " idivl %[divisor] ;" + : [result] "=a" (quotient) // quotient ends up in eax + , [remainder] "=d" (remainder) // remainder ends up in edx + : [divl] "a" (divl) // 'dividend' in edx:eax + , [divh] "d" (divh) + , [divisor] "rm" (divisor) // 'divisor' in register or memory + : "cc" // clobbers condition codes + ); +#endif + return quotient; +} + + +/*------------------------------------------------- + divu_64x32_rem - perform an unsigned 64 bit x + 32 bit divide and return the 32 bit quotient + and 32 bit remainder +-------------------------------------------------*/ + +#define divu_64x32_rem _divu_64x32_rem +inline uint32_t ATTR_FORCE_INLINE +_divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t &remainder) +{ + uint32_t quotient; +#ifndef __x86_64__ + // Throws arithmetic exception if result doesn't fit in 32 bits + __asm__ ( + " divl %[divisor] ;" + : [result] "=a" (quotient) // quotient ends up in eax + , [remainder] "=d" (remainder) // remainder ends up in edx + : [dividend] "A" (dividend) // 'dividend' in edx:eax + , [divisor] "rm" (divisor) // 'divisor' in register or memory + : "cc" // clobbers condition codes + ); +#else + uint32_t const divh{ uint32_t(dividend >> 32) }; + uint32_t const divl{ uint32_t(dividend) }; + + // Throws arithmetic exception if result doesn't fit in 32 bits + __asm__ ( + " divl %[divisor] ;" + : [result] "=a" (quotient) // quotient ends up in eax + , [remainder] "=d" (remainder) // remainder ends up in edx + : [divl] "a" (divl) // 'dividend' in edx:eax + , [divh] "d" (divh) + , [divisor] "rm" (divisor) // 'divisor' in register or memory + : "cc" // clobbers condition codes + ); + +#endif + return quotient; +} + + +/*------------------------------------------------- + div_32x32_shift - perform a signed divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +#ifndef __x86_64__ +#define div_32x32_shift _div_32x32_shift +inline int32_t ATTR_CONST ATTR_FORCE_INLINE +_div_32x32_shift(int32_t a, int32_t b, uint8_t shift) +{ + int32_t result; + + // Valid for (0 <= shift <= 31) + // Throws arithmetic exception if result doesn't fit in 32 bits + __asm__ ( + " cdq ;" + " shldl %[shift], %[a], %%edx ;" + " shll %[shift], %[a] ;" + " idivl %[b] ;" + : [result] "=&a" (result) // result ends up in eax + : [a] "0" (a) // 'a' should also be in eax on entry + , [b] "rm" (b) // 'b' can be memory or register + , [shift] "Ic" (shift) // 'shift' must be constant in 0-31 range or in cl + : "%edx", "cc" // clobbers edx and condition codes + ); + + return result; +} +#endif + + +/*------------------------------------------------- + divu_32x32_shift - perform an unsigned divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +#ifndef __x86_64__ +#define divu_32x32_shift _divu_32x32_shift +inline uint32_t ATTR_CONST ATTR_FORCE_INLINE +_divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +{ + int32_t result; + + // Valid for (0 <= shift <= 31) + // Throws arithmetic exception if result doesn't fit in 32 bits + __asm__ ( + " clr %%edx ;" + " shldl %[shift], %[a], %%edx ;" + " shll %[shift], %[a] ;" + " divl %[b] ;" + : [result] "=&a" (result) // result ends up in eax + : [a] "0" (a) // 'a' should also be in eax on entry + , [b] "rm" (b) // 'b' can be memory or register + , [shift] "Ic" (shift) // 'shift' must be constant in 0-31 range or in cl + : "%edx", "cc" // clobbers edx and condition codes + ); + + return result; +} +#endif + + +/*------------------------------------------------- + mod_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +#ifndef __x86_64__ +#define mod_64x32 _mod_64x32 +inline int32_t ATTR_CONST ATTR_FORCE_INLINE +_mod_64x32(int64_t a, int32_t b) +{ + int32_t result, temp; + + // Throws arithmetic exception if quotient doesn't fit in 32 bits + __asm__ ( + " idivl %[b] ;" + : [result] "=d" (result) // Result ends up in edx + , [temp] "=a" (temp) // This is effectively a clobber + : [a] "A" (a) // 'a' in edx:eax + , [b] "rm" (b) // 'b' in register or memory + : "cc" // Clobbers condition codes + ); + + return result; +} +#endif + + +/*------------------------------------------------- + modu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +#ifndef __x86_64__ +#define modu_64x32 _modu_64x32 +inline uint32_t ATTR_CONST ATTR_FORCE_INLINE +_modu_64x32(uint64_t a, uint32_t b) +{ + uint32_t result, temp; + + // Throws arithmetic exception if quotient doesn't fit in 32 bits + __asm__ ( + " divl %[b] ;" + : [result] "=d" (result) // Result ends up in edx + , [temp] "=a" (temp) // This is effectively a clobber + : [a] "A" (a) // 'a' in edx:eax + , [b] "rm" (b) // 'b' in register or memory + : "cc" // Clobbers condition codes + ); + + return result; +} +#endif + + +/*------------------------------------------------- + recip_approx - compute an approximate floating + point reciprocal +-------------------------------------------------*/ + +#ifdef __SSE2__ +#define recip_approx _recip_approx +inline float ATTR_CONST ATTR_FORCE_INLINE +_recip_approx(float value) +{ + __m128 const value_xmm(_mm_set_ss(value)); + __m128 const result_xmm(_mm_rcp_ss(value_xmm)); + float result; + _mm_store_ss(&result, result_xmm); + return result; +} +#endif + + +/*------------------------------------------------- + mul_64x64 - perform a signed 64 bit x 64 bit + multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifdef __x86_64__ +#define mul_64x64 _mul_64x64 +inline int64_t ATTR_FORCE_INLINE +_mul_64x64(int64_t a, int64_t b, int64_t &hi) +{ + __int128 const r(__int128(a) * b); + hi = int64_t(uint64_t((unsigned __int128)r >> 64)); + return int64_t(uint64_t((unsigned __int128)r)); +} +#endif + + +/*------------------------------------------------- + mulu_64x64 - perform an unsigned 64 bit x 64 + bit multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifdef __x86_64__ +#define mulu_64x64 _mulu_64x64 +inline uint64_t ATTR_FORCE_INLINE +_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) +{ + unsigned __int128 const r((unsigned __int128)a * b); + hi = uint64_t(r >> 64); + return uint64_t(r); +} +#endif + + + +/*************************************************************************** + INLINE BIT MANIPULATION FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + count_leading_zeros_32 - return the number of + leading zero bits in a 32-bit value +-------------------------------------------------*/ + +#define count_leading_zeros_32 _count_leading_zeros_32 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_zeros_32(uint32_t value) +{ + uint32_t result; + __asm__ ( + " bsrl %[value], %[result] ;" + " cmovzl %[bias], %[result] ;" + : [result] "=&r" (result) // result can be in any register + : [value] "rm" (value) // 'value' can be register or memory + , [bias] "rm" (~uint32_t(0)) // 'bias' can be register or memory + : "cc" // clobbers condition codes + ); + return uint8_t(31U - result); +} + + +/*------------------------------------------------- + count_leading_ones_32 - return the number of + leading one bits in a 32-bit value +-------------------------------------------------*/ + +#define count_leading_ones_32 _count_leading_ones_32 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_ones_32(uint32_t value) +{ + uint32_t result; + __asm__ ( + " bsrl %[value], %[result] ;" + " cmovzl %[bias], %[result] ;" + : [result] "=&r" (result) // result can be in any register + : [value] "rm" (~value) // 'value' can be register or memory + , [bias] "rm" (~uint32_t(0)) // 'bias' can be register or memory + : "cc" // clobbers condition codes + ); + return uint8_t(31U - result); +} + + +/*------------------------------------------------- + count_leading_zeros_64 - return the number of + leading zero bits in a 64-bit value +-------------------------------------------------*/ + +#ifdef __x86_64__ +#define count_leading_zeros_64 _count_leading_zeros_64 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_zeros_64(uint64_t value) +{ + uint64_t result; + __asm__ ( + " bsrq %[value], %[result] ;" + " cmovzq %[bias], %[result] ;" + : [result] "=&r" (result) // result can be in any register + : [value] "rm" (value) // 'value' can be register or memory + , [bias] "rm" (~uint64_t(0)) // 'bias' can be register or memory + : "cc" // clobbers condition codes + ); + return uint8_t(63U - result); +} +#endif + + +/*------------------------------------------------- + count_leading_ones_64 - return the number of + leading one bits in a 64-bit value +-------------------------------------------------*/ + +#ifdef __x86_64__ +#define count_leading_ones_64 _count_leading_ones_64 +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_ones_64(uint64_t value) +{ + uint64_t result; + __asm__ ( + " bsrq %[value], %[result] ;" + " cmovzq %[bias], %[result] ;" + : [result] "=&r" (result) // result can be in any register + : [value] "rm" (~value) // 'value' can be register or memory + , [bias] "rm" (~uint64_t(0)) // 'bias' can be register or memory + : "cc" // clobbers condition codes + ); + return uint8_t(63U - result); +} +#endif + +#endif // MAME_OSD_EIGCCX86_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/eivc.h b/waterbox/ares64/ares/thirdparty/mame/osd/eivc.h new file mode 100644 index 0000000000..2ffd0e7633 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/eivc.h @@ -0,0 +1,94 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +//============================================================ +// +// eivc.h +// +// Inline implementations for MSVC compiler. +// +//============================================================ + +#ifndef MAME_OSD_EIVC_H +#define MAME_OSD_EIVC_H + +#pragma once + +#include +#pragma intrinsic(_BitScanReverse) +#ifdef PTR64 +#pragma intrinsic(_BitScanReverse64) +#endif + + +/*************************************************************************** + INLINE BIT MANIPULATION FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + count_leading_zeros_32 - return the number of + leading zero bits in a 32-bit value +-------------------------------------------------*/ + +#ifndef count_leading_zeros_32 +#define count_leading_zeros_32 _count_leading_zeros_32 +__forceinline uint8_t _count_leading_zeros_32(uint32_t value) +{ + unsigned long index; + return _BitScanReverse(&index, value) ? (31U - index) : 32U; +} +#endif + + +/*------------------------------------------------- + count_leading_ones_32 - return the number of + leading one bits in a 32-bit value +-------------------------------------------------*/ + +#ifndef count_leading_ones_32 +#define count_leading_ones_32 _count_leading_ones_32 +__forceinline uint8_t _count_leading_ones_32(uint32_t value) +{ + unsigned long index; + return _BitScanReverse(&index, ~value) ? (31U - index) : 32U; +} +#endif + + +/*------------------------------------------------- + count_leading_zeros_64 - return the number of + leading zero bits in a 64-bit value +-------------------------------------------------*/ + +#ifndef count_leading_zeros_64 +#define count_leading_zeros_64 _count_leading_zeros_64 +__forceinline uint8_t _count_leading_zeros_64(uint64_t value) +{ + unsigned long index; +#ifdef PTR64 + return _BitScanReverse64(&index, value) ? (63U - index) : 64U; +#else + return _BitScanReverse(&index, uint32_t(value >> 32)) ? (31U - index) : _BitScanReverse(&index, uint32_t(value)) ? (63U - index) : 64U; +#endif +} +#endif + + +/*------------------------------------------------- + count_leading_ones_64 - return the number of + leading one bits in a 64-bit value +-------------------------------------------------*/ + +#ifndef count_leading_ones_64 +#define count_leading_ones_64 _count_leading_ones_64 +__forceinline uint8_t _count_leading_ones_64(uint64_t value) +{ + unsigned long index; +#ifdef PTR64 + return _BitScanReverse64(&index, ~value) ? (63U - index) : 64U; +#else + return _BitScanReverse(&index, ~uint32_t(value >> 32)) ? (31U - index) : _BitScanReverse(&index, ~uint32_t(value)) ? (63U - index) : 64U; +#endif +} +#endif + +#endif // MAME_OSD_EIVC_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/eivcarm.h b/waterbox/ares64/ares/thirdparty/mame/osd/eivcarm.h new file mode 100644 index 0000000000..5e612cb25f --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/eivcarm.h @@ -0,0 +1,75 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +//============================================================ +// +// eivcarm.h +// +// ARM/AArch64 inline implementations for MSVC compiler. +// +//============================================================ + +#ifndef MAME_OSD_EIVCARM_H +#define MAME_OSD_EIVCARM_H + +#pragma once + +#include + +#pragma intrinsic(_CountLeadingZeros) +#pragma intrinsic(_CountLeadingZeros64) +#pragma intrinsic(_CountLeadingOnes) +#pragma intrinsic(_CountLeadingOnes64) + + +/*************************************************************************** + INLINE BIT MANIPULATION FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + count_leading_zeros_32 - return the number of + leading zero bits in a 32-bit value +-------------------------------------------------*/ + +#define count_leading_zeros_32 _count_leading_zeros_32 +__forceinline uint8_t _count_leading_zeros_32(uint32_t value) +{ + return uint8_t(_CountLeadingZeros(value)); +} + + +/*------------------------------------------------- + count_leading_ones_32 - return the number of + leading one bits in a 32-bit value +-------------------------------------------------*/ + +#define count_leading_ones_32 _count_leading_ones_32 +__forceinline uint8_t _count_leading_ones_32(uint32_t value) +{ + return uint8_t(_CountLeadingOnes(value)); +} + + +/*------------------------------------------------- + count_leading_zeros_64 - return the number of + leading zero bits in a 64-bit value +-------------------------------------------------*/ + +#define count_leading_zeros_64 _count_leading_zeros_64 +__forceinline uint8_t _count_leading_zeros_64(uint64_t value) +{ + return uint8_t(_CountLeadingZeros64(value)); +} + + +/*------------------------------------------------- + count_leading_ones_64 - return the number of + leading one bits in a 64-bit value +-------------------------------------------------*/ + +#define count_leading_ones_64 _count_leading_ones_64 +__forceinline uint8_t _count_leading_ones_64(uint64_t value) +{ + return uint8_t(_CountLeadingOnes64(value)); +} + +#endif // MAME_OSD_EIVCARM_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/eivcx86.h b/waterbox/ares64/ares/thirdparty/mame/osd/eivcx86.h new file mode 100644 index 0000000000..eb8811ad8b --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/eivcx86.h @@ -0,0 +1,468 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +//============================================================ +// +// eivcx86.h +// +// x86 inline implementations for MSVC compiler. +// +//============================================================ + +#ifndef MAME_OSD_EIVCX86_H +#define MAME_OSD_EIVCX86_H + +#pragma once + +#ifdef PTR64 +#include +#endif + +#include + + +/*************************************************************************** + INLINE MATH FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + mul_32x32 - perform a signed 32 bit x 32 bit + multiply and return the full 64 bit result +-------------------------------------------------*/ + +#ifndef PTR64 +#define mul_32x32 _mul_32x32 +inline int64_t _mul_32x32(int32_t a, int32_t b) +{ + // in theory this should work, but it is untested + __asm + { + mov eax,a + imul b + // leave results in edx:eax + } +} +#endif + + +/*------------------------------------------------- + mulu_32x32 - perform an unsigned 32 bit x + 32 bit multiply and return the full 64 bit + result +-------------------------------------------------*/ + +#ifndef PTR64 +#define mulu_32x32 _mulu_32x32 +inline uint64_t _mulu_32x32(uint32_t a, uint32_t b) +{ + // in theory this should work, but it is untested + __asm + { + mov eax,a + mul b + // leave results in edx:eax + } +} +#endif + + +/*------------------------------------------------- + mul_32x32_hi - perform a signed 32 bit x 32 bit + multiply and return the upper 32 bits of the + result +-------------------------------------------------*/ + +#ifndef PTR64 +#define mul_32x32_hi _mul_32x32_hi +inline int32_t _mul_32x32_hi(int32_t a, int32_t b) +{ + int32_t result; + + __asm + { + mov eax,a + imul b + mov result,edx + } + + return result; +} +#endif + + +/*------------------------------------------------- + mulu_32x32_hi - perform an unsigned 32 bit x + 32 bit multiply and return the upper 32 bits + of the result +-------------------------------------------------*/ + +#ifndef PTR64 +#define mulu_32x32_hi _mulu_32x32_hi +inline uint32_t _mulu_32x32_hi(uint32_t a, uint32_t b) +{ + int32_t result; + + __asm + { + mov eax,a + mul b + mov result,edx + } + + return result; +} +#endif + + +/*------------------------------------------------- + mul_32x32_shift - perform a signed 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#ifndef PTR64 +#define mul_32x32_shift _mul_32x32_shift +static inline int32_t _mul_32x32_shift(int32_t a, int32_t b, uint8_t shift) +{ + int32_t result; + + __asm + { + mov eax,a + imul b + mov cl,shift + shrd eax,edx,cl + mov result,eax + } + + return result; +} +#endif + + +/*------------------------------------------------- + mulu_32x32_shift - perform an unsigned 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#ifndef PTR64 +#define mulu_32x32_shift _mulu_32x32_shift +inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +{ + int32_t result; + + __asm + { + mov eax,a + mul b + mov cl,shift + shrd eax,edx,cl + mov result,eax + } + + return result; +} +#endif + + +/*------------------------------------------------- + div_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +#ifndef PTR64 +#define div_64x32 _div_64x32 +inline int32_t _div_64x32(int64_t a, int32_t b) +{ + int32_t result; + int32_t alow = a; + int32_t ahigh = a >> 32; + + __asm + { + mov eax,alow + mov edx,ahigh + idiv b + mov result,eax + } + + return result; +} +#endif + + +/*------------------------------------------------- + divu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +#ifndef PTR64 +#define divu_64x32 _divu_64x32 +inline uint32_t _divu_64x32(uint64_t a, uint32_t b) +{ + uint32_t result; + uint32_t alow = a; + uint32_t ahigh = a >> 32; + + __asm + { + mov eax,alow + mov edx,ahigh + div b + mov result,eax + } + + return result; +} +#endif + + +/*------------------------------------------------- + div_64x32_rem - perform a signed 64 bit x 32 + bit divide and return the 32 bit quotient and + 32 bit remainder +-------------------------------------------------*/ + +#ifndef PTR64 +#define div_64x32_rem _div_64x32_rem +inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t &remainder) +{ + int32_t result; + int32_t alow = a; + int32_t ahigh = a >> 32; + int32_t rem; + + __asm + { + mov eax,alow + mov edx,ahigh + idiv b + mov result,eax + mov rem,edx + } + + remainder = rem; + return result; +} +#endif + + +/*------------------------------------------------- + divu_64x32_rem - perform an unsigned 64 bit x + 32 bit divide and return the 32 bit quotient + and 32 bit remainder +-------------------------------------------------*/ + +#ifndef PTR64 +#define divu_64x32_rem _divu_64x32_rem +inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t &remainder) +{ + uint32_t result; + uint32_t alow = a; + uint32_t ahigh = a >> 32; + uint32_t rem; + + __asm + { + mov eax,alow + mov edx,ahigh + div b + mov result,eax + mov rem,edx + } + + remainder = rem; + return result; +} +#endif + + +/*------------------------------------------------- + div_32x32_shift - perform a signed divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +#ifndef PTR64 +#define div_32x32_shift _div_32x32_shift +inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift) +{ + int32_t result; + + __asm + { + mov eax,a + cdq + mov cl,shift + shld edx,eax,cl + shl eax,cl + idiv b + mov result,eax + } + + return result; +} +#endif + + +/*------------------------------------------------- + divu_32x32_shift - perform an unsigned divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +#ifndef PTR64 +#define divu_32x32_shift _divu_32x32_shift +inline uint32_t _divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +{ + uint32_t result; + + __asm + { + mov eax,a + xor edx,edx + mov cl,shift + shld edx,eax,cl + shl eax,cl + div b + mov result,eax + } + + return result; +} +#endif + + +/*------------------------------------------------- + mod_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +#ifndef PTR64 +#define mod_64x32 _mod_64x32 +static inline int32_t _mod_64x32(int64_t a, int32_t b) +{ + int32_t result; + int32_t alow = a; + int32_t ahigh = a >> 32; + + __asm + { + mov eax,alow + mov edx,ahigh + idiv b + mov result,edx + } + + return result; +} +#endif + + +/*------------------------------------------------- + modu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +#ifndef PTR64 +#define modu_64x32 _modu_64x32 +inline uint32_t _modu_64x32(uint64_t a, uint32_t b) +{ + uint32_t result; + uint32_t alow = a; + uint32_t ahigh = a >> 32; + + __asm + { + mov eax,alow + mov edx,ahigh + div b + mov result,edx + } + + return result; +} +#endif + + +/*------------------------------------------------- + recip_approx - compute an approximate floating + point reciprocal +-------------------------------------------------*/ + +#ifdef PTR64 +#define recip_approx _recip_approx +inline float _recip_approx(float z) +{ + __m128 const mz = _mm_set_ss(z); + __m128 const mooz = _mm_rcp_ss(mz); + float ooz; + _mm_store_ss(&ooz, mooz); + return ooz; +} +#endif + + +/*------------------------------------------------- + mul_64x64 - perform a signed 64 bit x 64 bit + multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifdef PTR64 +#define mul_64x64 _mul_64x64 +__forceinline int64_t _mul_64x64(int64_t a, int64_t b, int64_t &hi) +{ + return _mul128(a, b, &hi); +} +#endif + + +/*------------------------------------------------- + mulu_64x64 - perform an unsigned 64 bit x 64 + bit multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifdef PTR64 +#define mulu_64x64 _mulu_64x64 +__forceinline int64_t _mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) +{ + return _umul128(a, b, &hi); +} +#endif + + +/*------------------------------------------------- + addu_32x32_co - perform an unsigned 32 bit + 32 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#define addu_32x32_co _addu_32x32_co +__forceinline bool _addu_32x32_co(uint32_t a, uint32_t b, uint32_t &sum) +{ + return _addcarry_u32(0, a, b, &sum); +} + + +/*------------------------------------------------- + addu_64x64_co - perform an unsigned 64 bit + 64 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#define addu_64x64_co _addu_64x64_co +__forceinline bool _addu_64x64_co(uint64_t a, uint64_t b, uint64_t &sum) +{ +#ifdef PTR64 + return _addcarry_u64(0, a, b, &sum); +#else + uint32_t l, h; + bool const result = _addcarry_u32(_addcarry_u32(0, uint32_t(a), uint32_t(b), &l), uint32_t(a >> 32), uint32_t(b >> 32), &h); + sum = (uint64_t(h) << 32) | l; + return result; +#endif +} + +#endif // MAME_OSD_EIVCX86_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/eminline.h b/waterbox/ares64/ares/thirdparty/mame/osd/eminline.h new file mode 100644 index 0000000000..02bd22b3fd --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/eminline.h @@ -0,0 +1,495 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/*************************************************************************** + + eminline.h + + Definitions for inline functions that can be overridden by OSD- + specific code. + +***************************************************************************/ + +#ifndef MAME_OSD_EMINLINE_H +#define MAME_OSD_EMINLINE_H + +#pragma once + +#include "osdcomm.h" +#include "osdcore.h" + +#if !defined(MAME_NOASM) + +#if defined(__GNUC__) + +#if defined(__i386__) || defined(__x86_64__) +#include "eigccx86.h" +#elif defined(__ppc__) || defined (__PPC__) || defined(__ppc64__) || defined(__PPC64__) +#include "eigccppc.h" +#elif defined(__arm__) || defined(__aarch64__) +#include "eigccarm.h" +#endif + +#include "eigcc.h" + +#elif defined(_MSC_VER) + +#if defined(_M_IX86) || defined(_M_X64) +#include "eivcx86.h" +#elif defined(_M_ARM) || defined(_M_ARM64) +#include "eivcarm.h" +#endif + +#include "eivc.h" + +#endif + +#endif // !defined(MAME_NOASM) + + +/*************************************************************************** + INLINE MATH FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + mul_32x32 - perform a signed 32 bit x 32 bit + multiply and return the full 64 bit result +-------------------------------------------------*/ + +#ifndef mul_32x32 +constexpr int64_t mul_32x32(int32_t a, int32_t b) +{ + return int64_t(a) * int64_t(b); +} +#endif + + +/*------------------------------------------------- + mulu_32x32 - perform an unsigned 32 bit x + 32 bit multiply and return the full 64 bit + result +-------------------------------------------------*/ + +#ifndef mulu_32x32 +constexpr uint64_t mulu_32x32(uint32_t a, uint32_t b) +{ + return uint64_t(a) * uint64_t(b); +} +#endif + + +/*------------------------------------------------- + mul_32x32_hi - perform a signed 32 bit x 32 bit + multiply and return the upper 32 bits of the + result +-------------------------------------------------*/ + +#ifndef mul_32x32_hi +constexpr int32_t mul_32x32_hi(int32_t a, int32_t b) +{ + return uint32_t((int64_t(a) * int64_t(b)) >> 32); +} +#endif + + +/*------------------------------------------------- + mulu_32x32_hi - perform an unsigned 32 bit x + 32 bit multiply and return the upper 32 bits + of the result +-------------------------------------------------*/ + +#ifndef mulu_32x32_hi +constexpr uint32_t mulu_32x32_hi(uint32_t a, uint32_t b) +{ + return uint32_t((uint64_t(a) * uint64_t(b)) >> 32); +} +#endif + + +/*------------------------------------------------- + mul_32x32_shift - perform a signed 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#ifndef mul_32x32_shift +constexpr int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift) +{ + return int32_t((int64_t(a) * int64_t(b)) >> shift); +} +#endif + + +/*------------------------------------------------- + mulu_32x32_shift - perform an unsigned 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#ifndef mulu_32x32_shift +constexpr uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +{ + return uint32_t((uint64_t(a) * uint64_t(b)) >> shift); +} +#endif + + +/*------------------------------------------------- + div_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +#ifndef div_64x32 +constexpr int32_t div_64x32(int64_t a, int32_t b) +{ + return a / int64_t(b); +} +#endif + + +/*------------------------------------------------- + divu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +#ifndef divu_64x32 +constexpr uint32_t divu_64x32(uint64_t a, uint32_t b) +{ + return a / uint64_t(b); +} +#endif + + +/*------------------------------------------------- + div_64x32_rem - perform a signed 64 bit x 32 + bit divide and return the 32 bit quotient and + 32 bit remainder +-------------------------------------------------*/ + +#ifndef div_64x32_rem +inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t &remainder) +{ + int32_t const res(div_64x32(a, b)); + remainder = a - (int64_t(b) * res); + return res; +} +#endif + + +/*------------------------------------------------- + divu_64x32_rem - perform an unsigned 64 bit x + 32 bit divide and return the 32 bit quotient + and 32 bit remainder +-------------------------------------------------*/ + +#ifndef divu_64x32_rem +inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t &remainder) +{ + uint32_t const res(divu_64x32(a, b)); + remainder = a - (uint64_t(b) * res); + return res; +} +#endif + + +/*------------------------------------------------- + div_32x32_shift - perform a signed divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +#ifndef div_32x32_shift +constexpr int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift) +{ + return (int64_t(a) << shift) / int64_t(b); +} +#endif + + +/*------------------------------------------------- + divu_32x32_shift - perform an unsigned divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +#ifndef divu_32x32_shift +constexpr uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +{ + return (uint64_t(a) << shift) / uint64_t(b); +} +#endif + + +/*------------------------------------------------- + mod_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +#ifndef mod_64x32 +constexpr int32_t mod_64x32(int64_t a, int32_t b) +{ + return a - (b * div_64x32(a, b)); +} +#endif + + +/*------------------------------------------------- + modu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +#ifndef modu_64x32 +constexpr uint32_t modu_64x32(uint64_t a, uint32_t b) +{ + return a - (b * divu_64x32(a, b)); +} +#endif + + +/*------------------------------------------------- + recip_approx - compute an approximate floating + point reciprocal +-------------------------------------------------*/ + +#ifndef recip_approx +constexpr float recip_approx(float value) +{ + return 1.0f / value; +} +#endif + + +/*------------------------------------------------- + mul_64x64 - perform a signed 64 bit x 64 bit + multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifndef mul_64x64 +inline int64_t mul_64x64(int64_t a, int64_t b, int64_t &hi) +{ + uint64_t const a_hi = uint64_t(a) >> 32; + uint64_t const b_hi = uint64_t(b) >> 32; + uint64_t const a_lo = uint32_t(uint64_t(a)); + uint64_t const b_lo = uint32_t(uint64_t(b)); + + uint64_t const ab_lo = a_lo * b_lo; + uint64_t const ab_m1 = a_hi * b_lo; + uint64_t const ab_m2 = a_lo * b_hi; + uint64_t const ab_hi = a_hi * b_hi; + uint64_t const carry = ((ab_lo >> 32) + uint32_t(ab_m1) + uint32_t(ab_m2)) >> 32; + + hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry; + + // adjust for sign + if (a < 0) + hi -= b; + if (b < 0) + hi -= a; + + return ab_lo + (ab_m1 << 32) + (ab_m2 << 32); +} +#endif + + +/*------------------------------------------------- + mulu_64x64 - perform an unsigned 64 bit x 64 + bit multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifndef mulu_64x64 +inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) +{ + uint64_t const a_hi = uint32_t(a >> 32); + uint64_t const b_hi = uint32_t(b >> 32); + uint64_t const a_lo = uint32_t(a); + uint64_t const b_lo = uint32_t(b); + + uint64_t const ab_lo = a_lo * b_lo; + uint64_t const ab_m1 = a_hi * b_lo; + uint64_t const ab_m2 = a_lo * b_hi; + uint64_t const ab_hi = a_hi * b_hi; + uint64_t const carry = ((ab_lo >> 32) + uint32_t(ab_m1) + uint32_t(ab_m2)) >> 32; + + hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry; + + return ab_lo + (ab_m1 << 32) + (ab_m2 << 32); +} +#endif + + +/*------------------------------------------------- + addu_32x32_co - perform an unsigned 32 bit + 32 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#ifndef addu_32x32_co +inline bool addu_32x32_co(uint32_t a, uint32_t b, uint32_t &sum) +{ + sum = a + b; + return (a > sum) || (b > sum); +} +#endif + + +/*------------------------------------------------- + addu_64x64_co - perform an unsigned 64 bit + 64 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#ifndef addu_64x64_co +inline bool addu_64x64_co(uint64_t a, uint64_t b, uint64_t &sum) +{ + sum = a + b; + return (a > sum) || (b > sum); +} +#endif + + + +/*************************************************************************** + INLINE BIT MANIPULATION FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + count_leading_zeros_32 - return the number of + leading zero bits in a 32-bit value +-------------------------------------------------*/ + +#ifndef count_leading_zeros_32 +inline uint8_t count_leading_zeros_32(uint32_t val) +{ + if (!val) return 32U; + uint8_t count; + for (count = 0; int32_t(val) >= 0; count++) val <<= 1; + return count; +} +#endif + + +/*------------------------------------------------- + count_leading_ones_32 - return the number of + leading one bits in a 32-bit value +-------------------------------------------------*/ + +#ifndef count_leading_ones_32 +inline uint8_t count_leading_ones_32(uint32_t val) +{ + uint8_t count; + for (count = 0; int32_t(val) < 0; count++) val <<= 1; + return count; +} +#endif + + +/*------------------------------------------------- + count_leading_zeros_64 - return the number of + leading zero bits in a 64-bit value +-------------------------------------------------*/ + +#ifndef count_leading_zeros_64 +inline uint8_t count_leading_zeros_64(uint64_t val) +{ + if (!val) return 64U; + uint8_t count; + for (count = 0; int64_t(val) >= 0; count++) val <<= 1; + return count; +} +#endif + + +/*------------------------------------------------- + count_leading_ones_64 - return the number of + leading one bits in a 64-bit value +-------------------------------------------------*/ + +#ifndef count_leading_ones_64 +inline uint8_t count_leading_ones_64(uint64_t val) +{ + uint8_t count; + for (count = 0; int64_t(val) < 0; count++) val <<= 1; + return count; +} +#endif + + +/*------------------------------------------------- + population_count_32 - return the number of + one bits in a 32-bit value +-------------------------------------------------*/ + +#ifndef population_count_32 +inline unsigned population_count_32(uint32_t val) +{ +#if defined(__NetBSD__) + return popcount32(val); +#else + // optimal Hamming weight assuming fast 32*32->32 + constexpr uint32_t m1(0x55555555); + constexpr uint32_t m2(0x33333333); + constexpr uint32_t m4(0x0f0f0f0f); + constexpr uint32_t h01(0x01010101); + val -= (val >> 1) & m1; + val = (val & m2) + ((val >> 2) & m2); + val = (val + (val >> 4)) & m4; + return unsigned((val * h01) >> 24); +#endif +} +#endif + + +/*------------------------------------------------- + population_count_64 - return the number of + one bits in a 64-bit value +-------------------------------------------------*/ + +#ifndef population_count_64 +inline unsigned population_count_64(uint64_t val) +{ +#if defined(__NetBSD__) + return popcount64(val); +#else + // guess that architectures with 64-bit pointers have 64-bit multiplier + if (sizeof(void *) >= sizeof(uint64_t)) + { + // optimal Hamming weight assuming fast 64*64->64 + constexpr uint64_t m1(0x5555555555555555); + constexpr uint64_t m2(0x3333333333333333); + constexpr uint64_t m4(0x0f0f0f0f0f0f0f0f); + constexpr uint64_t h01(0x0101010101010101); + val -= (val >> 1) & m1; + val = (val & m2) + ((val >> 2) & m2); + val = (val + (val >> 4)) & m4; + return unsigned((val * h01) >> 56); + } + else + { + // fall back to two 32-bit operations to avoid slow multiply + return population_count_32(uint32_t(val)) + population_count_32(uint32_t(val >> 32)); + } +#endif +} +#endif + + +/*************************************************************************** + INLINE TIMING FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + get_profile_ticks - return a tick counter + from the processor that can be used for + profiling. It does not need to run at any + particular rate. +-------------------------------------------------*/ + +#ifndef get_profile_ticks +inline int64_t get_profile_ticks() +{ + return osd_ticks(); +} +#endif + +#endif // MAME_OSD_EMINLINE_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/osdcomm.h b/waterbox/ares64/ares/thirdparty/mame/osd/osdcomm.h new file mode 100644 index 0000000000..75370f1b81 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/osdcomm.h @@ -0,0 +1,119 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/*************************************************************************** + + osdcomm.h + + Common definitions shared by the OSD layer. This includes the most + fundamental integral types as well as compiler-specific tweaks. + +***************************************************************************/ +#ifndef MAME_OSD_OSDCOMM_H +#define MAME_OSD_OSDCOMM_H + +#pragma once + +#include +#include +#include + +#include + + +/*************************************************************************** + COMPILER-SPECIFIC NASTINESS +***************************************************************************/ + +// The Win32 port requires this constant for variable arg routines. +#ifndef CLIB_DECL +#define CLIB_DECL +#endif + + +// Some optimizations/warnings cleanups for GCC +#if defined(__GNUC__) +#define ATTR_PRINTF(x,y) __attribute__((format(printf, x, y))) +#define ATTR_CONST __attribute__((const)) +#define ATTR_FORCE_INLINE __attribute__((always_inline)) +#define ATTR_HOT __attribute__((hot)) +#define ATTR_COLD __attribute__((cold)) +#define UNEXPECTED(exp) __builtin_expect(!!(exp), 0) +#define EXPECTED(exp) __builtin_expect(!!(exp), 1) +#define RESTRICT __restrict__ +#else +#define ATTR_PRINTF(x,y) +#define ATTR_CONST +#define ATTR_FORCE_INLINE __forceinline +#define ATTR_HOT +#define ATTR_COLD +#define UNEXPECTED(exp) (exp) +#define EXPECTED(exp) (exp) +#define RESTRICT +#endif + + + +/*************************************************************************** + FUNDAMENTAL TYPES +***************************************************************************/ + +namespace osd { + +using u8 = std::uint8_t; +using u16 = std::uint16_t; +using u32 = std::uint32_t; +using u64 = std::uint64_t; + +using s8 = std::int8_t; +using s16 = std::int16_t; +using s32 = std::int32_t; +using s64 = std::int64_t; + +} // namespace OSD + +/*************************************************************************** + FUNDAMENTAL MACROS +***************************************************************************/ + +// Concatenate/extract 32-bit halves of 64-bit values +constexpr uint64_t concat_64(uint32_t hi, uint32_t lo) { return (uint64_t(hi) << 32) | uint32_t(lo); } +constexpr uint32_t extract_64hi(uint64_t val) { return uint32_t(val >> 32); } +constexpr uint32_t extract_64lo(uint64_t val) { return uint32_t(val); } + +// Macros for normalizing data into big or little endian formats +constexpr uint16_t swapendian_int16(uint16_t val) { return (val << 8) | (val >> 8); } + +constexpr uint32_t swapendian_int32_partial16(uint32_t val) { return ((val << 8) & 0xFF00FF00U) | ((val >> 8) & 0x00FF00FFU); } +constexpr uint32_t swapendian_int32(uint32_t val) { return (swapendian_int32_partial16(val) << 16) | (swapendian_int32_partial16(val) >> 16); } + +constexpr uint64_t swapendian_int64_partial16(uint64_t val) { return ((val << 8) & 0xFF00FF00FF00FF00U) | ((val >> 8) & 0x00FF00FF00FF00FFU); } +constexpr uint64_t swapendian_int64_partial32(uint64_t val) { return ((swapendian_int64_partial16(val) << 16) & 0xFFFF0000FFFF0000U) | ((swapendian_int64_partial16(val) >> 16) & 0x0000FFFF0000FFFFU); } +constexpr uint64_t swapendian_int64(uint64_t val) { return (swapendian_int64_partial32(val) << 32) | (swapendian_int64_partial32(val) >> 32); } + +#ifdef LSB_FIRST +constexpr uint16_t big_endianize_int16(uint16_t x) { return swapendian_int16(x); } +constexpr uint32_t big_endianize_int32(uint32_t x) { return swapendian_int32(x); } +constexpr uint64_t big_endianize_int64(uint64_t x) { return swapendian_int64(x); } +constexpr uint16_t little_endianize_int16(uint16_t x) { return x; } +constexpr uint32_t little_endianize_int32(uint32_t x) { return x; } +constexpr uint64_t little_endianize_int64(uint64_t x) { return x; } +#else +constexpr uint16_t big_endianize_int16(uint16_t x) { return x; } +constexpr uint32_t big_endianize_int32(uint32_t x) { return x; } +constexpr uint64_t big_endianize_int64(uint64_t x) { return x; } +constexpr uint16_t little_endianize_int16(uint16_t x) { return swapendian_int16(x); } +constexpr uint32_t little_endianize_int32(uint32_t x) { return swapendian_int32(x); } +constexpr uint64_t little_endianize_int64(uint64_t x) { return swapendian_int64(x); } +#endif // LSB_FIRST + +#ifdef _MSC_VER +using ssize_t = std::make_signed_t; +#endif + +#ifdef __GNUC__ +#ifndef alloca +#define alloca(size) __builtin_alloca(size) +#endif +#endif + +#endif // MAME_OSD_OSDCOMM_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/osdcore.cpp b/waterbox/ares64/ares/thirdparty/mame/osd/osdcore.cpp new file mode 100644 index 0000000000..fc73218ff2 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/osdcore.cpp @@ -0,0 +1,223 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles + +#include "osdcore.h" +#include +#include + +#if defined(SDLMAME_ANDROID) +#include +#endif + +#ifdef _WIN32 +#include +#include +#include +#if !defined(MAME_RDP) +#include "strconv.h" +#endif +#endif + +static const int MAXSTACK = 10; +static osd_output *m_stack[MAXSTACK]; +static int m_ptr = -1; + +/*------------------------------------------------- + osd_output +-------------------------------------------------*/ + +void osd_output::push(osd_output *delegate) +{ + if (m_ptr < MAXSTACK - 1) + { + delegate->m_chain = (m_ptr >= 0 ? m_stack[m_ptr] : nullptr); + m_ptr++; + m_stack[m_ptr] = delegate; + } +} + +void osd_output::pop(osd_output *delegate) +{ + int f = -1; + for (int i = 0; i <= m_ptr; i++) + if (m_stack[i] == delegate) + { + f = i; + break; + } + if (f >= 0) + { + if (f < m_ptr) + m_stack[f+1]->m_chain = m_stack[f]->m_chain; + m_ptr--; + for (int i = f; i <= m_ptr; i++) + m_stack[i] = m_stack[i+1]; + } +} + + +/*************************************************************************** + OUTPUT MANAGEMENT +***************************************************************************/ + +/*------------------------------------------------- + osd_vprintf_error - output an error to the + appropriate callback +-------------------------------------------------*/ + +void osd_vprintf_error(util::format_argument_pack const &args) +{ +#if defined(SDLMAME_ANDROID) + __android_log_write(ANDROID_LOG_ERROR, "%s", util::string_format(args).c_str()); +#else + if (m_ptr >= 0) m_stack[m_ptr]->output_callback(OSD_OUTPUT_CHANNEL_ERROR, args); +#endif +} + + +/*------------------------------------------------- + osd_vprintf_warning - output a warning to the + appropriate callback +-------------------------------------------------*/ + +void osd_vprintf_warning(util::format_argument_pack const &args) +{ +#if defined(SDLMAME_ANDROID) + __android_log_write(ANDROID_LOG_WARN, "%s", util::string_format(args).c_str()); +#else + if (m_ptr >= 0) m_stack[m_ptr]->output_callback(OSD_OUTPUT_CHANNEL_WARNING, args); +#endif +} + + +/*------------------------------------------------- + osd_vprintf_info - output info text to the + appropriate callback +-------------------------------------------------*/ + +void osd_vprintf_info(util::format_argument_pack const &args) +{ +#if defined(SDLMAME_ANDROID) + __android_log_write(ANDROID_LOG_INFO, "%s", util::string_format(args).c_str()); +#else + if (m_ptr >= 0) m_stack[m_ptr]->output_callback(OSD_OUTPUT_CHANNEL_INFO, args); +#endif +} + + +/*------------------------------------------------- + osd_vprintf_verbose - output verbose text to + the appropriate callback +-------------------------------------------------*/ + +void osd_vprintf_verbose(util::format_argument_pack const &args) +{ +#if defined(SDLMAME_ANDROID) + __android_log_write( ANDROID_LOG_VERBOSE, "%s", util::string_format(args).c_str()); +#else + if (m_ptr >= 0) m_stack[m_ptr]->output_callback(OSD_OUTPUT_CHANNEL_VERBOSE, args); +#endif +} + + +/*------------------------------------------------- + osd_vprintf_debug - output debug text to the + appropriate callback +-------------------------------------------------*/ + +void osd_vprintf_debug(util::format_argument_pack const &args) +{ +#if defined(SDLMAME_ANDROID) + __android_log_write(ANDROID_LOG_DEBUG, "%s", util::string_format(args).c_str()); +#else + if (m_ptr >= 0) m_stack[m_ptr]->output_callback(OSD_OUTPUT_CHANNEL_DEBUG, args); +#endif +} + + +//============================================================ +// osd_ticks +//============================================================ + +osd_ticks_t osd_ticks() +{ +#ifdef _WIN32 + LARGE_INTEGER val; + QueryPerformanceCounter(&val); + return val.QuadPart; +#else + return std::chrono::high_resolution_clock::now().time_since_epoch().count(); +#endif +} + + +//============================================================ +// osd_ticks_per_second +//============================================================ + +osd_ticks_t osd_ticks_per_second() +{ +#ifdef _WIN32 + LARGE_INTEGER val; + QueryPerformanceFrequency(&val); + return val.QuadPart; +#else + return std::chrono::high_resolution_clock::period::den / std::chrono::high_resolution_clock::period::num; +#endif +} + +//============================================================ +// osd_sleep +//============================================================ + +void osd_sleep(osd_ticks_t duration) +{ +#ifdef _WIN32 +// sleep_for appears to oversleep on Windows with gcc 8 + Sleep(duration / (osd_ticks_per_second() / 1000)); +#else + std::this_thread::sleep_for(std::chrono::high_resolution_clock::duration(duration)); +#endif +} + + +#if !defined(MAME_RDP) +//============================================================ +// osd_get_command_line - returns command line arguments +// in an std::vector in UTF-8 +// +// The real purpose of this call is to hide details necessary +// on Windows (provided that one wants to avoid using wmain) +//============================================================ + +std::vector osd_get_command_line(int argc, char *argv[]) +{ + std::vector results; +#ifdef _WIN32 + { + // Get the command line from Windows + int count; + LPWSTR *wide_args = CommandLineToArgvW(GetCommandLineW(), &count); + + // Convert the returned command line arguments to UTF8 std::vector + results.reserve(count); + for (int i = 0; i < count; i++) + { + std::string arg = osd::text::from_wstring(wide_args[i]); + results.push_back(std::move(arg)); + } + + LocalFree(wide_args); + } +#else // !_WIN32 + { + // for non Windows platforms, we are assuming that arguments are + // already UTF-8; we just need to convert to std::vector + results.reserve(argc); + for (int i = 0; i < argc; i++) + results.emplace_back(argv[i]); + } +#endif // _WIN32 + return results; +} +#endif diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/osdcore.h b/waterbox/ares64/ares/thirdparty/mame/osd/osdcore.h new file mode 100644 index 0000000000..a7f652470d --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/osdcore.h @@ -0,0 +1,676 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/// \file +/// \brief Core OS-dependent code interface +/// +/// The prototypes in this file describe the interfaces that the MAME +/// core and various tools rely on to interact with the outside world. +/// They are broken out into several categories. +#ifndef MAME_OSD_OSDCORE_H +#define MAME_OSD_OSDCORE_H + +#pragma once + + +#include "osdcomm.h" + +#include "strformat.h" + +#include +#include +#include +#include +#include +#include + + +/// \brief Get environment variable value +/// +/// \param [in] name Name of the environment variable as a +/// NUL-terminated string. +/// \return Pointer to environment variable value as a NUL-terminated +/// string if found, or nullptr if not found. +const char *osd_getenv(const char *name); + + +/// \brief Get current process ID +/// +/// \return The process ID of the current process. +int osd_getpid(); + + +/*----------------------------------------------------------------------------- + osd_uchar_from_osdchar: convert the given character or sequence of + characters from the OS-default encoding to a Unicode character + + Parameters: + + uchar - pointer to a uint32_t to receive the resulting unicode + character + + osdchar - pointer to one or more chars that are in the OS-default + encoding + + count - number of characters provided in the OS-default encoding + + Return value: + + The number of characters required to form a Unicode character. +-----------------------------------------------------------------------------*/ +int osd_uchar_from_osdchar(char32_t *uchar, const char *osdchar, size_t count); + + + +/*************************************************************************** + TIMING INTERFACES +***************************************************************************/ + +/* a osd_ticks_t is a 64-bit unsigned integer that is used as a core type in timing interfaces */ +typedef uint64_t osd_ticks_t; + + +/*----------------------------------------------------------------------------- + osd_ticks: return the current running tick counter + + Parameters: + + None + + Return value: + + an osd_ticks_t value which represents the current tick counter + + Notes: + + The resolution of this counter should be 1ms or better for accurate + performance. It is also important that the source of this timer be + accurate. It is ok if this call is not ultra-fast, since it is + primarily used for once/frame synchronization. +-----------------------------------------------------------------------------*/ +osd_ticks_t osd_ticks(); + + +/*----------------------------------------------------------------------------- + osd_ticks_per_second: return the number of ticks per second + + Parameters: + + None + + Return value: + + an osd_ticks_t value which represents the number of ticks per + second +-----------------------------------------------------------------------------*/ +osd_ticks_t osd_ticks_per_second(); + + +/*----------------------------------------------------------------------------- + osd_sleep: sleep for the specified time interval + + Parameters: + + duration - an osd_ticks_t value that specifies how long we should + sleep + + Return value: + + None + + Notes: + + The OSD layer should try to sleep for as close to the specified + duration as possible, or less. This is used as a mechanism to + "give back" unneeded time to other programs running in the system. + On a simple, non multitasking system, this routine can be a no-op. + If there is significant volatility in the amount of time that the + sleep occurs for, the OSD layer should strive to sleep for less time + than specified rather than sleeping too long. +-----------------------------------------------------------------------------*/ +void osd_sleep(osd_ticks_t duration); + +/*************************************************************************** + WORK ITEM INTERFACES +***************************************************************************/ + +/* this is the maximum number of supported threads for a single work queue */ +/* threadid values are expected to range from 0..WORK_MAX_THREADS-1 */ +#define WORK_MAX_THREADS 16 + +/* these flags can be set when creating a queue to give hints to the code about + how to configure the queue */ +#define WORK_QUEUE_FLAG_IO 0x0001 +#define WORK_QUEUE_FLAG_MULTI 0x0002 +#define WORK_QUEUE_FLAG_HIGH_FREQ 0x0004 + +/* these flags can be set when queueing a work item to indicate how to handle + its deconstruction */ +#define WORK_ITEM_FLAG_AUTO_RELEASE 0x0001 + +/* osd_work_queue is an opaque type which represents a queue of work items */ +struct osd_work_queue; + +/* osd_work_item is an opaque type which represents a single work item */ +struct osd_work_item; + +/* osd_work_callback is a callback function that does work */ +typedef void *(*osd_work_callback)(void *param, int threadid); + + +/*----------------------------------------------------------------------------- + osd_work_queue_alloc: create a new work queue + + Parameters: + + flags - one or more of the WORK_QUEUE_FLAG_* values ORed together: + + WORK_QUEUE_FLAG_IO - indicates that the work queue will do some + I/O; this may be a useful hint so that threads are created + even on single-processor systems since I/O can often be + overlapped with other work + + WORK_QUEUE_FLAG_MULTI - indicates that the work queue should + take advantage of as many processors as it can; items queued + here are assumed to be fully independent or shared + + WORK_QUEUE_FLAG_HIGH_FREQ - indicates that items are expected + to be queued at high frequency and acted upon quickly; in + general, this implies doing some spin-waiting internally + before falling back to OS-specific synchronization + + Return value: + + A pointer to an allocated osd_work_queue object. + + Notes: + + A work queue abstracts the notion of how potentially threaded work + can be performed. If no threading support is available, it is a + simple matter to execute the work items as they are queued. +-----------------------------------------------------------------------------*/ +osd_work_queue *osd_work_queue_alloc(int flags); + + +/*----------------------------------------------------------------------------- + osd_work_queue_items: return the number of pending items in the queue + + Parameters: + + queue - pointer to an osd_work_queue that was previously created via + osd_work_queue_alloc + + Return value: + + The number of incomplete items remaining in the queue. +-----------------------------------------------------------------------------*/ +int osd_work_queue_items(osd_work_queue *queue); + + +/*----------------------------------------------------------------------------- + osd_work_queue_wait: wait for the queue to be empty + + Parameters: + + queue - pointer to an osd_work_queue that was previously created via + osd_work_queue_alloc + + timeout - a timeout value in osd_ticks_per_second() + + Return value: + + true if the queue is empty; false if the wait timed out before the + queue was emptied. +-----------------------------------------------------------------------------*/ +bool osd_work_queue_wait(osd_work_queue *queue, osd_ticks_t timeout); + + +/*----------------------------------------------------------------------------- + osd_work_queue_free: free a work queue, waiting for all items to complete + + Parameters: + + queue - pointer to an osd_work_queue that was previously created via + osd_work_queue_alloc + + Return value: + + None. +-----------------------------------------------------------------------------*/ +void osd_work_queue_free(osd_work_queue *queue); + + +/*----------------------------------------------------------------------------- + osd_work_item_queue_multiple: queue a set of work items + + Parameters: + + queue - pointer to an osd_work_queue that was previously created via + osd_work_queue_alloc + + callback - pointer to a function that will do the work + + numitems - number of work items to queue + + param - a void * parameter that can be used to pass data to the + function + + paramstep - the number of bytes to increment param by for each item + queued; for example, if you have an array of work_unit objects, + you can point param to the base of the array and set paramstep to + sizeof(work_unit) + + flags - one or more of the WORK_ITEM_FLAG_* values ORed together: + + WORK_ITEM_FLAG_AUTO_RELEASE - indicates that the work item + should be automatically freed when it is complete + + Return value: + + A pointer to the final allocated osd_work_item in the list. + + Notes: + + On single-threaded systems, this function may actually execute the + work item immediately before returning. +-----------------------------------------------------------------------------*/ +osd_work_item *osd_work_item_queue_multiple(osd_work_queue *queue, osd_work_callback callback, int32_t numitems, void *parambase, int32_t paramstep, uint32_t flags); + + +/* inline helper to queue a single work item using the same interface */ +static inline osd_work_item *osd_work_item_queue(osd_work_queue *queue, osd_work_callback callback, void *param, uint32_t flags) +{ + return osd_work_item_queue_multiple(queue, callback, 1, param, 0, flags); +} + + +/*----------------------------------------------------------------------------- + osd_work_item_wait: wait for a work item to complete + + Parameters: + + item - pointer to an osd_work_item that was previously returned from + osd_work_item_queue + + timeout - a timeout value in osd_ticks_per_second() + + Return value: + + true if the item completed; false if the wait timed out before the + item completed. +-----------------------------------------------------------------------------*/ +bool osd_work_item_wait(osd_work_item *item, osd_ticks_t timeout); + + +/*----------------------------------------------------------------------------- + osd_work_item_result: get the result of a work item + + Parameters: + + item - pointer to an osd_work_item that was previously returned from + osd_work_item_queue + + Return value: + + A void * that represents the work item's result. +-----------------------------------------------------------------------------*/ +void *osd_work_item_result(osd_work_item *item); + + +/*----------------------------------------------------------------------------- + osd_work_item_release: release the memory allocated to a work item + + Parameters: + + item - pointer to an osd_work_item that was previously returned from + osd_work_item_queue + + Return value: + + None. + + Notes: + + The osd_work_item exists until explicitly released, even if it has + long since completed. It is the queuer's responsibility to release + any work items it has queued. +-----------------------------------------------------------------------------*/ +void osd_work_item_release(osd_work_item *item); + + + +/*************************************************************************** + MISCELLANEOUS INTERFACES +***************************************************************************/ + +/// \brief Break into host debugger if attached +/// +/// This function is called when a fatal error occurs. If a debugger is +/// attached, it should break and display the specified message. +/// \param [in] message Message to output to the debugger as a +/// NUL-terminated string. +void osd_break_into_debugger(const char *message); + + +/// \brief Get clipboard text +/// +/// Gets current clipboard content as UTF-8 text. Returns an empty +/// string if the clipboard contents cannot be converted to plain text. +/// \return Clipboard contents or an empty string. +std::string osd_get_clipboard_text(); + + +/*************************************************************************** + MIDI I/O INTERFACES +***************************************************************************/ + +class osd_midi_device +{ +public: + virtual ~osd_midi_device() { } + // free result with osd_close_midi_channel() + virtual bool open_input(const char *devname) = 0; + // free result with osd_close_midi_channel() + virtual bool open_output(const char *devname) = 0; + virtual void close() = 0; + virtual bool poll() = 0; + virtual int read(uint8_t *pOut) = 0; + virtual void write(uint8_t data) = 0; +}; + +//FIXME: really needed here? +void osd_list_network_adapters(); + + +/*************************************************************************** + UNCATEGORIZED INTERFACES +***************************************************************************/ + +/*----------------------------------------------------------------------------- + osd_subst_env: substitute environment variables with values + + Parameters: + + dst - result pointer + src - source string + +-----------------------------------------------------------------------------*/ +void osd_subst_env(std::string &dst, std::string const &src); + +class osd_gpu +{ +public: + osd_gpu() { } + virtual ~osd_gpu() { } + + typedef uint64_t handle_t; + + class vertex_decl + { + public: + enum attr_type : uint32_t + { + FLOAT32, + FLOAT16, + UINT32, + UINT16, + UINT8, + + MAX_TYPES + }; + + static constexpr size_t TYPE_SIZES[MAX_TYPES] = { 4, 2, 4, 2, 1 }; + + static constexpr uint32_t MAX_COLORS = 2; + static constexpr uint32_t MAX_TEXCOORDS = 8; + + enum attr_usage : uint32_t + { + POSITION, + COLOR, + TEXCOORD = COLOR + MAX_COLORS, + NORMAL = TEXCOORD + MAX_TEXCOORDS, + BINORMAL, + TANGENT, + + MAX_ATTRS + }; + + class attr_entry + { + public: + attr_entry() : m_usage(POSITION), m_type(FLOAT32), m_count(3), m_size(12) { } + attr_entry(attr_usage usage, attr_type type, size_t count) : m_usage(usage), m_type(type), m_count(count), m_size(TYPE_SIZES[type] * count) { } + + attr_usage usage() const { return m_usage; } + attr_type type() const { return m_type; } + size_t count() const { return m_count; } + size_t size() const { return m_size; } + + private: + attr_usage m_usage; + attr_type m_type; + size_t m_count; + size_t m_size; + }; + + vertex_decl() + : m_entry_count(0) + , m_size(0) + { + } + + vertex_decl & add_attr(attr_usage usage, attr_type type, size_t count) + { + m_entries[m_entry_count] = attr_entry(usage, type, count); + m_size += m_entries[m_entry_count].size(); + m_entry_count++; + return *this; + } + + size_t entry_count() const { return m_entry_count; } + size_t size() const { return m_size; } + const attr_entry &entry(const uint32_t index) const { return m_entries[index]; } + + protected: + attr_entry m_entries[MAX_ATTRS]; + size_t m_entry_count; + size_t m_size; + }; + + class vertex_buffer_interface + { + public: + vertex_buffer_interface(vertex_decl &decl, uint32_t flags) + : m_decl(decl) + , m_flags(flags) + { + } + virtual ~vertex_buffer_interface() {} + + const vertex_decl &decl() const { return m_decl; } + uint32_t flags() const { return m_flags; } + handle_t handle() { return m_handle; } + + virtual size_t count() const = 0; + virtual size_t size() const = 0; + virtual void upload() = 0; + + protected: + const vertex_decl &m_decl; + const uint32_t m_flags; + handle_t m_handle; + }; + + class static_vertex_buffer_interface : public vertex_buffer_interface + { + public: + enum vertex_buffer_flags : uint32_t + { + RETAIN_ON_CPU = 0x00000001 + }; + + static_vertex_buffer_interface(vertex_decl &decl, size_t count, uint32_t flags) + : vertex_buffer_interface(decl, flags) + , m_count(count) + , m_size(decl.size() * count) + { + } + + virtual ~static_vertex_buffer_interface() + { + if (m_data) + delete [] m_data; + } + + size_t count() const override { return m_count; } + size_t size() const override { return m_size; } + + void set_data(void *data) + { + allocate_if_needed(); + memcpy(m_data, data, m_size); + } + + protected: + void allocate_if_needed() + { + if ((m_flags & RETAIN_ON_CPU) != 0 && m_data == nullptr) + m_data = new uint8_t[m_size]; + } + + const size_t m_count; + const size_t m_size; + uint8_t *m_data; + }; + + virtual void bind_buffer(vertex_buffer_interface *vb) = 0; + virtual void unbind_buffer(vertex_buffer_interface *vb) = 0; +}; + + +/// \defgroup osd_printf Diagnostic output functions +/// \{ + +// output channels +enum osd_output_channel +{ + OSD_OUTPUT_CHANNEL_ERROR, + OSD_OUTPUT_CHANNEL_WARNING, + OSD_OUTPUT_CHANNEL_INFO, + OSD_OUTPUT_CHANNEL_DEBUG, + OSD_OUTPUT_CHANNEL_VERBOSE, + OSD_OUTPUT_CHANNEL_LOG, + OSD_OUTPUT_CHANNEL_COUNT +}; + +class osd_output +{ +public: + osd_output() { } + virtual ~osd_output() { } + + virtual void output_callback(osd_output_channel channel, util::format_argument_pack const &args) = 0; + + static void push(osd_output *delegate); + static void pop(osd_output *delegate); + +protected: + + void chain_output(osd_output_channel channel, util::format_argument_pack const &args) const + { + if (m_chain) + m_chain->output_callback(channel, args); + } + +private: + osd_output *m_chain = nullptr; +}; + +void osd_vprintf_error(util::format_argument_pack const &args); +void osd_vprintf_warning(util::format_argument_pack const &args); +void osd_vprintf_info(util::format_argument_pack const &args); +void osd_vprintf_verbose(util::format_argument_pack const &args); +void osd_vprintf_debug(util::format_argument_pack const &args); + +/// \brief Print error message +/// +/// By default, error messages are sent to standard error. The relaxed +/// format rules used by util::string_format apply. +/// \param [in] fmt Message format string. +/// \param [in] args Optional message format arguments. +/// \sa util::string_format +template void osd_printf_error(Format &&fmt, Params &&...args) +{ + return osd_vprintf_error(util::make_format_argument_pack(std::forward(fmt), std::forward(args)...)); +} + +/// \brief Print warning message +/// +/// By default, warning messages are sent to standard error. The +/// relaxed format rules used by util::string_format apply. +/// \param [in] fmt Message format string. +/// \param [in] args Optional message format arguments. +/// \sa util::string_format +template void osd_printf_warning(Format &&fmt, Params &&...args) +{ + return osd_vprintf_warning(util::make_format_argument_pack(std::forward(fmt), std::forward(args)...)); +} + +/// \brief Print informational message +/// +/// By default, informational messages are sent to standard output. +/// The relaxed format rules used by util::string_format apply. +/// \param [in] fmt Message format string. +/// \param [in] args Optional message format arguments. +/// \sa util::string_format +template void osd_printf_info(Format &&fmt, Params &&...args) +{ + return osd_vprintf_info(util::make_format_argument_pack(std::forward(fmt), std::forward(args)...)); +} + +/// \brief Print verbose diagnostic message +/// +/// Verbose diagnostic messages are disabled by default. If enabled, +/// they are sent to standard output by default. The relaxed format +/// rules used by util::string_format apply. Note that the format +/// string and arguments will always be evaluated, even if verbose +/// diagnostic messages are disabled. +/// \param [in] fmt Message format string. +/// \param [in] args Optional message format arguments. +/// \sa util::string_format +template void osd_printf_verbose(Format &&fmt, Params &&...args) +{ + return osd_vprintf_verbose(util::make_format_argument_pack(std::forward(fmt), std::forward(args)...)); +} + +/// \brief Print debug message +/// +/// By default, debug messages are sent to standard output for debug +/// builds only. The relaxed format rules used by util::string_format +/// apply. Note that the format string and arguments will always be +/// evaluated, even if debug messages are disabled. +/// \param [in] fmt Message format string. +/// \param [in] args Optional message format arguments. +/// \sa util::string_format +template void osd_printf_debug(Format &&fmt, Params &&...args) +{ + return osd_vprintf_debug(util::make_format_argument_pack(std::forward(fmt), std::forward(args)...)); +} + +/// \} + + +// returns command line arguments as an std::vector in UTF-8 +std::vector osd_get_command_line(int argc, char *argv[]); + +/* discourage the use of printf directly */ +/* sadly, can't do this because of the ATTR_PRINTF under GCC */ +/* +#undef printf +#define printf !MUST_USE_osd_printf_*_CALLS_WITHIN_THE_CORE! +*/ + +// specifies "aggressive focus" - should MAME capture input for any windows co-habiting a MAME window? +void osd_set_aggressive_input_focus(bool aggressive_focus); + +#endif // MAME_OSD_OSDCORE_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/osdfile.h b/waterbox/ares64/ares/thirdparty/mame/osd/osdfile.h new file mode 100644 index 0000000000..bc3efbde80 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/osdfile.h @@ -0,0 +1,287 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +/// \file +/// \brief Core OS-dependent file interface +/// +/// The prototypes in this file describe the interfaces that the MAME +/// core and various tools rely on to interact with the outside world. +/// They are broken out into several categories. + +#ifndef MAME_OSD_OSDFILE_H +#define MAME_OSD_OSDFILE_H + +#pragma once + +#include +#include +#include +#include +#include +#include + + +/*************************************************************************** + FILE I/O INTERFACES +***************************************************************************/ + +/* Make sure we have a path separator (default to /) */ +#ifndef PATH_SEPARATOR +#if defined(_WIN32) +#define PATH_SEPARATOR "\\" +#else +#define PATH_SEPARATOR "/" +#endif +#endif + +/// \defgroup openflags File open flags +/// \{ + +/// Open file for reading. +constexpr uint32_t OPEN_FLAG_READ = 0x0001; + +/// Open file for writing. +constexpr uint32_t OPEN_FLAG_WRITE = 0x0002; + +/// Create the file, or truncate it if it exists. +constexpr uint32_t OPEN_FLAG_CREATE = 0x0004; + +/// Create non-existent directories in the path. +constexpr uint32_t OPEN_FLAG_CREATE_PATHS = 0x0008; + +/// Do not decompress into memory on open. +constexpr uint32_t OPEN_FLAG_NO_PRELOAD = 0x0010; + +/// \} + + +/// \brief Interface to file-like resources +/// +/// This interface is used to access file-like and stream-like +/// resources. Examples include plain files, TCP socket, named pipes, +/// pseudo-terminals, and compressed archive members. +class osd_file +{ +public: + /// \brief Smart pointer to a file handle + typedef std::unique_ptr ptr; + + /// \brief Open a new file handle + /// + /// This function is called by core_fopen and several other places + /// in the core to access files. These functions will construct + /// paths by concatenating various search paths held in the + /// options.c options database with partial paths specified by the + /// core. The core assumes that the path separator is the first + /// character of the string PATH_SEPARATOR, but does not interpret + /// any path separators in the search paths, so if you use a + /// different path separator in a search path, you may get a mixture + /// of PATH_SEPARATORs (from the core) and alternate path separators + /// (specified by users and placed into the options database). + /// \param [in] path Path to the file to open. + /// \param [in] openflags Combination of #OPEN_FLAG_READ, + /// #OPEN_FLAG_WRITE, #OPEN_FLAG_CREATE and + /// #OPEN_FLAG_CREATE_PATHS specifying the requested access mode + /// and open behaviour. + /// \param [out] file Receives the file handle if the operation + /// succeeds. Not valid if the operation fails. + /// \param [out] filesize Receives the size of the opened file if + /// the operation succeeded. Not valid if the operation failed. + /// Will be zero for stream-like objects (e.g. TCP sockets or + /// named pipes). + /// \return Result of the operation. + static std::error_condition open(std::string const &path, std::uint32_t openflags, ptr &file, std::uint64_t &filesize) noexcept; + + /// \brief Create a new pseudo-terminal (PTY) pair + /// + /// \param [out] file Receives the handle of the master side of the + /// pseudo-terminal if the operation succeeds. Not valid if the + /// operation fails. + /// \param [out] name Receives the name of the slave side of the + /// pseudo-terminal if the operation succeeds. Not valid if the + /// operation fails. + /// \return Result of the operation. + static std::error_condition openpty(ptr &file, std::string &name) noexcept; + + /// \brief Close an open file + virtual ~osd_file() { } + + /// \brief Read from an open file + /// + /// Read data from an open file at specified offset. Note that the + /// seek and read are not guaranteed to be atomic, which may cause + /// issues in multi-threaded applications. + /// \param [out] buffer Pointer to memory that will receive the data + /// read. + /// \param [in] offset Byte offset within the file to read at, + /// relative to the start of the file. Ignored for stream-like + /// objects (e.g. TCP sockets or named pipes). + /// \param [in] length Number of bytes to read. Fewer bytes may be + /// read if the end of file is reached, or if no data is + /// available. + /// \param [out] actual Receives the number of bytes read if the + /// operation succeeds. Not valid if the operation fails. + /// \return Result of the operation. + virtual std::error_condition read(void *buffer, std::uint64_t offset, std::uint32_t length, std::uint32_t &actual) noexcept = 0; + + /// \brief Write to an open file + /// + /// Write data to an open file at specified offset. Note that the + /// seek and write are not guaranteed to be atomic, which may cause + /// issues in multi-threaded applications. + /// \param [in] buffer Pointer to memory containing data to write. + /// \param [in] offset Byte offset within the file to write at, + /// relative to the start of the file. Ignored for stream-like + /// objects (e.g. TCP sockets or named pipes). + /// \param [in] length Number of bytes to write. + /// \param [out] actual Receives the number of bytes written if the + /// operation succeeds. Not valid if the operation fails. + /// \return Result of the operation. + virtual std::error_condition write(void const *buffer, std::uint64_t offset, std::uint32_t length, std::uint32_t &actual) noexcept = 0; + + /// \brief Change the size of an open file + /// + /// \param [in] offset Desired size of the file. + /// \return Result of the operation. + virtual std::error_condition truncate(std::uint64_t offset) noexcept = 0; + + /// \brief Flush file buffers + /// + /// This flushes any data cached by the application, but does not + /// guarantee that all prior writes have reached persistent storage. + /// \return Result of the operation. + virtual std::error_condition flush() noexcept = 0; + + /// \brief Delete a file + /// + /// \param [in] filename Path to the file to delete. + /// \return Result of the operation. + static std::error_condition remove(std::string const &filename) noexcept; +}; + + +/// \brief Describe geometry of physical drive +/// +/// If the given path points to a physical drive, return the geometry of +/// that drive. +/// +/// \param [in] filename Pointer to a path which might describe a +/// physical drive. +/// \param [out] cylinders Pointer to a uint32_t to receive the number of +/// cylinders of the physical drive. +/// \param [out] heads Pointer to a uint32_t to receive the number of +/// heads per cylinder of the physical drive. +/// \param [out] sectors Pointer to a uint32_t to receive the number of +/// sectors per cylinder of the physical drive. +/// \param [out] bps Pointer to a uint32_t to receive the number of +/// bytes per sector of the physical drive. +/// \return true if the filename points to a physical drive and if the +/// values pointed to by cylinders, heads, sectors, and bps are valid; +/// false in any other case +bool osd_get_physical_drive_geometry(const char *filename, uint32_t *cylinders, uint32_t *heads, uint32_t *sectors, uint32_t *bps) noexcept; + + +/// \brief Is the given character legal for filenames? +/// +/// \param [in] uchar The character to check. +/// \return Whether this character is legal in a filename. +bool osd_is_valid_filename_char(char32_t uchar) noexcept; + + +/// \brief Is the given character legal for paths? +/// +/// \param [in] uchar The character to check. +/// \return Whether this character is legal in a file path. +bool osd_is_valid_filepath_char(char32_t uchar) noexcept; + + +/*************************************************************************** + DIRECTORY INTERFACES +***************************************************************************/ + +namespace osd +{ + // directory is an opaque type which represents an open directory + class directory + { + public: + typedef std::unique_ptr ptr; + + // osd::directory::entry contains basic information about a file when iterating through + // a directory + class entry + { + public: + enum class entry_type + { + NONE, + FILE, + DIR, + OTHER + }; + + const char * name; // name of the entry + entry_type type; // type of the entry + std::uint64_t size; // size of the entry + std::chrono::system_clock::time_point last_modified; // last modified time + }; + + /// \brief Open a directory for iteration. + /// + /// \param [in] dirname Path to the directory in question. + /// \return Upon success, a directory pointer which contains opaque + /// data necessary to traverse the directory; on failure, nullptr. + static ptr open(std::string const &dirname); + + /// \brief Close an open directory. + virtual ~directory() { } + + /// \brief Return information about the next entry in the directory. + /// + /// \return A constant pointer to an entry representing the current + /// item in the directory, or nullptr, indicating that no more + /// entries are present. + virtual const entry *read() = 0; + }; +}; + + +/// \brief Return a directory entry for a path. +/// +/// \param [in] path The path in question. +/// \return An allocated pointer to an osd::directory::entry representing +/// info on the path; even if the file does not exist. +std::unique_ptr osd_stat(std::string const &path); + + +/*************************************************************************** + PATH INTERFACES +***************************************************************************/ + +/// \brief Returns whether the specified path is absolute. +/// +/// \param [in] path The path in question. +/// \return true if the path is absolute, false otherwise. +bool osd_is_absolute_path(const std::string &path) noexcept; + + +/// \brief Retrieves the full path. +/// \param [in] path The path in question. +/// \param [out] dst Reference to receive new path. +/// \return File error. +std::error_condition osd_get_full_path(std::string &dst, std::string const &path) noexcept; + + +/// \brief Retrieves the volume name. +/// +/// \param [in] idx Index number of volume. +/// \return Volume name or empty string of out of range. +std::string osd_get_volume_name(int idx); + + +/// \brief Retrieves volume names. +/// +/// \return Names of all mounted volumes. +std::vector osd_get_volume_names(); + + +#endif // MAME_OSD_OSDFILE_H diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/osdsync.cpp b/waterbox/ares64/ares/thirdparty/mame/osd/osdsync.cpp new file mode 100644 index 0000000000..a81e6b06a1 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/osdsync.cpp @@ -0,0 +1,823 @@ +// license:BSD-3-Clause +// copyright-holders:Aaron Giles +//============================================================ +// +// osdsync.c - OSD core work item functions +// +//============================================================ +#if defined(OSD_WINDOWS) || defined(SDLMAME_WIN32) +// standard windows headers +#include +#include +#include +#include + +#ifdef __GNUC__ +#include +#endif +#endif +#include +#include +#include +#include +#include +// MAME headers +#include "osdcore.h" +#include "osdsync.h" + +#include "eminline.h" + +#if defined(SDLMAME_LINUX) || defined(SDLMAME_BSD) || defined(SDLMAME_HAIKU) || defined(SDLMAME_EMSCRIPTEN) || defined(SDLMAME_MACOSX) +#include +#endif + +#if defined(MAME_RDP) +#define osd_getenv getenv +#endif + +//============================================================ +// DEBUGGING +//============================================================ + +#define KEEP_STATISTICS (0) + +//============================================================ +// PARAMETERS +//============================================================ + +#define ENV_PROCESSORS "OSDPROCESSORS" +#define ENV_WORKQUEUEMAXTHREADS "OSDWORKQUEUEMAXTHREADS" + +#define SPIN_LOOP_TIME (osd_ticks_per_second() / 10000) + +//============================================================ +// MACROS +//============================================================ + +#if KEEP_STATISTICS +#define add_to_stat(v,x) do { (v) += (x); } while (0) +#define begin_timing(v) do { (v) -= get_profile_ticks(); } while (0) +#define end_timing(v) do { (v) += get_profile_ticks(); } while (0) +#else +#define add_to_stat(v,x) do { } while (0) +#define begin_timing(v) do { } while (0) +#define end_timing(v) do { } while (0) +#endif + +template +static void spin_while(const volatile _AtomType * volatile atom, const _MainType val, const osd_ticks_t timeout, const int invert = 0) +{ + osd_ticks_t stopspin = osd_ticks() + timeout; + + do { + int spin = 10000; + while (--spin) + { + if ((*atom != val) ^ invert) + return; + } + } while (((*atom == val) ^ invert) && osd_ticks() < stopspin); +} + +template +static void spin_while_not(const volatile _AtomType * volatile atom, const _MainType val, const osd_ticks_t timeout) +{ + spin_while<_AtomType, _MainType>(atom, val, timeout, 1); +} + +//============================================================ +// osd_num_processors +//============================================================ + +int osd_get_num_processors() +{ +#if defined(SDLMAME_EMSCRIPTEN) + // multithreading is not supported at this time + return 1; +#else + // max out at 4 for now since scaling above that seems to do poorly + return std::min(std::thread::hardware_concurrency(), 4U); +#endif +} + +//============================================================ +// TYPE DEFINITIONS +//============================================================ + +struct work_thread_info +{ + work_thread_info(uint32_t aid, osd_work_queue &aqueue) + : queue(aqueue) + , handle(nullptr) + , wakeevent(false, false) // auto-reset, not signalled + , active(0) + , id(aid) +#if KEEP_STATISTICS + , itemsdone(0) + , actruntime(0) + , runtime(0) + , spintime(0) + , waittime(0) +#endif + { + } + osd_work_queue & queue; // pointer back to the queue + std::thread * handle; // handle to the thread + osd_event wakeevent; // wake event for the thread + std::atomic active; // are we actively processing work? + uint32_t id; + +#if KEEP_STATISTICS + int32_t itemsdone; + osd_ticks_t actruntime; + osd_ticks_t runtime; + osd_ticks_t spintime; + osd_ticks_t waittime; +#endif +}; + + +struct osd_work_queue +{ + osd_work_queue() + : list(nullptr) + , tailptr(nullptr) + , free(nullptr) + , items(0) + , livethreads(0) + , waiting(0) + , exiting(0) + , threads(0) + , flags(0) + , doneevent(true, true) // manual reset, signalled +#if KEEP_STATISTICS + , itemsqueued(0) + , setevents(0) + , extraitems(0) + , spinloops(0) +#endif + { + } + + std::mutex lock; // lock for protecting the queue + std::atomic list; // list of items in the queue + osd_work_item ** volatile tailptr; // pointer to the tail pointer of work items in the queue + std::atomic free; // free list of work items + std::atomic items; // items in the queue + std::atomic livethreads; // number of live threads + std::atomic waiting; // is someone waiting on the queue to complete? + std::atomic exiting; // should the threads exit on their next opportunity? + uint32_t threads; // number of threads in this queue + uint32_t flags; // creation flags + std::vector thread; // array of thread information + osd_event doneevent; // event signalled when work is complete + +#if KEEP_STATISTICS + std::atomic itemsqueued; // total items queued + std::atomic setevents; // number of times we called SetEvent + std::atomic extraitems; // how many extra items we got after the first in the queue loop + std::atomic spinloops; // how many times spinning bought us more items +#endif +}; + + +struct osd_work_item +{ + osd_work_item(osd_work_queue &aqueue) + : next(nullptr) + , queue(aqueue) + , callback(nullptr) + , param(nullptr) + , result(nullptr) + , event(nullptr) // manual reset, not signalled + , flags(0) + , done(false) + { + } + + osd_work_item * next; // pointer to next item + osd_work_queue & queue; // pointer back to the owning queue + osd_work_callback callback; // callback function + void * param; // callback parameter + void * result; // callback result + osd_event * event; // event signalled when complete + uint32_t flags; // creation flags + std::atomic done; // is the item done? +}; + +//============================================================ +// GLOBAL VARIABLES +//============================================================ + +int osd_num_processors = 0; + +//============================================================ +// FUNCTION PROTOTYPES +//============================================================ + +static int effective_num_processors(); +static void * worker_thread_entry(void *param); +static void worker_thread_process(osd_work_queue *queue, work_thread_info *thread); +static bool queue_has_list_items(osd_work_queue *queue); + +//============================================================ +// osd_thread_adjust_priority +//============================================================ + +int thread_adjust_priority(std::thread *thread, int adjust) +{ +#if defined(OSD_WINDOWS) || defined(SDLMAME_WIN32) + if (adjust) + SetThreadPriority((HANDLE)thread->native_handle(), THREAD_PRIORITY_ABOVE_NORMAL); + else + SetThreadPriority((HANDLE)thread->native_handle(), GetThreadPriority(GetCurrentThread())); +#endif +#if defined(SDLMAME_LINUX) || defined(SDLMAME_BSD) || defined(SDLMAME_HAIKU) || defined(SDLMAME_DARWIN) + struct sched_param sched; + int policy; + + if (pthread_getschedparam(thread->native_handle(), &policy, &sched) == 0) + { + sched.sched_priority += adjust; + if (pthread_setschedparam(thread->native_handle(), policy, &sched) == 0) + return true; + else + return false; + } +#endif + return true; +} + +//============================================================ +// osd_work_queue_alloc +//============================================================ + +osd_work_queue *osd_work_queue_alloc(int flags) +{ + int threadnum; + int numprocs = effective_num_processors(); + osd_work_queue *queue; + int osdthreadnum = 0; + int allocthreadnum; + const char *osdworkqueuemaxthreads = osd_getenv(ENV_WORKQUEUEMAXTHREADS); + + // allocate a new queue + queue = new osd_work_queue(); + + // initialize basic queue members + queue->tailptr = (osd_work_item **)&queue->list; + queue->flags = flags; + + // determine how many threads to create... + // on a single-CPU system, create 1 thread for I/O queues, and 0 threads for everything else + if (numprocs == 1) + threadnum = (flags & WORK_QUEUE_FLAG_IO) ? 1 : 0; + // on an n-CPU system, create n-1 threads for multi queues, and 1 thread for everything else + else + threadnum = (flags & WORK_QUEUE_FLAG_MULTI) ? (numprocs - 1) : 1; + + if (osdworkqueuemaxthreads != nullptr && sscanf(osdworkqueuemaxthreads, "%d", &osdthreadnum) == 1 && threadnum > osdthreadnum) + threadnum = osdthreadnum; + +#if defined(SDLMAME_EMSCRIPTEN) + // threads are not supported at all + threadnum = 0; +#endif + + // clamp to the maximum + queue->threads = std::min(threadnum, WORK_MAX_THREADS); + + // allocate memory for thread array (+1 to count the calling thread if WORK_QUEUE_FLAG_MULTI) + if (flags & WORK_QUEUE_FLAG_MULTI) + allocthreadnum = queue->threads + 1; + else + allocthreadnum = queue->threads; + +#if KEEP_STATISTICS + printf("osdprocs: %d effecprocs: %d threads: %d allocthreads: %d osdthreads: %d maxthreads: %d queuethreads: %d\n", osd_num_processors, numprocs, threadnum, allocthreadnum, osdthreadnum, WORK_MAX_THREADS, queue->threads); +#endif + + for (threadnum = 0; threadnum < allocthreadnum; threadnum++) + queue->thread.push_back(new work_thread_info(threadnum, *queue)); + + // iterate over threads + for (threadnum = 0; threadnum < queue->threads; threadnum++) + { + work_thread_info *thread = queue->thread[threadnum]; + + // create the thread + thread->handle = new std::thread(worker_thread_entry, thread); + if (thread->handle == nullptr) + goto error; + + // set its priority: I/O threads get high priority because they are assumed to be + // blocked most of the time; other threads just match the creator's priority + if (flags & WORK_QUEUE_FLAG_IO) + thread_adjust_priority(thread->handle, 1); + else + thread_adjust_priority(thread->handle, 0); + } + + // start a timer going for "waittime" on the main thread + if (flags & WORK_QUEUE_FLAG_MULTI) + { + begin_timing(queue->thread[queue->threads]->waittime); + } + return queue; + +error: + osd_work_queue_free(queue); + return nullptr; +} + + +//============================================================ +// osd_work_queue_items +//============================================================ + +int osd_work_queue_items(osd_work_queue *queue) +{ + // return the number of items currently in the queue + return queue->items; +} + + +//============================================================ +// osd_work_queue_wait +//============================================================ + +bool osd_work_queue_wait(osd_work_queue *queue, osd_ticks_t timeout) +{ + // if no threads, no waiting + if (queue->threads == 0) + return true; + + // if no items, we're done + if (queue->items == 0) + return true; + + // if this is a multi queue, help out rather than doing nothing + if (queue->flags & WORK_QUEUE_FLAG_MULTI) + { + work_thread_info *thread = queue->thread[queue->threads]; + + end_timing(thread->waittime); + + // process what we can as a worker thread + worker_thread_process(queue, thread); + + // if we're a high frequency queue, spin until done + if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ && queue->items != 0) + { + // spin until we're done + begin_timing(thread->spintime); + spin_while_not,int>(&queue->items, 0, timeout); + end_timing(thread->spintime); + + begin_timing(thread->waittime); + return (queue->items == 0); + } + begin_timing(thread->waittime); + } + + // reset our done event and double-check the items before waiting + queue->doneevent.reset(); + queue->waiting = true; + if (queue->items != 0) + queue->doneevent.wait(timeout); + queue->waiting = false; + + // return true if we actually hit 0 + return (queue->items == 0); +} + + +//============================================================ +// osd_work_queue_free +//============================================================ + +void osd_work_queue_free(osd_work_queue *queue) +{ + // stop the timer for "waittime" on the main thread + if (queue->flags & WORK_QUEUE_FLAG_MULTI) + { + end_timing(queue->thread[queue->threads]->waittime); + } + + // signal all the threads to exit + queue->exiting = true; + for (int threadnum = 0; threadnum < queue->threads; threadnum++) + { + work_thread_info *thread = queue->thread[threadnum]; + thread->wakeevent.set(); + } + + // wait for all the threads to go away + for (int threadnum = 0; threadnum < queue->threads; threadnum++) + { + work_thread_info *thread = queue->thread[threadnum]; + + // block on the thread going away, then close the handle + if (thread->handle != nullptr) + { + thread->handle->join(); + delete thread->handle; + } + + } + +#if KEEP_STATISTICS + // output per-thread statistics + for (work_thread_info *thread : queue->thread) + { + osd_ticks_t total = thread->runtime + thread->waittime + thread->spintime; + printf("Thread %d: items=%9d run=%5.2f%% (%5.2f%%) spin=%5.2f%% wait/other=%5.2f%% total=%9d\n", + thread->id, thread->itemsdone, + (double)thread->runtime * 100.0 / (double)total, + (double)thread->actruntime * 100.0 / (double)total, + (double)thread->spintime * 100.0 / (double)total, + (double)thread->waittime * 100.0 / (double)total, + (uint32_t) total); + } +#endif + + // free the list + for (auto & th : queue->thread) + delete th; + queue->thread.clear(); + + // free all items in the free list + while (queue->free.load() != nullptr) + { + auto *item = (osd_work_item *)queue->free; + queue->free = item->next; + delete item->event; + delete item; + } + + // free all items in the active list + while (queue->list.load() != nullptr) + { + auto *item = (osd_work_item *)queue->list; + queue->list = item->next; + delete item->event; + delete item; + } + +#if KEEP_STATISTICS + printf("Items queued = %9d\n", queue->itemsqueued.load()); + printf("SetEvent calls = %9d\n", queue->setevents.load()); + printf("Extra items = %9d\n", queue->extraitems.load()); + printf("Spin loops = %9d\n", queue->spinloops.load()); +#endif + + // free the queue itself + delete queue; +} + + +//============================================================ +// osd_work_item_queue_multiple +//============================================================ + +osd_work_item *osd_work_item_queue_multiple(osd_work_queue *queue, osd_work_callback callback, int32_t numitems, void *parambase, int32_t paramstep, uint32_t flags) +{ + osd_work_item *itemlist = nullptr, *lastitem = nullptr; + osd_work_item **item_tailptr = &itemlist; + int itemnum; + + // loop over items, building up a local list of work + for (itemnum = 0; itemnum < numitems; itemnum++) + { + osd_work_item *item; + + // first allocate a new work item; try the free list first + { + std::lock_guard lock(queue->lock); + do + { + item = (osd_work_item *)queue->free; + } while (item != nullptr && !queue->free.compare_exchange_weak(item, item->next, std::memory_order_release, std::memory_order_relaxed)); + } + + // if nothing, allocate something new + if (item == nullptr) + { + // allocate the item + item = new osd_work_item(*queue); + if (item == nullptr) + return nullptr; + } + else + { + item->done = false; // needs to be set this way to prevent data race/usage of uninitialized memory on Linux + } + + // fill in the basics + item->next = nullptr; + item->callback = callback; + item->param = parambase; + item->result = nullptr; + item->flags = flags; + + // advance to the next + lastitem = item; + *item_tailptr = item; + item_tailptr = &item->next; + parambase = (uint8_t *)parambase + paramstep; + } + + // enqueue the whole thing within the critical section + { + std::lock_guard lock(queue->lock); + *queue->tailptr = itemlist; + queue->tailptr = item_tailptr; + } + + // increment the number of items in the queue + queue->items += numitems; + add_to_stat(queue->itemsqueued, numitems); + + // look for free threads to do the work + if (queue->livethreads < queue->threads) + { + int threadnum; + + // iterate over all the threads + for (threadnum = 0; threadnum < queue->threads; threadnum++) + { + work_thread_info *thread = queue->thread[threadnum]; + + // if this thread is not active, wake him up + if (!thread->active) + { + thread->wakeevent.set(); + add_to_stat(queue->setevents, 1); + + // for non-shared, the first one we find is good enough + if (--numitems == 0) + break; + } + } + } + + // if no threads, run the queue now on this thread + if (queue->threads == 0) + { + end_timing(queue->thread[0]->waittime); + worker_thread_process(queue, queue->thread[0]); + begin_timing(queue->thread[0]->waittime); + } + // only return the item if it won't get released automatically + return (flags & WORK_ITEM_FLAG_AUTO_RELEASE) ? nullptr : lastitem; +} + + +//============================================================ +// osd_work_item_wait +//============================================================ + +bool osd_work_item_wait(osd_work_item *item, osd_ticks_t timeout) +{ + // if we're done already, just return + if (item->done) + return true; + + // if we don't have an event, create one + if (item->event == nullptr) + { + std::lock_guard lock(item->queue.lock); + item->event = new osd_event(true, false); // manual reset, not signalled + } + else + item->event->reset(); + + // if we don't have an event, we need to spin (shouldn't ever really happen) + if (item->event == nullptr) + { + // TODO: do we need to measure the spin time here as well? and how can we do it? + spin_while,int>(&item->done, 0, timeout); + } + + // otherwise, block on the event until done + else if (!item->done) + item->event->wait(timeout); + + // return true if the refcount actually hit 0 + return item->done; +} + + +//============================================================ +// osd_work_item_result +//============================================================ + +void *osd_work_item_result(osd_work_item *item) +{ + return item->result; +} + + +//============================================================ +// osd_work_item_release +//============================================================ + +void osd_work_item_release(osd_work_item *item) +{ + osd_work_item *next; + + // make sure we're done first + osd_work_item_wait(item, 100 * osd_ticks_per_second()); + + // add us to the free list on our queue + std::lock_guard lock(item->queue.lock); + do + { + next = (osd_work_item *) item->queue.free; + item->next = next; + } while (!item->queue.free.compare_exchange_weak(next, item, std::memory_order_release, std::memory_order_relaxed)); +} + + +//============================================================ +// effective_num_processors +//============================================================ + +static int effective_num_processors() +{ + int physprocs = osd_get_num_processors(); + + // osd_num_processors == 0 for 'auto' + if (osd_num_processors > 0) + { + return std::min(4 * physprocs, osd_num_processors); + } + else + { + int numprocs = 0; + + // if the OSDPROCESSORS environment variable is set, use that value if valid + // note that we permit more than the real number of processors for testing + const char *procsoverride = osd_getenv(ENV_PROCESSORS); + if (procsoverride != nullptr && sscanf(procsoverride, "%d", &numprocs) == 1 && numprocs > 0) + return std::min(4 * physprocs, numprocs); + + // otherwise, return the info from the system + return physprocs; + } +} + + +//============================================================ +// worker_thread_entry +//============================================================ + +static void *worker_thread_entry(void *param) +{ + auto *thread = (work_thread_info *)param; + osd_work_queue &queue = thread->queue; + + // loop until we exit + for ( ;; ) + { + // block waiting for work or exit + // bail on exit, and only wait if there are no pending items in queue + if (queue.exiting) + break; + + if (!queue_has_list_items(&queue)) + { + begin_timing(thread->waittime); + thread->wakeevent.wait( OSD_EVENT_WAIT_INFINITE); + end_timing(thread->waittime); + } + + if (queue.exiting) + break; + + // indicate that we are live + thread->active = true; + ++queue.livethreads; + + // process work items + for ( ;; ) + { + // process as much as we can + worker_thread_process(&queue, thread); + + // if we're a high frequency queue, spin for a while before giving up + if (queue.flags & WORK_QUEUE_FLAG_HIGH_FREQ && queue.list.load() == nullptr) + { + // spin for a while looking for more work + begin_timing(thread->spintime); + spin_while, osd_work_item *>(&queue.list, (osd_work_item *)nullptr, SPIN_LOOP_TIME); + end_timing(thread->spintime); + } + + // if nothing more, release the processor + if (!queue_has_list_items(&queue)) + break; + add_to_stat(queue.spinloops, 1); + } + + // decrement the live thread count + thread->active = false; + --queue.livethreads; + } + + return nullptr; +} + + +//============================================================ +// worker_thread_process +//============================================================ + +static void worker_thread_process(osd_work_queue *queue, work_thread_info *thread) +{ + int threadid = thread->id; + + begin_timing(thread->runtime); + + // loop until everything is processed + while (true) + { + osd_work_item *item = nullptr; + + bool end_loop = false; + + // use a critical section to synchronize the removal of items + { + std::lock_guard lock(queue->lock); + + if (queue->list.load() == nullptr) + { + end_loop = true; + } + else + { + // pull the item from the queue + item = (osd_work_item *)queue->list; + if (item != nullptr) + { + queue->list = item->next; + if (queue->list.load() == nullptr) + queue->tailptr = (osd_work_item **)&queue->list; + } + } + } + + if (end_loop) + break; + + // process non-NULL items + if (item != nullptr) + { + // call the callback and stash the result + begin_timing(thread->actruntime); + item->result = (*item->callback)(item->param, threadid); + end_timing(thread->actruntime); + + // decrement the item count after we are done + --queue->items; + item->done = true; + add_to_stat(thread->itemsdone, 1); + + // if it's an auto-release item, release it + if (item->flags & WORK_ITEM_FLAG_AUTO_RELEASE) + osd_work_item_release(item); + + // set the result and signal the event + else + { + std::lock_guard lock(queue->lock); + + if (item->event != nullptr) + { + item->event->set(); + add_to_stat(item->queue.setevents, 1); + } + } + + // if we removed an item and there's still work to do, bump the stats + if (queue_has_list_items(queue)) + add_to_stat(queue->extraitems, 1); + } + } + + // we don't need to set the doneevent for multi queues because they spin + if (queue->waiting) + { + queue->doneevent.set(); + add_to_stat(queue->setevents, 1); + } + + end_timing(thread->runtime); +} + +bool queue_has_list_items(osd_work_queue *queue) +{ + std::lock_guard lock(queue->lock); + bool has_list_items = (queue->list.load() != nullptr); + return has_list_items; +} diff --git a/waterbox/ares64/ares/thirdparty/mame/osd/osdsync.h b/waterbox/ares64/ares/thirdparty/mame/osd/osdsync.h new file mode 100644 index 0000000000..3a286b60af --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/mame/osd/osdsync.h @@ -0,0 +1,167 @@ +// license:BSD-3-Clause +// copyright-holders:Olivier Galibert, R. Belmont +//============================================================ +// +// osdsync.h - Core synchronization functions +// +//============================================================ +#ifndef MAME_OSD_OSDSYNC_H +#define MAME_OSD_OSDSYNC_H + +#pragma once + +// C++ headers +#include +#include +#include + +#include "osdcomm.h" + +/*************************************************************************** + SYNCHRONIZATION INTERFACES - Events +***************************************************************************/ + +#define OSD_EVENT_WAIT_INFINITE (~(osd_ticks_t)0) + +/* osd_event is an opaque type which represents a setable/resettable event */ + +class osd_event +{ +public: + /*----------------------------------------------------------------------------- + constructor: allocate a new event + + Parameters: + + manualreset - boolean. If true, the event will be automatically set + to non-signalled after a thread successfully waited for + it. + initialstate - boolean. If true, the event is signalled initially. + + Return value: + + A pointer to the allocated event. + -----------------------------------------------------------------------------*/ + osd_event(int manualreset, int initialstate) + { + m_signalled = initialstate; + m_autoreset = !manualreset; + } + + ~osd_event() + { + } + + /*----------------------------------------------------------------------------- + wait: wait for an event to be signalled + If the event is in signalled state, the + function returns immediately. If not it will wait for the event + to become signalled. + + Parameters: + + timeout - timeout in osd_ticks + + Return value: + + true: The event was signalled + false: A timeout occurred + -----------------------------------------------------------------------------*/ + + bool wait(osd_ticks_t timeout) + { + if (timeout == OSD_EVENT_WAIT_INFINITE) + timeout = osd_ticks_per_second() * (osd_ticks_t)10000; + + std::unique_lock lock(m_mutex); + if (!timeout) + { + if (!m_signalled) + { + return false; + } + } + else + { + if (!m_signalled) + { + uint64_t msec = timeout * 1000 / osd_ticks_per_second(); + + do { + if (m_cond.wait_for(lock, std::chrono::milliseconds(msec)) == std::cv_status::timeout) + { + if (!m_signalled) + { + return false; + } + else + break; + } else + break; + + } while (true); + } + } + + if (m_autoreset) + m_signalled = 0; + + return true; + } + + /*----------------------------------------------------------------------------- + osd_event_reset: reset an event to non-signalled state + + Parameters: + + None + + Return value: + + None + -----------------------------------------------------------------------------*/ + void reset() + { + m_mutex.lock(); + m_signalled = false; + m_mutex.unlock(); + } + + /*----------------------------------------------------------------------------- + osd_event_set: set an event to signalled state + + Parameters: + + None + + Return value: + + None + + Notes: + + All threads waiting for the event will be signalled. + -----------------------------------------------------------------------------*/ + void set() + { + m_mutex.lock(); + if (m_signalled == false) + { + m_signalled = true; + if (m_autoreset) + m_cond.notify_one(); + else + m_cond.notify_all(); + } + m_mutex.unlock(); + } + +private: + std::mutex m_mutex; + std::condition_variable m_cond; + std::atomic m_autoreset; + std::atomic m_signalled; + +}; + +#endif // MAME_OSD_OSDSYNC_H diff --git a/waterbox/ares64/ares/thirdparty/sljit.h b/waterbox/ares64/ares/thirdparty/sljit.h new file mode 100644 index 0000000000..b65afa8c53 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit.h @@ -0,0 +1,2 @@ +#define SLJIT +#include "sljit/sljit_src/sljitLir.h" diff --git a/waterbox/ares64/ares/thirdparty/sljit/.gitignore b/waterbox/ares64/ares/thirdparty/sljit/.gitignore new file mode 100644 index 0000000000..83f5a0011a --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/.gitignore @@ -0,0 +1,2 @@ +bin +taring diff --git a/waterbox/ares64/ares/thirdparty/sljit/API_CHANGES b/waterbox/ares64/ares/thirdparty/sljit/API_CHANGES new file mode 100644 index 0000000000..f0a67d7b9d --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/API_CHANGES @@ -0,0 +1,169 @@ +This file is the short summary of the API changes: + +27.05.2021 - Non-backward compatible + The comparison types with the 32 suffix are removed from the + project. The sljit_set_current_flags has a new flag which + must be set when the flags are set by a 32 bit operation. + +04.05.2021 - Non-backward compatible + The mul overflow comparison type is removed from the project + and the normal overflow type should be used instead. + +28.04.2021 - Non-backward compatible + The current_flags argument of sljit_set_current_flags must + provide information about the instructions which set the CPU + status flags. + +16.08.2020 - Non-backward compatible + A second parameter has been added to sljit_create_compiler() + and sljit_free_code() to pass some data to the executable + allocator functions. + +24.01.2020 - Non-backward compatible + The SLJIT_MOV instructions does not support SLJIT_UNDEFINED + as destination. New prefetch instructions has been added + instead. + +20.01.2019 - Non-backward compatible + The check_sljit_emit_fast_return function is removed, and + this operation is available through check_sljit_emit_op_src. + +16.01.2019 - Backward compatible + A new opcode (SLJIT_ENDBR) is added to support + Intel Control-flow Enforcement Technology (CET). + +08.01.2018 - Non-backward compatible + Fields of sljit_stack are renamed to fit a + top-down stack better. + +02.01.2018 - Non-backward compatible + Immediate source argument has not been supported + for NOT, NEG, CLZ, and fast_return instructions + anymore. No CPU supports immedate arguments for + these opcodes. + +26.12.2017 - Non-backward compatible + The MOVU opcodes are removed because the emulation + is inefficient. The sljit_emit_mem() operation is + added instead. + +18.10.2017 - Non-backward compatible + The SLJIT_CALL0 - SLJIT_CALL3 jump types are + replaced by sljit_emit_call and sljit_emit_icall + function calls. These functions allows declaring + the argument types. + +06.05.2017 - Non-backward compatible + Src argument is removed from sljit_emit_op_flags. + +24.04.2017 - Non-backward compatible + The sljit_is_fpu_available function is replaced + by sljit_has_cpu_feature. + +20.04.2017 - Non-backward compatible + x86 specific cmov is changed to a general function + +27.03.2017 - Non-backward compatible + JIT stack is changed from bottom-up to top-town. + +15.01.2017 - Non-backward compatible + Move with update may modifiy flags, the base register + can only be used once and [reg+reg< 0 is not supported anymore. + +12.01.2017 - Non-backward compatible + Introducing a new flag mechanism which provides better + compatibility with CPUs without flags. Only two flags + remain: zero and variable. The current type of the + variable flag is specified by the arithmetic operator. + The SLJIT_KEEP_FLAGS is removed. + +29.02.2016 - Non-backward compatible + Several types and instructions are renamed to improve + readability. In general byte, half, and int are renamed + to 8, 16, and 32. Floating point types are also renamed + from d and s to f64 and f32. + + [s|u]b -> [s|u]8 (8 bit values) + [s|u]h -> [s|u]16 (16 bit values) + [s|u]i -> [s|u]32 (32 bit values) + d -> f64 (64 bit floating point value) + s -> f32 (32 bit floating point value) + +18.05.2015 - Non-backward compatible + SLJIT_[I|]L[U|S]DIV is renamed to SLJIT_[I|][U|S]DIVMOD + +29.09.2014 - Non-backward compatible + The sljit_create_compiler, sljit_allocate_stack, and + sljit_free_stack functions have an allocator_data + argument now. + +19.09.2014 - Non-backward compatible + Using I, D, S prefixes in conditional and floating + point operations. And an L prefix to long multiplication + and division (op0 opcodes). + +11.08.2014 - Non-backward compatible + A currently unused options parameter is added to sljit_emit_enter + and sljit_set_context. + +06.07.2014 - Non-backward compatible + SCRATCH registers are renamed to Rx and SAVED registers + are renamed to Sx. See the explanation of these registers + in sljitLir.h. + +31.05.2014 - Non-backward compatible + SLJIT_TEMPORARY_EREGx registers were not renamed to + SLJIT_SCRATCH_EREGx when the change was done on 08.11.2012 + +05.03.2014 - Backward compatible + The sljit_set_target now supports those jumps, which + does not created with SLJIT_REWRITABLE_JUMP flag. + Reason: sljit_emit_ijump does not support conditional + jumps. + +03.03.2014 - Non-backward compatible + SLJIT_MOV_UI cannot be combined with SLJIT_INT_OP. + Reason: SLJIT_INT_OP flag is not recommended to use + directly, and SLJIT_IMOV has no sign bit. + +29.01.2014 - Backward compatible + Bits assigned to SLJIT_MEM and SLJIT_IMM flags are changed. + Reason: the most common cases are fits into one byte now, + and more registers can be supported in the future. + +08.11.2012 - Non-backward compatible + SLJIT_TEMPORARY_REGx registers are renamed to SLJIT_SCRATCH_REGx. + +07.11.2012 - Non-backward compatible + sljit_emit_cond_value is renamed to sljit_emit_op_flags. An + extra source argument is added which will be used in the future. + +05.11.2012 - Backward compatible + sljit_emit_cond_value now supports SLJIT_AND and SLJIT_INT_OP + flags, which makes this function complete. + +01.11.2012 - Non-backward compatible + SLJIT_F* opcodes are renamed to SLJIT_*D to show that + they are double precision operators. Furthermore + SLJIT_*S single precision opcodes are added. + +01.11.2012 - Non-backward compatible + Register arguments of operations with SLJIT_INT_OP flag + must be computed by another operation with SLJIT_INT_OP flag. + The same way as SLJIT_SINGLE_OP flag works with floating point + numbers. See the description of SLJIT_INT_OP. + +01.11.2012 - Backward compatible + All operations whose support the SLJIT_INT_OP flag, have an + alternate name now, which includes the SLJIT_INT_OP. These + names starting with I. + +31.10.2012 - Non-backward compatible + Renaming sljit_w to sljit_sw, sljit_i to sljit_si, sljit_h + to sljit_sh, and sljit_b to sljit_sb. Reason: their sign + bit is part of the type now. + +20.10.2012 - Non-backward compatible + Renaming SLJIT_C_FLOAT_NAN to SLJIT_C_FLOAT_UNORDERED. + Reason: all architectures call these unordered comparions. diff --git a/waterbox/ares64/ares/thirdparty/sljit/INTERNAL_CHANGES b/waterbox/ares64/ares/thirdparty/sljit/INTERNAL_CHANGES new file mode 100644 index 0000000000..7897390cc0 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/INTERNAL_CHANGES @@ -0,0 +1,8 @@ +This file is the short summary of the internal changes: + +18.11.2012 + Switching from stdcall to cdecl on x86-32. Fastcall is still the default + on GCC and MSVC. Now Intel C compilers are supported. + +20.10.2012 + Supporting Sparc-32 CPUs. diff --git a/waterbox/ares64/ares/thirdparty/sljit/Makefile b/waterbox/ares64/ares/thirdparty/sljit/Makefile new file mode 100644 index 0000000000..0f8f89c1e4 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/Makefile @@ -0,0 +1,96 @@ +ifdef CROSS_COMPILER +CC = $(CROSS_COMPILER) +else +ifndef CC +# default compiler +CC = gcc +endif +endif + +ifndef EXTRA_CPPFLAGS +EXTRA_CPPFLAGS= +endif + +ifndef EXTRA_LDFLAGS +EXTRA_LDFLAGS= +endif + +CPPFLAGS = $(EXTRA_CPPFLAGS) -Isljit_src +CFLAGS += -O2 -Wall +REGEX_CFLAGS += $(CFLAGS) -fshort-wchar +LDFLAGS = $(EXTRA_LDFLAGS) + +BINDIR = bin +SRCDIR = sljit_src +TESTDIR = test_src +REGEXDIR = regex_src +EXAMPLEDIR = doc/tutorial + +TARGET = $(BINDIR)/sljit_test $(BINDIR)/regex_test +EXAMPLE_TARGET = $(BINDIR)/func_call $(BINDIR)/first_program $(BINDIR)/branch $(BINDIR)/loop $(BINDIR)/array_access $(BINDIR)/func_call $(BINDIR)/struct_access $(BINDIR)/temp_var $(BINDIR)/brainfuck + +SLJIT_HEADERS = $(SRCDIR)/sljitLir.h $(SRCDIR)/sljitConfig.h $(SRCDIR)/sljitConfigInternal.h + +SLJIT_LIR_FILES = $(SRCDIR)/sljitLir.c $(SRCDIR)/sljitUtils.c \ + $(SRCDIR)/sljitExecAllocator.c $(SRCDIR)/sljitProtExecAllocator.c $(SRCDIR)/sljitWXExecAllocator.c \ + $(SRCDIR)/sljitNativeARM_32.c $(SRCDIR)/sljitNativeARM_T2_32.c $(SRCDIR)/sljitNativeARM_64.c \ + $(SRCDIR)/sljitNativeMIPS_common.c $(SRCDIR)/sljitNativeMIPS_32.c $(SRCDIR)/sljitNativeMIPS_64.c \ + $(SRCDIR)/sljitNativePPC_common.c $(SRCDIR)/sljitNativePPC_32.c $(SRCDIR)/sljitNativePPC_64.c \ + $(SRCDIR)/sljitNativeSPARC_common.c $(SRCDIR)/sljitNativeSPARC_32.c \ + $(SRCDIR)/sljitNativeS390X.c \ + $(SRCDIR)/sljitNativeX86_common.c $(SRCDIR)/sljitNativeX86_32.c $(SRCDIR)/sljitNativeX86_64.c + +.PHONY: all clean examples + +all: $(TARGET) + +clean: + -$(RM) $(BINDIR)/*.o $(BINDIR)/sljit_test $(BINDIR)/regex_test $(EXAMPLE_TARGET) + +$(BINDIR)/.keep : + mkdir -p $(BINDIR) + @touch $@ + +$(BINDIR)/sljitLir.o : $(BINDIR)/.keep $(SLJIT_LIR_FILES) $(SLJIT_HEADERS) + $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $(SRCDIR)/sljitLir.c + +$(BINDIR)/sljitMain.o : $(TESTDIR)/sljitMain.c $(BINDIR)/.keep $(SLJIT_HEADERS) + $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $(TESTDIR)/sljitMain.c + +$(BINDIR)/regexMain.o : $(REGEXDIR)/regexMain.c $(BINDIR)/.keep $(SLJIT_HEADERS) + $(CC) $(CPPFLAGS) $(REGEX_CFLAGS) -c -o $@ $(REGEXDIR)/regexMain.c + +$(BINDIR)/regexJIT.o : $(REGEXDIR)/regexJIT.c $(BINDIR)/.keep $(SLJIT_HEADERS) $(REGEXDIR)/regexJIT.h + $(CC) $(CPPFLAGS) $(REGEX_CFLAGS) -c -o $@ $(REGEXDIR)/regexJIT.c + +$(BINDIR)/sljit_test: $(BINDIR)/.keep $(BINDIR)/sljitMain.o $(TESTDIR)/sljitTest.c $(SRCDIR)/sljitLir.c $(SLJIT_LIR_FILES) $(SLJIT_HEADERS) $(TESTDIR)/sljitConfigPre.h $(TESTDIR)/sljitConfigPost.h + $(CC) $(CPPFLAGS) -DSLJIT_HAVE_CONFIG_PRE=1 -I$(TESTDIR) $(CFLAGS) $(LDFLAGS) $(BINDIR)/sljitMain.o $(TESTDIR)/sljitTest.c $(SRCDIR)/sljitLir.c -o $@ -lm -lpthread + +$(BINDIR)/regex_test: $(BINDIR)/.keep $(BINDIR)/regexMain.o $(BINDIR)/regexJIT.o $(BINDIR)/sljitLir.o + $(CC) $(CFLAGS) $(LDFLAGS) $(BINDIR)/regexMain.o $(BINDIR)/regexJIT.o $(BINDIR)/sljitLir.o -o $@ -lm -lpthread + +examples: $(EXAMPLE_TARGET) + +$(BINDIR)/first_program: $(EXAMPLEDIR)/first_program.c $(BINDIR)/.keep $(BINDIR)/sljitLir.o + $(CC) $(CPPFLAGS) $(LDFLAGS) $(EXAMPLEDIR)/first_program.c $(BINDIR)/sljitLir.o -o $@ -lm -lpthread + +$(BINDIR)/branch: $(EXAMPLEDIR)/branch.c $(BINDIR)/.keep $(BINDIR)/sljitLir.o + $(CC) $(CPPFLAGS) $(LDFLAGS) $(EXAMPLEDIR)/branch.c $(BINDIR)/sljitLir.o -o $@ -lm -lpthread + +$(BINDIR)/loop: $(EXAMPLEDIR)/loop.c $(BINDIR)/.keep $(BINDIR)/sljitLir.o + $(CC) $(CPPFLAGS) $(LDFLAGS) $(EXAMPLEDIR)/loop.c $(BINDIR)/sljitLir.o -o $@ -lm -lpthread + +$(BINDIR)/array_access: $(EXAMPLEDIR)/array_access.c $(BINDIR)/.keep $(BINDIR)/sljitLir.o + $(CC) $(CPPFLAGS) $(LDFLAGS) $(EXAMPLEDIR)/array_access.c $(BINDIR)/sljitLir.o -o $@ -lm -lpthread + +$(BINDIR)/func_call: $(EXAMPLEDIR)/func_call.c $(BINDIR)/.keep $(BINDIR)/sljitLir.o + $(CC) $(CPPFLAGS) $(LDFLAGS) $(EXAMPLEDIR)/func_call.c $(BINDIR)/sljitLir.o -o $@ -lm -lpthread + +$(BINDIR)/struct_access: $(EXAMPLEDIR)/struct_access.c $(BINDIR)/.keep $(BINDIR)/sljitLir.o + $(CC) $(CPPFLAGS) $(LDFLAGS) $(EXAMPLEDIR)/struct_access.c $(BINDIR)/sljitLir.o -o $@ -lm -lpthread + +$(BINDIR)/temp_var: $(EXAMPLEDIR)/temp_var.c $(BINDIR)/.keep $(BINDIR)/sljitLir.o + $(CC) $(CPPFLAGS) $(LDFLAGS) $(EXAMPLEDIR)/temp_var.c $(BINDIR)/sljitLir.o -o $@ -lm -lpthread + +$(BINDIR)/brainfuck: $(EXAMPLEDIR)/brainfuck.c $(BINDIR)/.keep $(BINDIR)/sljitLir.o + $(CC) $(CPPFLAGS) $(LDFLAGS) $(EXAMPLEDIR)/brainfuck.c $(BINDIR)/sljitLir.o -o $@ -lm -lpthread diff --git a/waterbox/ares64/ares/thirdparty/sljit/README b/waterbox/ares64/ares/thirdparty/sljit/README new file mode 100644 index 0000000000..1fc2337e41 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/README @@ -0,0 +1,40 @@ + + SLJIT - Stack Less JIT Compiler + +Purpose: + A simple, machine independent JIT compiler, which suitable for + translating interpreted byte code to machine code. The sljitLir.h + describes the LIR (low-level intermediate representation) of SLJIT. + +Compatible: + Any C (C++) compiler. At least I hope so. + +Using sljit: + Copy the content of sljit_src directory into your project source directory. + Add sljitLir.c source file to your build environment. All other files are + included by sljitLir.c (if required). Define the machine by SLJIT_CONFIG_* + selector. See sljitConfig.h for all possible values. For C++ compilers, + rename sljitLir.c to sljitLir.cpp. + +More info: + https://zherczeg.github.io/sljit/ + +Contact: + hzmester@freemail.hu + +Special thanks: + Alexander Nasonov + Carlo Marcelo Arenas Belón + Christian Persch + Daniel Richard G. + Giuseppe D'Angelo + H.J. Lu + James Cowgill + Jason Hood + Jiong Wang (TileGX support) + Marc Mutz + Martin Storsjö + Michael McConville + Walter Lee + Wen Xichang + YunQiang Su diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/overview.txt b/waterbox/ares64/ares/thirdparty/sljit/doc/overview.txt new file mode 100644 index 0000000000..7d79e7cad4 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/overview.txt @@ -0,0 +1,226 @@ +The following text is a brief overview of those key +principles which are useful to know when generating code +with SLJIT. Further details can be found in sljitLir.h. + +---------------------------------------------------------------- + What is SLJIT? +---------------------------------------------------------------- + +SLJIT is a platform independent assembler which + - provides access to common CPU features + - can be easily ported to wide-spread CPU + architectures (e.g. x86, ARM, POWER, MIPS, SPARC) + +The key challenge of this project is finding a common +subset of CPU features which + - covers traditional assembly level programming + - can be translated to machine code efficiently + +This aim is achieved by selecting those instructions / CPU +features which are either available on all platforms or +simulating them has a low performance overhead. + +For example, some SLJIT instructions support base register +pre-update when [base+offs] memory accessing mode is used. +Although this feature is only available on ARM and POWER +CPUs, the simulation overhead is low on other CPUs. + +---------------------------------------------------------------- + The generic CPU model of SLJIT +---------------------------------------------------------------- + +The CPU has + - integer registers, which can store either an + int32_t (4 byte) or intptr_t (4 or 8 byte) value + - floating point registers, which can store either a + single (4 byte) or double (8 byte) precision value + - boolean status flags + +*** Integer registers: + +The most important rule is: when a source operand of +an instruction is a register, the data type of the +register must match the data type expected by an +instruction. + +For example, the following code snippet +is a valid instruction sequence: + + sljit_emit_op1(compiler, SLJIT_IMOV, + SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 0); + // An int32_t value is loaded into SLJIT_R0 + sljit_emit_op1(compiler, SLJIT_INEG, + SLJIT_R0, 0, SLJIT_R0, 0); + // the int32_t value in SLJIT_R0 is negated + // and the type of the result is still int32_t + +The next code snippet is not allowed: + + sljit_emit_op1(compiler, SLJIT_MOV, + SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 0); + // An intptr_t value is loaded into SLJIT_R0 + sljit_emit_op1(compiler, SLJIT_INEG, + SLJIT_R0, 0, SLJIT_R0, 0); + // The result of SLJIT_INEG instruction + // is undefined. Even crash is possible + // (e.g. on MIPS-64). + +However, it is always allowed to overwrite a +register regardless its previous value: + + sljit_emit_op1(compiler, SLJIT_MOV, + SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 0); + // An intptr_t value is loaded into SLJIT_R0 + sljit_emit_op1(compiler, SLJIT_IMOV, + SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R2), 0); + // From now on SLJIT_R0 contains an int32_t + // value. The previous value is discarded. + +Type conversion instructions are provided to convert an +int32_t value to an intptr_t value and vice versa. In +certain architectures these conversions are nops (no +instructions are emitted). + +Memory accessing: + +Registers arguments of SLJIT_MEM1 / SLJIT_MEM2 addressing +modes must contain intptr_t data. + +Signed / unsigned values: + +Most operations are executed in the same way regardless +the value is signed or unsigned. These operations have +only one instruction form (e.g. SLJIT_ADD / SLJIT_MUL). +Instructions where the result depends on the sign have +two forms (e.g. integer division, long multiply). + +*** Floating point registers + +Floating point registers can either contain a single +or double precision value. Similar to integer registers, +the data type of the value stored in a source register +must match the data type expected by the instruction. +Otherwise the result is undefined (even crash is possible). + +Rounding: + +Similar to standard C, floating point computation +results are rounded toward zero. + +*** Boolean status flags: + +Conditional branches usually depend on the value +of CPU status flags. These status flags are boolean +values and can be set by certain instructions. + +To achive maximum efficiency and portability, the +following rules were introduced: + - Most instructions can freely modify these status + flags except if SLJIT_KEEP_FLAGS is passed. + - The SLJIT_KEEP_FLAGS option may have a performance + overhead, so it should only be used when necessary. + - The SLJIT_SET_E, SLJIT_SET_U, etc. options can + force an instruction to correctly set the + specified status flags. However, all other + status flags are undefined. This rule must + always be kept in mind! + - Status flags cannot be controlled directly + (there are no set/clear/invert operations) + +The last two rules allows efficent mapping of status flags. +For example the arithmetic and multiply overflow flag is +mapped to the same overflow flag bit on x86. This is allowed, +since no instruction can set both of these flags. When +either of them is set by an instruction, the other can +have any value (this satisfies the "all other flags are +undefined" rule). Therefore mapping two SLJIT flags to the +same CPU flag is possible. Even though SLJIT supports +a dozen status flags, they can be efficiently mapped +to CPUs with only 4 status flags (e.g. ARM or SPARC). + +---------------------------------------------------------------- + Complex instructions +---------------------------------------------------------------- + +We noticed, that introducing complex instructions for common +tasks can improve performance. For example, compare and +branch instruction sequences can be optimized if certain +conditions apply, but these conditions depend on the target +CPU. SLJIT can do these optimizations, but it needs to +understand the "purpose" of the generated code. Static +instruction analysis has a large performance overhead +however, so we choose another approach: we introduced +complex instruction forms for certain non-atomic tasks. +SLJIT can optimize these "instructions" more efficiently +since the "purpose" is known to the compiler. These complex +instruction forms can often be assembled from other SLJIT +instructions, but we recommended to use them since the +compiler can optimize them on certain CPUs. + +---------------------------------------------------------------- + Generating functions +---------------------------------------------------------------- + +SLJIT is often used for generating function bodies which are +called from C. SLJIT provides two complex instructions for +generating function entry and return: sljit_emit_enter and +sljit_emit_return. The sljit_emit_enter also initializes the +"compiling context" which specify the current register mapping, +local space size, etc. configurations. The sljit_set_context +can also set this context without emitting any machine +instructions. + +This context is important since it affects the compiler, so +the first instruction after a compiler is created must be +either sljit_emit_enter or sljit_set_context. The context can +be changed by calling sljit_emit_enter or sljit_set_context +again. + +---------------------------------------------------------------- + All-in-one building +---------------------------------------------------------------- + +Instead of using a separate library, the whole SLJIT +compiler infrastructure can be directly included: + +#define SLJIT_CONFIG_STATIC 1 +#include "sljitLir.c" + +This approach is useful for single file compilers. + +Advantages: + - Everything provided by SLJIT is available + (no need to include anything else). + - Configuring SLJIT is easy + (e.g. redefining SLJIT_MALLOC / SLJIT_FREE). + - The SLJIT compiler API is hidden from the + world which improves securtity. + - The C compiler can optimize the SLJIT code + generator (e.g. removing unused functions). + +---------------------------------------------------------------- + Types and macros +---------------------------------------------------------------- + +The sljitConfig.h contains those defines, which controls +the compiler. The beginning of sljitConfigInternal.h +lists architecture specific types and macros provided +by SLJIT. Some of these macros: + +SLJIT_DEBUG : enabled by default + Enables assertions. Should be disabled in release mode. + +SLJIT_VERBOSE : enabled by default + When this macro is enabled, the sljit_compiler_verbose + function can be used to dump SLJIT instructions. + Otherwise this function is not available. Should be + disabled in release mode. + +SLJIT_SINGLE_THREADED : disabled by default + Single threaded programs can define this flag which + eliminates the pthread dependency. + +sljit_sw, sljit_uw, etc. : + It is recommended to use these types instead of long, + intptr_t, etc. Improves readability / portability of + the code. diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/99bottles.bf b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/99bottles.bf new file mode 100644 index 0000000000..4a4a46a0d8 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/99bottles.bf @@ -0,0 +1,247 @@ +########################## +### +### Severely updated version! +### (now says "1 bottle" and +### contains no extra "0" verse) +### +########################## +### 99 Bottles of Beer ### +### coded in Brainfuck ### +### with explanations ### +########################## +# +# This Bottles of Beer program +# was written by Andrew Paczkowski +# Coder Alias: thepacz +# three_halves_plus_one@yahoo.com +##### + +> 0 in the zeroth cell ++++++++>++++++++++[<+++++>-] 57 in the first cell or "9" ++++++++>++++++++++[<+++++>-] 57 in second cell or "9" +++++++++++ 10 in third cell +>+++++++++ 9 in fourth cell + +########################################## +### create ASCII chars in higher cells ### +########################################## + +>>++++++++[<++++>-] " " +>++++++++++++++[<+++++++>-] b ++>+++++++++++[<++++++++++>-] o +++>+++++++++++++++++++[<++++++>-] t +++>+++++++++++++++++++[<++++++>-] t +>++++++++++++[<+++++++++>-] l ++>++++++++++[<++++++++++>-] e ++>+++++++++++++++++++[<++++++>-] s +>++++++++[<++++>-] " " ++>+++++++++++[<++++++++++>-] o +++>++++++++++[<++++++++++>-] f +>++++++++[<++++>-] " " +>++++++++++++++[<+++++++>-] b ++>++++++++++[<++++++++++>-] e ++>++++++++++[<++++++++++>-] e +>+++++++++++++++++++[<++++++>-] r +>++++++++[<++++>-] " " ++>+++++++++++[<++++++++++>-] o +>+++++++++++[<++++++++++>-] n +>++++++++[<++++>-] " " +++>+++++++++++++++++++[<++++++>-] t +++++>++++++++++[<++++++++++>-] h ++>++++++++++[<++++++++++>-] e +>++++++++[<++++>-] " " +++>+++++++++++++[<+++++++++>-] w ++>++++++++++++[<++++++++>-] a +>++++++++++++[<+++++++++>-] l +>++++++++++++[<+++++++++>-] l +>+++++[<++>-] LF +++>+++++++++++++++++++[<++++++>-] t ++>++++++++++++[<++++++++>-] a ++++>+++++++++++++[<++++++++>-] k ++>++++++++++[<++++++++++>-] e +>++++++++[<++++>-] " " ++>+++++++++++[<++++++++++>-] o +>+++++++++++[<++++++++++>-] n ++>++++++++++[<++++++++++>-] e +>++++++++[<++++>-] " " +>++++++++++[<++++++++++>-] d ++>+++++++++++[<++++++++++>-] o +++>+++++++++++++[<+++++++++>-] w +>+++++++++++[<++++++++++>-] n +>++++++++[<++++>-] " " ++>++++++++++++[<++++++++>-] a +>+++++++++++[<++++++++++>-] n +>++++++++++[<++++++++++>-] d +>++++++++[<++++>-] " " +++>+++++++++++[<++++++++++>-] p ++>++++++++++++[<++++++++>-] a ++>+++++++++++++++++++[<++++++>-] s ++>+++++++++++++++++++[<++++++>-] s +>++++++++[<++++>-] " " ++>+++++++++++++[<++++++++>-] i +++>+++++++++++++++++++[<++++++>-] t +>++++++++[<++++>-] " " ++>++++++++++++[<++++++++>-] a +>+++++++++++++++++++[<++++++>-] r ++>+++++++++++[<++++++++++>-] o +>+++++++++++++[<+++++++++>-] u +>+++++++++++[<++++++++++>-] n +>++++++++++[<++++++++++>-] d +>+++++[<++>-] LF ++++++++++++++ CR + +[<]>>>> go back to fourth cell + +################################# +### initiate the display loop ### +################################# + +[ loop + < back to cell 3 + [ loop + [>]<< go to last cell and back to LF + .. output 2 newlines + [<]> go to first cell + + ################################### + #### begin display of characters### + ################################### + # + #.>.>>>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.> + #X X b o t t l e s o f b e e r + #.>.>.>.>.>.>.>.>.>.>.>. + #o n t h e w a l l N + #[<]> go to first cell + #.>.>>>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>>>>>>>>>>>>>.> + #X X b o t t l e s o f b e e r N + #.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.> + #t a k e o n e d o w n a n d p a s s + #.>.>.>.>.>.>.>.>.>. + #i t a r o u n d N + ##### + + [<]>> go to cell 2 + - subtract 1 from cell 2 + < go to cell 1 + + ######################## + ### display last line ## + ######################## + # + #.>.>>>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.> + #X X b o t t l e s o f b e e r + #.>.>.>.>.>.>.>.>.>.>. + #o n t h e w a l l + ##### + + [<]>>>- go to cell 3/subtract 1 + ] end loop when cell 3 is 0 + ++++++++++ add 10 to cell 3 + <++++++++++ back to cell 2/add 10 + <- back to cell 1/subtract 1 + [>]<. go to last line/carriage return + [<]> go to first line + +######################## +### correct last line ## +######################## +# +#.>.>>>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.> +#X X b o t t l e s o f b e e r +#.>.>.>.>.>.>.>.>.>.>. +#o n t h e w a l l +##### + + [<]>>>>- go to cell 4/subtract 1 +] end loop when cell 4 is 0 + +############################################################## +### By this point verses 9910 are displayed but to work ### +### with the lower numbered verses in a more readable way ### +### we initiate a new loop for verses 9{CODE} that will not ### +### use the fourth cell at all ### +############################################################## + ++ add 1 to cell four (to keep it nonzero) +<-- back to cell 3/subtract 2 + +[ loop + [>]<< go to last cell and back to LF + .. output 2 newlines + [<]> go to first cell + + ################################### + #### begin display of characters### + ################################### + # + #>.>>>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.> + # X b o t t l e s o f b e e r + #.>.>.>.>.>.>.>.>.>.>.>. + #o n t h e w a l l N + #[<]> go to first cell + #>.>>>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>>>>>>>>>>>>>.> + # X b o t t l e s o f b e e r N + #.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.> + #t a k e o n e d o w n a n d p a s s + #.>.>.>.>.>.>.>.>.>. + #i t a r o u n d N + ##### + + [<]>> go to cell 2 + - subtract 1 from cell 2 + + ######################## + ### display last line ## + ######################## + # + #.>>>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.> + #X b o t t l e s o f b e e r + #.>.>.>.>.>.>.>.>.>.>. + #o n t h e w a l l + ##### + + [<]>>>- go to cell 3/subtract 1 +] end loop when cell 3 is 0 ++ add 1 to cell 3 to keep it nonzero + +[>]<. go to last line/carriage return +[<]> go to first line + +######################## +### correct last line ## +######################## +# +#>.>>>.>.>.>.>.>.>.>>.>.>.>.>.>.>.>.>.> +# X b o t t l e o f b e e r +#.>.>.>.>.>.>.>.>.>.>.<<<<. +#o n t h e w a l l +##### + +[>]<< go to last cell and back to LF +.. output 2 newlines +[<]> go to first line + +######################### +### the final verse ## +######################### +# +#>.>>>.>.>.>.>.>.>.>>.>.>.>.>.>.>.>.>.> +# X b o t t l e o f b e e r +#.>.>.>.>.>.>.>.>.>.>.>. +#o n t h e w a l l N +#[<]> go to first cell +#>.>>>.>.>.>.>.>.>.>>.>.>.>.>.>.>.>.>>>>>>>>>>>>>.> +# X b o t t l e o f b e e r N +#.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.> +#t a k e o n e d o w n a n d p a s s +#.>.>.>.>.>.>.>.>.>. +#i t a r o u n d N +#[>]< go to last line +#<<<.<<.<<<. +# n o +#[<]>>>> go to fourth cell +#>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.>.> +# b o t t l e s o f b e e r +#.>.>.>.>.>.>.>.>.>.>.>. +#o n t h e w a l l N +#####fin## \ No newline at end of file diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/README b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/README new file mode 100644 index 0000000000..1279694a2e --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/README @@ -0,0 +1,3 @@ +These files were contributed by Wen Xichang. + +Copyright 2015 Wen Xichang (wenxichang@163.com). All rights reserved. diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/array_access.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/array_access.c new file mode 100644 index 0000000000..e819aaa913 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/array_access.c @@ -0,0 +1,71 @@ +#include "sljitLir.h" + +#include +#include + +typedef long (SLJIT_FUNC *func_arr_t)(long *arr, long narr); + +static long SLJIT_FUNC print_num(long a) +{ + printf("num = %ld\n", a); + return a + 1; +} + +/* + This example, we generate a function like this: + +long func(long *array, long narray) +{ + long i; + for (i = 0; i < narray; ++i) + print_num(array[i]); + return narray; +} + +*/ + +static int array_access(long *arr, long narr) +{ + void *code; + unsigned long len; + func_arr_t func; + + /* Create a SLJIT compiler */ + struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); + + sljit_emit_enter(C, 0, SLJIT_ARG1(SW), 1, 3, 0, 0, 0); + /* opt arg R S FR FS local_size */ + sljit_emit_op2(C, SLJIT_XOR, SLJIT_S2, 0, SLJIT_S2, 0, SLJIT_S2, 0); // S2 = 0 + sljit_emit_op1(C, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, narr); // S1 = narr + struct sljit_label *loopstart = sljit_emit_label(C); // loopstart: + struct sljit_jump *out = sljit_emit_cmp(C, SLJIT_GREATER_EQUAL, SLJIT_S2, 0, SLJIT_S1, 0); // S2 >= a --> jump out + + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_S2), SLJIT_WORD_SHIFT);// R0 = (long *)S0[S2]; + sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0); + + sljit_emit_op2(C, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S2, 0, SLJIT_IMM, 1); // S2 += 1 + sljit_set_label(sljit_emit_jump(C, SLJIT_JUMP), loopstart); // jump loopstart + sljit_set_label(out, sljit_emit_label(C)); // out: + sljit_emit_return(C, SLJIT_MOV, SLJIT_S1, 0); // return RET + + /* Generate machine code */ + code = sljit_generate_code(C); + len = sljit_get_generated_code_size(C); + + /* Execute code */ + func = (func_arr_t)code; + printf("func return %ld\n", func(arr, narr)); + + /* dump_code(code, len); */ + + /* Clean up */ + sljit_free_compiler(C); + sljit_free_code(code, NULL); + return 0; +} + +int main() +{ + long arr[8] = { 3, -10, 4, 6, 8, 12, 2000, 0 }; + return array_access(arr, 8); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/brainfuck.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/brainfuck.c new file mode 100644 index 0000000000..b4387cc193 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/brainfuck.c @@ -0,0 +1,260 @@ +/* + * Brainfuck interpreter with SLJIT + * + * Copyright 2015 Wen Xichang (wenxichang@163.com). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sljitLir.h" + +#include +#include + +#define BF_CELL_SIZE 3000 +#define BF_LOOP_LEVEL 256 + +static int readvalid(FILE *src) +{ + int chr; + + while ((chr = fgetc(src)) != EOF) { + switch (chr) { + case '+': + case '-': + case '>': + case '<': + case '.': + case ',': + case '[': + case ']': + return chr; + } + } + + return chr; +} + +/* reading same instruction, and count, for optimization */ +/* ++++ -> '+', 4 */ +static int gettoken(FILE *src, int *ntok) +{ + int chr = readvalid(src); + int chr2; + int cnt = 1; + + if (chr == EOF) + return EOF; + + if (chr == '.' || chr == ',' || chr == '[' || chr == ']') { + *ntok = 1; + return chr; + } + + while ((chr2 = readvalid(src)) == chr) + cnt++; + + if (chr2 != EOF) + ungetc(chr2, src); + + *ntok = cnt; + return chr; +} + +/* maintaining loop matched [] */ +struct loop_node_st { + struct sljit_label *loop_start; + struct sljit_jump *loop_end; +}; + +/* stack of loops */ +static struct loop_node_st loop_stack[BF_LOOP_LEVEL]; +static int loop_sp; + +static int loop_push(struct sljit_label *loop_start, struct sljit_jump *loop_end) +{ + if (loop_sp >= BF_LOOP_LEVEL) + return -1; + + loop_stack[loop_sp].loop_start = loop_start; + loop_stack[loop_sp].loop_end = loop_end; + loop_sp++; + return 0; +} + +static int loop_pop(struct sljit_label **loop_start, struct sljit_jump **loop_end) +{ + if (loop_sp <= 0) + return -1; + + loop_sp--; + *loop_start = loop_stack[loop_sp].loop_start; + *loop_end = loop_stack[loop_sp].loop_end; + return 0; +} + +static void *SLJIT_FUNC my_alloc(long size, long n) +{ + return calloc(size, n); +} + +static void SLJIT_FUNC my_putchar(long c) +{ + putchar(c); +} + +static long SLJIT_FUNC my_getchar(void) +{ + return getchar(); +} + +static void SLJIT_FUNC my_free(void *mem) +{ + free(mem); +} + +#define loop_empty() (loop_sp == 0) + +/* compile bf source to a void func() */ +static void *compile(FILE *src, unsigned long *lcode) +{ + void *code = NULL; + int chr; + int nchr; + + struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); + struct sljit_jump *end; + struct sljit_label *loop_start; + struct sljit_jump *loop_end; + + int SP = SLJIT_S0; /* bf SP */ + int CELLS = SLJIT_S1; /* bf array */ + + sljit_emit_enter(C, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 2, 2, 0, 0, 0); /* opt arg R S FR FS local_size */ + + sljit_emit_op2(C, SLJIT_XOR, SP, 0, SP, 0, SP, 0); /* SP = 0 */ + + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, BF_CELL_SIZE); + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(my_alloc));/* calloc(BF_CELL_SIZE, 1) => R0 */ + + end = sljit_emit_cmp(C, SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0); /* R0 == 0 --> jump end */ + + sljit_emit_op1(C, SLJIT_MOV, CELLS, 0, SLJIT_R0, 0); /* CELLS = R0 */ + + while ((chr = gettoken(src, &nchr)) != EOF) { + switch (chr) { + case '+': + case '-': + sljit_emit_op1(C, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_MEM2(CELLS, SP), 0); /* R0 = CELLS[SP] */ + sljit_emit_op2(C, chr == '+' ? SLJIT_ADD : SLJIT_SUB, + SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, nchr); /* R0 ?= nchr */ + sljit_emit_op1(C, SLJIT_MOV_U8, SLJIT_MEM2(CELLS, SP), 0, SLJIT_R0, 0); /* CELLS[SP] = R0 */ + break; + case '>': + case '<': + sljit_emit_op2(C, chr == '>' ? SLJIT_ADD : SLJIT_SUB, + SP, 0, SP, 0, SLJIT_IMM, nchr); /* SP ?= nchr */ + break; + case '.': + sljit_emit_op1(C, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_MEM2(CELLS, SP), 0); /* R0 = CELLS[SP] */ + sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(my_putchar)); /* putchar(R0) */ + break; + case ',': + sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(my_getchar)); /* R0 = getchar() */ + sljit_emit_op1(C, SLJIT_MOV_U8, SLJIT_MEM2(CELLS, SP), 0, SLJIT_R0, 0); /* CELLS[SP] = R0 */ + break; + case '[': + loop_start = sljit_emit_label(C); /* loop_start: */ + sljit_emit_op1(C, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_MEM2(CELLS, SP), 0); /* R0 = CELLS[SP] */ + loop_end = sljit_emit_cmp(C, SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0); /* IF R0 == 0 goto loop_end */ + + if (loop_push(loop_start, loop_end)) { + fprintf(stderr, "Too many loop level\n"); + goto compile_failed; + } + break; + case ']': + if (loop_pop(&loop_start, &loop_end)) { + fprintf(stderr, "Unmatch loop ]\n"); + goto compile_failed; + } + + sljit_set_label(sljit_emit_jump(C, SLJIT_JUMP), loop_start); /* goto loop_start */ + sljit_set_label(loop_end, sljit_emit_label(C)); /* loop_end: */ + break; + } + } + + if (!loop_empty()) { + fprintf(stderr, "Unmatch loop [\n"); + goto compile_failed; + } + + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, CELLS, 0); + sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(my_free)); /* free(CELLS) */ + + sljit_set_label(end, sljit_emit_label(C)); + sljit_emit_return(C, SLJIT_UNUSED, 0, 0); + + code = sljit_generate_code(C); + if (lcode) + *lcode = sljit_get_generated_code_size(C); + +compile_failed: + sljit_free_compiler(C); + return code; +} + +/* function prototype of bf compiled code */ +typedef void (*bf_entry_t)(void); + +int main(int argc, char **argv) +{ + void *code; + bf_entry_t entry; + FILE *fp; + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return -1; + } + + fp = fopen(argv[1], "rb"); + if (!fp) { + perror("open"); + return -1; + } + + code = compile(fp, NULL); + fclose(fp); + + if (!code) { + fprintf(stderr, "[Fatal]: Compile failed\n"); + return -1; + } + + entry = (bf_entry_t)code; + entry(); + + sljit_free_code(code, NULL); + return 0; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/branch.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/branch.c new file mode 100644 index 0000000000..379176061b --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/branch.c @@ -0,0 +1,77 @@ +#include "sljitLir.h" + +#include +#include + +typedef long (SLJIT_FUNC *func3_t)(long a, long b, long c); + +/* + This example, we generate a function like this: + +long func(long a, long b, long c) +{ + if ((a & 1) == 0) + return c; + return b; +} + + */ +static int branch(long a, long b, long c) +{ + void *code; + unsigned long len; + func3_t func; + + struct sljit_jump *ret_c; + struct sljit_jump *out; + + /* Create a SLJIT compiler */ + struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); + + /* 3 arg, 1 temp reg, 3 save reg */ + sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW)|SLJIT_ARG3(SW), 3, 3, 0, 0, 0); + + /* R0 = a & 1, S0 is argument a */ + sljit_emit_op2(C, SLJIT_AND, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + + /* if R0 == 0 then jump to ret_c, where is ret_c? we assign it later */ + ret_c = sljit_emit_cmp(C, SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0); + + /* R0 = b, S1 is argument b */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_S1, 0); + + /* jump to out */ + out = sljit_emit_jump(C, SLJIT_JUMP); + + /* here is the 'ret_c' should jump, we emit a label and set it to ret_c */ + sljit_set_label(ret_c, sljit_emit_label(C)); + + /* R0 = c, S2 is argument c */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_S2, 0); + + /* here is the 'out' should jump */ + sljit_set_label(out, sljit_emit_label(C)); + + /* end of function */ + sljit_emit_return(C, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + /* Generate machine code */ + code = sljit_generate_code(C); + len = sljit_get_generated_code_size(C); + + /* Execute code */ + func = (func3_t)code; + printf("func return %ld\n", func(a, b, c)); + + /* dump_code(code, len); */ + + /* Clean up */ + sljit_free_compiler(C); + sljit_free_code(code, NULL); + return 0; +} + +int main() +{ + return branch(4, 5, 6); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/first_program.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/first_program.c new file mode 100644 index 0000000000..c779241243 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/first_program.c @@ -0,0 +1,53 @@ +#include "sljitLir.h" + +#include +#include + +typedef long (SLJIT_FUNC *func3_t)(long a, long b, long c); + +static int add3(long a, long b, long c) +{ + void *code; + unsigned long len; + func3_t func; + + /* Create a SLJIT compiler */ + struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); + + /* Start a context(function entry), have 3 arguments, discuss later */ + sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW)|SLJIT_ARG3(SW), 3, 3, 0, 0, 0); + + /* The first arguments of function is register SLJIT_S0, 2nd, SLJIT_S1, etc. */ + /* R0 = first */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + + /* R0 = R0 + second */ + sljit_emit_op2(C, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_S1, 0); + + /* R0 = R0 + third */ + sljit_emit_op2(C, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_S2, 0); + + /* This statement mov R0 to RETURN REG and return */ + /* in fact, R0 is RETURN REG itself */ + sljit_emit_return(C, SLJIT_MOV, SLJIT_R0, 0); + + /* Generate machine code */ + code = sljit_generate_code(C); + len = sljit_get_generated_code_size(C); + + /* Execute code */ + func = (func3_t)code; + printf("func return %ld\n", func(a, b, c)); + + /* dump_code(code, len); */ + + /* Clean up */ + sljit_free_compiler(C); + sljit_free_code(code, NULL); + return 0; +} + +int main() +{ + return add3(4, 5, 6); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/func_call.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/func_call.c new file mode 100644 index 0000000000..3e5cf7c35d --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/func_call.c @@ -0,0 +1,80 @@ +#include "sljitLir.h" + +#include +#include + +typedef long (SLJIT_FUNC *func3_t)(long a, long b, long c); + +static long SLJIT_FUNC print_num(long a) +{ + printf("a = %ld\n", a); + return a + 1; +} + +/* + This example, we generate a function like this: + +long func(long a, long b, long c) +{ + if ((a & 1) == 0) + return print_num(c); + return print_num(b); +} +*/ + +static int func_call(long a, long b, long c) +{ + void *code; + unsigned long len; + func3_t func; + + struct sljit_jump *out; + struct sljit_jump *print_c; + + /* Create a SLJIT compiler */ + struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); + + sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW)|SLJIT_ARG3(SW), 3, 3, 0, 0, 0); + + /* a & 1 --> R0 */ + sljit_emit_op2(C, SLJIT_AND, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + /* R0 == 0 --> jump print_c */ + print_c = sljit_emit_cmp(C, SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0); + + /* R0 = S1; print_num(R0) */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S1, 0); + sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); + + /* jump out */ + out = sljit_emit_jump(C, SLJIT_JUMP); + /* print_c: */ + sljit_set_label(print_c, sljit_emit_label(C)); + + /* R0 = c; print_num(R0); */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S2, 0); + sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); + + /* out: */ + sljit_set_label(out, sljit_emit_label(C)); + sljit_emit_return(C, SLJIT_MOV, SLJIT_R0, 0); + + /* Generate machine code */ + code = sljit_generate_code(C); + len = sljit_get_generated_code_size(C); + + /* Execute code */ + func = (func3_t)code; + printf("func return %ld\n", func(a, b, c)); + + /* dump_code(code, len); */ + + /* Clean up */ + sljit_free_compiler(C); + sljit_free_code(code, NULL); + return 0; +} + +int main() +{ + return func_call(4, 5, 6); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/hello.bf b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/hello.bf new file mode 100644 index 0000000000..3c87e66675 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/hello.bf @@ -0,0 +1,21 @@ ++++++ +++++ initialize counter (cell #0) to 10\ +[ use loop to set the next four cells to 70/100/30/10\ + > +++++ ++ add 7 to cell #1\ + > +++++ +++++ add 10 to cell #2 \ + > +++ add 3 to cell #3\ + > + add 1 to cell #4\ + <<<< - decrement counter (cell #0)\ +]\ +> ++ . print 'H'\ +> + . print 'e'\ ++++++ ++ . print 'l'\ +. print 'l'\ ++++ . print 'o'\ +> ++ . print ' '\ +<< +++++ +++++ +++++ . print 'W'\ +> . print 'o'\ ++++ . print 'r'\ +----- - . print 'l'\ +----- --- . print 'd'\ +> + . print '!'\ +> . print '\n'\ \ No newline at end of file diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/loop.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/loop.c new file mode 100644 index 0000000000..9167a328e5 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/loop.c @@ -0,0 +1,76 @@ +#include "sljitLir.h" + +#include +#include + +typedef long (SLJIT_FUNC *func2_t)(long a, long b); + +/* + This example, we generate a function like this: + +long func(long a, long b) +{ + long i; + long ret = 0; + for (i = 0; i < a; ++i) { + ret += b; + } + return ret; +} +*/ + +static int loop(long a, long b) +{ + void *code; + unsigned long len; + func2_t func; + + struct sljit_label *loopstart; + struct sljit_jump *out; + + /* Create a SLJIT compiler */ + struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); + + /* 2 arg, 2 temp reg, 2 saved reg */ + sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW), 2, 2, 0, 0, 0); + + /* R0 = 0 */ + sljit_emit_op2(C, SLJIT_XOR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R1, 0); + /* RET = 0 */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); + /* loopstart: */ + loopstart = sljit_emit_label(C); + /* R1 >= a --> jump out */ + out = sljit_emit_cmp(C, SLJIT_GREATER_EQUAL, SLJIT_R1, 0, SLJIT_S0, 0); + /* RET += b */ + sljit_emit_op2(C, SLJIT_ADD, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0, SLJIT_S1, 0); + /* R1 += 1 */ + sljit_emit_op2(C, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + /* jump loopstart */ + sljit_set_label(sljit_emit_jump(C, SLJIT_JUMP), loopstart); + /* out: */ + sljit_set_label(out, sljit_emit_label(C)); + + /* return RET */ + sljit_emit_return(C, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + /* Generate machine code */ + code = sljit_generate_code(C); + len = sljit_get_generated_code_size(C); + + /* Execute code */ + func = (func2_t)code; + printf("func return %ld\n", func(a, b)); + + /* dump_code(code, len); */ + + /* Clean up */ + sljit_free_compiler(C); + sljit_free_code(code, NULL); + return 0; +} + +int main() +{ + return loop(4, 5); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/sljit_tutorial.html b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/sljit_tutorial.html new file mode 100644 index 0000000000..0fac1c1d5a --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/sljit_tutorial.html @@ -0,0 +1,584 @@ + + + + + + SLJIT tutorial + + + + + + +

    + + + + +
    + +
    +SourceForge.net Logo +
    +

    SLJIT tutorial

    + +

    Before started

    + +Download the tutorial sources
    +
    +SLJIT is a light-weight, platform independent JIT compiler, it's easy to +embed to your own project, as a result of its 'stack-less', SLJIT have +some limit to register usage.
    +
    +Here is some other JIT compiler I digged these days, place here if you have interest:
    + +
      + Libjit/liblighning: - the backend of GNU.net
      + Libgccjit: - introduced in GCC5.0, its different from other JIT lib, this + one seems like constructing a C code, it use the backend of GCC.
      + AsmJIT: - branch from the famous V8 project (JavaScript engine in Chrome), + support only X86/X86_64.
      + DynASM: - used in LuaJIT.
      +
    + +
    +AsmJIT and DynASM work in the instruction level, look like coding with ASM language, +SLJIT look like ASM also, but it hide the detail of the specific CPU, make it more +common, and become portable, libjit work on higher layer, libgccjit as I mention, +really you are constructing the C code.
    + +

    First program

    + +Usage of SLJIT: +
      +1. #include "sljitLir.h" in the head of your C/C++ program
      +2. Compile with sljit_src/sljitLir.c
      +
    + +ALL example can be compile like this: +
      +gcc -Wall -Ipath/to/sljit_src -DSLJIT_CONFIG_AUTO=1 \
      +
        xxx.c path/to/sljit_src/sljitLir.c -o program
      +
    + +OK, let's take a look at the first program, this program we create a function that +return the sum of 3 arguments.
    +
    +
    +
      +#include "sljitLir.h"
      +
      +#include <stdio.h>
      +#include <stdlib.h>
      +
      +typedef sljit_sw (*func3_t)(sljit_sw a, sljit_sw b, sljit_sw c);
      +
      +static int add3(sljit_sw a, sljit_sw b, sljit_sw c)
      +{
      +
        + void *code;
        + sljit_sw len;
        + func3_t func;
        +
        + /* Create a SLJIT compiler */
        + struct sljit_compiler *C = sljit_create_compiler();
        +
        + /* Start a context(function entry), have 3 arguments, discuss later */
        + sljit_emit_enter(C, 0, 3, 1, 3, 0, 0, 0);
        +
        + /* The first arguments of function is register SLJIT_S0, 2nd, SLJIT_S1, etc. */
        + /* R0 = first */
        + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0);
        +
        + /* R0 = R0 + second */
        + sljit_emit_op2(C, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_S1, 0);
        +
        + /* R0 = R0 + third */
        + sljit_emit_op2(C, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_S2, 0);
        +
        + /* This statement mov R0 to RETURN REG and return */
        + /* in fact, R0 is RETURN REG itself */
        + sljit_emit_return(C, SLJIT_MOV, SLJIT_R0, 0);
        +
        + /* Generate machine code */
        + code = sljit_generate_code(C);
        + len = sljit_get_generated_code_size(C);
        +
        + /* Execute code */
        + func = (func3_t)code;
        + printf("func return %ld\n", func(a, b, c));
        +
        + /* dump_code(code, len); */
        +
        + /* Clean up */
        + sljit_free_compiler(C);
        + sljit_free_code(code);
        + return 0;
        +
      +}
      +
      +int main()
      +{
      +
        + return add3(4, 5, 6);
        +
      +}
      +
    +
    + +
    +The function sljit_emit_enter create a context, save some registers to the stack, +and create a call-frame, sljit_emit_return restore the saved-register and clean-up +the frame. SLJIT is design to embed into other application, the code it generated +has to follow some basic rule.
    +
    +The standard called Application Binary Interface, or ABI for short, here is a +document for X86_64 CPU (ABI.pdf), +almost all Linux/Unix follow this standard. MS windows has its own, read this for more: +X86_calling_conventions
    +
    +When reading the doc of sljit_emit_emter, the parameters 'saveds' and 'scratchs' make +me confused. The fact is, the registers in CPU has different functions in the ABI spec, +some of them used to pass arguments, some of them are 'callee-saved', some of them are +'temporary used', take X86_64 for example, RAX, R10, R11 are temporary used, that means, +they may be changed after a call instruction. And RBX, R12-R15 are callee-saved, those +will remain the same values after the call. The rule is, every function should save +those registers before using it.
    +
    +Fortunately, SLJIT have done the most for us, SLJIT_S[0-9] represent those 'safe' +registers, SLJIT_R[0-9] however, only for 'temporary used'.
    +
    +When a function start, SLJIT move the function arguments to S0, S1, S2 register, it +means function arguments are always 'safe' in the context, the limit of using stack for +storing arguments make SLJIT support only 3 arguments max.
    +
    +Sljit_emit_opX is easy to understand, in SLJIT a data value is represented by 2 +parameters, it can be a register, an In-memory data, or an immediate number.
    +
    + + + + + + + + +
    First parameter Second parameter Meaning
    SLJIT_R*, SLJIT_S* 0 Temp/saved registers
    SLJIT_IMM Number Immediate number
    SLJIT_MEM Address In-mem data with Absolute address
    SLJIT_MEM1(r) Offset In-mem data in [R + offset]
    SLJIT_MEM2(r1, r2) Shift(size) In-mem array, R1 as base address, R2 as index,
    + Shift as size(0 for bytes, 1 for shorts, 2 for
    + 4bytes, 3 for 8bytes)
    + +

    Branch

    +
    +
      +#include "sljitLir.h"
      +
      +#include <stdio.h>
      +#include <stdlib.h>
      +
      +typedef sljit_sw (*func3_t)(sljit_sw a, sljit_sw b, sljit_sw c);
      +
      +/*
      + This example, we generate a function like this:
      +
      +sljit_sw func(sljit_sw a, sljit_sw b, sljit_sw c)
      +{
      +
        + if ((a & 1) == 0)
        +
          + return c;
          +
        + return b;
        +
      +}
      +
      + */
      +static int branch(sljit_sw a, sljit_sw b, sljit_sw c)
      +{
      +
        + void *code;
        + sljit_uw len;
        + func3_t func;
        +
        + struct sljit_jump *ret_c;
        + struct sljit_jump *out;
        +
        + /* Create a SLJIT compiler */
        + struct sljit_compiler *C = sljit_create_compiler();
        +
        + /* 3 arg, 1 temp reg, 3 save reg */
        + sljit_emit_enter(C, 0, 3, 1, 3, 0, 0, 0);
        +
        + /* R0 = a & 1, S0 is argument a */
        + sljit_emit_op2(C, SLJIT_AND, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1);
        +
        + /* if R0 == 0 then jump to ret_c, where is ret_c? we assign it later */
        + ret_c = sljit_emit_cmp(C, SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0);
        +
        + /* R0 = b, S1 is argument b */
        + sljit_emit_op1(C, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_S1, 0);
        +
        + /* jump to out */
        + out = sljit_emit_jump(C, SLJIT_JUMP);
        +
        + /* here is the 'ret_c' should jump, we emit a label and set it to ret_c */
        + sljit_set_label(ret_c, sljit_emit_label(C));
        +
        + /* R0 = c, S2 is argument c */
        + sljit_emit_op1(C, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_S2, 0);
        +
        + /* here is the 'out' should jump */
        + sljit_set_label(out, sljit_emit_label(C));
        +
        + /* end of function */
        + sljit_emit_return(C, SLJIT_MOV, SLJIT_RETURN_REG, 0);
        +
        + /* Generate machine code */
        + code = sljit_generate_code(C);
        + len = sljit_get_generated_code_size(C);
        +
        + /* Execute code */
        + func = (func3_t)code;
        + printf("func return %ld\n", func(a, b, c));
        +
        + /* dump_code(code, len); */
        +
        + /* Clean up */
        + sljit_free_compiler(C);
        + sljit_free_code(code);
        + return 0;
        +
      +}
      +
      +int main()
      +{
      +
        + return branch(4, 5, 6);
        +
      +}
      +
    +
    + +The key to implement branch is 'struct sljit_jump' and 'struct sljit_label', +the 'jump' contain a jump instruction, it does not know where to jump unless +you set a label to it, the 'label' is a code address just like label in ASM +language.
    +
    +sljit_emit_cmp/sljit_emit_jump generate a conditional/unconditional jump, +take the statement
    +
      +ret_c = sljit_emit_cmp(C, SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0);
      +
    +For example, it create a jump instruction, the condition is R0 equals 0, and +the position of jumping will assign later with the sljit_set_label statement.
    +
    +In this example, it creates a branch like this:
    +
      +
        + R0 = a & 1;
        + if R0 == 0 then goto ret_c;
        + R0 = b;
        + goto out;
        +
      +ret_c:
      +
        + R0 = c;
        +
      +out:
      +
        + return R0;
        +
      +
    +
    +This is how high-level-language compiler handle branch.
    +
    + +

    Loop

    + +Loop example is similar with Branch. + +
    +
      +/* + This example, we generate a function like this:
      +
      +sljit_sw func(sljit_sw a, sljit_sw b)
      +{
      +
        + sljit_sw i;
        + sljit_sw ret = 0;
        + for (i = 0; i < a; ++i) {
        +
          + ret += b;
          +
        + }
        + return ret;
        +
      +}
      +*/
      +
      +
        + /* 2 arg, 2 temp reg, 2 saved reg */
        + sljit_emit_enter(C, 0, 2, 2, 2, 0, 0, 0);
        +
        + /* R0 = 0 */
        + sljit_emit_op2(C, SLJIT_XOR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R1, 0);
        + /* RET = 0 */
        + sljit_emit_op1(C, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
        + /* loopstart: */
        + loopstart = sljit_emit_label(C);
        + /* R1 >= a --> jump out */
        + out = sljit_emit_cmp(C, SLJIT_GREATER_EQUAL, SLJIT_R1, 0, SLJIT_S0, 0);
        + /* RET += b */
        + sljit_emit_op2(C, SLJIT_ADD, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0, SLJIT_S1, 0);
        + /* R1 += 1 */
        + sljit_emit_op2(C, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
        + /* jump loopstart */
        + sljit_set_label(sljit_emit_jump(C, SLJIT_JUMP), loopstart);
        + /* out: */
        + sljit_set_label(out, sljit_emit_label(C));
        +
        + /* return RET */
        + sljit_emit_return(C, SLJIT_MOV, SLJIT_RETURN_REG, 0);
        +
      +
    +
    + +After this example, you are ready to construct any program that contain complex branch +and loop.
    +
    +Here is an interesting fact, 'xor reg, reg' is better than 'mov reg, 0', it save 2 bytes +in X86 machine.
    +
    +I will give only the key code in the rest of this tutorial, the full source of each +chapter can be found in the attachment.
    + + +

    Call external function

    + +It's easy to call an external function in SLJIT, we use sljit_emit_ijump with SLJIT_CALL* +operation to do so.
    +
    +SLJIT_CALL[N] is use to call a function with N arguments, SLJIT has only SLJIT_CALL0, +CALL1, CALL2, CALL3, which means you can call a function with 3 arguments in max(that +disappoint me, no chance to call fwrite in SLJIT), the arguments for the callee function +are passed from SLJIT_R0, R1 and R2. Keep in mind to maintain those 'temp registers'.
    +
    +Assume that we have an external function:
    +
      + sljit_sw print_num(sljit_sw a); +
    + +JIT code to call print_num(S1): + +
    +
      + /* R0 = S1; */
      + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S1, 0);
      + /* print_num(R0) */
      + sljit_emit_ijump(C, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num));
      +
    +
    +
    +This code call a imm-data(address of print_num), which is linked properly when the +program loaded. There no problem in 1-time compile and execute, but when you planning +to save to file and load/execute next time, that address may not correct as you expect, +in some platform that support PIC, the address of print_num may relocate to another +address in run-time. Check this out: +PIC
    +
    + +

    Structure access

    + +SLJIT use SLJIT_MEM1 to implement [Reg + offset] memory access.
    +
    +
      +struct point_st {
      +
        + sljit_sw x;
        + int y;
        + short z;
        + char d;
        + char e;
        +
      +};
      +
      +sljit_emit_op1(C, SLJIT_MOV_SI, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0),
      +
        +SLJIT_OFFSETOF(struct point_st, y));
        +
      +
    +
    + +In this case, SLJIT_S0 is the address of the point_st structure, offset of member 'y' +is determined in compile time, the important MOV operation always comes with a +'signed/size' postfix, like this one _SI means 'signed 32bits integer', the postfix +list:
    +
      + UB = unsigned byte (8 bit)
      + SB = signed byte (8 bit)
      + UH = unsigned half (16 bit)
      + SH = signed half (16 bit)
      + UI = unsigned int (32 bit)
      + SI = signed int (32 bit)
      + P = pointer (sljit_p) size
      +
    + +

    Array accessing

    + +SLJIT use SLJIT_MEM2 to access arrays, like this:
    + +
    +
      +sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_S2),
      +
        +SLJIT_WORD_SHIFT); +
      +
    +
    + +This statement generates a code like this:
    +
      +WORD S0[];
      +R0 = S0[S2]
      +
    +
    +The array S0 is declared to be WORD, which will be sizeof(sljit_sw) in length. +Sljit use a 'shift' for length representation: (0 for single byte, 1 for 2 +bytes, 2 for 4 bytes, 3 for 8bytes)
    +
    +The file array_access.c demonstrate a array-print example, should be easy +to understand.
    + +

    Local variables

    + +SLJIT provide SLJIT_MEM1(SLJIT_SP) to access the reserved space in +sljit_emit_enter's last parameter.
    +In this example we have to pass the address to print_arr, local variable +is the only choice.
    + +
    +
      + /* reserved space in stack for sljit_sw arr[3] */
      + sljit_emit_enter(C, 0, 3, 2, 3, 0, 0, 3 * sizeof(sljit_sw));
      + /* opt arg R S FR FS local_size */
      +
      + /* arr[0] = S0, SLJIT_SP is the init address of local var */
      + sljit_emit_op1(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_S0, 0);
      + /* arr[1] = S1 */
      + sljit_emit_op1(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 1 * sizeof(sljit_sw), SLJIT_S1, 0);
      + /* arr[2] = S2 */
      + sljit_emit_op1(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_sw), SLJIT_S2, 0);
      +
      + /* R0 = arr; in fact SLJIT_SP is the address of arr, but can't do so in SLJIT */
      + sljit_get_local_base(C, SLJIT_R0, 0, 0); /* get the address of local variables */
      + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3); /* R1 = 3; */
      + sljit_emit_ijump(C, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(print_arr));
      + sljit_emit_return(C, SLJIT_MOV, SLJIT_R0, 0);
      +
    +
    +
    +SLJIT_SP can only be used in SLJIT_MEM1(SLJIT_SP). In this case, SP is the +address of 'arr', but we cannot assign it to Reg using SLJIT_MOV opr, +instead, we use sljit_get_local_base, which load the address and offset of +local variable to the target.
    + +

    Brainfuck compiler

    + +Ok, the basic usage of SLJIT ends here, with more detail, I suggest reading +sljitLir.h directly, having fun hacking the wonder of SLJIT!
    +
    +The brainfuck machine introduction can be found here: +Brainfuck
    +
    + +

    Extra

    + +1. Dump_code function
    +SLJIT didn't provide disassemble functional, this is a simple function to do this(X86 only)
    +
    + +
    +
      +static void dump_code(void *code, sljit_uw len)
      +{
      +
        + FILE *fp = fopen("/tmp/slj_dump", "wb");
        + if (!fp)
        +
          + return;
          +
        + fwrite(code, len, 1, fp);
        + fclose(fp);
        +
      +#if defined(SLJIT_CONFIG_X86_64)
      +
        + system("objdump -b binary -m l1om -D /tmp/slj_dump");
        +
      +#elif defined(SLJIT_CONFIG_X86_32)
      +
        + system("objdump -b binary -m i386 -D /tmp/slj_dump");
        +
      +#endif
      +} +
    +
    + +The branch example disassembling:
    +
    +0000000000000000 <.data>:
    +
      + + + + + + + + + + + + + + + + + + + + +
      0:53push %rbx
      1:41 57push %r15
      3:41 56push %r14
      5:48 8b dfmov %rdi,%rbx
      8:4c 8b femov %rsi,%r15
      b:4c 8b f2mov %rdx,%r14
      e:48 83 ec 10sub $0x10,%rsp
      12:48 89 d8mov %rbx,%rax
      15:48 83 e0 01and $0x1,%rax
      19:48 83 f8 00cmp $0x0,%rax
      1d:74 05je 0x24
      1f:4c 89 f8mov %r15,%rax
      22:eb 03jmp 0x27
      24:4c 89 f0mov %r14,%rax
      27:48 83 c4 10add $0x10,%rsp
      2b:41 5epop %r14
      2d:41 5fpop %r15
      2f:5bpop %rbx
      30:c3retq
      +
    +
    +with GCC -O2
    +0000000000000000 <func>:
    +
      + + + + + +
      0:48 89 d0mov %rdx,%rax
      3:83 e7 01and $0x1,%edi
      6:48 0f 45 c6cmovne %rsi,%rax
      a:c3retq
      +
    +
    +Err... Ok, the optimization here may be weak, or, optimization there is crazy... :-)
    + + +
    By wenxichang#163.com, 2015.5.10
    + +
    +
    + + + diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/struct_access.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/struct_access.c new file mode 100644 index 0000000000..1ebabfb7df --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/struct_access.c @@ -0,0 +1,83 @@ +#include "sljitLir.h" + +#include +#include + +struct point_st { + long x; + int y; + short z; + char d; +}; + +typedef long (SLJIT_FUNC *point_func_t)(struct point_st *point);; + +static long SLJIT_FUNC print_num(long a) +{ + printf("a = %ld\n", a); + return a + 1; +} + +/* + This example, we generate a function like this: + +long func(struct point_st *point) +{ + print_num(point->x); + print_num(point->y); + print_num(point->z); + print_num(point->d); + return point->x; +} +*/ + +static int struct_access() +{ + void *code; + unsigned long len; + point_func_t func; + + struct point_st point = { + -5, -20, 5, 'a' + }; + + /* Create a SLJIT compiler */ + struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); + + sljit_emit_enter(C, 0, SLJIT_ARG1(SW), 1, 1, 0, 0, 0); + /* opt arg R S FR FS local_size */ + + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, x)); // S0->x --> R0 + sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW)|SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0); + + sljit_emit_op1(C, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, y)); // S0->y --> R0 + sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW)|SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0); + + sljit_emit_op1(C, SLJIT_MOV_S16, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, z)); // S0->z --> R0 + sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW)|SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0); + + sljit_emit_op1(C, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, d)); // S0->z --> R0 + sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW)|SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0); + + sljit_emit_return(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, x)); // return S0->x + + /* Generate machine code */ + code = sljit_generate_code(C); + len = sljit_get_generated_code_size(C); + + /* Execute code */ + func = (point_func_t)code; + printf("func return %ld\n", func(&point)); + + /* dump_code(code, len); */ + + /* Clean up */ + sljit_free_compiler(C); + sljit_free_code(code, NULL); + return 0; +} + +int main() +{ + return struct_access(); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/temp_var.c b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/temp_var.c new file mode 100644 index 0000000000..da94729c72 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/doc/tutorial/temp_var.c @@ -0,0 +1,74 @@ +#include "sljitLir.h" + +#include +#include + +typedef long (SLJIT_FUNC *func3_t)(long a, long b, long c); + +static long SLJIT_FUNC print_arr(long *a, long n) +{ + long i; + long sum = 0; + for (i = 0; i < n; ++i) { + sum += a[i]; + printf("arr[%ld] = %ld\n", i, a[i]); + } + return sum; +} + +/* + This example, we generate a function like this: + +long func(long a, long b, long c) +{ + long arr[3] = { a, b, c }; + return print_arr(arr, 3); +} +*/ + +static int temp_var(long a, long b, long c) +{ + void *code; + unsigned long len; + func3_t func; + + /* Create a SLJIT compiler */ + struct sljit_compiler *C = sljit_create_compiler(NULL, NULL); + + /* reserved space in stack for long arr[3] */ + sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW)|SLJIT_ARG3(SW), 3, 3, 0, 0, 3 * sizeof(long)); + /* opt arg R S FR FS local_size */ + + /* arr[0] = S0, SLJIT_SP is the init address of local var */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_S0, 0); + /* arr[1] = S1 */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 1 * sizeof(long), SLJIT_S1, 0); + /* arr[2] = S2 */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(long), SLJIT_S2, 0); + + /* R0 = arr; in fact SLJIT_SP is the address of arr, but can't do so in SLJIT */ + sljit_get_local_base(C, SLJIT_R0, 0, 0); /* get the address of local variables */ + sljit_emit_op1(C, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3); /* R1 = 3; */ + sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARG1(SW)|SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_arr)); + sljit_emit_return(C, SLJIT_MOV, SLJIT_R0, 0); + + /* Generate machine code */ + code = sljit_generate_code(C); + len = sljit_get_generated_code_size(C); + + /* Execute code */ + func = (func3_t)code; + printf("func return %ld\n", func(a, b, c)); + + /* dump_code(code, len); */ + + /* Clean up */ + sljit_free_compiler(C); + sljit_free_code(code, NULL); + return 0; +} + +int main() +{ + return temp_var(7, 8, 9); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.c b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.c new file mode 100644 index 0000000000..30bd0654b0 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.c @@ -0,0 +1,2590 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sljitLir.h" +#include "regexJIT.h" + +#include + +#ifdef REGEX_MATCH_VERBOSE +#include +#endif + +/* Extra, hidden flags: + {id!} where id > 0 found in the code. */ +#define REGEX_ID_CHECK 0x100 +/* When REGEX_NEWLINE && REGEX_MATCH_BEGIN defined, the pattern turn to a normal search, + which starts with [\r\n] character range. */ +#define REGEX_FAKE_MATCH_BEGIN 0x200 +/* When REGEX_NEWLINE && REGEX_MATCH_END defined, the pattern turn to a normal search, + which ends with [\r\n] character range. */ +#define REGEX_FAKE_MATCH_END 0x400 + +/* --------------------------------------------------------------------- */ +/* Structures for JIT-ed pattern matching */ +/* --------------------------------------------------------------------- */ + +struct regex_machine +{ + /* flags. */ + int flags; + /* Number of state descriptors for one term. */ + sljit_sw no_states; + /* Total size. */ + sljit_sw size; + + union { + void *init_match; + sljit_sw (SLJIT_FUNC *call_init)(void *next, void* match); + } u; +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + struct sljit_function_context context; +#endif + + void *continue_match; + + /* Variable sized array to contain the handler addresses. */ + sljit_uw entry_addrs[1]; +}; + +struct regex_match +{ + /* Current and next state array. */ + sljit_sw *current; + sljit_sw *next; + /* Starting. */ + sljit_sw head; + /* String character index (ever increasing). */ + sljit_sw index; + /* Best match found so far (members in priority order). */ + sljit_sw best_begin; + sljit_sw best_end; + sljit_sw best_id; + /* Bool flags (encoded as word). */ + sljit_sw fast_quit; + sljit_sw fast_forward; + /* Machine. */ + struct regex_machine *machine; + + union { + void *continue_match; + void (SLJIT_FUNC *call_continue)(struct regex_match *match, const regex_char_t *input_string, int length); + } u; + + /* Variable sized array to contain the state arrays. */ + sljit_sw states[1]; +}; + +/* State vector + ITEM[0] - pointer to the address inside the machine code + ITEM[1] - next pointer + ITEM[2] - string started from (optional) + ITEM[3] - max ID (optional) */ + +/* Register allocation. */ +/* Current state array (loaded & stored: regex_match->current). */ +#define R_CURR_STATE SLJIT_S0 +/* Next state array (loaded & stored: regex_match->next). */ +#define R_NEXT_STATE SLJIT_S1 +/* Head (loaded & stored: regex_match->head). */ +#define R_NEXT_HEAD SLJIT_S2 +/* String fragment pointer. */ +#define R_STRING SLJIT_S3 +/* String fragment length. */ +#define R_LENGTH SLJIT_S4 +/* 'struct regex_match*' */ +#define R_REGEX_MATCH SLJIT_R0 +/* Current character. */ +#define R_CURR_CHAR SLJIT_R1 +/* Temporary register. */ +#define R_TEMP SLJIT_R2 +/* Caches the regex_match->best_begin. */ +#define R_BEST_BEGIN SLJIT_R3 +/* Current character index. */ +#define R_CURR_INDEX SLJIT_R4 + +/* --------------------------------------------------------------------- */ +/* Stack management */ +/* --------------------------------------------------------------------- */ + +/* Try to allocate 2^n blocks. */ +#define STACK_FRAGMENT_SIZE (((64 * sizeof(struct stack_item)) - (sizeof(struct stack_fragment_data))) / (sizeof(struct stack_item))) + +struct stack_item { + int type; + int value; +}; + +struct stack_fragment_data { + struct stack_fragment *next; + struct stack_fragment *prev; +}; + +struct stack_fragment { + struct stack_fragment_data data; + struct stack_item items[STACK_FRAGMENT_SIZE]; +}; + +struct stack { + struct stack_fragment *first; + struct stack_fragment *last; + int index; + int count; +}; + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + +static void stack_check(struct stack *stack) +{ + struct stack_fragment *curr; + int found; + + if (!stack) + return; + + SLJIT_ASSERT(stack->index >= 0 && stack->index < STACK_FRAGMENT_SIZE); + + if (stack->first == NULL) { + SLJIT_ASSERT(stack->first == NULL && stack->last == NULL); + SLJIT_ASSERT(stack->index == STACK_FRAGMENT_SIZE - 1 && stack->count == 0); + return; + } + + found = 0; + if (stack->last == NULL) { + SLJIT_ASSERT(stack->index == STACK_FRAGMENT_SIZE - 1 && stack->count == 0); + found = 1; + } + else + SLJIT_ASSERT(stack->index >= 0 && stack->count >= 0); + + SLJIT_ASSERT(stack->first->data.prev == NULL); + curr = stack->first; + while (curr) { + if (curr == stack->last) + found = 1; + if (curr->data.next) + SLJIT_ASSERT(curr->data.next->data.prev == curr); + curr = curr->data.next; + } + SLJIT_ASSERT(found); +} + +#endif + +static void stack_init(struct stack *stack) +{ + stack->first = NULL; + stack->last = NULL; + stack->index = STACK_FRAGMENT_SIZE - 1; + stack->count = 0; +} + +static void stack_destroy(struct stack *stack) +{ + struct stack_fragment *curr = stack->first; + struct stack_fragment *prev; + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + stack_check(stack); +#endif + + while (curr) { + prev = curr; + curr = curr->data.next; + SLJIT_FREE(prev, NULL); + } +} + +static SLJIT_INLINE struct stack_item* stack_top(struct stack *stack) +{ + SLJIT_ASSERT(stack->last); + return stack->last->items + stack->index; +} + +static int stack_push(struct stack *stack, int type, int value) +{ + if (stack->last) { + stack->index++; + if (stack->index >= STACK_FRAGMENT_SIZE) { + stack->index = 0; + if (!stack->last->data.next) { + stack->last->data.next = (struct stack_fragment*)SLJIT_MALLOC(sizeof(struct stack_fragment), NULL); + if (!stack->last->data.next) + return 1; + stack->last->data.next->data.next = NULL; + stack->last->data.next->data.prev = stack->last; + } + stack->last = stack->last->data.next; + } + } + else if (!stack->first) { + stack->last = (struct stack_fragment*)SLJIT_MALLOC(sizeof(struct stack_fragment), NULL); + if (!stack->last) + return 1; + stack->last->data.prev = NULL; + stack->last->data.next = NULL; + stack->first = stack->last; + stack->index = 0; + } + else { + stack->last = stack->first; + stack->index = 0; + } + stack->last->items[stack->index].type = type; + stack->last->items[stack->index].value = value; + stack->count++; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + stack_check(stack); +#endif + return 0; +} + +static struct stack_item* stack_pop(struct stack *stack) +{ + struct stack_item *ret = stack_top(stack); + + if (stack->index > 0) + stack->index--; + else { + stack->last = stack->last->data.prev; + stack->index = STACK_FRAGMENT_SIZE - 1; + } + + stack->count--; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + stack_check(stack); +#endif + return ret; +} + +static SLJIT_INLINE void stack_clone(struct stack *src, struct stack *dst) +{ + *dst = *src; +} + +static int stack_push_copy(struct stack *stack, int items, int length) +{ + struct stack_fragment *frag1; + int ind1; + struct stack_fragment *frag2; + int ind2; + int counter; + + SLJIT_ASSERT(stack->count >= length && items <= length && items > 0); + + /* Allocate the necessary elements. */ + counter = items; + frag1 = stack->last; + ind1 = stack->index; + while (counter > 0) { + if (stack->index + counter >= STACK_FRAGMENT_SIZE) { + counter -= STACK_FRAGMENT_SIZE - stack->index - 1 + 1; + stack->index = 0; + if (!stack->last->data.next) { + stack->last->data.next = (struct stack_fragment*)SLJIT_MALLOC(sizeof(struct stack_fragment), NULL); + if (!stack->last->data.next) + return 1; + stack->last->data.next->data.next = NULL; + stack->last->data.next->data.prev = stack->last; + } + stack->last = stack->last->data.next; + } + else { + stack->index += counter; + counter = 0; + } + } + + frag2 = stack->last; + ind2 = stack->index; + while (length > 0) { + frag2->items[ind2--] = frag1->items[ind1--]; + if (ind1 < 0) { + ind1 = STACK_FRAGMENT_SIZE - 1; + frag1 = frag1->data.prev; + } + if (ind2 < 0) { + ind2 = STACK_FRAGMENT_SIZE - 1; + frag2 = frag2->data.prev; + } + length--; + } + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + stack_check(stack); +#endif + stack->count += items; + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Parser */ +/* --------------------------------------------------------------------- */ + +enum { + /* Common. */ + type_begin, + type_end, + type_char, + type_newline, + type_id, + type_rng_start, + type_rng_end, + type_rng_char, + type_rng_left, + type_rng_right, + + /* generator only. */ + type_branch, + type_jump, + + /* Parser only. */ + type_open_br, + type_close_br, + type_select, + type_asterisk, + type_plus_sign, + type_qestion_mark +}; + +struct compiler_common { + /* Temporary stacks. */ + struct stack stack; + struct stack depth; + /* REGEX_ flags. */ + int flags; + /* Encoded size of the dfa representation. */ + sljit_sw dfa_size; + /* Number of terms. */ + sljit_sw terms_size; + /* Number of state descriptors for one term (same as machine->no_states). */ + sljit_sw no_states; + /* Number of type_rng_(char|left)-s in the longest character range. */ + sljit_sw longest_range_size; + + /* DFA linear representation (size: dfa_size). */ + struct stack_item *dfa_transitions; + /* Term id and search state pairs (size: dfa_size). */ + struct stack_item *search_states; + + /* sljit compiler */ + struct sljit_compiler *compiler; + /* Machine data, which must be kept for later use. */ + struct regex_machine *machine; + /* Temporary space for jumps (size: longest_range_size). */ + struct sljit_jump **range_jump_list; +}; + +static const regex_char_t* decode_number(const regex_char_t *regex_string, int length, int *result) +{ + int value = 0; + + SLJIT_ASSERT(length > 0); + if (*regex_string < '0' || *regex_string > '9') { + *result = -1; + return regex_string; + } + + while (length > 0 && *regex_string >= '0' && *regex_string <= '9') { + value = value * 10 + (*regex_string - '0'); + length--; + regex_string++; + } + + *result = value; + return regex_string; +} + +static int iterate(struct stack *stack, int min, int max) +{ + struct stack it; + struct stack_item *item; + int count = -1; + int len = 0; + int depth = 0; + + stack_clone(stack, &it); + + /* Calculate size. */ + while (count < 0) { + item = stack_pop(&it); + switch (item->type) { + case type_id: + case type_rng_end: + case type_rng_char: + case type_rng_left: + case type_rng_right: + case type_plus_sign: + case type_qestion_mark: + len++; + break; + + case type_asterisk: + len += 2; + break; + + case type_close_br: + depth++; + break; + + case type_open_br: + SLJIT_ASSERT(depth > 0); + depth--; + if (depth == 0) + count = it.count; + break; + + case type_select: + SLJIT_ASSERT(depth > 0); + len += 2; + break; + + default: + SLJIT_ASSERT(item->type != type_begin && item->type != type_end); + if (depth == 0) + count = it.count; + len++; + break; + } + } + + if (min == 0 && max == 0) { + /* {0,0} case, not {0,} case: delete subtree. */ + stack_clone(&it, stack); + /* and put an empty bracket expression instead of it. */ + if (stack_push(stack, type_open_br, 0)) + return REGEX_MEMORY_ERROR; + if (stack_push(stack, type_close_br, 0)) + return REGEX_MEMORY_ERROR; + return len; + } + + count = stack->count - count; + + /* Put an open bracket before the sequence. */ + if (stack_push_copy(stack, 1, count)) + return -1; + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + SLJIT_ASSERT(stack_push(&it, type_open_br, 0) == 0); +#else + stack_push(&it, type_open_br, 0); +#endif + + /* Copy the data. */ + if (max > 0) { + len = len * (max - 1); + max -= min; + /* Insert ? operators. */ + len += max; + + if (min > 0) { + min--; + while (min > 0) { + if (stack_push_copy(stack, count, count)) + return -1; + min--; + } + if (max > 0) { + if (stack_push_copy(stack, count, count)) + return -1; + if (stack_push(stack, type_qestion_mark, 0)) + return REGEX_MEMORY_ERROR; + count++; + max--; + } + } + else { + SLJIT_ASSERT(max > 0); + max--; + count++; + if (stack_push(stack, type_qestion_mark, 0)) + return REGEX_MEMORY_ERROR; + } + + while (max > 0) { + if (stack_push_copy(stack, count, count)) + return -1; + max--; + } + } + else { + SLJIT_ASSERT(min > 0); + min--; + /* Insert + operator. */ + len = len * min + 1; + while (min > 0) { + if (stack_push_copy(stack, count, count)) + return -1; + min--; + } + + if (stack_push(stack, type_plus_sign, 0)) + return REGEX_MEMORY_ERROR; + } + + /* Close the opened bracket. */ + if (stack_push(stack, type_close_br, 0)) + return REGEX_MEMORY_ERROR; + + return len; +} + +static int parse_iterator(const regex_char_t *regex_string, int length, struct stack *stack, sljit_sw *dfa_size, int begin) +{ + /* We only know that *regex_string == { . */ + int val1, val2; + const regex_char_t *base_from = regex_string; + const regex_char_t *from; + + length--; + regex_string++; + + /* Decode left value. */ + val2 = -1; + if (length == 0) + return -2; + if (*regex_string == ',') { + val1 = 0; + length--; + regex_string++; + } + else { + from = regex_string; + regex_string = decode_number(regex_string, length, &val1); + if (val1 < 0) + return -2; + length -= regex_string - from; + + if (length == 0) + return -2; + if (*regex_string == '}') { + val2 = val1; + if (val1 == 0) + val1 = -1; + } + else if (length >= 2 && *regex_string == '!' && regex_string[1] == '}') { + /* Non posix extension. */ + if (stack_push(stack, type_id, val1)) + return -1; + (*dfa_size)++; + return (regex_string - base_from) + 1; + } + else { + if (*regex_string != ',') + return -2; + length--; + regex_string++; + } + } + + if (begin) + return -2; + + /* Decode right value. */ + if (val2 == -1) { + if (length == 0) + return -2; + if (*regex_string == '}') + val2 = 0; + else { + from = regex_string; + regex_string = decode_number(regex_string, length, &val2); + length -= regex_string - from; + if (val2 < 0 || length == 0 || *regex_string != '}' || val2 < val1) + return -2; + if (val2 == 0) { + SLJIT_ASSERT(val1 == 0); + val1 = -1; + } + } + } + + /* Fast cases. */ + if (val1 > 1 || val2 > 1) { + val1 = iterate(stack, val1, val2); + if (val1 < 0) + return -1; + *dfa_size += val1; + } + else if (val1 == 0 && val2 == 0) { + if (stack_push(stack, type_asterisk, 0)) + return -1; + *dfa_size += 2; + } + else if (val1 == 1 && val2 == 0) { + if (stack_push(stack, type_plus_sign, 0)) + return -1; + (*dfa_size)++; + } + else if (val1 == 0 && val2 == 1) { + if (stack_push(stack, type_qestion_mark, 0)) + return -1; + (*dfa_size)++; + } + else if (val1 == -1) { + val1 = iterate(stack, 0, 0); + if (val1 < 0) + return -1; + *dfa_size -= val1; + SLJIT_ASSERT(*dfa_size >= 2); + } + else { + /* Ignore. */ + SLJIT_ASSERT(val1 == 1 && val2 == 1); + } + return regex_string - base_from; +} + +static int parse_char_range(const regex_char_t *regex_string, int length, struct compiler_common *compiler_common) +{ + struct stack* stack = &compiler_common->stack; + const regex_char_t *base_from = regex_string; + regex_char_t left_char, right_char, tmp_char; + + length--; + regex_string++; + + if (length == 0) + return -2; + + if (*regex_string != '^') { + if (stack_push(stack, type_rng_start, 0)) + return -1; + } + else { + length--; + regex_string++; + + if (length == 0) + return -2; + + if (stack_push(stack, type_rng_start, 1)) + return -1; + } + /* For both the type_rng_start & type_rng_end. */ + compiler_common->dfa_size += 2; + + /* Range must be at least 1 character. */ + if (*regex_string == ']') { + length--; + regex_string++; + if (stack_push(stack, type_rng_char, ']')) + return -1; + compiler_common->dfa_size++; + } + + while (1) { + if (length == 0) + return -2; + + if (*regex_string == ']') + break; + + if (*regex_string != '\\') + left_char = *regex_string; + else { + regex_string++; + length--; + if (length == 0) + return -2; + left_char = *regex_string; + } + regex_string++; + length--; + + /* Is a range here? */ + if (length >= 3 && *regex_string == '-' && *(regex_string + 1) != ']') { + regex_string++; + length--; + + if (*regex_string != '\\') + right_char = *regex_string; + else { + regex_string++; + length--; + if (length == 0) + return -2; + right_char = *regex_string; + } + regex_string++; + length--; + + if (left_char > right_char) { + /* Swap if necessary. */ + tmp_char = left_char; + left_char = right_char; + right_char = tmp_char; + } + + if (stack_push(stack, type_rng_left, left_char)) + return -1; + if (stack_push(stack, type_rng_right, right_char)) + return -1; + compiler_common->dfa_size += 2; + } + else { + if (stack_push(stack, type_rng_char, left_char)) + return -1; + compiler_common->dfa_size++; + } + } + + if (stack_push(stack, type_rng_end, 0)) + return -1; + return regex_string - base_from; +} + +static int parse(const regex_char_t *regex_string, int length, struct compiler_common *compiler_common) +{ + /* Depth of bracketed expressions. */ + int depth = 0; + /* Have we already found a term? '1' if not yet. */ + int begin = 1; + /* Cache stack pointer. */ + struct stack* stack = &compiler_common->stack; + int tmp; + + /* Type_begin and type_end. */ + compiler_common->dfa_size = 2; + stack_init(stack); + if (stack_push(stack, type_begin, 0)) + return REGEX_MEMORY_ERROR; + + if (length > 0 && *regex_string == '^') { + compiler_common->flags |= REGEX_MATCH_BEGIN; + length--; + regex_string++; + } + + if ((compiler_common->flags & (REGEX_MATCH_BEGIN | REGEX_NEWLINE)) == (REGEX_MATCH_BEGIN | REGEX_NEWLINE)) { + /* Replace REGEX_MATCH_BEGIN flag to REGEX_FAKE_MATCH_BEGIN */ + compiler_common->flags &= ~REGEX_MATCH_BEGIN; + compiler_common->flags |= REGEX_FAKE_MATCH_BEGIN; + /* and append a new-line search. */ + if (stack_push(stack, type_newline, 0)) + return REGEX_MEMORY_ERROR; + compiler_common->dfa_size++; + /* Begin intentionally kept as 1. */ + } + + while (length > 0) { + switch (*regex_string) { + case '\\' : + length--; + regex_string++; + if (length == 0) + return REGEX_INVALID_REGEX; + if (stack_push(stack, type_char, *regex_string)) + return REGEX_MEMORY_ERROR; + begin = 0; + compiler_common->dfa_size++; + break; + + case '.' : + if (stack_push(stack, type_rng_start, 1)) + return REGEX_MEMORY_ERROR; + if (compiler_common->flags & REGEX_NEWLINE) { + if (stack_push(stack, type_rng_char, '\n')) + return REGEX_MEMORY_ERROR; + if (stack_push(stack, type_rng_char, '\r')) + return REGEX_MEMORY_ERROR; + compiler_common->dfa_size += 2; + } + if (stack_push(stack, type_rng_end, 1)) + return REGEX_MEMORY_ERROR; + begin = 0; + compiler_common->dfa_size += 2; + break; + + case '(' : + depth++; + if (stack_push(stack, type_open_br, 0)) + return REGEX_MEMORY_ERROR; + begin = 1; + break; + + case ')' : + if (depth == 0) + return REGEX_INVALID_REGEX; + depth--; + if (stack_push(stack, type_close_br, 0)) + return REGEX_MEMORY_ERROR; + begin = 0; + break; + + case '|' : + if (stack_push(stack, type_select, 0)) + return REGEX_MEMORY_ERROR; + begin = 1; + compiler_common->dfa_size += 2; + break; + + case '*' : + if (begin) + return REGEX_INVALID_REGEX; + if (stack_push(stack, type_asterisk, 0)) + return REGEX_MEMORY_ERROR; + compiler_common->dfa_size += 2; + break; + + case '?' : + case '+' : + if (begin) + return REGEX_INVALID_REGEX; + if (stack_push(stack, (*regex_string == '+') ? type_plus_sign : type_qestion_mark, 0)) + return REGEX_MEMORY_ERROR; + compiler_common->dfa_size++; + break; + + case '{' : + tmp = parse_iterator(regex_string, length, stack, &compiler_common->dfa_size, begin); + + if (tmp >= 0) { + length -= tmp; + regex_string += tmp; + } + else if (tmp == -1) + return REGEX_MEMORY_ERROR; + else { + /* Not a valid range expression. */ + SLJIT_ASSERT(tmp == -2); + if (stack_push(stack, type_char, '{')) + return REGEX_MEMORY_ERROR; + compiler_common->dfa_size++; + } + break; + + case '[' : + tmp = parse_char_range(regex_string, length, compiler_common); + if (tmp >= 0) { + length -= tmp; + regex_string += tmp; + } + else if (tmp == -1) + return REGEX_MEMORY_ERROR; + else { + SLJIT_ASSERT(tmp == -2); + return REGEX_INVALID_REGEX; + } + begin = 0; + break; + + default: + if (length == 1 && *regex_string == '$') { + compiler_common->flags |= REGEX_MATCH_END; + break; + } + if (stack_push(stack, type_char, *regex_string)) + return REGEX_MEMORY_ERROR; + begin = 0; + compiler_common->dfa_size++; + break; + } + length--; + regex_string++; + } + + if (depth != 0) + return REGEX_INVALID_REGEX; + + if ((compiler_common->flags & (REGEX_MATCH_END | REGEX_NEWLINE)) == (REGEX_MATCH_END | REGEX_NEWLINE)) { + /* Replace REGEX_MATCH_END flag to REGEX_FAKE_MATCH_END */ + compiler_common->flags &= ~REGEX_MATCH_END; + compiler_common->flags |= REGEX_FAKE_MATCH_END; + /* and append a new-line search. */ + if (stack_push(stack, type_newline, 1)) + return REGEX_MEMORY_ERROR; + compiler_common->dfa_size++; + /* Begin intentionally kept as 1. */ + } + + if (stack_push(stack, type_end, 0)) + return REGEX_MEMORY_ERROR; + + return REGEX_NO_ERROR; +} + +/* --------------------------------------------------------------------- */ +/* Generating machine state transitions */ +/* --------------------------------------------------------------------- */ + +#define PUT_TRANSITION(typ, val) \ + do { \ + --transitions_ptr; \ + transitions_ptr->type = typ; \ + transitions_ptr->value = val; \ + } while (0) + +static struct stack_item* handle_iteratives(struct stack_item *transitions_ptr, struct stack_item *transitions, struct stack *depth) +{ + struct stack_item *item; + + while (1) { + item = stack_top(depth); + + switch (item->type) { + case type_asterisk: + SLJIT_ASSERT(transitions[item->value].type == type_branch); + transitions[item->value].value = transitions_ptr - transitions; + PUT_TRANSITION(type_branch, item->value + 1); + break; + + case type_plus_sign: + SLJIT_ASSERT(transitions[item->value].type == type_branch); + transitions[item->value].value = transitions_ptr - transitions; + break; + + case type_qestion_mark: + PUT_TRANSITION(type_branch, item->value); + break; + + default: + return transitions_ptr; + } + stack_pop(depth); + } +} + +static int generate_transitions(struct compiler_common *compiler_common) +{ + struct stack *stack = &compiler_common->stack; + struct stack *depth = &compiler_common->depth; + struct stack_item *transitions_ptr; + struct stack_item *item; + + stack_init(depth); + compiler_common->dfa_transitions = SLJIT_MALLOC(sizeof(struct stack_item) * compiler_common->dfa_size, NULL); + if (!compiler_common->dfa_transitions) + return REGEX_MEMORY_ERROR; + + /* Go through the items of the stack and generate the necessary branches and jumps (edges of DFA). */ + transitions_ptr = compiler_common->dfa_transitions + compiler_common->dfa_size; + while (stack->count > 0) { + item = stack_pop(stack); + switch (item->type) { + case type_begin: + case type_open_br: + item = stack_pop(depth); + if (item->type == type_select) + PUT_TRANSITION(type_branch, item->value + 1); + else + SLJIT_ASSERT(item->type == type_close_br); + if (stack->count == 0) + PUT_TRANSITION(type_begin, 0); + else + transitions_ptr = handle_iteratives(transitions_ptr, compiler_common->dfa_transitions, depth); + break; + + case type_end: + case type_close_br: + if (item->type == type_end) + *--transitions_ptr = *item; + if (stack_push(depth, type_close_br, transitions_ptr - compiler_common->dfa_transitions)) + return REGEX_MEMORY_ERROR; + break; + + case type_select: + item = stack_top(depth); + if (item->type == type_select) { + SLJIT_ASSERT(compiler_common->dfa_transitions[item->value].type == type_jump); + PUT_TRANSITION(type_branch, item->value + 1); + PUT_TRANSITION(type_jump, item->value); + item->value = transitions_ptr - compiler_common->dfa_transitions; + } + else { + SLJIT_ASSERT(item->type == type_close_br); + item->type = type_select; + PUT_TRANSITION(type_jump, item->value); + item->value = transitions_ptr - compiler_common->dfa_transitions; + } + break; + + case type_asterisk: + case type_plus_sign: + case type_qestion_mark: + if (item->type != type_qestion_mark) + PUT_TRANSITION(type_branch, 0); + if (stack_push(depth, item->type, transitions_ptr - compiler_common->dfa_transitions)) + return REGEX_MEMORY_ERROR; + break; + + case type_char: + case type_newline: + case type_rng_start: + /* Requires handle_iteratives. */ + *--transitions_ptr = *item; + transitions_ptr = handle_iteratives(transitions_ptr, compiler_common->dfa_transitions, depth); + break; + + default: + *--transitions_ptr = *item; + break; + } + } + + SLJIT_ASSERT(compiler_common->dfa_transitions == transitions_ptr); + SLJIT_ASSERT(depth->count == 0); + return REGEX_NO_ERROR; +} + +#undef PUT_TRANSITION + +#ifdef REGEX_MATCH_VERBOSE + +static void verbose_transitions(struct compiler_common *compiler_common) +{ + struct stack_item *transitions_ptr = compiler_common->dfa_transitions; + struct stack_item *transitions_end = transitions_ptr + compiler_common->dfa_size; + struct stack_item *search_states_ptr = compiler_common->search_states; + int pos; + + printf("-----------------\nTransitions\n-----------------\n"); + pos = 0; + while (transitions_ptr < transitions_end) { + printf("[%3d] ", pos++); + if (search_states_ptr->type >= 0) + printf("(%3d) ", search_states_ptr->type); + switch (transitions_ptr->type) { + case type_begin: + printf("type_begin\n"); + break; + + case type_end: + printf("type_end\n"); + break; + + case type_char: + if (transitions_ptr->value >= ' ') + printf("type_char '%c'\n", transitions_ptr->value); + else + printf("type_char 0x%x\n", transitions_ptr->value); + break; + + case type_newline: + printf("type_newline %s\n", transitions_ptr->value ? "(end)" : "(begin)"); + break; + + case type_id: + printf("type_id %d\n", transitions_ptr->value); + break; + + case type_rng_start: + printf("type_rng_start %s\n", transitions_ptr->value ? "(invert)" : "(normal)"); + break; + + case type_rng_end: + printf("type_rng_end\n"); + break; + + case type_rng_char: + if (transitions_ptr->value >= ' ') + printf("type_rng_char '%c'\n", transitions_ptr->value); + else + printf("type_rng_char 0x%x\n", transitions_ptr->value); + break; + + case type_rng_left: + if (transitions_ptr->value >= ' ') + printf("type_rng_left '%c'\n", transitions_ptr->value); + else + printf("type_rng_left 0x%x\n", transitions_ptr->value); + break; + + case type_rng_right: + if (transitions_ptr->value >= ' ') + printf("type_rng_right '%c'\n", transitions_ptr->value); + else + printf("type_rng_right 0x%x\n", transitions_ptr->value); + break; + + case type_branch: + printf("type_branch -> %d\n", transitions_ptr->value); + break; + + case type_jump: + printf("type_jump -> %d\n", transitions_ptr->value); + break; + + default: + printf("UNEXPECTED TYPE\n"); + break; + } + transitions_ptr++; + search_states_ptr++; + } + printf("flags: "); + if (!(compiler_common->flags & (REGEX_MATCH_BEGIN | REGEX_MATCH_END | REGEX_NEWLINE | REGEX_ID_CHECK | REGEX_FAKE_MATCH_BEGIN | REGEX_FAKE_MATCH_END))) + printf("none "); + if (compiler_common->flags & REGEX_MATCH_BEGIN) + printf("REGEX_MATCH_BEGIN "); + if (compiler_common->flags & REGEX_MATCH_END) + printf("REGEX_MATCH_END "); + if (compiler_common->flags & REGEX_NEWLINE) + printf("REGEX_NEWLINE "); + if (compiler_common->flags & REGEX_ID_CHECK) + printf("REGEX_ID_CHECK "); + if (compiler_common->flags & REGEX_FAKE_MATCH_BEGIN) + printf("REGEX_FAKE_MATCH_BEGIN "); + if (compiler_common->flags & REGEX_FAKE_MATCH_END) + printf("REGEX_FAKE_MATCH_END "); + if (compiler_common->longest_range_size > 0) + printf("(longest range: %ld) ", (long)compiler_common->longest_range_size); + printf("\n"); +} + +#endif + +/* --------------------------------------------------------------------- */ +/* Utilities */ +/* --------------------------------------------------------------------- */ + +static int generate_search_states(struct compiler_common *compiler_common) +{ + struct stack_item *transitions_ptr = compiler_common->dfa_transitions; + struct stack_item *transitions_end = transitions_ptr + compiler_common->dfa_size; + struct stack_item *search_states_ptr; + struct stack_item *rng_start = NULL; + + compiler_common->terms_size = !(compiler_common->flags & REGEX_FAKE_MATCH_END) ? 1 : 2; + compiler_common->longest_range_size = 0; + compiler_common->search_states = SLJIT_MALLOC(sizeof(struct stack_item) * compiler_common->dfa_size, NULL); + if (!compiler_common->search_states) + return REGEX_MEMORY_ERROR; + + search_states_ptr = compiler_common->search_states; + while (transitions_ptr < transitions_end) { + switch (transitions_ptr->type) { + case type_begin: + case type_end: + search_states_ptr->type = 0; + break; + + case type_char: + search_states_ptr->type = compiler_common->terms_size++; + break; + + case type_newline: + if (transitions_ptr->value) + search_states_ptr->type = 1; + else + search_states_ptr->type = compiler_common->terms_size++; + SLJIT_ASSERT(search_states_ptr->type == 1 || search_states_ptr->type == 2); + break; + + case type_id: + if (transitions_ptr->value > 0) + compiler_common->flags |= REGEX_ID_CHECK; + search_states_ptr->type = -1; + break; + + case type_rng_start: + search_states_ptr->type = compiler_common->terms_size; + rng_start = search_states_ptr; + break; + + case type_rng_end: + search_states_ptr->type = compiler_common->terms_size++; + /* Ok, this is a blunt over estimation :) */ + if (compiler_common->longest_range_size < search_states_ptr - rng_start) + compiler_common->longest_range_size = search_states_ptr - rng_start; + break; + + default: + search_states_ptr->type = -1; + break; + } + search_states_ptr->value = -1; + search_states_ptr++; + transitions_ptr++; + } + return REGEX_NO_ERROR; +} + +static int trace_transitions(int from, struct compiler_common *compiler_common) +{ + int id = 0; + struct stack *stack = &compiler_common->stack; + struct stack *depth = &compiler_common->depth; + struct stack_item *dfa_transitions = compiler_common->dfa_transitions; + struct stack_item *search_states = compiler_common->search_states; + + SLJIT_ASSERT(search_states[from].type >= 0); + + from++; + + /* Be prepared for any paths (loops, etc). */ + while (1) { + if (dfa_transitions[from].type == type_id) + if (id < dfa_transitions[from].value) + id = dfa_transitions[from].value; + + if (search_states[from].value < id) { + /* Forward step. */ + if (search_states[from].value == -1) + if (stack_push(stack, 0, from)) + return REGEX_MEMORY_ERROR; + search_states[from].value = id; + + if (dfa_transitions[from].type == type_branch) { + if (stack_push(depth, id, from)) + return REGEX_MEMORY_ERROR; + from++; + continue; + } + else if (dfa_transitions[from].type == type_jump) { + from = dfa_transitions[from].value; + continue; + } + else if (search_states[from].type < 0) { + from++; + continue; + } + } + + /* Back tracking. */ + if (depth->count > 0) { + id = stack_top(depth)->type; + from = dfa_transitions[stack_pop(depth)->value].value; + continue; + } + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Code generator */ +/* --------------------------------------------------------------------- */ + +#define TERM_OFFSET_OF(index, offs) (((index) * no_states + (offs)) * sizeof(sljit_sw)) +#define TERM_REL_OFFSET_OF(base, offs) ((base) + ((offs) * sizeof(sljit_sw))) + +#define EMIT_OP1(type, arg1, arg2, arg3, arg4) \ + CHECK(sljit_emit_op1(compiler, type, arg1, arg2, arg3, arg4)) + +#define EMIT_OP2(type, arg1, arg2, arg3, arg4, arg5, arg6) \ + CHECK(sljit_emit_op2(compiler, type, arg1, arg2, arg3, arg4, arg5, arg6)) + +#define EMIT_LABEL(label) \ + label = sljit_emit_label(compiler); \ + CHECK(!label) + +#define EMIT_JUMP(jump, type) \ + jump = sljit_emit_jump(compiler, type); \ + CHECK(!jump) + +#define EMIT_CMP(jump, type, arg1, arg2, arg3, arg4) \ + jump = sljit_emit_cmp(compiler, type, arg1, arg2, arg3, arg4); \ + CHECK(!jump) + +/* CHECK depends on the use case. */ + +#define CHECK(exp) \ + if (SLJIT_UNLIKELY(exp)) \ + return REGEX_MEMORY_ERROR + +static int compile_uncond_tran(struct compiler_common *compiler_common, int reg) +{ + struct sljit_compiler *compiler = compiler_common->compiler; + struct stack *stack = &compiler_common->stack; + struct stack_item *search_states = compiler_common->search_states; + int flags = compiler_common->flags; + sljit_sw no_states = compiler_common->no_states; + sljit_uw head = 0; + sljit_sw offset, value; + + if (reg != R_CURR_STATE || !(compiler_common->flags & REGEX_FAKE_MATCH_BEGIN)) { + CHECK(trace_transitions(0, compiler_common)); + } + else { + CHECK(trace_transitions(1, compiler_common)); + } + + while (stack->count > 0) { + value = stack_pop(stack)->value; + if (search_states[value].type >= 0) { + offset = TERM_OFFSET_OF(search_states[value].type, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 1), SLJIT_IMM, head); + if (offset > 0) + head = offset; + + if (!(flags & REGEX_MATCH_BEGIN)) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 2), R_TEMP, 0); + if (flags & REGEX_ID_CHECK) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 3), SLJIT_IMM, search_states[value].value); + } + } + else if (flags & REGEX_ID_CHECK) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 2), SLJIT_IMM, search_states[value].value); + } + } + search_states[value].value = -1; + } + if (reg == R_NEXT_STATE) { + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_NEXT_HEAD, 0); + } + else if (flags & REGEX_FAKE_MATCH_BEGIN) { + SLJIT_ASSERT(compiler_common->dfa_transitions[1].type == type_newline && !compiler_common->dfa_transitions[1].value); + offset = TERM_OFFSET_OF(search_states[1].type, 0); + + SLJIT_ASSERT(!(flags & REGEX_MATCH_BEGIN)); + + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 1), SLJIT_IMM, head); + head = offset; + + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 2), SLJIT_IMM, 1); + if (flags & REGEX_ID_CHECK) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 3), SLJIT_IMM, 0); + } + } + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, head); + return REGEX_NO_ERROR; +} + +static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw curr_index) +{ + struct sljit_compiler *compiler = compiler_common->compiler; + struct stack *stack = &compiler_common->stack; + struct stack_item *search_states = compiler_common->search_states; + sljit_sw offset; + int flags; + sljit_sw no_states; + sljit_sw value; + struct sljit_jump *jump1; + struct sljit_jump *jump2; + struct sljit_jump *jump3; + struct sljit_jump *jump4; + struct sljit_jump *jump5; + struct sljit_label *label1; + + flags = compiler_common->flags; + no_states = compiler_common->no_states; + + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_forward), SLJIT_IMM, 0); + if (!(flags & (REGEX_ID_CHECK | REGEX_MATCH_BEGIN))) { + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 2)); + } + + while (stack->count > 0) { + value = stack_pop(stack)->value; + if (search_states[value].type >= 0) { +#ifdef REGEX_MATCH_VERBOSE + if (flags & REGEX_MATCH_VERBOSE) + printf("-> (%3d:%3d) ", search_states[value].type, search_states[value].value); +#endif + offset = TERM_OFFSET_OF(search_states[value].type, 0); + + if (!(flags & REGEX_ID_CHECK)) { + if (!(flags & REGEX_MATCH_BEGIN)) { + /* Check whether item is inserted. */ + EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0); + if (offset > 0) { + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset); + } + EMIT_JUMP(jump2, SLJIT_JUMP); + + /* Check whether old index <= index. */ + EMIT_LABEL(label1); + sljit_set_label(jump1, label1); + + EMIT_CMP(jump1, SLJIT_LESS_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); + + EMIT_LABEL(label1); + sljit_set_label(jump2, label1); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); + + EMIT_LABEL(label1); + sljit_set_label(jump1, label1); + } + else { + /* Check whether item is inserted. */ + EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0); + if (offset > 0) { + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset); + } + EMIT_LABEL(label1); + sljit_set_label(jump1, label1); + } + } + else { + if (!(flags & REGEX_MATCH_BEGIN)) { + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 2)); + + /* Check whether item is inserted. */ + EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0); + if (offset > 0) { + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset); + } + EMIT_JUMP(jump2, SLJIT_JUMP); + + /* Check whether old index != index. */ + EMIT_LABEL(label1); + sljit_set_label(jump1, label1); + + EMIT_OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); + EMIT_JUMP(jump1, SLJIT_LESS); + EMIT_JUMP(jump3, SLJIT_NOT_EQUAL); /* Greater. */ + + /* Old index == index. */ + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 3)); + if (search_states[value].value > 0) { + EMIT_CMP(jump4, SLJIT_GREATER, R_TEMP, 0, SLJIT_IMM, search_states[value].value); + + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_IMM, search_states[value].value); + EMIT_LABEL(label1); + sljit_set_label(jump4, label1); + } + + EMIT_OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, SLJIT_MEM1(R_NEXT_STATE), offset + 3 * sizeof(sljit_sw), R_TEMP, 0); + EMIT_JUMP(jump4, SLJIT_GREATER_EQUAL); + EMIT_JUMP(jump5, SLJIT_JUMP); + + /* Overwrite index & id. */ + EMIT_LABEL(label1); + sljit_set_label(jump3, label1); + sljit_set_label(jump2, label1); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); + + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 3)); + if (search_states[value].value > 0) { + EMIT_CMP(jump3, SLJIT_GREATER, R_TEMP, 0, SLJIT_IMM, search_states[value].value); + + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_IMM, search_states[value].value); + EMIT_LABEL(label1); + sljit_set_label(jump3, label1); + } + + EMIT_LABEL(label1); + sljit_set_label(jump5, label1); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 3 * sizeof(sljit_sw), R_TEMP, 0); + + /* Exit. */ + EMIT_LABEL(label1); + sljit_set_label(jump1, label1); + sljit_set_label(jump4, label1); + } + else { + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 2)); + + if (search_states[value].value > 0) { + EMIT_CMP(jump1, SLJIT_GREATER, R_TEMP, 0, SLJIT_IMM, search_states[value].value); + + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_IMM, search_states[value].value); + EMIT_LABEL(label1); + sljit_set_label(jump1, label1); + } + + /* Check whether item is inserted. */ + EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0); + if (offset > 0) { + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset); + } + EMIT_JUMP(jump2, SLJIT_JUMP); + + /* Check whether old id >= id. */ + EMIT_LABEL(label1); + sljit_set_label(jump1, label1); + + EMIT_CMP(jump1, SLJIT_GREATER_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); + + EMIT_LABEL(label1); + sljit_set_label(jump2, label1); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); + + EMIT_LABEL(label1); + sljit_set_label(jump1, label1); + } + } + } + search_states[value].value = -1; + } + +#ifdef REGEX_MATCH_VERBOSE + if (flags & REGEX_MATCH_VERBOSE) + printf("\n"); +#endif + return REGEX_NO_ERROR; +} + +static int compile_end_check(struct compiler_common *compiler_common, struct sljit_label *end_check_label) +{ + struct sljit_compiler *compiler = compiler_common->compiler; + struct sljit_jump *jump; + struct sljit_jump *clear_states_jump; + struct sljit_label *label; + struct sljit_label *leave_label; + struct sljit_label *begin_loop_label; + + /* Priority order: best_begin > best_end > best_id. + In other words: + if (new best_begin > old test_begin) do nothing + otherwise we know that new_end > old_end, since R_CURR_INDEX ever increasing + therefore we must overwrite all best_* variables (new_id also contains the highest id for this turn). */ + + /* Both R_CURR_CHAR and R_BEST_BEGIN used as temporary registers. */ + + if (!(compiler_common->flags & REGEX_MATCH_BEGIN)) { + EMIT_OP1(SLJIT_MOV, R_CURR_CHAR, 0, SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 2)); + EMIT_CMP(jump, !(compiler_common->flags & REGEX_MATCH_NON_GREEDY) ? SLJIT_LESS : SLJIT_LESS_EQUAL, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin), R_CURR_CHAR, 0); + sljit_set_label(jump, end_check_label); + + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin), R_CURR_CHAR, 0); + if (!(compiler_common->flags & (REGEX_FAKE_MATCH_BEGIN | REGEX_FAKE_MATCH_END))) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0); + } + else { + if ((compiler_common->flags & (REGEX_FAKE_MATCH_BEGIN | REGEX_FAKE_MATCH_END)) == (REGEX_FAKE_MATCH_BEGIN | REGEX_FAKE_MATCH_END)) { + EMIT_OP2(SLJIT_SUB, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0, SLJIT_IMM, 2); + } + else { + EMIT_OP2(SLJIT_SUB, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0, SLJIT_IMM, 1); + } + } + if (compiler_common->flags & REGEX_ID_CHECK) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 3)); + } + + EMIT_CMP(clear_states_jump, SLJIT_LESS, R_CURR_CHAR, 0, R_BEST_BEGIN, 0); + + EMIT_LABEL(leave_label); + EMIT_OP1(SLJIT_MOV, R_BEST_BEGIN, 0, R_CURR_CHAR, 0); + EMIT_JUMP(jump, SLJIT_JUMP); + sljit_set_label(jump, end_check_label); + + /* A loop to clear all states, which are > (or >=) than R_CURR_CHAR. */ + EMIT_LABEL(label); + sljit_set_label(clear_states_jump, label); + + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_NEXT_HEAD, 0); + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, 0); + + /* Begin of the loop. */ + EMIT_LABEL(begin_loop_label); + EMIT_CMP(jump, SLJIT_EQUAL, R_TEMP, 0, SLJIT_IMM, 0); + sljit_set_label(jump, leave_label); + + EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, R_CURR_STATE, 0); + EMIT_OP1(SLJIT_MOV, R_BEST_BEGIN, 0, SLJIT_MEM1(R_TEMP), sizeof(sljit_sw)); + EMIT_CMP(clear_states_jump, !(compiler_common->flags & REGEX_MATCH_NON_GREEDY) ? SLJIT_GREATER : SLJIT_GREATER_EQUAL, SLJIT_MEM1(R_TEMP), 2 * sizeof(sljit_sw), R_CURR_CHAR, 0); + + /* Case 1: keep this case. */ + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_TEMP), sizeof(sljit_sw), R_NEXT_HEAD, 0); + EMIT_OP2(SLJIT_SUB, R_NEXT_HEAD, 0, R_TEMP, 0, R_CURR_STATE, 0); + + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_BEST_BEGIN, 0); + EMIT_JUMP(jump, SLJIT_JUMP); + sljit_set_label(jump, begin_loop_label); + + /* Case 2: remove this case. */ + EMIT_LABEL(label); + sljit_set_label(clear_states_jump, label); + + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_TEMP), sizeof(sljit_sw), SLJIT_IMM, -1); + + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_BEST_BEGIN, 0); + EMIT_JUMP(jump, SLJIT_JUMP); + sljit_set_label(jump, begin_loop_label); + } + else { + EMIT_OP1(SLJIT_MOV, R_BEST_BEGIN, 0, SLJIT_IMM, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin), SLJIT_IMM, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0); + if (compiler_common->flags & REGEX_ID_CHECK) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 2)); + } + EMIT_JUMP(jump, SLJIT_JUMP); + sljit_set_label(jump, end_check_label); + } + return REGEX_NO_ERROR; +} + +static int compile_leave_fast_forward(struct compiler_common *compiler_common, struct sljit_label *fast_forward_label) +{ + struct sljit_compiler *compiler = compiler_common->compiler; + struct stack *stack = &compiler_common->stack; + struct stack_item *dfa_transitions = compiler_common->dfa_transitions; + struct stack_item *search_states = compiler_common->search_states; + int ind; + struct sljit_jump *jump; + int init_range = 1, prev_value = 0; + + while (stack->count > 0) { + ind = stack_pop(stack)->value; + search_states[ind].value = -1; + if (search_states[ind].type >= 0) { + if (dfa_transitions[ind].type == type_char) { + EMIT_CMP(jump, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, dfa_transitions[ind].value); + sljit_set_label(jump, fast_forward_label); + } + else if (dfa_transitions[ind].type == type_rng_start) { + SLJIT_ASSERT(!dfa_transitions[ind].value); + ind++; + while (dfa_transitions[ind].type != type_rng_end) { + if (dfa_transitions[ind].type == type_rng_char) { + EMIT_CMP(jump, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, dfa_transitions[ind].value); + sljit_set_label(jump, fast_forward_label); + } + else { + SLJIT_ASSERT(dfa_transitions[ind].type == type_rng_left); + if (init_range) { + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_CURR_CHAR, 0); + init_range = 0; + } + if (dfa_transitions[ind].value != prev_value) { + /* Best compatibility to all archs. */ + prev_value -= dfa_transitions[ind].value; + if (prev_value < 0) { + EMIT_OP2(SLJIT_SUB, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, -prev_value); + } + else { + EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, prev_value); + } + prev_value = dfa_transitions[ind].value; + } + EMIT_CMP(jump, SLJIT_LESS_EQUAL, R_TEMP, 0, SLJIT_IMM, dfa_transitions[ind + 1].value - dfa_transitions[ind].value); + sljit_set_label(jump, fast_forward_label); + ind++; + } + ind++; + } + } + else { + SLJIT_ASSERT(dfa_transitions[ind].type == type_newline); + EMIT_CMP(jump, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, '\n'); + sljit_set_label(jump, fast_forward_label); + EMIT_CMP(jump, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, '\r'); + sljit_set_label(jump, fast_forward_label); + } + } + } + return REGEX_NO_ERROR; +} + +static int compile_newline_check(struct compiler_common *compiler_common, sljit_sw ind) +{ + struct sljit_compiler *compiler = compiler_common->compiler; + struct sljit_jump *jump1; + struct sljit_jump *jump2; + struct sljit_label *label; + sljit_sw no_states; + sljit_sw offset; + + /* Check whether a new-line character is found. */ + EMIT_CMP(jump1, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, '\n'); + EMIT_CMP(jump2, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, '\r'); + + no_states = compiler_common->no_states; + offset = TERM_OFFSET_OF(compiler_common->search_states[ind].type, 1); + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), offset); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_CURR_STATE), offset, SLJIT_IMM, -1); + CHECK(sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); + + EMIT_LABEL(label); + sljit_set_label(jump1, label); + sljit_set_label(jump2, label); + return REGEX_NO_ERROR; +} + +#undef CHECK + +#define CHECK(exp) \ + if (SLJIT_UNLIKELY(exp)) \ + return 0 + +static SLJIT_INLINE void range_set_label(struct sljit_jump **range_jump_list, struct sljit_label *label) +{ + while (*range_jump_list) { + sljit_set_label(*range_jump_list, label); + range_jump_list++; + } +} + +static sljit_sw compile_range_check(struct compiler_common *compiler_common, sljit_sw ind) +{ + struct sljit_compiler *compiler = compiler_common->compiler; + struct stack_item *dfa_transitions = compiler_common->dfa_transitions; + struct sljit_jump **range_jump_list = compiler_common->range_jump_list; + int invert = dfa_transitions[ind].value; + struct sljit_label *label; + sljit_sw no_states; + sljit_sw offset; + int init_range = 1, prev_value = 0; + + ind++; + + while (dfa_transitions[ind].type != type_rng_end) { + if (dfa_transitions[ind].type == type_rng_char) { + EMIT_CMP(*range_jump_list, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, dfa_transitions[ind].value); + range_jump_list++; + } + else { + SLJIT_ASSERT(dfa_transitions[ind].type == type_rng_left); + if (init_range) { + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_CURR_CHAR, 0); + init_range = 0; + } + if (dfa_transitions[ind].value != prev_value) { + /* Best compatibility to all archs. */ + prev_value -= dfa_transitions[ind].value; + if (prev_value < 0) { + EMIT_OP2(SLJIT_SUB, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, -prev_value); + } + else { + EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, prev_value); + } + prev_value = dfa_transitions[ind].value; + } + EMIT_CMP(*range_jump_list, SLJIT_LESS_EQUAL, R_TEMP, 0, SLJIT_IMM, dfa_transitions[ind + 1].value - dfa_transitions[ind].value); + range_jump_list++; + ind++; + } + ind++; + } + + *range_jump_list = NULL; + + if (!invert) { + no_states = compiler_common->no_states; + offset = TERM_OFFSET_OF(compiler_common->search_states[ind].type, 1); + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), offset); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_CURR_STATE), offset, SLJIT_IMM, -1); + CHECK(sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); + + EMIT_LABEL(label); + range_set_label(compiler_common->range_jump_list, label); + /* Clears the jump list. */ + *compiler_common->range_jump_list = NULL; + } + return ind; +} + +#undef TERM_OFFSET_OF +#undef EMIT_OP1 +#undef EMIT_OP2 +#undef EMIT_LABEL +#undef EMIT_JUMP +#undef EMIT_CMP +#undef CHECK + +/* --------------------------------------------------------------------- */ +/* Main compiler */ +/* --------------------------------------------------------------------- */ + +#define TERM_OFFSET_OF(ind, offs) (((ind) * compiler_common.no_states + (offs)) * sizeof(sljit_sw)) + +#define EMIT_OP1(type, arg1, arg2, arg3, arg4) \ + CHECK(sljit_emit_op1(compiler_common.compiler, type, arg1, arg2, arg3, arg4)) + +#define EMIT_OP2(type, arg1, arg2, arg3, arg4, arg5, arg6) \ + CHECK(sljit_emit_op2(compiler_common.compiler, type, arg1, arg2, arg3, arg4, arg5, arg6)) + +#define EMIT_LABEL(label) \ + label = sljit_emit_label(compiler_common.compiler); \ + CHECK(!label) + +#define EMIT_JUMP(jump, type) \ + jump = sljit_emit_jump(compiler_common.compiler, type); \ + CHECK(!jump) + +#define EMIT_CMP(jump, type, arg1, arg2, arg3, arg4) \ + jump = sljit_emit_cmp(compiler_common.compiler, type, arg1, arg2, arg3, arg4); \ + CHECK(!jump) + +/* A do {} while(0) expression helps to avoid goto statements. */ +#define BEGIN_GUARD \ + do { + +#define END_GUARD \ + } while(0); + +#define CHECK(exp) \ + if (SLJIT_UNLIKELY(exp)) \ + break; + +struct regex_machine* regex_compile(const regex_char_t *regex_string, int length, int re_flags, int *error) +{ + struct compiler_common compiler_common; + sljit_sw ind; + int error_code, done, suggest_fast_forward; + /* ID of an empty match (-1 if not reachable). */ + int empty_match_id; + + struct sljit_jump *jump; + struct sljit_jump *best_match_found_jump; + struct sljit_jump *fast_forward_jump = NULL; + struct sljit_jump *length_is_zero_jump; + struct sljit_jump *end_check_jump = NULL; + struct sljit_jump *best_match_check_jump = NULL; + struct sljit_jump *non_greedy_end_jump = NULL; + struct sljit_label *label; + struct sljit_label *end_check_label = NULL; + struct sljit_label *start_label; + struct sljit_label *fast_forward_label; + struct sljit_label *fast_forward_return_label; + + if (error) + *error = REGEX_NO_ERROR; +#ifdef REGEX_MATCH_VERBOSE + compiler_common.flags = re_flags & (REGEX_MATCH_BEGIN | REGEX_MATCH_END | REGEX_MATCH_NON_GREEDY | REGEX_NEWLINE | REGEX_MATCH_VERBOSE); +#else + compiler_common.flags = re_flags & (REGEX_MATCH_BEGIN | REGEX_MATCH_END | REGEX_MATCH_NON_GREEDY | REGEX_NEWLINE); +#endif + + /* Step 1: parsing (Left->Right). + Syntax check and AST generator. */ + error_code = parse(regex_string, length, &compiler_common); + if (error_code) { + stack_destroy(&compiler_common.stack); + if (error) + *error = error_code; + return NULL; + } + + /* Step 2: generating branches (Right->Left). */ + error_code = generate_transitions(&compiler_common); + stack_destroy(&compiler_common.stack); + stack_destroy(&compiler_common.depth); + if (error_code) { + if (compiler_common.dfa_transitions) + SLJIT_FREE(compiler_common.dfa_transitions, NULL); + if (error) + *error = error_code; + return NULL; + } + + /* Step 3: Generate necessary data for depth-first search (Left->Right). */ + error_code = generate_search_states(&compiler_common); + if (error_code) { + SLJIT_FREE(compiler_common.dfa_transitions, NULL); + if (error) + *error = error_code; + return NULL; + } + +#ifdef REGEX_MATCH_VERBOSE + if (compiler_common.flags & REGEX_MATCH_VERBOSE) + verbose_transitions(&compiler_common); +#endif + + /* Step 4: Left->Right generate code. */ + stack_init(&compiler_common.stack); + stack_init(&compiler_common.depth); + done = 0; + compiler_common.machine = NULL; + compiler_common.compiler = NULL; + compiler_common.range_jump_list = NULL; + + BEGIN_GUARD + + compiler_common.machine = (struct regex_machine*)SLJIT_MALLOC(sizeof(struct regex_machine) + (compiler_common.terms_size - 1) * sizeof(sljit_uw), NULL); + CHECK(!compiler_common.machine); + + compiler_common.compiler = sljit_create_compiler(NULL, NULL); + CHECK(!compiler_common.compiler); + + if (compiler_common.longest_range_size > 0) { + compiler_common.range_jump_list = (struct sljit_jump**)SLJIT_MALLOC(sizeof(struct sljit_jump*) * compiler_common.longest_range_size, NULL); + CHECK(!compiler_common.range_jump_list); + } + + if ((compiler_common.flags & REGEX_ID_CHECK) && !(compiler_common.flags & REGEX_MATCH_BEGIN)) + compiler_common.no_states = 4; + else if (!(compiler_common.flags & REGEX_ID_CHECK) && (compiler_common.flags & REGEX_MATCH_BEGIN)) + compiler_common.no_states = 2; + else + compiler_common.no_states = 3; + + compiler_common.machine->flags = compiler_common.flags; + compiler_common.machine->no_states = compiler_common.no_states; + compiler_common.machine->size = compiler_common.machine->no_states * compiler_common.terms_size; + + /* Study the regular expression. */ + empty_match_id = -1; + suggest_fast_forward = 1; + if (!(compiler_common.flags & REGEX_FAKE_MATCH_BEGIN)) { + CHECK(trace_transitions(0, &compiler_common)); + while (compiler_common.stack.count > 0) { + ind = stack_pop(&compiler_common.stack)->value; + if (compiler_common.search_states[ind].type == 0) { + SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type == type_end); + suggest_fast_forward = 0; + empty_match_id = compiler_common.search_states[ind].value; + } + else if (compiler_common.search_states[ind].type > 0) { + SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type != type_end); + if (compiler_common.dfa_transitions[ind].type == type_rng_start && compiler_common.dfa_transitions[ind].value) + suggest_fast_forward = 0; + } + compiler_common.search_states[ind].value = -1; + } + } + else { + SLJIT_ASSERT(compiler_common.dfa_transitions[1].type == type_newline); + CHECK(trace_transitions(1, &compiler_common)); + while (compiler_common.stack.count > 0) { + ind = stack_pop(&compiler_common.stack)->value; + if (compiler_common.search_states[ind].type == 0) { + SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type == type_end); + suggest_fast_forward = 0; + empty_match_id = compiler_common.search_states[ind].value; + } + compiler_common.search_states[ind].value = -1; + } + } + + /* Step 4.1: Generate entry. */ + CHECK(sljit_emit_enter(compiler_common.compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 5, 5, 0, 0, 0)); + + /* Copy arguments to their place. */ + EMIT_OP1(SLJIT_MOV, R_REGEX_MATCH, 0, SLJIT_S0, 0); + EMIT_OP1(SLJIT_MOV, R_STRING, 0, SLJIT_S1, 0); + EMIT_OP2(SLJIT_ADD, R_LENGTH, 0, SLJIT_S2, 0, SLJIT_IMM, 1); + + /* Init global registers. */ + EMIT_OP1(SLJIT_MOV, R_CURR_STATE, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, current)); + EMIT_OP1(SLJIT_MOV, R_NEXT_STATE, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, next)); + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, head)); + EMIT_OP1(SLJIT_MOV, R_BEST_BEGIN, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin)); + EMIT_OP1(SLJIT_MOV, R_CURR_INDEX, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, index)); + + /* Check whether the best match has already found in a previous frame. */ + EMIT_CMP(jump, SLJIT_EQUAL, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_quit), SLJIT_IMM, 0); + EMIT_JUMP(best_match_found_jump, SLJIT_JUMP); + +#ifdef REGEX_MATCH_VERBOSE + if (compiler_common.flags & REGEX_MATCH_VERBOSE) + printf("\n-----------------\nTrace\n-----------------\n"); +#endif + + /* Step 4.2: Generate code for state 0. */ + EMIT_LABEL(label); + sljit_emit_op0(compiler_common.compiler, SLJIT_ENDBR); + compiler_common.machine->entry_addrs[0] = (sljit_uw)label; + + /* Swapping current and next. */ + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_CURR_STATE, 0); + EMIT_OP1(SLJIT_MOV, R_CURR_STATE, 0, R_NEXT_STATE, 0); + EMIT_OP1(SLJIT_MOV, R_NEXT_STATE, 0, R_TEMP, 0); + + /* Checking whether the best case needs to be updated. */ + if (!(compiler_common.flags & REGEX_MATCH_END)) { + EMIT_CMP(end_check_jump, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 1), SLJIT_IMM, -1); + EMIT_LABEL(end_check_label); + } + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), TERM_REL_OFFSET_OF(0, 1), SLJIT_IMM, -1); + EMIT_OP2(SLJIT_ADD, R_CURR_INDEX, 0, R_CURR_INDEX, 0, SLJIT_IMM, 1); + + /* Checking whether best case has already found. */ + if (!(compiler_common.flags & REGEX_MATCH_END) || (compiler_common.flags & REGEX_MATCH_BEGIN)) { + if (!(compiler_common.flags & REGEX_MATCH_BEGIN)) { + /* We can bail out if no more active states remain and R_BEST_BEGIN != -1. */ + EMIT_CMP(best_match_check_jump, SLJIT_NOT_EQUAL, R_BEST_BEGIN, 0, SLJIT_IMM, -1); + } + else { + /* We can bail out if no more active states remain (regardless of R_BEST_BEGIN). */ + EMIT_CMP(best_match_check_jump, SLJIT_EQUAL, R_NEXT_HEAD, 0, SLJIT_IMM, 0); + } + } + + EMIT_LABEL(start_label); + sljit_set_label(jump, start_label); + + if (!(compiler_common.flags & REGEX_MATCH_BEGIN) && suggest_fast_forward) { + EMIT_CMP(fast_forward_jump, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_forward), SLJIT_IMM, 0); + } + + /* Loading the next character. */ + EMIT_OP2(SLJIT_SUB | SLJIT_SET_Z, R_LENGTH, 0, R_LENGTH, 0, SLJIT_IMM, 1); + EMIT_JUMP(length_is_zero_jump, SLJIT_EQUAL); + + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_STRING, 0); +#ifdef REGEX_USE_8BIT_CHARS + EMIT_OP1(SLJIT_MOV_U8, R_CURR_CHAR, 0, SLJIT_MEM1(R_TEMP), 0); + EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, 1); +#else + EMIT_OP1(SLJIT_MOV_UH, R_CURR_CHAR, 0, SLJIT_MEM1(R_TEMP), 0); + EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, 2); +#endif + EMIT_OP1(SLJIT_MOV, R_STRING, 0, R_TEMP, 0); + +#ifdef REGEX_MATCH_VERBOSE + if (compiler_common.flags & REGEX_MATCH_VERBOSE) { + printf("(%3d): ", 0); + CHECK(trace_transitions(0, &compiler_common)); + while (compiler_common.stack.count > 0) { + ind = stack_pop(&compiler_common.stack)->value; + if (compiler_common.search_states[ind].type >= 0) + printf("-> (%3d:%3d) ", compiler_common.search_states[ind].type, compiler_common.search_states[ind].value); + compiler_common.search_states[ind].value = -1; + } + printf("\n"); + } +#endif + + EMIT_LABEL(fast_forward_return_label); + if (!(compiler_common.flags & REGEX_MATCH_BEGIN)) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_forward), SLJIT_IMM, 1); + if (!(compiler_common.flags & REGEX_MATCH_END)) { + EMIT_CMP(jump, SLJIT_NOT_EQUAL, R_BEST_BEGIN, 0, SLJIT_IMM, -1); + } + + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_CURR_INDEX, 0); + CHECK(compile_uncond_tran(&compiler_common, R_NEXT_STATE)); + /* And branching to the first state. */ + CHECK(sljit_emit_ijump(compiler_common.compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); + + if (!(compiler_common.flags & REGEX_MATCH_END)) { + EMIT_LABEL(label); + sljit_set_label(jump, label); + } + } + /* This is the case where we only have to reset the R_NEXT_HEAD. */ + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_NEXT_HEAD, 0); + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, 0); + CHECK(sljit_emit_ijump(compiler_common.compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); + + /* Fast-forward loop. */ + if (fast_forward_jump) { + /* Quit from fast-forward loop. */ + EMIT_LABEL(fast_forward_label); + EMIT_OP2(SLJIT_SUB, R_TEMP, 0, R_NEXT_HEAD, 0, SLJIT_IMM, 1); + EMIT_OP1(SLJIT_MOV, R_LENGTH, 0, R_NEXT_STATE, 0); + EMIT_OP1(SLJIT_MOV, R_STRING, 0, R_CURR_STATE, 0); + EMIT_OP1(SLJIT_MOV, R_CURR_INDEX, 0, R_NEXT_HEAD, 0); + EMIT_OP1(SLJIT_MOV, R_NEXT_STATE, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, next)); + EMIT_OP1(SLJIT_MOV, R_CURR_STATE, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, current)); + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, head)); + + /* Update the start field of the locations. */ + CHECK(trace_transitions(0, &compiler_common)); + while (compiler_common.stack.count > 0) { + ind = stack_pop(&compiler_common.stack)->value; + if (compiler_common.search_states[ind].type >= 0) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(compiler_common.search_states[ind].type, 2), R_TEMP, 0); + } + compiler_common.search_states[ind].value = -1; + } + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_forward), SLJIT_IMM, 0); + EMIT_JUMP(jump, SLJIT_JUMP); + sljit_set_label(jump, fast_forward_return_label); + + /* Start fast-forward. */ + EMIT_LABEL(label); + sljit_set_label(fast_forward_jump, label); + + /* Moving everything to registers. */ + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, next), R_NEXT_STATE, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, current), R_CURR_STATE, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, head), R_NEXT_HEAD, 0); + EMIT_OP1(SLJIT_MOV, R_NEXT_STATE, 0, R_LENGTH, 0); + EMIT_OP1(SLJIT_MOV, R_CURR_STATE, 0, R_STRING, 0); + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, R_CURR_INDEX, 0); + + /* Fast forward mainloop. */ + EMIT_LABEL(label); + EMIT_OP2(SLJIT_SUB | SLJIT_SET_Z, R_NEXT_STATE, 0, R_NEXT_STATE, 0, SLJIT_IMM, 1); + EMIT_JUMP(fast_forward_jump, SLJIT_EQUAL); + +#ifdef REGEX_USE_8BIT_CHARS + EMIT_OP1(SLJIT_MOV_U8, R_CURR_CHAR, 0, SLJIT_MEM1(R_CURR_STATE), 0); + EMIT_OP2(SLJIT_ADD, R_CURR_STATE, 0, R_CURR_STATE, 0, SLJIT_IMM, 1); +#else + EMIT_OP1(SLJIT_MOV_UH, R_CURR_CHAR, 0, SLJIT_MEM1(R_CURR_STATE), 0); + EMIT_OP2(SLJIT_ADD, R_CURR_STATE, 0, R_CURR_STATE, 0, SLJIT_IMM, 2); +#endif + + CHECK(trace_transitions(0, &compiler_common)); + CHECK(compile_leave_fast_forward(&compiler_common, fast_forward_label)); + + EMIT_OP2(SLJIT_ADD, R_NEXT_HEAD, 0, R_NEXT_HEAD, 0, SLJIT_IMM, 1); + EMIT_JUMP(jump, SLJIT_JUMP); + sljit_set_label(jump, label); + + /* String is finished. */ + EMIT_LABEL(label); + sljit_set_label(fast_forward_jump, label); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, index), R_NEXT_HEAD, 0); + EMIT_JUMP(fast_forward_jump, SLJIT_JUMP); + } + + /* End check. */ + if (end_check_jump) { + EMIT_LABEL(label); + sljit_set_label(end_check_jump, label); + + if (!(compiler_common.flags & REGEX_MATCH_NON_GREEDY) || !(compiler_common.flags & REGEX_MATCH_BEGIN)) { + CHECK(compile_end_check(&compiler_common, end_check_label)); + } + else { + /* Since we leave, we do not need to update the R_BEST_BEGIN. */ + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin), SLJIT_IMM, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0); + if (compiler_common.flags & REGEX_ID_CHECK) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 2)); + } + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_quit), SLJIT_IMM, 1); + EMIT_JUMP(non_greedy_end_jump, SLJIT_JUMP); + } + } + + /* Finish check. */ + if (best_match_check_jump) { + EMIT_LABEL(label); + sljit_set_label(best_match_check_jump, label); + + if (!(compiler_common.flags & REGEX_MATCH_BEGIN)) { + EMIT_CMP(jump, SLJIT_NOT_EQUAL, R_NEXT_HEAD, 0, SLJIT_IMM, 0); + sljit_set_label(jump, start_label); + } + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_quit), SLJIT_IMM, 1); + } + + /* Leaving matching and storing the necessary values. */ + EMIT_LABEL(label); + sljit_set_label(length_is_zero_jump, label); + if (non_greedy_end_jump) + sljit_set_label(non_greedy_end_jump, label); + + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, index), R_CURR_INDEX, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, head), R_NEXT_HEAD, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, next), R_NEXT_STATE, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, current), R_CURR_STATE, 0); + + /* Exit from JIT. */ + EMIT_LABEL(label); + sljit_set_label(best_match_found_jump, label); + if (fast_forward_jump) + sljit_set_label(fast_forward_jump, label); + CHECK(sljit_emit_return(compiler_common.compiler, SLJIT_UNUSED, 0, 0)); + + for (ind = 1; ind < compiler_common.dfa_size - 1; ind++) { + if (compiler_common.search_states[ind].type >= 0) { + SLJIT_ASSERT(compiler_common.search_states[ind].type < compiler_common.terms_size); + EMIT_LABEL(label); + sljit_emit_op0(compiler_common.compiler, SLJIT_ENDBR); + compiler_common.machine->entry_addrs[compiler_common.search_states[ind].type] = (sljit_uw)label; + + if (compiler_common.dfa_transitions[ind].type == type_char) { + EMIT_CMP(jump, SLJIT_NOT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, compiler_common.dfa_transitions[ind].value); + } + else if (compiler_common.dfa_transitions[ind].type == type_rng_start) { + ind = compile_range_check(&compiler_common, ind); + CHECK(!ind); + } + else { + SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type == type_newline); + CHECK(compile_newline_check(&compiler_common, ind)); + } + + CHECK(trace_transitions(ind, &compiler_common)); +#ifdef REGEX_MATCH_VERBOSE + if (compiler_common.flags & REGEX_MATCH_VERBOSE) + printf("(%3d): ", compiler_common.search_states[ind].type); +#endif + CHECK(compile_cond_tran(&compiler_common, compiler_common.search_states[ind].type)); + + if (compiler_common.dfa_transitions[ind].type == type_char) { + EMIT_LABEL(label); + sljit_set_label(jump, label); + } + else if (compiler_common.dfa_transitions[ind].type == type_rng_end) { + EMIT_LABEL(label); + range_set_label(compiler_common.range_jump_list, label); + } + else { + SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type == type_newline); + } + + /* Branch to the next item in the list. */ + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(compiler_common.search_states[ind].type, 1)); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(compiler_common.search_states[ind].type, 1), SLJIT_IMM, -1); + CHECK(sljit_emit_ijump(compiler_common.compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); + } + } + + if (ind == compiler_common.dfa_size - 1) { + /* Generate an init stub function. */ + EMIT_LABEL(label); + CHECK(sljit_emit_enter(compiler_common.compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 3, 0, 0, 0)); + + if (empty_match_id == -1) { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, best_begin), SLJIT_IMM, -1); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_IMM, 0); + } + else { + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, best_begin), SLJIT_IMM, 0); + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_IMM, empty_match_id); + } + + EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, index), SLJIT_IMM, !(compiler_common.flags & REGEX_FAKE_MATCH_BEGIN) ? 1 : 2); + + if (!(compiler_common.flags & REGEX_MATCH_NON_GREEDY) || empty_match_id == -1) { + /* The else is a really rare event, so we still generate an empty function instead of a runtime pointer check. */ + SLJIT_ASSERT(R_CURR_STATE == SLJIT_S0); + if (!(compiler_common.flags & REGEX_MATCH_BEGIN)) { + /* R_CURR_INDEX (put to R_TEMP) is zero. */ + EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_IMM, 0); + } + CHECK(compile_uncond_tran(&compiler_common, R_CURR_STATE)); + } + else { + EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, 0); + } + CHECK(sljit_emit_return(compiler_common.compiler, SLJIT_MOV, R_NEXT_HEAD, 0)); + + compiler_common.machine->continue_match = sljit_generate_code(compiler_common.compiler); +#ifndef SLJIT_INDIRECT_CALL + compiler_common.machine->u.init_match = (void*)(sljit_sw)sljit_get_label_addr(label); +#else + sljit_set_function_context(&compiler_common.machine->u.init_match, &compiler_common.machine->context, sljit_get_label_addr(label), regex_compile); +#endif +#ifdef REGEX_MATCH_VERBOSE + if (compiler_common.flags & REGEX_MATCH_VERBOSE) + printf("Continue match: %p Init match: %p\n\n", compiler_common.machine->continue_match, compiler_common.machine->u.init_match); +#endif + if (compiler_common.machine->continue_match) { + for (ind = 0; ind < compiler_common.terms_size; ++ind) + compiler_common.machine->entry_addrs[ind] = sljit_get_label_addr((struct sljit_label*)compiler_common.machine->entry_addrs[ind]); + done = 1; + } + } + END_GUARD + + stack_destroy(&compiler_common.stack); + stack_destroy(&compiler_common.depth); + SLJIT_FREE(compiler_common.dfa_transitions, NULL); + SLJIT_FREE(compiler_common.search_states, NULL); + if (compiler_common.range_jump_list) + SLJIT_FREE(compiler_common.range_jump_list, NULL); + if (compiler_common.compiler) + sljit_free_compiler(compiler_common.compiler); + if (done) + return compiler_common.machine; + + if (compiler_common.machine) { + SLJIT_FREE(compiler_common.machine, NULL); + } + if (error) + *error = REGEX_MEMORY_ERROR; + return NULL; +} + +#undef TERM_OFFSET_OF +#undef EMIT_OP1 +#undef EMIT_OP2 +#undef EMIT_LABEL +#undef EMIT_JUMP +#undef EMIT_CMP +#undef BEGIN_GUARD +#undef END_GUARD +#undef CHECK + +void regex_free_machine(struct regex_machine *machine) +{ + sljit_free_code(machine->continue_match, NULL); + SLJIT_FREE(machine, NULL); +} + +const char* regex_get_platform_name(void) +{ + return sljit_get_platform_name(); +} + +/* --------------------------------------------------------------------- */ +/* Mathching utilities */ +/* --------------------------------------------------------------------- */ + +struct regex_match* regex_begin_match(struct regex_machine *machine) +{ + sljit_sw *ptr1; + sljit_sw *ptr2; + sljit_sw *end; + sljit_sw *entry_addrs; + + struct regex_match *match = (struct regex_match*)SLJIT_MALLOC(sizeof(struct regex_match) + (machine->size * 2 - 1) * sizeof(sljit_sw), NULL); + if (!match) + return NULL; + + ptr1 = match->states; + ptr2 = match->states + machine->size; + end = ptr2; + entry_addrs = (sljit_sw*)machine->entry_addrs; + + match->current = ptr1; + match->next = ptr2; + match->head = 0; + match->machine = machine; + + /* Init machine states. */ + switch (machine->no_states) { + case 2: + while (ptr1 < end) { + *ptr1++ = *entry_addrs; + *ptr2++ = *entry_addrs++; + *ptr1++ = -1; + *ptr2++ = -1; + } + break; + + case 3: + while (ptr1 < end) { + *ptr1++ = *entry_addrs; + *ptr2++ = *entry_addrs++; + *ptr1++ = -1; + *ptr2++ = -1; + *ptr1++ = 0; + *ptr2++ = 0; + } + break; + + case 4: + while (ptr1 < end) { + *ptr1++ = *entry_addrs; + *ptr2++ = *entry_addrs++; + *ptr1++ = -1; + *ptr2++ = -1; + *ptr1++ = 0; + *ptr2++ = 0; + *ptr1++ = 0; + *ptr2++ = 0; + } + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + + SLJIT_ASSERT(ptr1 == end); + + match->u.continue_match = machine->continue_match; + + regex_reset_match(match); + return match; +} + +void regex_reset_match(struct regex_match *match) +{ + struct regex_machine *machine = match->machine; + sljit_sw current, ind; + sljit_sw *current_ptr; + + match->best_end = 0; + match->fast_quit = 0; + match->fast_forward = 0; + + if (match->head != 0) { + /* Clear the current state. */ + current = match->head; + current_ptr = match->current; + do { + ind = (current / sizeof(sljit_sw)) + 1; + current = current_ptr[ind]; + current_ptr[ind] = -1; + } while (current != 0); + } + match->head = machine->u.call_init(match->current, match); +} + +void regex_free_match(struct regex_match *match) +{ + SLJIT_FREE(match, NULL); +} + +void regex_continue_match(struct regex_match *match, const regex_char_t *input_string, int length) +{ + match->u.call_continue(match, input_string, length); +} + +int regex_get_result(struct regex_match *match, int *end, int *id) +{ + int flags = match->machine->flags; + sljit_sw no_states; + + *end = match->best_end; + *id = match->best_id; + if (!(flags & (REGEX_MATCH_END | REGEX_FAKE_MATCH_END))) + return match->best_begin; + + if (flags & REGEX_FAKE_MATCH_END) { + SLJIT_ASSERT(!(flags & (REGEX_MATCH_BEGIN | REGEX_MATCH_END))); + if (match->best_begin != -1) + return match->best_begin; + + no_states = match->machine->no_states; + if (match->current[no_states + 1] == -1) + return -1; + if (flags & REGEX_ID_CHECK) + *id = match->current[no_states + 3]; + if (!(flags & REGEX_FAKE_MATCH_BEGIN)) + *end = match->index - 1; + else + *end = match->index - 2; + return match->current[no_states + 2]; + } + else { + /* Check the status of the last code. */ + if (!(flags & REGEX_MATCH_BEGIN)) { + /* No shortcut in this case. */ + if (!(flags & REGEX_ID_CHECK)) { + if (match->current[1] == -1) + return -1; + *end = match->index - 1; + return match->current[2]; + } + + if (match->current[1] == -1) + return -1; + *end = match->index - 1; + *id = match->current[3]; + return match->current[2]; + } + + /* Shortcut is possible in this case. */ + if (!(flags & REGEX_ID_CHECK)) { + if (match->current[1] == -1 || match->head == -1) + return -1; + *end = match->index - 1; + return 0; + } + + if (match->current[1] == -1 || match->head == -1) + return -1; + *end = match->index - 1; + *id = match->current[2]; + return 0; + } +} + +int regex_is_match_finished(struct regex_match *match) +{ + return match->fast_quit; +} + +#ifdef REGEX_MATCH_VERBOSE +void regex_continue_match_debug(struct regex_match *match, const regex_char_t *input_string, int length) +{ + sljit_sw *ptr; + sljit_sw *end; + sljit_sw count; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + sljit_sw current; +#endif + sljit_sw no_states = match->machine->no_states; + sljit_sw len = match->machine->size; + + while (length > 0) { + match->u.call_continue(match, input_string, 1); + + if (match->fast_forward) { + if (match->machine->flags & REGEX_MATCH_VERBOSE) + printf("fast forward\n"); + } + + /* Verbose (first). */ + if (match->machine->flags & REGEX_MATCH_VERBOSE) { + ptr = match->current; + end = ptr + len; + count = 0; + printf("'%c' (%3ld->%3ld [%3ld]) ", *input_string, (long)match->best_begin, (long)match->best_end, (long)match->best_id); + while (ptr < end) { + printf("[%3ld:", (long)count++); + switch (no_states) { + case 2: + if (ptr[1] != -1) + printf("+] "); + else + printf(" ] "); + break; + + case 3: + if (ptr[1] != -1) + printf("+,%3ld] ", (long)ptr[2]); + else + printf(" ,XXX] "); + break; + + case 4: + if (ptr[1] != -1) + printf("+,%3ld,%3ld] ", (long)ptr[2], (long)ptr[3]); + else + printf(" ,XXX,XXX] "); + break; + } + ptr += no_states; + } + printf("\n"); + } + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + /* Sanity check (later). */ + ptr = match->next; + end = ptr + len; + while (ptr < end) { + SLJIT_ASSERT(ptr[1] == -1); + ptr += no_states; + } + + /* Check number of active elements. */ + ptr = match->current + no_states; + end = ptr + len - no_states; + count = 0; + while (ptr < end) { + if (ptr[1] != -1) + count++; + ptr += no_states; + } + + /* Check chain list. */ + current = match->head; + ptr = match->current; + while (current != 0) { + SLJIT_ASSERT(current >= 0 && current < len * sizeof(sljit_sw)); + SLJIT_ASSERT((current % (no_states * sizeof(sljit_sw))) == 0); + SLJIT_ASSERT(count > 0); + current = ptr[(current / sizeof(sljit_sw)) + 1]; + count--; + } + SLJIT_ASSERT(count == 0); +#endif + + if (match->fast_quit) { + /* the machine has stopped working. */ + if (match->machine->flags & REGEX_MATCH_VERBOSE) + printf("Best match has found\n"); + break; + } + + input_string++; + length--; + } +} +#endif diff --git a/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.h b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.h new file mode 100644 index 0000000000..95c55ff05b --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexJIT.h @@ -0,0 +1,106 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _REGEX_JIT_H_ +#define _REGEX_JIT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Character type config. */ +#define REGEX_USE_8BIT_CHARS + +#ifdef REGEX_USE_8BIT_CHARS +typedef char regex_char_t; +#else +typedef wchar_t regex_char_t; +#endif + +/* Error codes. */ +#define REGEX_NO_ERROR 0 +#define REGEX_MEMORY_ERROR 1 +#define REGEX_INVALID_REGEX 2 + +/* Note: large, nested {a,b} iterations can blow up the memory consumption + a{n,m} is replaced by aa...aaa?a?a?a?a? (n >= 0, m > 0) + \__n__/\____m___/ + a{n,} is replaced by aa...aaa+ (n > 0) + \_n-1_/ +*/ + +/* The value returned by regex_compile. Can be used for multiple matching. */ +struct regex_machine; + +/* A matching state. */ +struct regex_match; + +/* Note: REGEX_MATCH_BEGIN and REGEX_MATCH_END does not change the parsing + (Hence ^ and $ are parsed normally). + Force matching to start from begining of the string (same as ^). */ +#define REGEX_MATCH_BEGIN 0x01 +/* Force matching to continue until the last character (same as $). */ +#define REGEX_MATCH_END 0x02 +/* Changes . to [^\r\n] + Note: [...] and [^...] are NOT affected at all (as other regex engines do). */ +#define REGEX_NEWLINE 0x04 +/* Non greedy matching. In case of Thompson (non-recursive) algorithm, + it (usually) does not have a significant speed gain. */ +#define REGEX_MATCH_NON_GREEDY 0x08 +/* Verbose. This define can be commented out, which disables all verbose features. */ +#define REGEX_MATCH_VERBOSE 0x10 + +/* If error occures the function returns NULL, and the error code returned in error variable. + You can pass NULL to error if you don't care about the error code. + The re_flags argument contains the default REGEX_MATCH flags. See above. */ +struct regex_machine* regex_compile(const regex_char_t *regex_string, int length, int re_flags, int *error); +void regex_free_machine(struct regex_machine *machine); + +/* Create and init match structure for a given machine. */ +struct regex_match* regex_begin_match(struct regex_machine *machine); +void regex_reset_match(struct regex_match *match); +void regex_free_match(struct regex_match *match); + +/* Pattern matching. + regex_continue_match does not support REGEX_MATCH_VERBOSE flag. */ +void regex_continue_match(struct regex_match *match, const regex_char_t *input_string, int length); +int regex_get_result(struct regex_match *match, int *end, int *id); +/* Returns true, if the best match has already found. */ +int regex_is_match_finished(struct regex_match *match); + +/* Only exists if VERBOSE is defined in regexJIT.c + Do both sanity check and verbose. + (The latter only if REGEX_MATCH_VERBOSE was passed to regex_compile) */ +void regex_continue_match_debug(struct regex_match *match, const regex_char_t *input_string, int length); + +/* Misc. */ +const char* regex_get_platform_name(void); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexMain.c b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexMain.c new file mode 100644 index 0000000000..4e3aac0fdd --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/regex_src/regexMain.c @@ -0,0 +1,335 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Must be the first one. Must not depend on any other include. */ +#include "sljitLir.h" +#include "regexJIT.h" + +#include + +#if defined _WIN32 || defined _WIN64 +#define COLOR_RED +#define COLOR_GREEN +#define COLOR_ARCH +#define COLOR_DEFAULT +#else +#define COLOR_RED "\33[31m" +#define COLOR_GREEN "\33[32m" +#define COLOR_ARCH "\33[33m" +#define COLOR_DEFAULT "\33[0m" +#endif + +#ifdef REGEX_USE_8BIT_CHARS +#define S(str) str +#else +#define S(str) L##str +#endif + +#ifdef REGEX_MATCH_VERBOSE +void verbose_test(regex_char_t *pattern, regex_char_t *string) +{ + int error; + regex_char_t *ptr; + struct regex_machine* machine; + struct regex_match* match; + int begin, end, id; + + ptr = pattern; + while (*ptr) + ptr++; + + printf("Start test '%s' matches to '%s'\n", pattern, string); + machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error); + + if (error) { + printf("WARNING: Error %d\n", error); + return; + } + if (!machine) { + printf("ERROR: machine must be exists. Report this bug, please\n"); + return; + } + + match = regex_begin_match(machine); + if (!match) { + printf("WARNING: Not enough memory for matching\n"); + regex_free_machine(machine); + return; + } + + ptr = string; + while (*ptr) + ptr++; + + regex_continue_match_debug(match, string, ptr - string); + + begin = regex_get_result(match, &end, &id); + printf("Math returns: %3d->%3d [%3d]\n", begin, end, id); + + regex_free_match(match); + regex_free_machine(machine); +} +#endif + +struct test_case { + int begin; /* Expected begin. */ + int end; /* Expected end. */ + int id; /* Expected id. */ + int finished; /* -1 : don't care, 0 : false, 1 : true. */ + int flags; /* REGEX_MATCH_* */ + const regex_char_t *pattern; /* NULL : use the previous pattern. */ + const regex_char_t *string; /* NULL : end of tests. */ +}; + +void run_tests(struct test_case* test, int verbose, int silent) +{ + int error; + const regex_char_t *ptr; + struct regex_machine* machine = NULL; + struct regex_match* match; + int begin, end, id, finished; + int success = 0, fail = 0; + + if (!verbose && !silent) + printf("Pass -v to enable verbose, -s to disable this hint.\n\n"); + + for ( ; test->string ; test++) { + if (verbose) + printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); + fail++; + + if (test->pattern) { + if (machine) + regex_free_machine(machine); + + ptr = test->pattern; + while (*ptr) + ptr++; + + machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error); + + if (error) { + if (!verbose) + printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); + printf("ABORT: Error %d\n", error); + return; + } + if (!machine) { + if (!verbose) + printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); + printf("ABORT: machine must be exists. Report this bug, please\n"); + return; + } + } + else if (test->flags != 0) { + if (!verbose) + printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); + printf("ABORT: flag must be 0 if no pattern\n"); + return; + } + + ptr = test->string; + while (*ptr) + ptr++; + + match = regex_begin_match(machine); +#ifdef REGEX_MATCH_VERBOSE + if (!match) { + if (!verbose) + printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); + printf("ABORT: Not enough memory for matching\n"); + regex_free_machine(machine); + return; + } + regex_continue_match_debug(match, test->string, ptr - test->string); + begin = regex_get_result(match, &end, &id); + finished = regex_is_match_finished(match); + + if (begin != test->begin || end != test->end || id != test->id) { + if (!verbose) + printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); + printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id); + continue; + } + if (test->finished != -1 && test->finished != !!finished) { + if (!verbose) + printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); + printf("FAIL A: finish check\n"); + continue; + } +#endif + + regex_reset_match(match); + regex_continue_match(match, test->string, ptr - test->string); + begin = regex_get_result(match, &end, &id); + finished = regex_is_match_finished(match); + regex_free_match(match); + + if (begin != test->begin || end != test->end || id != test->id) { + if (!verbose) + printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); + printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id); + continue; + } + if (test->finished != -1 && test->finished != !!finished) { + if (!verbose) + printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); + printf("FAIL B: finish check\n"); + continue; + } + + if (verbose) + printf("SUCCESS\n"); + fail--; + success++; + } + if (machine) + regex_free_machine(machine); + + printf("REGEX tests: "); + if (fail == 0) + printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " "); + else + printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail)); + printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name()); +} + +/* Testing. */ + +static struct test_case tests[] = { +{ 3, 7, 0, -1, 0, + S("text"), S("is textile") }, +{ 0, 10, 0, -1, 0, + S("^(ab|c)*?d+(es)?"), S("abccabddeses") }, +{ -1, 0, 0, 1, 0, + S("^a+"), S("saaaa") }, +{ 3, 6, 0, 0, 0, + S("(a+|b+)$"), S("saabbb") }, +{ 1, 6, 0, 0, 0, + S("(a+|b+){,2}$"), S("saabbb") }, +{ 1, 6, 0, 1, 0, + S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") }, +{ 1, 6, 0, 1, 0, + S("(abc(aa)?|(cab+){2})"), S("cabcaa") }, +{ -1, 0, 0, 1, 0, + S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") }, +{ 0, 3, 1, -1, 0, + S("^(ab{001!})?c"), S("abcde") }, +{ 1, 15, 2, -1, 0, + S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") }, +{ 2, 9, 0, -1, 0, + NULL, S("cacaadaadaa") }, +{ -1, 0, 0, -1, REGEX_MATCH_BEGIN, + S("(((ab?c|d{1})))"), S("ad") }, +{ 0, 9, 3, -1, REGEX_MATCH_BEGIN, + S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") }, +{ 1, 6, 0, 0, REGEX_MATCH_END, + S("(a+(bb|cc?)?){4,}"), S("maaaac") }, +{ 3, 12, 1, 0, REGEX_MATCH_END, + S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") }, +{ 1, 2, 3, -1, 0, + S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") }, +{ 1, 4, 2, 1, 0, + NULL, S("sxxaxxxaccacca") }, +{ 0, 2, 1, 1, 0, + NULL, S("ccdcdcdddddcdccccd") }, +{ 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY, + S("^a+a+a+"), S("aaaaaa") }, +{ 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY, + S("a+a+a+"), S("bbaaaaaa") }, +{ 1, 4, 0, 1, 0, + S("baa|a+"), S("sbaaaaaa") }, +{ 0, 6, 0, 1, 0, + S("baaa|baa|sbaaaa"), S("sbaaaaa") }, +{ 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY, + S("baaa|baa"), S("xbaaa") }, +{ 0, 0, 3, 1, 0, + S("{3!}"), S("xx") }, +{ 0, 0, 1, 1, 0, + S("{1!}(a{2!})*"), S("xx") }, +{ 0, 2, 2, 0, 0, + NULL, S("aa") }, +{ 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY, + S("{1!}(a{2!})*"), S("aaxx") }, +{ 4, 12, 0, 1, 0, + S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") }, +{ 3, 7, 1, 1, 0, + S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") }, +{ 0, 8, 3, 0, 0, + S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") }, +{ 0, 9, 0, 0, 0, + NULL, S("x-y[-][]x") }, +{ 2, 8, 0, 1, 0, + S("<(/{1!})?[^>]+>"), S(" ") }, +{ 2, 9, 1, 1, 0, + NULL, S(" ") }, +{ 2, 9, 0, 1, 0, + S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") }, +{ 1, 4, 0, 1, 0, + S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") }, +{ 4, 11, 0, 0, 0, + NULL, S("ssaymmaa_ccl") }, +{ 3, 6, 0, 1, REGEX_NEWLINE, + S(".a[^k]"), S("\na\nxa\ns") }, +{ 0, 2, 0, 1, REGEX_NEWLINE, + S("^a+"), S("aa\n") }, +{ 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */, + NULL, S("\naaa\n") }, +{ 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */, + NULL, S("\n\na\n") }, +{ 0, 2, 0, 1, REGEX_NEWLINE, + S("a+$"), S("aa\n") }, +{ 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */, + NULL, S("aaa") }, +{ 2, 4, 1, 1, REGEX_NEWLINE, + S("^a(a{1!})*$"), S("\n\naa\n\n") }, +{ 0, 1, 0, 0, 0 /* REGEX_NEWLINE */, + NULL, S("a") }, +{ -1, 0, 0, -1, 0 /* REGEX_NEWLINE */, + NULL, S("ab\nba") }, +{ -1, 0, 0, 0, 0, + NULL, NULL } +}; + +int main(int argc, char* argv[]) +{ + int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0'); + +/* verbose_test("a((b)((c|d))|)c|"); */ +/* verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */ +/* verbose_test("{3!}({3})({0!}){,"); */ +/* verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */ +/* verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */ +/* verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */ + + run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's'); + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + sljit_free_unused_memory_exec(); +#endif /* !SLJIT_CONFIG_UNSUPPORTED */ + + return 0; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfig.h b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfig.h new file mode 100644 index 0000000000..1c821d287d --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfig.h @@ -0,0 +1,172 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_H_ +#define SLJIT_CONFIG_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + This file contains the basic configuration options for the SLJIT compiler + and their default values. These options can be overridden in the + sljitConfigPre.h header file when SLJIT_HAVE_CONFIG_PRE is set to a + non-zero value. +*/ + +/* --------------------------------------------------------------------- */ +/* Architecture */ +/* --------------------------------------------------------------------- */ + +/* Architecture selection. */ +/* #define SLJIT_CONFIG_X86_32 1 */ +/* #define SLJIT_CONFIG_X86_64 1 */ +/* #define SLJIT_CONFIG_ARM_V5 1 */ +/* #define SLJIT_CONFIG_ARM_V7 1 */ +/* #define SLJIT_CONFIG_ARM_THUMB2 1 */ +/* #define SLJIT_CONFIG_ARM_64 1 */ +/* #define SLJIT_CONFIG_PPC_32 1 */ +/* #define SLJIT_CONFIG_PPC_64 1 */ +/* #define SLJIT_CONFIG_MIPS_32 1 */ +/* #define SLJIT_CONFIG_MIPS_64 1 */ +/* #define SLJIT_CONFIG_SPARC_32 1 */ +/* #define SLJIT_CONFIG_S390X 1 */ + +/* #define SLJIT_CONFIG_AUTO 1 */ +/* #define SLJIT_CONFIG_UNSUPPORTED 1 */ + +/* --------------------------------------------------------------------- */ +/* Utilities */ +/* --------------------------------------------------------------------- */ + +/* Implements a stack like data structure (by using mmap / VirtualAlloc */ +/* or a custom allocator). */ +#ifndef SLJIT_UTIL_STACK +/* Enabled by default */ +#define SLJIT_UTIL_STACK 1 +#endif + +/* Uses user provided allocator to allocate the stack (see SLJIT_UTIL_STACK) */ +#ifndef SLJIT_UTIL_SIMPLE_STACK_ALLOCATION +/* Disabled by default */ +#define SLJIT_UTIL_SIMPLE_STACK_ALLOCATION 0 +#endif + +/* Single threaded application. Does not require any locks. */ +#ifndef SLJIT_SINGLE_THREADED +/* Disabled by default. */ +#define SLJIT_SINGLE_THREADED 0 +#endif + +/* --------------------------------------------------------------------- */ +/* Configuration */ +/* --------------------------------------------------------------------- */ + +/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should + define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMCPY, and NULL. */ +#ifndef SLJIT_STD_MACROS_DEFINED +/* Disabled by default. */ +#define SLJIT_STD_MACROS_DEFINED 0 +#endif + +/* Executable code allocation: + If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should + define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */ +#ifndef SLJIT_EXECUTABLE_ALLOCATOR +/* Enabled by default. */ +#define SLJIT_EXECUTABLE_ALLOCATOR 1 + +/* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses + an allocator which does not set writable and executable + permission flags at the same time. + Instead, it creates a shared memory segment (usually backed by a file) + and maps it twice, with different permissions, depending on the use + case. + The trade-off is increased use of virtual memory, incompatibility with + fork(), and some possible additional security risks by the use of + publicly accessible files for the generated code. */ +#ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR +/* Disabled by default. */ +#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0 +#endif + +/* When SLJIT_WX_EXECUTABLE_ALLOCATOR is enabled SLJIT uses an + allocator which does not set writable and executable permission + flags at the same time. + Instead, it creates a new independent map on each invocation and + switches permissions at the underlying pages as needed. + The trade-off is increased memory use and degraded performance. */ +#ifndef SLJIT_WX_EXECUTABLE_ALLOCATOR +/* Disabled by default. */ +#define SLJIT_WX_EXECUTABLE_ALLOCATOR 0 +#endif + +#endif /* !SLJIT_EXECUTABLE_ALLOCATOR */ + +/* Force cdecl calling convention even if a better calling + convention (e.g. fastcall) is supported by the C compiler. + If this option is disabled (this is the default), functions + called from JIT should be defined with SLJIT_FUNC attribute. + Standard C functions can still be called by using the + SLJIT_CALL_CDECL jump type. */ +#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION +/* Disabled by default */ +#define SLJIT_USE_CDECL_CALLING_CONVENTION 0 +#endif + +/* Return with error when an invalid argument is passed. */ +#ifndef SLJIT_ARGUMENT_CHECKS +/* Disabled by default */ +#define SLJIT_ARGUMENT_CHECKS 0 +#endif + +/* Debug checks (assertions, etc.). */ +#ifndef SLJIT_DEBUG +/* Enabled by default */ +#define SLJIT_DEBUG 1 +#endif + +/* Verbose operations. */ +#ifndef SLJIT_VERBOSE +/* Enabled by default */ +#define SLJIT_VERBOSE 1 +#endif + +/* + SLJIT_IS_FPU_AVAILABLE + The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE. + zero value - FPU is NOT present. + nonzero value - FPU is present. +*/ + +/* For further configurations, see the beginning of sljitConfigInternal.h */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SLJIT_CONFIG_H_ */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigInternal.h b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigInternal.h new file mode 100644 index 0000000000..025111abcc --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitConfigInternal.h @@ -0,0 +1,839 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_INTERNAL_H_ +#define SLJIT_CONFIG_INTERNAL_H_ + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE))) +#include +#endif + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG \ + && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) || !defined(SLJIT_HALT_PROCESS))) +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + SLJIT defines the following architecture dependent types and macros: + + Types: + sljit_s8, sljit_u8 : signed and unsigned 8 bit integer type + sljit_s16, sljit_u16 : signed and unsigned 16 bit integer type + sljit_s32, sljit_u32 : signed and unsigned 32 bit integer type + sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer + sljit_p : unsgined pointer value (usually the same as sljit_uw, but + some 64 bit ABIs may use 32 bit pointers) + sljit_f32 : 32 bit single precision floating point value + sljit_f64 : 64 bit double precision floating point value + + Macros for feature detection (boolean): + SLJIT_32BIT_ARCHITECTURE : 32 bit architecture + SLJIT_64BIT_ARCHITECTURE : 64 bit architecture + SLJIT_LITTLE_ENDIAN : little endian architecture + SLJIT_BIG_ENDIAN : big endian architecture + SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + + Constants: + SLJIT_NUMBER_OF_REGISTERS : number of available registers + SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers + SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers + SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers + SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index + SLJIT_F32_SHIFT : the shift required to apply when accessing + a single precision floating point array by index + SLJIT_F64_SHIFT : the shift required to apply when accessing + a double precision floating point array by index + SLJIT_PREF_SHIFT_REG : x86 systems prefers ecx for shifting by register + the scratch register index of ecx is stored in this variable + SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET) + SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address + + Other macros: + SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT + SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper) +*/ + +/*****************/ +/* Sanity check. */ +/*****************/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ + + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + + (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + + (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 +#error "Multiple architectures are selected" +#endif + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + && !(defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ + && !(defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + && !(defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + && !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + && !(defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \ + && !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) +#if defined SLJIT_CONFIG_AUTO && !SLJIT_CONFIG_AUTO +#error "An architecture must be selected" +#else /* SLJIT_CONFIG_AUTO */ +#define SLJIT_CONFIG_AUTO 1 +#endif /* !SLJIT_CONFIG_AUTO */ +#endif /* !SLJIT_CONFIG */ + +/********************************************************/ +/* Automatic CPU detection (requires compiler support). */ +/********************************************************/ + +#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) + +#ifndef _WIN32 + +#if defined(__i386__) || defined(__i386) +#define SLJIT_CONFIG_X86_32 1 +#elif defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif defined(__arm__) || defined(__ARM__) +#ifdef __thumb2__ +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) +#define SLJIT_CONFIG_ARM_V7 1 +#else +#define SLJIT_CONFIG_ARM_V5 1 +#endif +#elif defined (__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 +#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__)) +#define SLJIT_CONFIG_PPC_64 1 +#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) +#define SLJIT_CONFIG_PPC_32 1 +#elif defined(__mips__) && !defined(_LP64) +#define SLJIT_CONFIG_MIPS_32 1 +#elif defined(__mips64) +#define SLJIT_CONFIG_MIPS_64 1 +#elif defined(__sparc__) || defined(__sparc) +#define SLJIT_CONFIG_SPARC_32 1 +#elif defined(__s390x__) +#define SLJIT_CONFIG_S390X 1 +#else +/* Unsupported architecture */ +#define SLJIT_CONFIG_UNSUPPORTED 1 +#endif + +#else /* _WIN32 */ + +#if defined(_M_X64) || defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__) +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif (defined(_M_ARM) && _M_ARM >= 7) +#define SLJIT_CONFIG_ARM_V7 1 +#elif defined(_ARM_) +#define SLJIT_CONFIG_ARM_V5 1 +#elif defined(_M_ARM64) || defined(__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 +#else +#define SLJIT_CONFIG_X86_32 1 +#endif + +#endif /* !_WIN32 */ +#endif /* SLJIT_CONFIG_AUTO */ + +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#undef SLJIT_EXECUTABLE_ALLOCATOR +#endif + +/******************************/ +/* CPU family type detection. */ +/******************************/ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CONFIG_ARM_32 1 +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#define SLJIT_CONFIG_X86 1 +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +#define SLJIT_CONFIG_ARM 1 +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_CONFIG_PPC 1 +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#define SLJIT_CONFIG_MIPS 1 +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64) +#define SLJIT_CONFIG_SPARC 1 +#endif + +/***********************************************************/ +/* Intel Control-flow Enforcement Technology (CET) spport. */ +/***********************************************************/ + +#ifdef SLJIT_CONFIG_X86 + +#if defined(__CET__) && !(defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) +#define SLJIT_CONFIG_X86_CET 1 +#endif + +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined(__GNUC__) +#include +#endif + +#endif /* SLJIT_CONFIG_X86 */ + +/**********************************/ +/* External function definitions. */ +/**********************************/ + +/* General macros: + Note: SLJIT is designed to be independent from them as possible. + + In release mode (SLJIT_DEBUG is not defined) only the following + external functions are needed: +*/ + +#ifndef SLJIT_MALLOC +#define SLJIT_MALLOC(size, allocator_data) malloc(size) +#endif + +#ifndef SLJIT_FREE +#define SLJIT_FREE(ptr, allocator_data) free(ptr) +#endif + +#ifndef SLJIT_MEMCPY +#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len) +#endif + +#ifndef SLJIT_MEMMOVE +#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) +#endif + +#ifndef SLJIT_ZEROMEM +#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) +#endif + +/***************************/ +/* Compiler helper macros. */ +/***************************/ + +#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) + +#if defined(__GNUC__) && (__GNUC__ >= 3) +#define SLJIT_LIKELY(x) __builtin_expect((x), 1) +#define SLJIT_UNLIKELY(x) __builtin_expect((x), 0) +#else +#define SLJIT_LIKELY(x) (x) +#define SLJIT_UNLIKELY(x) (x) +#endif + +#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */ + +#ifndef SLJIT_INLINE +/* Inline functions. Some old compilers do not support them. */ +#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510 +#define SLJIT_INLINE +#else +#define SLJIT_INLINE __inline +#endif +#endif /* !SLJIT_INLINE */ + +#ifndef SLJIT_NOINLINE +/* Not inline functions. */ +#if defined(__GNUC__) +#define SLJIT_NOINLINE __attribute__ ((noinline)) +#else +#define SLJIT_NOINLINE +#endif +#endif /* !SLJIT_INLINE */ + +#ifndef SLJIT_UNUSED_ARG +/* Unused arguments. */ +#define SLJIT_UNUSED_ARG(arg) (void)arg +#endif + +/*********************************/ +/* Type of public API functions. */ +/*********************************/ + +#ifndef SLJIT_API_FUNC_ATTRIBUTE +#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) +/* Static ABI functions. For all-in-one programs. */ + +#if defined(__GNUC__) +/* Disable unused warnings in gcc. */ +#define SLJIT_API_FUNC_ATTRIBUTE static __attribute__((unused)) +#else +#define SLJIT_API_FUNC_ATTRIBUTE static +#endif + +#else +#define SLJIT_API_FUNC_ATTRIBUTE +#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */ +#endif /* defined SLJIT_API_FUNC_ATTRIBUTE */ + +/****************************/ +/* Instruction cache flush. */ +/****************************/ + +#if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) +#if __has_builtin(__builtin___clear_cache) + +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)(from), (char*)(to)) + +#endif /* __has_builtin(__builtin___clear_cache) */ +#endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */ + +#ifndef SLJIT_CACHE_FLUSH + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +/* Not required to implement on archs with unified caches. */ +#define SLJIT_CACHE_FLUSH(from, to) + +#elif defined __APPLE__ + +/* Supported by all macs since Mac OS 10.5. + However, it does not work on non-jailbroken iOS devices, + although the compilation is successful. */ + +#define SLJIT_CACHE_FLUSH(from, to) \ + sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) + +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + ppc_cache_flush((from), (to)) +#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 + +#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) + +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)(from), (char*)(to)) + +#elif defined __ANDROID__ + +/* Android lacks __clear_cache; instead, cacheflush should be used. */ + +#define SLJIT_CACHE_FLUSH(from, to) \ + cacheflush((long)(from), (long)(to), 0) + +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + +/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + sparc_cache_flush((from), (to)) +#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 + +#elif defined _WIN32 + +#define SLJIT_CACHE_FLUSH(from, to) \ + FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from)) + +#else + +/* Calls __ARM_NR_cacheflush on ARM-Linux. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + __clear_cache((char*)(from), (char*)(to)) + +#endif + +#endif /* !SLJIT_CACHE_FLUSH */ + +/******************************************************/ +/* Integer and floating point type definitions. */ +/******************************************************/ + +/* 8 bit byte type. */ +typedef unsigned char sljit_u8; +typedef signed char sljit_s8; + +/* 16 bit half-word type. */ +typedef unsigned short int sljit_u16; +typedef signed short int sljit_s16; + +/* 32 bit integer type. */ +typedef unsigned int sljit_u32; +typedef signed int sljit_s32; + +/* Machine word type. Enough for storing a pointer. + 32 bit for 32 bit machines. + 64 bit for 64 bit machines. */ +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +/* Just to have something. */ +#define SLJIT_WORD_SHIFT 0 +typedef unsigned long int sljit_uw; +typedef long int sljit_sw; +#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_32BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 2 +typedef unsigned int sljit_uw; +typedef int sljit_sw; +#else +#define SLJIT_64BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 3 +#ifdef _WIN32 +#ifdef __GNUC__ +/* These types do not require windows.h */ +typedef unsigned long long sljit_uw; +typedef long long sljit_sw; +#else +typedef unsigned __int64 sljit_uw; +typedef __int64 sljit_sw; +#endif +#else /* !_WIN32 */ +typedef unsigned long int sljit_uw; +typedef long int sljit_sw; +#endif /* _WIN32 */ +#endif + +typedef sljit_uw sljit_p; + +/* Floating point types. */ +typedef float sljit_f32; +typedef double sljit_f64; + +/* Shift for pointer sized data. */ +#define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT + +/* Shift for double precision sized data. */ +#define SLJIT_F32_SHIFT 2 +#define SLJIT_F64_SHIFT 3 + +#ifndef SLJIT_W + +/* Defining long constants. */ +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#define SLJIT_W(w) (w##l) +#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#ifdef _WIN64 +#define SLJIT_W(w) (w##ll) +#else /* !windows */ +#define SLJIT_W(w) (w##l) +#endif /* windows */ +#else /* 32 bit */ +#define SLJIT_W(w) (w) +#endif /* unknown */ + +#endif /* !SLJIT_W */ + +/*************************/ +/* Endianness detection. */ +/*************************/ + +#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) + +/* These macros are mostly useful for the applications. */ +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + +#ifdef __LITTLE_ENDIAN__ +#define SLJIT_LITTLE_ENDIAN 1 +#else +#define SLJIT_BIG_ENDIAN 1 +#endif + +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + +#ifdef __MIPSEL__ +#define SLJIT_LITTLE_ENDIAN 1 +#else +#define SLJIT_BIG_ENDIAN 1 +#endif + +#ifndef SLJIT_MIPS_REV + +/* Auto detecting mips revision. */ +#if (defined __mips_isa_rev) && (__mips_isa_rev >= 6) +#define SLJIT_MIPS_REV 6 +#elif (defined __mips_isa_rev && __mips_isa_rev >= 1) \ + || (defined __clang__ && defined _MIPS_ARCH_OCTEON) \ + || (defined __clang__ && defined _MIPS_ARCH_P5600) +/* clang either forgets to define (clang-7) __mips_isa_rev at all + * or sets it to zero (clang-8,-9) for -march=octeon (MIPS64 R2+) + * and -march=p5600 (MIPS32 R5). + * It also sets the __mips macro to 64 or 32 for -mipsN when N <= 5 + * (should be set to N exactly) so we cannot rely on this too. + */ +#define SLJIT_MIPS_REV 1 +#endif + +#endif /* !SLJIT_MIPS_REV */ + +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +#define SLJIT_BIG_ENDIAN 1 + +#else +#define SLJIT_LITTLE_ENDIAN 1 +#endif + +#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */ + +/* Sanity check. */ +#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#ifndef SLJIT_UNALIGNED + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_UNALIGNED 1 +#endif + +#endif /* !SLJIT_UNALIGNED */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +/* Auto detect SSE2 support using CPUID. + On 64 bit x86 cpus, sse2 must be present. */ +#define SLJIT_DETECT_SSE2 1 +#endif + +/*****************************************************************************************/ +/* Calling convention of functions generated by SLJIT or called from the generated code. */ +/*****************************************************************************************/ + +#ifndef SLJIT_FUNC + +#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION) \ + || !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +#define SLJIT_FUNC + +#elif defined(__GNUC__) && !defined(__APPLE__) + +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +#define SLJIT_FUNC __attribute__ ((fastcall)) +#define SLJIT_X86_32_FASTCALL 1 +#else +#define SLJIT_FUNC +#endif /* gcc >= 3.4 */ + +#elif defined(_MSC_VER) + +#define SLJIT_FUNC __fastcall +#define SLJIT_X86_32_FASTCALL 1 + +#elif defined(__BORLANDC__) + +#define SLJIT_FUNC __msfastcall +#define SLJIT_X86_32_FASTCALL 1 + +#else /* Unknown compiler. */ + +/* The cdecl calling convention is usually the x86 default. */ +#define SLJIT_FUNC + +#endif /* SLJIT_USE_CDECL_CALLING_CONVENTION */ + +#endif /* !SLJIT_FUNC */ + +#ifndef SLJIT_INDIRECT_CALL +#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \ + || ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX) +/* It seems certain ppc compilers use an indirect addressing for functions + which makes things complicated. */ +#define SLJIT_INDIRECT_CALL 1 +#endif +#endif /* SLJIT_INDIRECT_CALL */ + +/* The offset which needs to be substracted from the return address to +determine the next executed instruction after return. */ +#ifndef SLJIT_RETURN_ADDRESS_OFFSET +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define SLJIT_RETURN_ADDRESS_OFFSET 8 +#else +#define SLJIT_RETURN_ADDRESS_OFFSET 0 +#endif +#endif /* SLJIT_RETURN_ADDRESS_OFFSET */ + +/***************************************************/ +/* Functions of the built-in executable allocator. */ +/***************************************************/ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +#define SLJIT_BUILTIN_MALLOC_EXEC(size, exec_allocator_data) sljit_malloc_exec(size) +#define SLJIT_BUILTIN_FREE_EXEC(ptr, exec_allocator_data) sljit_free_exec(ptr) + +#ifndef SLJIT_MALLOC_EXEC +#define SLJIT_MALLOC_EXEC(size, exec_allocator_data) SLJIT_BUILTIN_MALLOC_EXEC((size), (exec_allocator_data)) +#endif /* SLJIT_MALLOC_EXEC */ + +#ifndef SLJIT_FREE_EXEC +#define SLJIT_FREE_EXEC(ptr, exec_allocator_data) SLJIT_BUILTIN_FREE_EXEC((ptr), (exec_allocator_data)) +#endif /* SLJIT_FREE_EXEC */ + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); +#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr) +#else +#define SLJIT_EXEC_OFFSET(ptr) 0 +#endif + +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ + +/**********************************************/ +/* Registers and locals offset determination. */ +/**********************************************/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 9 +#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) +#define SLJIT_PREF_SHIFT_REG SLJIT_R2 + +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define SLJIT_NUMBER_OF_REGISTERS 13 +#ifndef _WIN64 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#define SLJIT_LOCALS_OFFSET_BASE 0 +#else /* _WIN64 */ +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) +#endif /* !_WIN64 */ +#define SLJIT_PREF_SHIFT_REG SLJIT_R3 + +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + +#define SLJIT_NUMBER_OF_REGISTERS 26 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) +#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw)) +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +/* Add +1 for double alignment. */ +#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw)) +#else +#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw)) +#endif /* SLJIT_CONFIG_PPC_64 || _AIX */ + +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +#define SLJIT_NUMBER_OF_REGISTERS 21 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw)) +#else +#define SLJIT_LOCALS_OFFSET_BASE 0 +#endif + +#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) + +#define SLJIT_NUMBER_OF_REGISTERS 18 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14 +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +/* saved registers (16), return struct pointer (1), space for 6 argument words (1), + 4th double arg (2), double alignment (1). */ +#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * sizeof(sljit_sw)) +#endif + +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +/* + * https://refspecs.linuxbase.org/ELF/zSeries/lzsabi0_zSeries.html#STACKFRAME + * + * 160 + * .. FR6 + * .. FR4 + * .. FR2 + * 128 FR0 + * 120 R15 (used for SP) + * 112 R14 + * 104 R13 + * 96 R12 + * .. + * 48 R6 + * .. + * 16 R2 + * 8 RESERVED + * 0 SP + */ +#define SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE 160 + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + +#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#define SLJIT_NUMBER_OF_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#endif + +#define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE) + +#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ + (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS) + +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64) +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1 +#else +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#endif + +#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ + (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) + +/********************************/ +/* CPU status flags management. */ +/********************************/ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ + || (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_HAS_STATUS_FLAGS_STATE 1 +#endif + +/*************************************/ +/* Debug and verbose related macros. */ +/*************************************/ + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + +#if !defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) + +/* SLJIT_HALT_PROCESS must halt the process. */ +#ifndef SLJIT_HALT_PROCESS +#define SLJIT_HALT_PROCESS() \ + abort(); +#endif /* !SLJIT_HALT_PROCESS */ + +#endif /* !SLJIT_ASSERT || !SLJIT_UNREACHABLE */ + +/* Feel free to redefine these two macros. */ +#ifndef SLJIT_ASSERT + +#define SLJIT_ASSERT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) { \ + printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \ + SLJIT_HALT_PROCESS(); \ + } \ + } while (0) + +#endif /* !SLJIT_ASSERT */ + +#ifndef SLJIT_UNREACHABLE + +#define SLJIT_UNREACHABLE() \ + do { \ + printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \ + SLJIT_HALT_PROCESS(); \ + } while (0) + +#endif /* !SLJIT_UNREACHABLE */ + +#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +/* Forcing empty, but valid statements. */ +#undef SLJIT_ASSERT +#undef SLJIT_UNREACHABLE + +#define SLJIT_ASSERT(x) \ + do { } while (0) +#define SLJIT_UNREACHABLE() \ + do { } while (0) + +#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +#ifndef SLJIT_COMPILE_ASSERT + +#define SLJIT_COMPILE_ASSERT(x, description) \ + switch(0) { case 0: case ((x) ? 1 : 0): break; } + +#endif /* !SLJIT_COMPILE_ASSERT */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SLJIT_CONFIG_INTERNAL_H_ */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitExecAllocator.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitExecAllocator.c new file mode 100644 index 0000000000..fce584b5c2 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitExecAllocator.c @@ -0,0 +1,411 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple executable memory allocator + + It is assumed, that executable code blocks are usually medium (or sometimes + large) memory blocks, and the allocator is not too frequently called (less + optimized than other allocators). Thus, using it as a generic allocator is + not suggested. + + How does it work: + Memory is allocated in continuous memory areas called chunks by alloc_chunk() + Chunk format: + [ block ][ block ] ... [ block ][ block terminator ] + + All blocks and the block terminator is started with block_header. The block + header contains the size of the previous and the next block. These sizes + can also contain special values. + Block size: + 0 - The block is a free_block, with a different size member. + 1 - The block is a block terminator. + n - The block is used at the moment, and the value contains its size. + Previous block size: + 0 - This is the first block of the memory chunk. + n - The size of the previous block. + + Using these size values we can go forward or backward on the block chain. + The unused blocks are stored in a chain list pointed by free_blocks. This + list is useful if we need to find a suitable memory area when the allocator + is called. + + When a block is freed, the new free block is connected to its adjacent free + blocks if possible. + + [ free block ][ used block ][ free block ] + and "used block" is freed, the three blocks are connected together: + [ one big free block ] +*/ + +/* --------------------------------------------------------------------- */ +/* System (OS) functions */ +/* --------------------------------------------------------------------- */ + +/* 64 KByte. */ +#define CHUNK_SIZE 0x10000 + +/* + alloc_chunk / free_chunk : + * allocate executable system memory chunks + * the size is always divisible by CHUNK_SIZE + SLJIT_ALLOCATOR_LOCK / SLJIT_ALLOCATOR_UNLOCK : + * provided as part of sljitUtils + * only the allocator requires this lock, sljit is fully thread safe + as it only uses local variables +*/ + +#ifdef _WIN32 +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + SLJIT_UNUSED_ARG(size); + VirtualFree(chunk, 0, MEM_RELEASE); +} + +#else /* POSIX */ + +#if defined(__APPLE__) && defined(MAP_JIT) +/* + On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a + version where it's OK to have more than one JIT block or where MAP_JIT is + required. + On non-macOS systems, returns MAP_JIT if it is defined. +*/ +#include +#if TARGET_OS_OSX +#if defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86 +#ifdef MAP_ANON +#include +#include + +#define SLJIT_MAP_JIT (get_map_jit_flag()) + +static SLJIT_INLINE int get_map_jit_flag() +{ + sljit_sw page_size; + void *ptr; + struct utsname name; + static int map_jit_flag = -1; + + if (map_jit_flag < 0) { + map_jit_flag = 0; + uname(&name); + + /* Kernel version for 10.14.0 (Mojave) or later */ + if (atoi(name.release) >= 18) { + page_size = get_page_alignment() + 1; + /* Only use MAP_JIT if a hardened runtime is used */ + ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANON, -1, 0); + + if (ptr != MAP_FAILED) + munmap(ptr, page_size); + else + map_jit_flag = MAP_JIT; + } + } + return map_jit_flag; +} +#endif /* MAP_ANON */ +#else /* !SLJIT_CONFIG_X86 */ +#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#error "Unsupported architecture" +#endif /* SLJIT_CONFIG_ARM */ +#include +#include + +#define SLJIT_MAP_JIT (MAP_JIT) +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + apple_update_wx_flags(enable_exec) + +static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) +{ +#if MAC_OS_X_VERSION_MIN_REQUIRED >= 110000 + pthread_jit_write_protect_np(enable_exec); +#else +#error "Must target Big Sur or newer" +#endif /* BigSur */ +} +#endif /* SLJIT_CONFIG_X86 */ +#else /* !TARGET_OS_OSX */ +#define SLJIT_MAP_JIT (MAP_JIT) +#endif /* TARGET_OS_OSX */ +#endif /* __APPLE__ && MAP_JIT */ +#ifndef SLJIT_UPDATE_WX_FLAGS +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#endif /* !SLJIT_UPDATE_WX_FLAGS */ +#ifndef SLJIT_MAP_JIT +#define SLJIT_MAP_JIT (0) +#endif /* !SLJIT_MAP_JIT */ + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + void *retval; + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE; + int fd = -1; + +#ifdef PROT_MAX + prot |= PROT_MAX(prot); +#endif + +#ifdef MAP_ANON + flags |= MAP_ANON | SLJIT_MAP_JIT; +#else /* !MAP_ANON */ + if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) + return NULL; + + fd = dev_zero; +#endif /* MAP_ANON */ + + retval = mmap(NULL, size, prot, flags, fd, 0); + if (retval == MAP_FAILED) + return NULL; + +#ifdef __FreeBSD__ + /* HardenedBSD's mmap lies, so check permissions again */ + if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { + munmap(retval, size); + return NULL; + } +#endif /* FreeBSD */ + + SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0); + + return retval; +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + munmap(chunk, size); +} + +#endif /* windows */ + +/* --------------------------------------------------------------------- */ +/* Common functions */ +/* --------------------------------------------------------------------- */ + +#define CHUNK_MASK (~(CHUNK_SIZE - 1)) + +struct block_header { + sljit_uw size; + sljit_uw prev_size; +}; + +struct free_block { + struct block_header header; + struct free_block *next; + struct free_block *prev; + sljit_uw size; +}; + +#define AS_BLOCK_HEADER(base, offset) \ + ((struct block_header*)(((sljit_u8*)base) + offset)) +#define AS_FREE_BLOCK(base, offset) \ + ((struct free_block*)(((sljit_u8*)base) + offset)) +#define MEM_START(base) ((void*)(((sljit_u8*)base) + sizeof(struct block_header))) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) + +static struct free_block* free_blocks; +static sljit_uw allocated_size; +static sljit_uw total_size; + +static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size) +{ + free_block->header.size = 0; + free_block->size = size; + + free_block->next = free_blocks; + free_block->prev = NULL; + if (free_blocks) + free_blocks->prev = free_block; + free_blocks = free_block; +} + +static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) +{ + if (free_block->next) + free_block->next->prev = free_block->prev; + + if (free_block->prev) + free_block->prev->next = free_block->next; + else { + SLJIT_ASSERT(free_blocks == free_block); + free_blocks = free_block->next; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ + struct block_header *header; + struct block_header *next_header; + struct free_block *free_block; + sljit_uw chunk_size; + + SLJIT_ALLOCATOR_LOCK(); + if (size < (64 - sizeof(struct block_header))) + size = (64 - sizeof(struct block_header)); + size = ALIGN_SIZE(size); + + free_block = free_blocks; + while (free_block) { + if (free_block->size >= size) { + chunk_size = free_block->size; + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); + if (chunk_size > size + 64) { + /* We just cut a block from the end of the free block. */ + chunk_size -= size; + free_block->size = chunk_size; + header = AS_BLOCK_HEADER(free_block, chunk_size); + header->prev_size = chunk_size; + AS_BLOCK_HEADER(header, size)->prev_size = size; + } + else { + sljit_remove_free_block(free_block); + header = (struct block_header*)free_block; + size = chunk_size; + } + allocated_size += size; + header->size = size; + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); + } + free_block = free_block->next; + } + + chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK; + header = (struct block_header*)alloc_chunk(chunk_size); + if (!header) { + SLJIT_ALLOCATOR_UNLOCK(); + return NULL; + } + + chunk_size -= sizeof(struct block_header); + total_size += chunk_size; + + header->prev_size = 0; + if (chunk_size > size + 64) { + /* Cut the allocated space into a free and a used block. */ + allocated_size += size; + header->size = size; + chunk_size -= size; + + free_block = AS_FREE_BLOCK(header, size); + free_block->header.prev_size = size; + sljit_insert_free_block(free_block, chunk_size); + next_header = AS_BLOCK_HEADER(free_block, chunk_size); + } + else { + /* All space belongs to this allocation. */ + allocated_size += chunk_size; + header->size = chunk_size; + next_header = AS_BLOCK_HEADER(header, chunk_size); + } + next_header->size = 1; + next_header->prev_size = chunk_size; + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + struct block_header *header; + struct free_block* free_block; + + SLJIT_ALLOCATOR_LOCK(); + header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); + allocated_size -= header->size; + + /* Connecting free blocks together if possible. */ + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); + + /* If header->prev_size == 0, free_block will equal to header. + In this case, free_block->header.size will be > 0. */ + free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size); + if (SLJIT_UNLIKELY(!free_block->header.size)) { + free_block->size += header->size; + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + else { + free_block = (struct free_block*)header; + sljit_insert_free_block(free_block, header->size); + } + + header = AS_BLOCK_HEADER(free_block, free_block->size); + if (SLJIT_UNLIKELY(!header->size)) { + free_block->size += ((struct free_block*)header)->size; + sljit_remove_free_block((struct free_block*)header); + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + + /* The whole chunk is free. */ + if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { + /* If this block is freed, we still have (allocated_size / 2) free space. */ + if (total_size - free_block->size > (allocated_size * 3 / 2)) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + sizeof(struct block_header)); + } + } + + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); + SLJIT_ALLOCATOR_UNLOCK(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + struct free_block* free_block; + struct free_block* next_free_block; + + SLJIT_ALLOCATOR_LOCK(); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); + + free_block = free_blocks; + while (free_block) { + next_free_block = free_block->next; + if (!free_block->header.prev_size && + AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + sizeof(struct block_header)); + } + free_block = next_free_block; + } + + SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); + SLJIT_ALLOCATOR_UNLOCK(); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.c new file mode 100644 index 0000000000..a24a99ab87 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.c @@ -0,0 +1,2708 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sljitLir.h" + +#ifdef _WIN32 + +#include + +#endif /* _WIN32 */ + +#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) + +/* These libraries are needed for the macros below. */ +#include +#include + +#endif /* SLJIT_STD_MACROS_DEFINED */ + +#define CHECK_ERROR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return compiler->error; \ + } while (0) + +#define CHECK_ERROR_PTR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return NULL; \ + } while (0) + +#define FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return compiler->error; \ + } while (0) + +#define PTR_FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return NULL; \ + } while (0) + +#define FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return SLJIT_ERR_ALLOC_FAILED; \ + } \ + } while (0) + +#define PTR_FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#define PTR_FAIL_WITH_EXEC_IF(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_EX_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#define VARIABLE_FLAG_SHIFT (10) +#define VARIABLE_FLAG_MASK (0x3f << VARIABLE_FLAG_SHIFT) +#define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT) + +#define GET_OPCODE(op) \ + ((op) & ~(SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + +#define HAS_FLAGS(op) \ + ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + +#define GET_ALL_FLAGS(op) \ + ((op) & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define TYPE_CAST_NEEDED(op) \ + ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32) +#else +#define TYPE_CAST_NEEDED(op) \ + ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) +#endif + +#define BUF_SIZE 4096 + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define ABUF_SIZE 2048 +#else +#define ABUF_SIZE 4096 +#endif + +/* Parameter parsing. */ +#define REG_MASK 0x3f +#define OFFS_REG(reg) (((reg) >> 8) & REG_MASK) +#define OFFS_REG_MASK (REG_MASK << 8) +#define TO_OFFS_REG(reg) ((reg) << 8) +/* When reg cannot be unused. */ +#define FAST_IS_REG(reg) ((reg) <= REG_MASK) +/* When reg can be unused. */ +#define SLOW_IS_REG(reg) ((reg) > 0 && (reg) <= REG_MASK) + +/* Mask for argument types. */ +#define SLJIT_DEF_MASK ((1 << SLJIT_DEF_SHIFT) - 1) + +/* Jump flags. */ +#define JUMP_LABEL 0x1 +#define JUMP_ADDR 0x2 +/* SLJIT_REWRITABLE_JUMP is 0x1000. */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +# define PATCH_MB 0x4 +# define PATCH_MW 0x8 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +# define PATCH_MD 0x10 +#endif +# define TYPE_SHIFT 13 +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# define IS_BL 0x4 +# define PATCH_B 0x8 +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +# define CPOOL_SIZE 512 +#endif + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +# define IS_COND 0x04 +# define IS_BL 0x08 + /* conditional + imm8 */ +# define PATCH_TYPE1 0x10 + /* conditional + imm20 */ +# define PATCH_TYPE2 0x20 + /* IT + imm24 */ +# define PATCH_TYPE3 0x30 + /* imm11 */ +# define PATCH_TYPE4 0x40 + /* imm24 */ +# define PATCH_TYPE5 0x50 + /* BL + imm24 */ +# define PATCH_BL 0x60 + /* 0xf00 cc code for branches */ +#endif + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +# define IS_COND 0x004 +# define IS_CBZ 0x008 +# define IS_BL 0x010 +# define PATCH_B 0x020 +# define PATCH_COND 0x040 +# define PATCH_ABS48 0x080 +# define PATCH_ABS64 0x100 +#endif + +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +# define IS_COND 0x004 +# define IS_CALL 0x008 +# define PATCH_B 0x010 +# define PATCH_ABS_B 0x020 +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +# define PATCH_ABS32 0x040 +# define PATCH_ABS48 0x080 +#endif +# define REMOVE_COND 0x100 +#endif + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +# define IS_MOVABLE 0x004 +# define IS_JAL 0x008 +# define IS_CALL 0x010 +# define IS_BIT26_COND 0x020 +# define IS_BIT16_COND 0x040 +# define IS_BIT23_COND 0x080 + +# define IS_COND (IS_BIT26_COND | IS_BIT16_COND | IS_BIT23_COND) + +# define PATCH_B 0x100 +# define PATCH_J 0x200 + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +# define PATCH_ABS32 0x400 +# define PATCH_ABS48 0x800 +#endif + + /* instruction types */ +# define MOVABLE_INS 0 + /* 1 - 31 last destination register */ + /* no destination (i.e: store) */ +# define UNMOVABLE_INS 32 + /* FPU status register */ +# define FCSR_FCC 33 +#endif + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +# define IS_MOVABLE 0x04 +# define IS_COND 0x08 +# define IS_CALL 0x10 + +# define PATCH_B 0x20 +# define PATCH_CALL 0x40 + + /* instruction types */ +# define MOVABLE_INS 0 + /* 1 - 31 last destination register */ + /* no destination (i.e: store) */ +# define UNMOVABLE_INS 32 + +# define DST_INS_MASK 0xff + + /* ICC_SET is the same as SET_FLAGS. */ +# define ICC_IS_SET (1 << 23) +# define FCC_IS_SET (1 << 24) +#endif + +/* Stack management. */ + +#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ + (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ + (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \ + extra) * sizeof(sljit_sw)) + +#define ADJUST_LOCAL_OFFSET(p, i) \ + if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + (i) += SLJIT_LOCALS_OFFSET; + +#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ + +/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ +#include "sljitUtils.c" + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +#include "sljitProtExecAllocator.c" +#elif (defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR) +#include "sljitWXExecAllocator.c" +#else +#include "sljitExecAllocator.c" +#endif + +#endif + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset)) +#else +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr)) +#endif + +#ifndef SLJIT_UPDATE_WX_FLAGS +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#endif + +/* Argument checking features. */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +/* Returns with error when an invalid argument is passed. */ + +#define CHECK_ARGUMENT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) \ + return 1; \ + } while (0) + +#define CHECK_RETURN_TYPE sljit_s32 +#define CHECK_RETURN_OK return 0 + +#define CHECK(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return SLJIT_ERR_BAD_ARGUMENT; \ + } \ + } while (0) + +#define CHECK_PTR(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return NULL; \ + } \ + } while (0) + +#define CHECK_REG_INDEX(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + return -2; \ + } \ + } while (0) + +#elif (defined SLJIT_DEBUG && SLJIT_DEBUG) + +/* Assertion failure occures if an invalid argument is passed. */ +#undef SLJIT_ARGUMENT_CHECKS +#define SLJIT_ARGUMENT_CHECKS 1 + +#define CHECK_ARGUMENT(x) SLJIT_ASSERT(x) +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x + +#elif (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +/* Arguments are not checked. */ +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x + +#else + +/* Arguments are not checked. */ +#define CHECK(x) +#define CHECK_PTR(x) +#define CHECK_REG_INDEX(x) + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +/* --------------------------------------------------------------------- */ +/* Public functions */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#define SLJIT_NEEDS_COMPILER_INIT 1 +static sljit_s32 compiler_initialized = 0; +/* A thread safe initialization. */ +static void init_compiler(void); +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data) +{ + struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler), allocator_data); + if (!compiler) + return NULL; + SLJIT_ZEROMEM(compiler, sizeof(struct sljit_compiler)); + + SLJIT_COMPILE_ASSERT( + sizeof(sljit_s8) == 1 && sizeof(sljit_u8) == 1 + && sizeof(sljit_s16) == 2 && sizeof(sljit_u16) == 2 + && sizeof(sljit_s32) == 4 && sizeof(sljit_u32) == 4 + && (sizeof(sljit_p) == 4 || sizeof(sljit_p) == 8) + && sizeof(sljit_p) <= sizeof(sljit_sw) + && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) + && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), + invalid_integer_types); + SLJIT_COMPILE_ASSERT(SLJIT_I32_OP == SLJIT_F32_OP, + int_op_and_single_op_must_be_the_same); + SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_F32_OP, + rewritable_jump_and_single_op_must_not_be_the_same); + SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_EQUAL_F64 & 0x1) && !(SLJIT_JUMP & 0x1), + conditional_flags_must_be_even_numbers); + + /* Only the non-zero members must be set. */ + compiler->error = SLJIT_SUCCESS; + + compiler->allocator_data = allocator_data; + compiler->exec_allocator_data = exec_allocator_data; + compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data); + compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, allocator_data); + + if (!compiler->buf || !compiler->abuf) { + if (compiler->buf) + SLJIT_FREE(compiler->buf, allocator_data); + if (compiler->abuf) + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); + return NULL; + } + + compiler->buf->next = NULL; + compiler->buf->used_size = 0; + compiler->abuf->next = NULL; + compiler->abuf->used_size = 0; + + compiler->scratches = -1; + compiler->saveds = -1; + compiler->fscratches = -1; + compiler->fsaveds = -1; + compiler->local_size = -1; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->args = -1; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + + CPOOL_SIZE * sizeof(sljit_u8), allocator_data); + if (!compiler->cpool) { + SLJIT_FREE(compiler->buf, allocator_data); + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); + return NULL; + } + compiler->cpool_unique = (sljit_u8*)(compiler->cpool + CPOOL_SIZE); + compiler->cpool_diff = 0xffffffff; +#endif + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + compiler->delay_slot = UNMOVABLE_INS; +#endif + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + compiler->delay_slot = UNMOVABLE_INS; +#endif + +#if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) + if (!compiler_initialized) { + init_compiler(); + compiler_initialized = 1; + } +#endif + + return compiler; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *curr; + void *allocator_data = compiler->allocator_data; + SLJIT_UNUSED_ARG(allocator_data); + + buf = compiler->buf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr, allocator_data); + } + + buf = compiler->abuf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr, allocator_data); + } + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_FREE(compiler->cpool, allocator_data); +#endif + SLJIT_FREE(compiler, allocator_data); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) +{ + if (compiler->error == SLJIT_SUCCESS) + compiler->error = SLJIT_ERR_ALLOC_FAILED; +} + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(exec_allocator_data); + + /* Remove thumb mode flag. */ + SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~0x1), exec_allocator_data); +} +#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(exec_allocator_data); + + /* Resolve indirection. */ + code = (void*)(*(sljit_uw*)code); + SLJIT_FREE_EXEC(code, exec_allocator_data); +} +#else +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(exec_allocator_data); + + SLJIT_FREE_EXEC(code, exec_allocator_data); +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) +{ + if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { + jump->flags &= ~JUMP_ADDR; + jump->flags |= JUMP_LABEL; + jump->u.label = label; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) +{ + if (SLJIT_LIKELY(!!jump)) { + jump->flags &= ~JUMP_LABEL; + jump->flags |= JUMP_ADDR; + jump->u.target = target; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label) +{ + if (SLJIT_LIKELY(!!put_label)) + put_label->label = label; +} + +#define SLJIT_CURRENT_FLAGS_ALL \ + (SLJIT_CURRENT_FLAGS_I32_OP | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE) + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(current_flags); + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + compiler->status_flags_state = current_flags; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = 0; + if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ALL)) == 0) { + compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_I32_OP | SLJIT_SET_Z)); + } +#endif +} + +/* --------------------------------------------------------------------- */ +/* Private functions */ +/* --------------------------------------------------------------------- */ + +static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size) +{ + sljit_u8 *ret; + struct sljit_memory_fragment *new_frag; + + SLJIT_ASSERT(size <= 256); + if (compiler->buf->used_size + size <= (BUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { + ret = compiler->buf->memory + compiler->buf->used_size; + compiler->buf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, compiler->allocator_data); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->buf; + compiler->buf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size) +{ + sljit_u8 *ret; + struct sljit_memory_fragment *new_frag; + + SLJIT_ASSERT(size <= 256); + if (compiler->abuf->used_size + size <= (ABUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { + ret = compiler->abuf->memory + compiler->abuf->used_size; + compiler->abuf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, compiler->allocator_data); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->abuf; + compiler->abuf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size) +{ + CHECK_ERROR_PTR(); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + if (size <= 0 || size > 128) + return NULL; + size = (size + 7) & ~7; +#else + if (size <= 0 || size > 64) + return NULL; + size = (size + 3) & ~3; +#endif + return ensure_abuf(compiler, size); +} + +static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf = compiler->buf; + struct sljit_memory_fragment *prev = NULL; + struct sljit_memory_fragment *tmp; + + do { + tmp = buf->next; + buf->next = prev; + prev = buf; + buf = tmp; + } while (buf != NULL); + + compiler->buf = prev; +} + +static SLJIT_INLINE sljit_s32 get_arg_count(sljit_s32 arg_types) +{ + sljit_s32 arg_count = 0; + + arg_types >>= SLJIT_DEF_SHIFT; + while (arg_types) { + arg_count++; + arg_types >>= SLJIT_DEF_SHIFT; + } + + return arg_count; +} + + +/* Only used in RISC architectures where the instruction size is constant */ +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +static SLJIT_INLINE sljit_uw compute_next_addr(struct sljit_label *label, struct sljit_jump *jump, + struct sljit_const *const_, struct sljit_put_label *put_label) +{ + sljit_uw result = ~(sljit_uw)0; + + if (label) + result = label->size; + + if (jump && jump->addr < result) + result = jump->addr; + + if (const_ && const_->addr < result) + result = const_->addr; + + if (put_label && put_label->addr < result) + result = put_label->addr; + + return result; +} + +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_S390X */ + +static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->logical_local_size = local_size; +#endif +} + +static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->logical_local_size = local_size; +#endif +} + +static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) +{ + label->next = NULL; + label->size = compiler->size; + if (compiler->last_label) + compiler->last_label->next = label; + else + compiler->labels = label; + compiler->last_label = label; +} + +static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_s32 flags) +{ + jump->next = NULL; + jump->flags = flags; + if (compiler->last_jump) + compiler->last_jump->next = jump; + else + compiler->jumps = jump; + compiler->last_jump = jump; +} + +static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_compiler *compiler) +{ + const_->next = NULL; + const_->addr = compiler->size; + if (compiler->last_const) + compiler->last_const->next = const_; + else + compiler->consts = const_; + compiler->last_const = const_; +} + +static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct sljit_compiler *compiler, sljit_uw offset) +{ + put_label->next = NULL; + put_label->label = NULL; + put_label->addr = compiler->size - offset; + put_label->flags = 0; + if (compiler->last_put_label) + compiler->last_put_label->next = put_label; + else + compiler->put_labels = put_label; + compiler->last_put_label = put_label; +} + +#define ADDRESSING_DEPENDS_ON(exp, reg) \ + (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg)) + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +#define FUNCTION_CHECK_IS_REG(r) \ + (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) \ + || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) + +#define FUNCTION_CHECK_IS_FREG(fr) \ + (((fr) >= SLJIT_FR0 && (fr) < (SLJIT_FR0 + compiler->fscratches)) \ + || ((fr) > (SLJIT_FS0 - compiler->fsaveds) && (fr) <= SLJIT_FS0)) + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define CHECK_IF_VIRTUAL_REGISTER(p) ((p) <= SLJIT_S3 && (p) >= SLJIT_S8) +#else +#define CHECK_IF_VIRTUAL_REGISTER(p) 0 +#endif + +static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1 || compiler->saveds == -1) + return 0; + + if (!(p & SLJIT_MEM)) + return 0; + + if (!((p & REG_MASK) == SLJIT_UNUSED || FUNCTION_CHECK_IS_REG(p & REG_MASK))) + return 0; + + if (CHECK_IF_VIRTUAL_REGISTER(p & REG_MASK)) + return 0; + + if (p & OFFS_REG_MASK) { + if ((p & REG_MASK) == SLJIT_UNUSED) + return 0; + + if (!(FUNCTION_CHECK_IS_REG(OFFS_REG(p)))) + return 0; + + if (CHECK_IF_VIRTUAL_REGISTER(OFFS_REG(p))) + return 0; + + if ((i & ~0x3) != 0) + return 0; + } + + return (p & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK)) == 0; +} + +#define FUNCTION_CHECK_SRC_MEM(p, i) \ + CHECK_ARGUMENT(function_check_src_mem(compiler, p, i)); + +static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1 || compiler->saveds == -1) + return 0; + + if (FUNCTION_CHECK_IS_REG(p)) + return (i == 0); + + if (p == SLJIT_IMM) + return 1; + + if (p == SLJIT_MEM1(SLJIT_SP)) + return (i >= 0 && i < compiler->logical_local_size); + + return function_check_src_mem(compiler, p, i); +} + +#define FUNCTION_CHECK_SRC(p, i) \ + CHECK_ARGUMENT(function_check_src(compiler, p, i)); + +static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 unused) +{ + if (compiler->scratches == -1 || compiler->saveds == -1) + return 0; + + if (FUNCTION_CHECK_IS_REG(p) || ((unused) && (p) == SLJIT_UNUSED)) + return (i == 0); + + if (p == SLJIT_MEM1(SLJIT_SP)) + return (i >= 0 && i < compiler->logical_local_size); + + return function_check_src_mem(compiler, p, i); +} + +#define FUNCTION_CHECK_DST(p, i, unused) \ + CHECK_ARGUMENT(function_check_dst(compiler, p, i, unused)); + +static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1 || compiler->saveds == -1) + return 0; + + if (FUNCTION_CHECK_IS_FREG(p)) + return (i == 0); + + if (p == SLJIT_MEM1(SLJIT_SP)) + return (i >= 0 && i < compiler->logical_local_size); + + return function_check_src_mem(compiler, p, i); +} + +#define FUNCTION_FCHECK(p, i) \ + CHECK_ARGUMENT(function_fcheck(compiler, p, i)); + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) +{ + compiler->verbose = verbose; +} + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#ifdef _WIN64 +# define SLJIT_PRINT_D "I64" +#else +# define SLJIT_PRINT_D "l" +#endif +#else +# define SLJIT_PRINT_D "" +#endif + +static void sljit_verbose_reg(struct sljit_compiler *compiler, sljit_s32 r) +{ + if (r < (SLJIT_R0 + compiler->scratches)) + fprintf(compiler->verbose, "r%d", r - SLJIT_R0); + else if (r != SLJIT_SP) + fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - r); + else + fprintf(compiler->verbose, "sp"); +} + +static void sljit_verbose_freg(struct sljit_compiler *compiler, sljit_s32 r) +{ + if (r < (SLJIT_FR0 + compiler->fscratches)) + fprintf(compiler->verbose, "fr%d", r - SLJIT_FR0); + else + fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - r); +} + +static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if ((p) & SLJIT_IMM) + fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); + else if ((p) & SLJIT_MEM) { + if ((p) & REG_MASK) { + fputc('[', compiler->verbose); + sljit_verbose_reg(compiler, (p) & REG_MASK); + if ((p) & OFFS_REG_MASK) { + fprintf(compiler->verbose, " + "); + sljit_verbose_reg(compiler, OFFS_REG(p)); + if (i) + fprintf(compiler->verbose, " * %d", 1 << (i)); + } + else if (i) + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); + fputc(']', compiler->verbose); + } + else + fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); + } else if (p) + sljit_verbose_reg(compiler, p); + else + fprintf(compiler->verbose, "unused"); +} + +static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if ((p) & SLJIT_MEM) { + if ((p) & REG_MASK) { + fputc('[', compiler->verbose); + sljit_verbose_reg(compiler, (p) & REG_MASK); + if ((p) & OFFS_REG_MASK) { + fprintf(compiler->verbose, " + "); + sljit_verbose_reg(compiler, OFFS_REG(p)); + if (i) + fprintf(compiler->verbose, "%d", 1 << (i)); + } + else if (i) + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); + fputc(']', compiler->verbose); + } + else + fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); + } + else + sljit_verbose_freg(compiler, p); +} + +static const char* op0_names[] = { + (char*)"breakpoint", (char*)"nop", (char*)"lmul.uw", (char*)"lmul.sw", + (char*)"divmod.u", (char*)"divmod.s", (char*)"div.u", (char*)"div.s", + (char*)"endbr", (char*)"skip_frames_before_return" +}; + +static const char* op1_names[] = { + (char*)"", (char*)".u8", (char*)".s8", (char*)".u16", + (char*)".s16", (char*)".u32", (char*)".s32", (char*)".p", + (char*)"", (char*)".u8", (char*)".s8", (char*)".u16", + (char*)".s16", (char*)".u32", (char*)".s32", (char*)".p", + (char*)"not", (char*)"neg", (char*)"clz", +}; + +static const char* op2_names[] = { + (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc", + (char*)"mul", (char*)"and", (char*)"or", (char*)"xor", + (char*)"shl", (char*)"lshr", (char*)"ashr", +}; + +static const char* op_src_names[] = { + (char*)"fast_return", (char*)"skip_frames_before_fast_return", + (char*)"prefetch_l1", (char*)"prefetch_l2", + (char*)"prefetch_l3", (char*)"prefetch_once", +}; + +static const char* fop1_names[] = { + (char*)"mov", (char*)"conv", (char*)"conv", (char*)"conv", + (char*)"conv", (char*)"conv", (char*)"cmp", (char*)"neg", + (char*)"abs", +}; + +static const char* fop2_names[] = { + (char*)"add", (char*)"sub", (char*)"mul", (char*)"div" +}; + +#define JUMP_POSTFIX(type) \ + ((type & 0xff) <= SLJIT_NOT_OVERFLOW ? ((type & SLJIT_I32_OP) ? "32" : "") \ + : ((type & 0xff) <= SLJIT_ORDERED_F64 ? ((type & SLJIT_F32_OP) ? ".f32" : ".f64") : "")) + +static char* jump_names[] = { + (char*)"equal", (char*)"not_equal", + (char*)"less", (char*)"greater_equal", + (char*)"greater", (char*)"less_equal", + (char*)"sig_less", (char*)"sig_greater_equal", + (char*)"sig_greater", (char*)"sig_less_equal", + (char*)"overflow", (char*)"not_overflow", + (char*)"carry", (char*)"", + (char*)"equal", (char*)"not_equal", + (char*)"less", (char*)"greater_equal", + (char*)"greater", (char*)"less_equal", + (char*)"unordered", (char*)"ordered", + (char*)"jump", (char*)"fast_call", + (char*)"call", (char*)"call.cdecl" +}; + +static char* call_arg_names[] = { + (char*)"void", (char*)"sw", (char*)"uw", (char*)"s32", (char*)"u32", (char*)"f32", (char*)"f64" +}; + +#endif /* SLJIT_VERBOSE */ + +/* --------------------------------------------------------------------- */ +/* Arch dependent */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_compiler *compiler) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + struct sljit_jump *jump; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->size > 0); + jump = compiler->jumps; + while (jump) { + /* All jumps have target. */ + CHECK_ARGUMENT(jump->flags & (JUMP_LABEL | JUMP_ADDR)); + jump = jump->next; + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 types, arg_count, curr_type; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + CHECK_ARGUMENT((arg_types & SLJIT_DEF_MASK) == 0); + + types = (arg_types >> SLJIT_DEF_SHIFT); + arg_count = 0; + while (types != 0 && arg_count < 3) { + curr_type = (types & SLJIT_DEF_MASK); + CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW); + arg_count++; + types >>= SLJIT_DEF_SHIFT; + } + CHECK_ARGUMENT(arg_count <= saveds && types == 0); + + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " enter options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : ""); + + arg_types >>= SLJIT_DEF_SHIFT; + while (arg_types) { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } + + fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + scratches, saveds, fscratches, fsaveds, local_size); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 types, arg_count, curr_type; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + + types = (arg_types >> SLJIT_DEF_SHIFT); + arg_count = 0; + while (types != 0 && arg_count < 3) { + curr_type = (types & SLJIT_DEF_MASK); + CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW); + arg_count++; + types >>= SLJIT_DEF_SHIFT; + } + CHECK_ARGUMENT(arg_count <= saveds && types == 0); + + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " set_context options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : ""); + + arg_types >>= SLJIT_DEF_SHIFT; + while (arg_types) { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } + + fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + scratches, saveds, fscratches, fsaveds, local_size); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->scratches >= 0); + if (op != SLJIT_UNUSED) { + CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_P); + FUNCTION_CHECK_SRC(src, srcw); + } + else + CHECK_ARGUMENT(src == 0 && srcw == 0); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (op == SLJIT_UNUSED) + fprintf(compiler->verbose, " return\n"); + else { + fprintf(compiler->verbose, " return%s ", op1_names[op - SLJIT_OP1_BASE]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw, 0); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fast_enter "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW) + || ((op & ~SLJIT_I32_OP) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_I32_OP) <= SLJIT_DIV_SW) + || (op >= SLJIT_ENDBR && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN)); + CHECK_ARGUMENT(GET_OPCODE(op) < SLJIT_LMUL_UW || GET_OPCODE(op) >= SLJIT_ENDBR || compiler->scratches >= 2); + if ((GET_OPCODE(op) >= SLJIT_LMUL_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) || op == SLJIT_SKIP_FRAMES_BEFORE_RETURN) + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + { + fprintf(compiler->verbose, " %s", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); + if (GET_OPCODE(op) >= SLJIT_DIVMOD_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) { + fprintf(compiler->verbose, (op & SLJIT_I32_OP) ? "32" : "w"); + } + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ); + + switch (GET_OPCODE(op)) { + case SLJIT_NOT: + /* Only SLJIT_I32_OP and SLJIT_SET_Z are allowed. */ + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + break; + case SLJIT_NEG: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); + break; + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_P: + /* Nothing allowed */ + CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + default: + /* Only SLJIT_I32_OP is allowed. */ + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + } + + FUNCTION_CHECK_DST(dst, dstw, HAS_FLAGS(op)); + FUNCTION_CHECK_SRC(src, srcw); + + if (GET_OPCODE(op) >= SLJIT_NOT) { + CHECK_ARGUMENT(src != SLJIT_IMM); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (GET_OPCODE(op) <= SLJIT_MOV_P) + { + fprintf(compiler->verbose, " mov%s%s ", !(op & SLJIT_I32_OP) ? "" : "32", + (op != SLJIT_MOV32) ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ""); + } + else + { + fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32", + !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", + !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); + } + + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR); + + switch (GET_OPCODE(op)) { + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + break; + case SLJIT_MUL: + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); + break; + case SLJIT_ADD: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY) + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); + break; + case SLJIT_SUB: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_OVERFLOW) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + break; + case SLJIT_ADDC: + case SLJIT_SUBC: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + CHECK_ARGUMENT((op & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP)); + break; + default: + SLJIT_UNREACHABLE(); + break; + } + + FUNCTION_CHECK_DST(dst, dstw, HAS_FLAGS(op)); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32", + !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", + !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(op >= SLJIT_FAST_RETURN && op <= SLJIT_PREFETCH_ONCE); + FUNCTION_CHECK_SRC(src, srcw); + + if (op == SLJIT_FAST_RETURN || op == SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN) + { + CHECK_ARGUMENT(src != SLJIT_IMM); + compiler->last_flags = 0; + } + else if (op >= SLJIT_PREFETCH_L1 && op <= SLJIT_PREFETCH_ONCE) + { + CHECK_ARGUMENT(src & SLJIT_MEM); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s ", op_src_names[op - SLJIT_OP_SRC_BASE]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 reg) +{ + SLJIT_UNUSED_ARG(reg); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit_s32 reg) +{ + SLJIT_UNUSED_ARG(reg); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + int i; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(instruction); + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + CHECK_ARGUMENT(size > 0 && size < 16); +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + CHECK_ARGUMENT((size == 2 && (((sljit_sw)instruction) & 0x1) == 0) + || (size == 4 && (((sljit_sw)instruction) & 0x3) == 0)); +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + CHECK_ARGUMENT(size == 2 || size == 4 || size == 6); +#else + CHECK_ARGUMENT(size == 4 && (((sljit_sw)instruction) & 0x3) == 0); +#endif + + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " op_custom"); + for (i = 0; i < size; i++) + fprintf(compiler->verbose, " 0x%x", ((sljit_u8*)instruction)[i]); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_FCHECK(src, srcw); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + fprintf(compiler->verbose, " %s%s ", fop1_names[SLJIT_CONV_F64_FROM_F32 - SLJIT_FOP1_BASE], + (op & SLJIT_F32_OP) ? ".f32.from.f64" : ".f64.from.f32"); + else + fprintf(compiler->verbose, " %s%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); +#endif + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64); + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK) + || (GET_FLAG_TYPE(op) >= SLJIT_EQUAL_F64 && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_F64)); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + if (op & VARIABLE_FLAG_MASK) { + fprintf(compiler->verbose, ".%s_f", jump_names[GET_FLAG_TYPE(op)]); + } + fprintf(compiler->verbose, " "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_FCHECK(src, srcw); + FUNCTION_CHECK_DST(dst, dstw, 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? ".s32" : ".sw", + (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_SW && GET_OPCODE(op) <= SLJIT_CONV_F64_FROM_S32); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_CHECK_SRC(src, srcw); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_F32_OP) ? ".f32" : ".f64", + (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? ".s32" : ".sw"); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = 0; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, "label:\n"); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); + CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL); + CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_I32_OP)); + + if ((type & 0xff) < SLJIT_JUMP) { + if ((type & 0xff) <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else + CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " jump%s %s%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + jump_names[type & 0xff], JUMP_POSTFIX(type)); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 i, types, curr_type, scratches, fscratches; + + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); + + types = arg_types; + scratches = 0; + fscratches = 0; + for (i = 0; i < 5; i++) { + curr_type = (types & SLJIT_DEF_MASK); + CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64); + if (i > 0) { + if (curr_type == 0) { + break; + } + if (curr_type >= SLJIT_ARG_TYPE_F32) + fscratches++; + else + scratches++; + } else { + if (curr_type >= SLJIT_ARG_TYPE_F32) { + CHECK_ARGUMENT(compiler->fscratches > 0); + } else if (curr_type >= SLJIT_ARG_TYPE_SW) { + CHECK_ARGUMENT(compiler->scratches > 0); + } + } + types >>= SLJIT_DEF_SHIFT; + } + CHECK_ARGUMENT(compiler->scratches >= scratches); + CHECK_ARGUMENT(compiler->fscratches >= fscratches); + CHECK_ARGUMENT(types == 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]); + + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + fprintf(compiler->verbose, "]\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " cmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + jump_names[type & 0xff], (type & SLJIT_I32_OP) ? "32" : ""); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_F32_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL_F64 && (type & 0xff) <= SLJIT_ORDERED_F64); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fcmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + jump_names[type & 0xff], (type & SLJIT_F32_OP) ? ".f32" : ".f64"); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_FAST_CALL); + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " ijump.%s ", jump_names[type]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 i, types, curr_type, scratches, fscratches; + + CHECK_ARGUMENT(type == SLJIT_CALL || type == SLJIT_CALL_CDECL); + FUNCTION_CHECK_SRC(src, srcw); + + types = arg_types; + scratches = 0; + fscratches = 0; + for (i = 0; i < 5; i++) { + curr_type = (types & SLJIT_DEF_MASK); + CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64); + if (i > 0) { + if (curr_type == 0) { + break; + } + if (curr_type >= SLJIT_ARG_TYPE_F32) + fscratches++; + else + scratches++; + } else { + if (curr_type >= SLJIT_ARG_TYPE_F32) { + CHECK_ARGUMENT(compiler->fscratches > 0); + } else if (curr_type >= SLJIT_ARG_TYPE_SW) { + CHECK_ARGUMENT(compiler->scratches > 0); + } + } + types >>= SLJIT_DEF_SHIFT; + } + CHECK_ARGUMENT(compiler->scratches >= scratches); + CHECK_ARGUMENT(compiler->fscratches >= fscratches); + CHECK_ARGUMENT(types == 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " i%s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]); + + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + fprintf(compiler->verbose, "], "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); + CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); + CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 + || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + + if ((type & 0xff) <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else + CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); + + FUNCTION_CHECK_DST(dst, dstw, 0); + + if (GET_OPCODE(op) >= SLJIT_ADD) + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " flags%s %s%s, ", + !(op & SLJIT_SET_Z) ? "" : ".z", + GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], + GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_I32_OP) ? "32" : "")); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", %s%s\n", jump_names[type & 0xff], JUMP_POSTFIX(type)); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); + + CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); + if (src != SLJIT_IMM) { + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src)); + CHECK_ARGUMENT(srcw == 0); + } + + if ((type & 0xff) <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else + CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " cmov%s %s%s, ", + !(dst_reg & SLJIT_I32_OP) ? "" : "32", + jump_names[type & 0xff], JUMP_POSTFIX(type)); + sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((type & 0xff) >= SLJIT_MOV && (type & 0xff) <= SLJIT_MOV_P); + CHECK_ARGUMENT(!(type & SLJIT_I32_OP) || ((type & 0xff) != SLJIT_MOV && (type & 0xff) != SLJIT_MOV_U32 && (type & 0xff) != SLJIT_MOV_P)); + CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); + CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_I32_OP | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + + FUNCTION_CHECK_SRC_MEM(mem, memw); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); + + CHECK_ARGUMENT((mem & REG_MASK) != SLJIT_UNUSED && (mem & REG_MASK) != reg); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (!(type & SLJIT_MEM_SUPP) && SLJIT_UNLIKELY(!!compiler->verbose)) { + if (sljit_emit_mem(compiler, type | SLJIT_MEM_SUPP, reg, mem, memw) == SLJIT_ERR_UNSUPPORTED) + fprintf(compiler->verbose, " //"); + + fprintf(compiler->verbose, " mem%s.%s%s%s ", + !(type & SLJIT_I32_OP) ? "" : "32", + (type & SLJIT_MEM_STORE) ? "st" : "ld", + op1_names[(type & 0xff) - SLJIT_OP1_BASE], + (type & SLJIT_MEM_PRE) ? ".pre" : ".post"); + sljit_verbose_reg(compiler, reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); + CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); + CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_I32_OP | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + + FUNCTION_CHECK_SRC_MEM(mem, memw); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (!(type & SLJIT_MEM_SUPP) && SLJIT_UNLIKELY(!!compiler->verbose)) { + if (sljit_emit_fmem(compiler, type | SLJIT_MEM_SUPP, freg, mem, memw) == SLJIT_ERR_UNSUPPORTED) + fprintf(compiler->verbose, " //"); + + fprintf(compiler->verbose, " fmem.%s%s%s ", + (type & SLJIT_MEM_STORE) ? "st" : "ld", + !(type & SLJIT_I32_OP) ? ".f64" : ".f32", + (type & SLJIT_MEM_PRE) ? ".pre" : ".post"); + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + /* Any offset is allowed. */ + SLJIT_UNUSED_ARG(offset); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw, 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " local_base "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + SLJIT_UNUSED_ARG(init_value); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw, 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " const "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw, 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " put_label "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ + +#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \ + SLJIT_COMPILE_ASSERT(!(SLJIT_CONV_SW_FROM_F64 & 0x1) && !(SLJIT_CONV_F64_FROM_SW & 0x1), \ + invalid_float_opcodes); \ + if (GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CMP_F64) { \ + if (GET_OPCODE(op) == SLJIT_CMP_F64) { \ + CHECK(check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \ + } \ + if ((GET_OPCODE(op) | 0x1) == SLJIT_CONV_S32_FROM_F64) { \ + CHECK(check_sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); + +static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + /* Return if don't need to do anything. */ + if (op == SLJIT_UNUSED) + return SLJIT_SUCCESS; + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */ + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P)) + return SLJIT_SUCCESS; +#else + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P)) + return SLJIT_SUCCESS; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + compiler->skip_checks = 1; +#endif + return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); +} + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)) + +static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + struct sljit_label *label; + struct sljit_jump *jump; + sljit_s32 op = (dst_reg & SLJIT_I32_OP) ? SLJIT_MOV32 : SLJIT_MOV; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + jump = sljit_emit_jump(compiler, type ^ 0x1); + FAIL_IF(!jump); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + FAIL_IF(sljit_emit_op1(compiler, op, dst_reg & ~SLJIT_I32_OP, 0, src, srcw)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + label = sljit_emit_label(compiler); + FAIL_IF(!label); + sljit_set_label(jump, label); + return SLJIT_SUCCESS; +} + +#endif + +/* CPU description section */ + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 32bit (" +#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 64bit (" +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_CPUINFO_PART2 "little endian + " +#elif (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) +#define SLJIT_CPUINFO_PART2 "big endian + " +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) +#define SLJIT_CPUINFO_PART3 "unaligned)" +#else +#define SLJIT_CPUINFO_PART3 "aligned)" +#endif + +#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +# include "sljitNativeX86_common.c" +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +# include "sljitNativeARM_32.c" +#elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# include "sljitNativeARM_32.c" +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +# include "sljitNativeARM_T2_32.c" +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +# include "sljitNativeARM_64.c" +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +# include "sljitNativePPC_common.c" +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +# include "sljitNativeMIPS_common.c" +#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) +# include "sljitNativeSPARC_common.c" +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +# include "sljitNativeS390X.c" +#endif + +#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* Default compare for most architectures. */ + sljit_s32 flags, tmp_src, condition; + sljit_sw tmp_srcw; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + + condition = type & 0xff; +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + if ((condition == SLJIT_EQUAL || condition == SLJIT_NOT_EQUAL)) { + if ((src1 & SLJIT_IMM) && !src1w) { + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + src2w = 0; + } + if ((src2 & SLJIT_IMM) && !src2w) + return emit_cmp_to0(compiler, type, src1, src1w); + } +#endif + + if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) { + /* Immediate is preferred as second argument by most architectures. */ + switch (condition) { + case SLJIT_LESS: + condition = SLJIT_GREATER; + break; + case SLJIT_GREATER_EQUAL: + condition = SLJIT_LESS_EQUAL; + break; + case SLJIT_GREATER: + condition = SLJIT_LESS; + break; + case SLJIT_LESS_EQUAL: + condition = SLJIT_GREATER_EQUAL; + break; + case SLJIT_SIG_LESS: + condition = SLJIT_SIG_GREATER; + break; + case SLJIT_SIG_GREATER_EQUAL: + condition = SLJIT_SIG_LESS_EQUAL; + break; + case SLJIT_SIG_GREATER: + condition = SLJIT_SIG_LESS; + break; + case SLJIT_SIG_LESS_EQUAL: + condition = SLJIT_SIG_GREATER_EQUAL; + break; + } + + type = condition | (type & (SLJIT_I32_OP | SLJIT_REWRITABLE_JUMP)); + tmp_src = src1; + src1 = src2; + src2 = tmp_src; + tmp_srcw = src1w; + src1w = src2w; + src2w = tmp_srcw; + } + + if (condition <= SLJIT_NOT_ZERO) + flags = SLJIT_SET_Z; + else + flags = condition << VARIABLE_FLAG_SHIFT; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_I32_OP), + SLJIT_UNUSED, 0, src1, src1w, src2, src2w)); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_I32_OP), src1, src1w, src2, src2w); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_jump(compiler, type); +} + +#if !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(reg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif + +#if !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + if (offset != 0) + return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); +} + +#endif + +#else /* SLJIT_CONFIG_UNSUPPORTED */ + +/* Empty function bodies for those machines, which are not (yet) supported. */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "unsupported"; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + SLJIT_UNUSED_ARG(exec_allocator_data); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(size); + SLJIT_UNREACHABLE(); + return NULL; +} + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(verbose); + SLJIT_UNREACHABLE(); +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + SLJIT_UNUSED_ARG(feature_type); + SLJIT_UNREACHABLE(); + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(code); + SLJIT_UNUSED_ARG(exec_allocator_data); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNUSED_ARG(scratches); + SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); + SLJIT_UNUSED_ARG(local_size); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNUSED_ARG(scratches); + SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); + SLJIT_UNUSED_ARG(local_size); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + SLJIT_UNREACHABLE(); + return reg; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(instruction); + SLJIT_UNUSED_ARG(size); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(current_flags); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) +{ + SLJIT_UNUSED_ARG(jump); + SLJIT_UNUSED_ARG(label); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) +{ + SLJIT_UNUSED_ARG(jump); + SLJIT_UNUSED_ARG(target); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label) +{ + SLJIT_UNUSED_ARG(put_label); + SLJIT_UNUSED_ARG(label); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(type); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(dst_reg); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(reg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(offset); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw initval) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(initval); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(addr); + SLJIT_UNUSED_ARG(new_target); + SLJIT_UNUSED_ARG(executable_offset); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(addr); + SLJIT_UNUSED_ARG(new_constant); + SLJIT_UNUSED_ARG(executable_offset); + SLJIT_UNREACHABLE(); +} + +#endif /* !SLJIT_CONFIG_UNSUPPORTED */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.h b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.h new file mode 100644 index 0000000000..0eb62fc21b --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitLir.h @@ -0,0 +1,1545 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_LIR_H_ +#define SLJIT_LIR_H_ + +/* + ------------------------------------------------------------------------ + Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC) + ------------------------------------------------------------------------ + + Short description + Advantages: + - The execution can be continued from any LIR instruction. In other + words, it is possible to jump to any label from anywhere, even from + a code fragment, which is compiled later, if both compiled code + shares the same context. See sljit_emit_enter for more details + - Supports self modifying code: target of (conditional) jump and call + instructions and some constant values can be dynamically modified + during runtime + - although it is not suggested to do it frequently + - can be used for inline caching: save an important value once + in the instruction stream + - since this feature limits the optimization possibilities, a + special flag must be passed at compile time when these + instructions are emitted + - A fixed stack space can be allocated for local variables + - The compiler is thread-safe + - The compiler is highly configurable through preprocessor macros. + You can disable unneeded features (multithreading in single + threaded applications), and you can use your own system functions + (including memory allocators). See sljitConfig.h + Disadvantages: + - No automatic register allocation, and temporary results are + not stored on the stack. (hence the name comes) + In practice: + - This approach is very effective for interpreters + - One of the saved registers typically points to a stack interface + - It can jump to any exception handler anytime (even if it belongs + to another function) + - Hot paths can be modified during runtime reflecting the changes + of the fastest execution path of the dynamic language + - SLJIT supports complex memory addressing modes + - mainly position and context independent code (except some cases) + + For valgrind users: + - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code" +*/ + +#if (defined SLJIT_HAVE_CONFIG_PRE && SLJIT_HAVE_CONFIG_PRE) +#include "sljitConfigPre.h" +#endif /* SLJIT_HAVE_CONFIG_PRE */ + +#include "sljitConfig.h" + +/* The following header file defines useful macros for fine tuning +sljit based code generators. They are listed in the beginning +of sljitConfigInternal.h */ + +#include "sljitConfigInternal.h" + +#if (defined SLJIT_HAVE_CONFIG_POST && SLJIT_HAVE_CONFIG_POST) +#include "sljitConfigPost.h" +#endif /* SLJIT_HAVE_CONFIG_POST */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* --------------------------------------------------------------------- */ +/* Error codes */ +/* --------------------------------------------------------------------- */ + +/* Indicates no error. */ +#define SLJIT_SUCCESS 0 +/* After the call of sljit_generate_code(), the error code of the compiler + is set to this value to avoid future sljit calls (in debug mode at least). + The complier should be freed after sljit_generate_code(). */ +#define SLJIT_ERR_COMPILED 1 +/* Cannot allocate non executable memory. */ +#define SLJIT_ERR_ALLOC_FAILED 2 +/* Cannot allocate executable memory. + Only for sljit_generate_code() */ +#define SLJIT_ERR_EX_ALLOC_FAILED 3 +/* Return value for SLJIT_CONFIG_UNSUPPORTED placeholder architecture. */ +#define SLJIT_ERR_UNSUPPORTED 4 +/* An ivalid argument is passed to any SLJIT function. */ +#define SLJIT_ERR_BAD_ARGUMENT 5 +/* Dynamic code modification is not enabled. */ +#define SLJIT_ERR_DYN_CODE_MOD 6 + +/* --------------------------------------------------------------------- */ +/* Registers */ +/* --------------------------------------------------------------------- */ + +/* + Scratch (R) registers: registers whose may not preserve their values + across function calls. + + Saved (S) registers: registers whose preserve their values across + function calls. + + The scratch and saved register sets are overlap. The last scratch register + is the first saved register, the one before the last is the second saved + register, and so on. + + If an architecture provides two scratch and three saved registers, + its scratch and saved register sets are the following: + + R0 | | R0 is always a scratch register + R1 | | R1 is always a scratch register + [R2] | S2 | R2 and S2 represent the same physical register + [R3] | S1 | R3 and S1 represent the same physical register + [R4] | S0 | R4 and S0 represent the same physical register + + Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and + SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture. + + Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12 + and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers + are virtual on x86-32. See below. + + The purpose of this definition is convenience: saved registers can + be used as extra scratch registers. For example four registers can + be specified as scratch registers and the fifth one as saved register + on the CPU above and any user code which requires four scratch + registers can run unmodified. The SLJIT compiler automatically saves + the content of the two extra scratch register on the stack. Scratch + registers can also be preserved by saving their value on the stack + but this needs to be done manually. + + Note: To emphasize that registers assigned to R2-R4 are saved + registers, they are enclosed by square brackets. + + Note: sljit_emit_enter and sljit_set_context defines whether a register + is S or R register. E.g: when 3 scratches and 1 saved is mapped + by sljit_emit_enter, the allowed register set will be: R0-R2 and + S0. Although S2 is mapped to the same position as R2, it does not + available in the current configuration. Furthermore the S1 register + is not available at all. +*/ + +/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1 + or sljit_emit_op2 operations the result is discarded. Some status + flags must be set when the destination is SLJIT_UNUSED, because the + operation would have no effect otherwise. Other SLJIT operations do + not support SLJIT_UNUSED as a destination operand. */ +#define SLJIT_UNUSED 0 + +/* Scratch registers. */ +#define SLJIT_R0 1 +#define SLJIT_R1 2 +#define SLJIT_R2 3 +/* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_R3 4 +#define SLJIT_R4 5 +#define SLJIT_R5 6 +#define SLJIT_R6 7 +#define SLJIT_R7 8 +#define SLJIT_R8 9 +#define SLJIT_R9 10 +/* All R registers provided by the architecture can be accessed by SLJIT_R(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */ +#define SLJIT_R(i) (1 + (i)) + +/* Saved registers. */ +#define SLJIT_S0 (SLJIT_NUMBER_OF_REGISTERS) +#define SLJIT_S1 (SLJIT_NUMBER_OF_REGISTERS - 1) +#define SLJIT_S2 (SLJIT_NUMBER_OF_REGISTERS - 2) +/* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_S3 (SLJIT_NUMBER_OF_REGISTERS - 3) +#define SLJIT_S4 (SLJIT_NUMBER_OF_REGISTERS - 4) +#define SLJIT_S5 (SLJIT_NUMBER_OF_REGISTERS - 5) +#define SLJIT_S6 (SLJIT_NUMBER_OF_REGISTERS - 6) +#define SLJIT_S7 (SLJIT_NUMBER_OF_REGISTERS - 7) +#define SLJIT_S8 (SLJIT_NUMBER_OF_REGISTERS - 8) +#define SLJIT_S9 (SLJIT_NUMBER_OF_REGISTERS - 9) +/* All S registers provided by the architecture can be accessed by SLJIT_S(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */ +#define SLJIT_S(i) (SLJIT_NUMBER_OF_REGISTERS - (i)) + +/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1) + +/* The SLJIT_SP provides direct access to the linear stack space allocated by + sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). + The immediate offset is extended by the relative stack offset automatically. + The sljit_get_local_base can be used to obtain the absolute offset. */ +#define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) + +/* Return with machine word. */ + +#define SLJIT_RETURN_REG SLJIT_R0 + +/* --------------------------------------------------------------------- */ +/* Floating point registers */ +/* --------------------------------------------------------------------- */ + +/* Each floating point register can store a 32 or a 64 bit precision + value. The FR and FS register sets are overlap in the same way as R + and S register sets. See above. */ + +/* Note: SLJIT_UNUSED as destination is not valid for floating point + operations, since they cannot be used for setting flags. */ + +/* Floating point scratch registers. */ +#define SLJIT_FR0 1 +#define SLJIT_FR1 2 +#define SLJIT_FR2 3 +#define SLJIT_FR3 4 +#define SLJIT_FR4 5 +#define SLJIT_FR5 6 +/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ +#define SLJIT_FR(i) (1 + (i)) + +/* Floating point saved registers. */ +#define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS) +#define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) +#define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2) +#define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) +#define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) +#define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) +/* All S registers provided by the architecture can be accessed by SLJIT_FS(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ +#define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) + +/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) + +/* --------------------------------------------------------------------- */ +/* Argument type definitions */ +/* --------------------------------------------------------------------- */ + +/* Argument type definitions. + Used by SLJIT_[DEF_]ARGx and SLJIT_[DEF]_RET macros. */ + +#define SLJIT_ARG_TYPE_VOID 0 +#define SLJIT_ARG_TYPE_SW 1 +#define SLJIT_ARG_TYPE_UW 2 +#define SLJIT_ARG_TYPE_S32 3 +#define SLJIT_ARG_TYPE_U32 4 +#define SLJIT_ARG_TYPE_F32 5 +#define SLJIT_ARG_TYPE_F64 6 + +/* The following argument type definitions are used by sljit_emit_enter, + sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. + The following return type definitions are used by sljit_emit_call + and sljit_emit_icall functions. + + When a function is called, the first integer argument must be placed + in SLJIT_R0, the second in SLJIT_R1, and so on. Similarly the first + floating point argument must be placed in SLJIT_FR0, the second in + SLJIT_FR1, and so on. + + Example function definition: + sljit_f32 SLJIT_FUNC example_c_callback(sljit_sw arg_a, + sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); + + Argument type definition: + SLJIT_DEF_RET(SLJIT_ARG_TYPE_F32) + | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F64) + | SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_U32) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F32) + + Short form of argument type definition: + SLJIT_RET(F32) | SLJIT_ARG1(SW) | SLJIT_ARG2(F64) + | SLJIT_ARG3(S32) | SLJIT_ARG4(F32) + + Argument passing: + arg_a must be placed in SLJIT_R0 + arg_c must be placed in SLJIT_R1 + arg_b must be placed in SLJIT_FR0 + arg_d must be placed in SLJIT_FR1 + +Note: + The SLJIT_ARG_TYPE_VOID type is only supported by + SLJIT_DEF_RET, and SLJIT_ARG_TYPE_VOID is also the + default value when SLJIT_DEF_RET is not specified. */ +#define SLJIT_DEF_SHIFT 4 +#define SLJIT_DEF_RET(type) (type) +#define SLJIT_DEF_ARG1(type) ((type) << SLJIT_DEF_SHIFT) +#define SLJIT_DEF_ARG2(type) ((type) << (2 * SLJIT_DEF_SHIFT)) +#define SLJIT_DEF_ARG3(type) ((type) << (3 * SLJIT_DEF_SHIFT)) +#define SLJIT_DEF_ARG4(type) ((type) << (4 * SLJIT_DEF_SHIFT)) + +/* Short form of the macros above. + + For example the following definition: + SLJIT_DEF_RET(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_F32) + + can be shortened to: + SLJIT_RET(SW) | SLJIT_ARG1(F32) + +Note: + The VOID type is only supported by SLJIT_RET, and + VOID is also the default value when SLJIT_RET is + not specified. */ +#define SLJIT_RET(type) SLJIT_DEF_RET(SLJIT_ARG_TYPE_ ## type) +#define SLJIT_ARG1(type) SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_ ## type) +#define SLJIT_ARG2(type) SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_ ## type) +#define SLJIT_ARG3(type) SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_ ## type) +#define SLJIT_ARG4(type) SLJIT_DEF_ARG4(SLJIT_ARG_TYPE_ ## type) + +/* --------------------------------------------------------------------- */ +/* Main structures and functions */ +/* --------------------------------------------------------------------- */ + +/* + The following structures are private, and can be changed in the + future. Keeping them here allows code inlining. +*/ + +struct sljit_memory_fragment { + struct sljit_memory_fragment *next; + sljit_uw used_size; + /* Must be aligned to sljit_sw. */ + sljit_u8 memory[1]; +}; + +struct sljit_label { + struct sljit_label *next; + sljit_uw addr; + /* The maximum size difference. */ + sljit_uw size; +}; + +struct sljit_jump { + struct sljit_jump *next; + sljit_uw addr; + sljit_uw flags; + union { + sljit_uw target; + struct sljit_label *label; + } u; +}; + +struct sljit_put_label { + struct sljit_put_label *next; + struct sljit_label *label; + sljit_uw addr; + sljit_uw flags; +}; + +struct sljit_const { + struct sljit_const *next; + sljit_uw addr; +}; + +struct sljit_compiler { + sljit_s32 error; + sljit_s32 options; + + struct sljit_label *labels; + struct sljit_jump *jumps; + struct sljit_put_label *put_labels; + struct sljit_const *consts; + struct sljit_label *last_label; + struct sljit_jump *last_jump; + struct sljit_const *last_const; + struct sljit_put_label *last_put_label; + + void *allocator_data; + void *exec_allocator_data; + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *abuf; + + /* Used scratch registers. */ + sljit_s32 scratches; + /* Used saved registers. */ + sljit_s32 saveds; + /* Used float scratch registers. */ + sljit_s32 fscratches; + /* Used float saved registers. */ + sljit_s32 fsaveds; + /* Local stack size. */ + sljit_s32 local_size; + /* Code size. */ + sljit_uw size; + /* Relative offset of the executable mapping from the writable mapping. */ + sljit_uw executable_offset; + /* Executable size for statistical purposes. */ + sljit_uw executable_size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + sljit_s32 status_flags_state; +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 args; + sljit_s32 locals_offset; + sljit_s32 saveds_offset; + sljit_s32 stack_tmp_size; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 mode32; +#ifdef _WIN64 + sljit_s32 locals_offset; +#endif +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + /* Constant pool handling. */ + sljit_uw *cpool; + sljit_u8 *cpool_unique; + sljit_uw cpool_diff; + sljit_uw cpool_fill; + /* Other members. */ + /* Contains pointer, "ldr pc, [...]" pairs. */ + sljit_uw patches; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + /* Temporary fields. */ + sljit_uw shift_imm; +#endif + +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + sljit_sw imm; +#endif + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + sljit_s32 delay_slot; + sljit_s32 cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + sljit_s32 delay_slot; + sljit_s32 cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + /* Need to allocate register save area to make calls. */ + sljit_s32 mode; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + FILE* verbose; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + /* Flags specified by the last arithmetic instruction. + It contains the type of the variable flag. */ + sljit_s32 last_flags; + /* Local size passed to the functions. */ + sljit_s32 logical_local_size; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + /* Trust arguments when the API function is called. */ + sljit_s32 skip_checks; +#endif +}; + +/* --------------------------------------------------------------------- */ +/* Main functions */ +/* --------------------------------------------------------------------- */ + +/* Creates an sljit compiler. The allocator_data is required by some + custom memory managers. This pointer is passed to SLJIT_MALLOC + and SLJIT_FREE macros. Most allocators (including the default + one) ignores this value, and it is recommended to pass NULL + as a dummy value for allocator_data. The exec_allocator_data + has the same purpose but this one is passed to SLJIT_MALLOC_EXEC / + SLJIT_MALLOC_FREE functions. + + Returns NULL if failed. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data); + +/* Frees everything except the compiled machine code. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler); + +/* Returns the current error code. If an error is occurred, future sljit + calls which uses the same compiler argument returns early with the same + error code. Thus there is no need for checking the error after every + call, it is enough to do it before the code is compiled. Removing + these checks increases the performance of the compiling process. */ +static SLJIT_INLINE sljit_s32 sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } + +/* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except + if an error was detected before. After the error code is set + the compiler behaves as if the allocation failure happened + during an sljit function call. This can greatly simplify error + checking, since only the compiler status needs to be checked + after the compilation. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler); + +/* + Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, + and <= 128 bytes on 64 bit architectures. The memory area is owned by the + compiler, and freed by sljit_free_compiler. The returned pointer is + sizeof(sljit_sw) aligned. Excellent for allocating small blocks during + the compiling, and no need to worry about freeing them. The size is + enough to contain at most 16 pointers. If the size is outside of the range, + the function will return with NULL. However, this return value does not + indicate that there is no more memory (does not set the current error code + of the compiler to out-of-memory status). +*/ +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +/* Passing NULL disables verbose. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); +#endif + +/* + Create executable code from the sljit instruction stream. This is the final step + of the code generation so no more instructions can be added after this call. +*/ + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler); + +/* Free executable code. */ + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data); + +/* + When the protected executable allocator is used the JIT code is mapped + twice. The first mapping has read/write and the second mapping has read/exec + permissions. This function returns with the relative offset of the executable + mapping using the writable mapping as the base after the machine code is + successfully generated. The returned value is always 0 for the normal executable + allocator, since it uses only one mapping with read/write/exec permissions. + Dynamic code modifications requires this value. + + Before a successful code generation, this function returns with 0. +*/ +static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; } + +/* + The executable memory consumption of the generated code can be retrieved by + this function. The returned value can be used for statistical purposes. + + Before a successful code generation, this function returns with 0. +*/ +static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; } + +/* Returns with non-zero if the feature or limitation type passed as its + argument is present on the current CPU. + + Some features (e.g. floating point operations) require hardware (CPU) + support while others (e.g. move with update) are emulated if not available. + However even if a feature is emulated, specialized code paths can be faster + than the emulation. Some limitations are emulated as well so their general + case is supported but it has extra performance costs. */ + +/* [Not emulated] Floating-point support is available. */ +#define SLJIT_HAS_FPU 0 +/* [Limitation] Some registers are virtual registers. */ +#define SLJIT_HAS_VIRTUAL_REGISTERS 1 +/* [Emulated] Has zero register (setting a memory location to zero is efficient). */ +#define SLJIT_HAS_ZERO_REGISTER 2 +/* [Emulated] Count leading zero is supported. */ +#define SLJIT_HAS_CLZ 3 +/* [Emulated] Conditional move is supported. */ +#define SLJIT_HAS_CMOV 4 +/* [Emulated] Conditional move is supported. */ +#define SLJIT_HAS_PREFETCH 5 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +/* [Not emulated] SSE2 support is available on x86. */ +#define SLJIT_HAS_SSE2 100 +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); + +/* Instruction generation. Returns with any error code. If there is no + error, they return with SLJIT_SUCCESS. */ + +/* + The executable code is a function from the viewpoint of the C + language. The function calls must obey to the ABI (Application + Binary Interface) of the platform, which specify the purpose of + machine registers and stack handling among other things. The + sljit_emit_enter function emits the necessary instructions for + setting up a new context for the executable code and moves function + arguments to the saved registers. Furthermore the options argument + can be used to pass configuration options to the compiler. The + available options are listed before sljit_emit_enter. + + The function argument list is the combination of SLJIT_ARGx + (SLJIT_DEF_ARG1) macros. Currently maximum 3 SW / UW + (SLJIT_ARG_TYPE_SW / LJIT_ARG_TYPE_UW) arguments are supported. + The first argument goes to SLJIT_S0, the second goes to SLJIT_S1 + and so on. The register set used by the function must be declared + as well. The number of scratch and saved registers used by the + function must be passed to sljit_emit_enter. Only R registers + between R0 and "scratches" argument can be used later. E.g. if + "scratches" is set to 2, the scratch register set will be limited + to SLJIT_R0 and SLJIT_R1. The S registers and the floating point + registers ("fscratches" and "fsaveds") are specified in a similar + manner. The sljit_emit_enter is also capable of allocating a stack + space for local variables. The "local_size" argument contains the + size in bytes of this local area and its staring address is stored + in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and + SLJIT_SP + local_size (exclusive) can be modified freely until + the function returns. The stack space is not initialized. + + Note: the following conditions must met: + 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS + 0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS + scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS + 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + + Note: every call of sljit_emit_enter and sljit_set_context + overwrites the previous context. +*/ + +/* The absolute address returned by sljit_get_local_base with +offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */ +#define SLJIT_F64_ALIGNMENT 0x00000001 + +/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ +#define SLJIT_MAX_LOCAL_SIZE 65536 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); + +/* The machine code has a context (which contains the local stack space size, + number of used registers, etc.) which initialized by sljit_emit_enter. Several + functions (like sljit_emit_return) requres this context to be able to generate + the appropriate code. However, some code fragments (like inline cache) may have + no normal entry point so their context is unknown for the compiler. Their context + can be provided to the compiler by the sljit_set_context function. + + Note: every call of sljit_emit_enter and sljit_set_context overwrites + the previous context. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); + +/* Return from machine code. The op argument can be SLJIT_UNUSED which means the + function does not return with anything or any opcode between SLJIT_MOV and + SLJIT_MOV_P (see sljit_emit_op1). As for src and srcw they must be 0 if op + is SLJIT_UNUSED, otherwise see below the description about source and + destination arguments. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw); + +/* Generating entry and exit points for fast call functions (see SLJIT_FAST_CALL). + Both sljit_emit_fast_enter and SLJIT_FAST_RETURN operations preserve the + values of all registers and stack frame. The return address is stored in the + dst argument of sljit_emit_fast_enter, and this return address can be passed + to SLJIT_FAST_RETURN to continue the execution after the fast call. + + Fast calls are cheap operations (usually only a single call instruction is + emitted) but they do not preserve any registers. However the callee function + can freely use / update any registers and stack values which can be + efficiently exploited by various optimizations. Registers can be saved + manually by the callee function if needed. + + Although returning to different address by SLJIT_FAST_RETURN is possible, + this address usually cannot be predicted by the return address predictor of + modern CPUs which may reduce performance. Furthermore certain security + enhancement technologies such as Intel Control-flow Enforcement Technology + (CET) may disallow returning to a different address. + + Flags: - (does not modify flags). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); + +/* + Source and destination operands for arithmetical instructions + imm - a simple immediate value (cannot be used as a destination) + reg - any of the registers (immediate argument must be 0) + [imm] - absolute immediate memory address + [reg+imm] - indirect memory address + [reg+(reg<addr; } +static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } +static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } + +/* Only the address and executable offset are required to perform dynamic + code modifications. See sljit_get_executable_offset function. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset); +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset); + +/* --------------------------------------------------------------------- */ +/* Miscellaneous utility functions */ +/* --------------------------------------------------------------------- */ + +#define SLJIT_MAJOR_VERSION 0 +#define SLJIT_MINOR_VERSION 94 + +/* Get the human readable name of the platform. Can be useful on platforms + like ARM, where ARM and Thumb2 functions can be mixed, and + it is useful to know the type of the code generator. */ +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void); + +/* Portable helper function to get an offset of a member. */ +#define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10) + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +/* The sljit_stack structure and its manipulation functions provides + an implementation for a top-down stack. The stack top is stored + in the end field of the sljit_stack structure and the stack goes + down to the min_start field, so the memory region reserved for + this stack is between min_start (inclusive) and end (exclusive) + fields. However the application can only use the region between + start (inclusive) and end (exclusive) fields. The sljit_stack_resize + function can be used to extend this region up to min_start. + + This feature uses the "address space reserve" feature of modern + operating systems. Instead of allocating a large memory block + applications can allocate a small memory region and extend it + later without moving the content of the memory area. Therefore + after a successful resize by sljit_stack_resize all pointers into + this region are still valid. + + Note: + this structure may not be supported by all operating systems. + end and max_limit fields are aligned to PAGE_SIZE bytes (usually + 4 Kbyte or more). + stack should grow in larger steps, e.g. 4Kbyte, 16Kbyte or more. */ + +struct sljit_stack { + /* User data, anything can be stored here. + Initialized to the same value as the end field. */ + sljit_u8 *top; +/* These members are read only. */ + /* End address of the stack */ + sljit_u8 *end; + /* Current start address of the stack. */ + sljit_u8 *start; + /* Lowest start address of the stack. */ + sljit_u8 *min_start; +}; + +/* Allocates a new stack. Returns NULL if unsuccessful. + Note: see sljit_create_compiler for the explanation of allocator_data. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data); +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data); + +/* Can be used to increase (extend) or decrease (shrink) the stack + memory area. Returns with new_start if successful and NULL otherwise. + It always fails if new_start is less than min_start or greater or equal + than end fields. The fields of the stack are not changed if the returned + value is NULL (the current memory content is never lost). */ +SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start); + +#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ + +#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + +/* Get the entry address of a given function. */ +#define SLJIT_FUNC_OFFSET(func_name) ((sljit_sw)func_name) + +#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +/* All JIT related code should be placed in the same context (library, binary, etc.). */ + +#define SLJIT_FUNC_OFFSET(func_name) (*(sljit_sw*)(void*)func_name) + +/* For powerpc64, the function pointers point to a context descriptor. */ +struct sljit_function_context { + sljit_sw addr; + sljit_sw r2; + sljit_sw r11; +}; + +/* Fill the context arguments using the addr and the function. + If func_ptr is NULL, it will not be set to the address of context + If addr is NULL, the function address also comes from the func pointer. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func); + +#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +/* Free unused executable memory. The allocator keeps some free memory + around to reduce the number of OS executable memory allocations. + This improves performance since these calls are costly. However + it is sometimes desired to free all unused memory regions, e.g. + before the application terminates. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +#endif + +/* --------------------------------------------------------------------- */ +/* CPU specific functions */ +/* --------------------------------------------------------------------- */ + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index ( >=0 ) of any SLJIT_R, + SLJIT_S and SLJIT_SP registers. + + Note: it returns with -1 for virtual registers (only on x86-32). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg); + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index of any SLJIT_FLOAT register. + + Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg); + +/* Any instruction can be inserted into the instruction stream by + sljit_emit_op_custom. It has a similar purpose as inline assembly. + The size parameter must match to the instruction size of the target + architecture: + + x86: 0 < size <= 15. The instruction argument can be byte aligned. + Thumb2: if size == 2, the instruction argument must be 2 byte aligned. + if size == 4, the instruction argument must be 4 byte aligned. + Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size); + +/* Flags were set by a 32 bit operation. */ +#define SLJIT_CURRENT_FLAGS_I32_OP SLJIT_I32_OP + +/* Flags were set by an ADD, ADDC, SUB, SUBC, or NEG operation. */ +#define SLJIT_CURRENT_FLAGS_ADD_SUB 0x01 + +/* Flags were set by a SUB with unused destination. + Must be combined with SLJIT_CURRENT_FLAGS_ADD_SUB. */ +#define SLJIT_CURRENT_FLAGS_COMPARE 0x02 + +/* Define the currently available CPU status flags. It is usually used after + an sljit_emit_label or sljit_emit_op_custom operations to define which CPU + status flags are available. + + The current_flags must be a valid combination of SLJIT_SET_* and + SLJIT_CURRENT_FLAGS_* constants. */ + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, + sljit_s32 current_flags); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SLJIT_LIR_H_ */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_32.c new file mode 100644 index 0000000000..74cf55fcd2 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_32.c @@ -0,0 +1,2833 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef __SOFTFP__ +#define ARM_ABI_INFO " ABI:softfp" +#else +#define ARM_ABI_INFO " ABI:hardfp" +#endif + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO; +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + return "ARMv5" SLJIT_CPUINFO ARM_ABI_INFO; +#else +#error "Internal error: Unknown ARM architecture" +#endif +} + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +/* In ARM instruction words. + Cache lines are usually 32 byte aligned. */ +#define CONST_POOL_ALIGNMENT 8 +#define CONST_POOL_EMPTY 0xffffffff + +#define ALIGN_INSTRUCTION(ptr) \ + (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1)) +#define MAX_DIFFERENCE(max_diff) \ + (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) + +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7 +}; + +#define RM(rm) (reg_map[rm]) +#define RD(rd) (reg_map[rd] << 12) +#define RN(rn) (reg_map[rn] << 16) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* The instruction includes the AL condition. + INST_NAME - CONDITIONAL remove this flag. */ +#define COND_MASK 0xf0000000 +#define CONDITIONAL 0xe0000000 +#define PUSH_POOL 0xff000000 + +#define ADC 0xe0a00000 +#define ADD 0xe0800000 +#define AND 0xe0000000 +#define B 0xea000000 +#define BIC 0xe1c00000 +#define BL 0xeb000000 +#define BLX 0xe12fff30 +#define BX 0xe12fff10 +#define CLZ 0xe16f0f10 +#define CMN 0xe1600000 +#define CMP 0xe1400000 +#define BKPT 0xe1200070 +#define EOR 0xe0200000 +#define MOV 0xe1a00000 +#define MUL 0xe0000090 +#define MVN 0xe1e00000 +#define NOP 0xe1a00000 +#define ORR 0xe1800000 +#define PUSH 0xe92d0000 +#define POP 0xe8bd0000 +#define RSB 0xe0600000 +#define RSC 0xe0e00000 +#define SBC 0xe0c00000 +#define SMULL 0xe0c00090 +#define SUB 0xe0400000 +#define UMULL 0xe0800090 +#define VABS_F32 0xeeb00ac0 +#define VADD_F32 0xee300a00 +#define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 +#define VDIV_F32 0xee800a00 +#define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 +#define VMOV2 0xec400a10 +#define VMRS 0xeef1fa10 +#define VMUL_F32 0xee200a00 +#define VNEG_F32 0xeeb10a40 +#define VSTR_F32 0xed000a00 +#define VSUB_F32 0xee300a40 + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +/* Arm v7 specific instructions. */ +#define MOVW 0xe3000000 +#define MOVT 0xe3400000 +#define SXTB 0xe6af0070 +#define SXTH 0xe6bf0070 +#define UXTB 0xe6ef0070 +#define UXTH 0xe6ff0070 +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +static sljit_s32 push_cpool(struct sljit_compiler *compiler) +{ + /* Pushing the constant pool into the instruction stream. */ + sljit_uw* inst; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + sljit_s32 i; + + /* The label could point the address after the constant pool. */ + if (compiler->last_label && compiler->last_label->size == compiler->size) + compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1; + + SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE); + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0xff000000 | compiler->cpool_fill; + + for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) { + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0; + } + + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + while (cpool_ptr < cpool_end) { + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = *cpool_ptr++; + } + compiler->cpool_diff = CONST_POOL_EMPTY; + compiler->cpool_fill = 0; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) +{ + sljit_uw* ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +{ + sljit_uw* ptr; + sljit_uw cpool_index = CPOOL_SIZE; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + sljit_u8* cpool_unique_ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + else if (compiler->cpool_fill > 0) { + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + cpool_unique_ptr = compiler->cpool_unique; + do { + if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { + cpool_index = cpool_ptr - compiler->cpool; + break; + } + cpool_ptr++; + cpool_unique_ptr++; + } while (cpool_ptr < cpool_end); + } + + if (cpool_index == CPOOL_SIZE) { + /* Must allocate a new entry in the literal pool. */ + if (compiler->cpool_fill < CPOOL_SIZE) { + cpool_index = compiler->cpool_fill; + compiler->cpool_fill++; + } + else { + FAIL_IF(push_cpool(compiler)); + cpool_index = 0; + compiler->cpool_fill = 1; + } + } + + SLJIT_ASSERT((inst & 0xfff) == 0); + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | cpool_index; + + compiler->cpool[cpool_index] = literal; + compiler->cpool_unique[cpool_index] = 0; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +{ + sljit_uw* ptr; + if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) + FAIL_IF(push_cpool(compiler)); + + SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0); + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | compiler->cpool_fill; + + compiler->cpool[compiler->cpool_fill] = literal; + compiler->cpool_unique[compiler->cpool_fill] = 1; + compiler->cpool_fill++; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler) +{ + /* Place for at least two instruction (doesn't matter whether the first has a literal). */ + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088))) + return push_cpool(compiler); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler) +{ + /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ + SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + return push_inst(compiler, BLX | RM(TMP_REG1)); +} + +static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) +{ + sljit_uw diff; + sljit_uw ind; + sljit_uw counter = 0; + sljit_uw* clear_const_pool = const_pool; + sljit_uw* clear_const_pool_end = const_pool + cpool_size; + + SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT); + /* Set unused flag for all literals in the constant pool. + I.e.: unused literals can belong to branches, which can be encoded as B or BL. + We can "compress" the constant pool by discarding these literals. */ + while (clear_const_pool < clear_const_pool_end) + *clear_const_pool++ = (sljit_uw)(-1); + + while (last_pc_patch < code_ptr) { + /* Data transfer instruction with Rn == r15. */ + if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { + diff = const_pool - last_pc_patch; + ind = (*last_pc_patch) & 0xfff; + + /* Must be a load instruction with immediate offset. */ + SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20))); + if ((sljit_s32)const_pool[ind] < 0) { + const_pool[ind] = counter; + ind = counter; + counter++; + } + else + ind = const_pool[ind]; + + SLJIT_ASSERT(diff >= 1); + if (diff >= 2 || ind > 0) { + diff = (diff + ind - 2) << 2; + SLJIT_ASSERT(diff <= 0xfff); + *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; + } + else + *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; + } + last_pc_patch++; + } + return counter; +} + +/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */ +struct future_patch { + struct future_patch* next; + sljit_s32 index; + sljit_s32 value; +}; + +static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) +{ + sljit_s32 value; + struct future_patch *curr_patch, *prev_patch; + + SLJIT_UNUSED_ARG(compiler); + + /* Using the values generated by patch_pc_relative_loads. */ + if (!*first_patch) + value = (sljit_s32)cpool_start_address[cpool_current_index]; + else { + curr_patch = *first_patch; + prev_patch = NULL; + while (1) { + if (!curr_patch) { + value = (sljit_s32)cpool_start_address[cpool_current_index]; + break; + } + if ((sljit_uw)curr_patch->index == cpool_current_index) { + value = curr_patch->value; + if (prev_patch) + prev_patch->next = curr_patch->next; + else + *first_patch = curr_patch->next; + SLJIT_FREE(curr_patch, compiler->allocator_data); + break; + } + prev_patch = curr_patch; + curr_patch = curr_patch->next; + } + } + + if (value >= 0) { + if ((sljit_uw)value > cpool_current_index) { + curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); + if (!curr_patch) { + while (*first_patch) { + curr_patch = *first_patch; + *first_patch = (*first_patch)->next; + SLJIT_FREE(curr_patch, compiler->allocator_data); + } + return SLJIT_ERR_ALLOC_FAILED; + } + curr_patch->next = *first_patch; + curr_patch->index = value; + curr_patch->value = cpool_start_address[value]; + *first_patch = curr_patch; + } + cpool_start_address[value] = *buf_ptr; + } + return SLJIT_SUCCESS; +} + +#else + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) +{ + sljit_uw* ptr; + + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); +} + +#endif + +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset) +{ + sljit_sw diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (jump->flags & IS_BL) + code_ptr--; + + if (jump->flags & JUMP_ADDR) + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset); + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); + } + + /* Branch to Thumb code has not been optimized yet. */ + if (diff & 0x3) + return 0; + + if (jump->flags & IS_BL) { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } + } + else { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); + jump->flags |= PATCH_B; + } + } +#else + if (jump->flags & JUMP_ADDR) + diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset); + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); + } + + /* Branch to Thumb code has not been optimized yet. */ + if (diff & 0x3) + return 0; + + if (diff <= 0x01ffffff && diff >= -0x02000000) { + code_ptr -= 2; + *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } +#endif + return 0; +} + +static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw *ptr = (sljit_uw *)jump_ptr; + sljit_uw *inst = (sljit_uw *)ptr[0]; + sljit_uw mov_pc = ptr[1]; + sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC); + sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2); + + SLJIT_UNUSED_ARG(executable_offset); + + if (diff <= 0x7fffff && diff >= -0x800000) { + /* Turn to branch. */ + if (!bl) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + } + inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); + inst[1] = NOP; + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } else { + /* Get the position of the constant. */ + if (mov_pc & (1 << 23)) + ptr = inst + ((mov_pc & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != mov_pc) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0); + } + inst[0] = mov_pc; + if (!bl) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + inst[1] = BLX | RM(TMP_REG1); + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0); + } + + *ptr = new_addr; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); + } + } +#else + sljit_uw *inst = (sljit_uw*)jump_ptr; + + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + } + + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif +} + +static sljit_uw get_imm(sljit_uw imm); + +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw *ptr = (sljit_uw*)addr; + sljit_uw *inst = (sljit_uw*)ptr[0]; + sljit_uw ldr_literal = ptr[1]; + sljit_uw src2; + + SLJIT_UNUSED_ARG(executable_offset); + + src2 = get_imm(new_constant); + if (src2) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + + *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + src2 = get_imm(~new_constant); + if (src2) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + + *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + if (ldr_literal & (1 << 23)) + ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != ldr_literal) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + + *inst = ldr_literal; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0); + } + + *ptr = new_constant; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); + } +#else + sljit_uw *inst = (sljit_uw*)addr; + + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + } + + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_uw *code; + sljit_uw *code_ptr; + sljit_uw *buf_ptr; + sljit_uw *buf_end; + sljit_uw size; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_sw addr; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw cpool_size; + sljit_uw cpool_skip_alignment; + sljit_uw cpool_current_index; + sljit_uw *cpool_start_address; + sljit_uw *last_pc_patch; + struct future_patch *first_patch; +#endif + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + /* Second code generation pass. */ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + size = compiler->size + (compiler->patches << 1); + if (compiler->cpool_fill > 0) + size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; +#else + size = compiler->size; +#endif + code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + cpool_size = 0; + cpool_skip_alignment = 0; + cpool_current_index = 0; + cpool_start_address = NULL; + first_patch = NULL; + last_pc_patch = code; +#endif + + code_ptr = code; + word_count = 0; + next_addr = 1; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + if (label && label->size == 0) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + label = label->next; + } + + do { + buf_ptr = (sljit_uw*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + word_count++; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (cpool_size > 0) { + if (cpool_skip_alignment > 0) { + buf_ptr++; + cpool_skip_alignment--; + } + else { + if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code, compiler->exec_allocator_data); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + if (++cpool_current_index >= cpool_size) { + SLJIT_ASSERT(!first_patch); + cpool_size = 0; + if (label && label->size == word_count) { + /* Points after the current instruction. */ + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = code_ptr - code; + label = label->next; + + next_addr = compute_next_addr(label, jump, const_, put_label); + } + } + } + } + else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { +#endif + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (detect_jump_type(jump, code_ptr, code, executable_offset)) + code_ptr--; + jump->addr = (sljit_uw)code_ptr; +#else + jump->addr = (sljit_uw)(code_ptr - 2); + if (detect_jump_type(jump, code_ptr, code, executable_offset)) + code_ptr -= 2; +#endif + jump = jump->next; + } + if (label && label->size == word_count) { + /* code_ptr can be affected above. */ + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); + label->size = (code_ptr + 1) - code; + label = label->next; + } + if (const_ && const_->addr == word_count) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + const_->addr = (sljit_uw)code_ptr; +#else + const_->addr = (sljit_uw)(code_ptr - 1); +#endif + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr++; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + } + else { + /* Fortunately, no need to shift. */ + cpool_size = *buf_ptr++ & ~PUSH_POOL; + SLJIT_ASSERT(cpool_size > 0); + cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); + if (cpool_current_index > 0) { + /* Unconditional branch. */ + *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = cpool_start_address + cpool_current_index; + } + cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; + cpool_current_index = 0; + last_pc_patch = code_ptr; + } +#endif + } while (buf_ptr < buf_end); + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_ASSERT(cpool_size == 0); + if (compiler->cpool_fill > 0) { + cpool_start_address = ALIGN_INSTRUCTION(code_ptr); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); + if (cpool_current_index > 0) + code_ptr = cpool_start_address + cpool_current_index; + + buf_ptr = compiler->cpool; + buf_end = buf_ptr + compiler->cpool_fill; + cpool_current_index = 0; + while (buf_ptr < buf_end) { + if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code, compiler->exec_allocator_data); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + cpool_current_index++; + } + SLJIT_ASSERT(!first_patch); + } +#endif + + jump = compiler->jumps; + while (jump) { + buf_ptr = (sljit_uw *)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); + if (!(jump->flags & JUMP_ADDR)) { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - addr) >= -0x02000000); + *buf_ptr |= (((sljit_sw)jump->u.label->addr - addr) >> 2) & 0x00ffffff; + } + else { + SLJIT_ASSERT(((sljit_sw)jump->u.target - addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - addr) >= -0x02000000); + *buf_ptr |= (((sljit_sw)jump->u.target - addr) >> 2) & 0x00ffffff; + } + } + else if (jump->flags & SLJIT_REWRITABLE_JUMP) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + jump->addr = (sljit_uw)code_ptr; + code_ptr[0] = (sljit_uw)buf_ptr; + code_ptr[1] = *buf_ptr; + inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + code_ptr += 2; +#else + inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); +#endif + } + else { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (jump->flags & IS_BL) + buf_ptr--; + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; +#else + inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); +#endif + } + jump = jump->next; + } + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + const_ = compiler->consts; + while (const_) { + buf_ptr = (sljit_uw*)const_->addr; + const_->addr = (sljit_uw)code_ptr; + + code_ptr[0] = (sljit_uw)buf_ptr; + code_ptr[1] = *buf_ptr; + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + /* Set the value again (can be a simple constant). */ + inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); + code_ptr += 2; + + const_ = const_->next; + } +#endif + + put_label = compiler->put_labels; + while (put_label) { + addr = put_label->label->addr; + buf_ptr = (sljit_uw*)put_label->addr; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000); + buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr; +#else + SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT); + buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff); + buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff); +#endif + put_label = put_label->next; + } + + SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size); + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw); + + code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + case SLJIT_HAS_PREFETCH: +#endif + return 1; + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define WORD_SIZE 0x00 +#define BYTE_SIZE 0x01 +#define HALF_SIZE 0x02 +#define PRELOAD 0x03 +#define SIGNED 0x04 +#define LOAD_DATA 0x08 + +/* Flag bits for emit_op. */ +#define ALLOW_IMM 0x10 +#define ALLOW_INV_IMM 0x20 +#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) + +/* s/l - store/load (1 bit) + u/s - signed/unsigned (1 bit) + w/b/h/N - word/byte/half/NOT allowed (2 bit) + Storing signed and unsigned values are the same operations. */ + +static const sljit_uw data_transfer_insts[16] = { +/* s u w */ 0xe5000000 /* str */, +/* s u b */ 0xe5400000 /* strb */, +/* s u h */ 0xe10000b0 /* strh */, +/* s u N */ 0x00000000 /* not allowed */, +/* s s w */ 0xe5000000 /* str */, +/* s s b */ 0xe5400000 /* strb */, +/* s s h */ 0xe10000b0 /* strh */, +/* s s N */ 0x00000000 /* not allowed */, + +/* l u w */ 0xe5100000 /* ldr */, +/* l u b */ 0xe5500000 /* ldrb */, +/* l u h */ 0xe11000b0 /* ldrh */, +/* l u p */ 0xf5500000 /* preload */, +/* l s w */ 0xe5100000 /* ldr */, +/* l s b */ 0xe11000d0 /* ldrsb */, +/* l s h */ 0xe11000f0 /* ldrsh */, +/* l s N */ 0x00000000 /* not allowed */, +}; + +#define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \ + (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (arg)) + +/* Normal ldr/str instruction. + Type2: ldrsb, ldrh, ldrsh */ +#define IS_TYPE1_TRANSFER(type) \ + (data_transfer_insts[(type) & 0xf] & 0x04000000) +#define TYPE2_TRANSFER_IMM(imm) \ + (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args, size, i, tmp; + sljit_uw push; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Push saved registers, temporary registers + stmdb sp!, {..., lr} */ + push = PUSH | (1 << 14); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + + FAIL_IF(push_inst(compiler, push)); + + /* Stack must be aligned to 8 bytes: */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + local_size = ((size + local_size + 7) & ~7) - size; + compiler->local_size = local_size; + if (local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); + + args = get_arg_count(arg_types); + + if (args >= 1) + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0) | RM(SLJIT_R0))); + if (args >= 2) + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S1) | RM(SLJIT_R1))); + if (args >= 3) + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S2) | RM(SLJIT_R2))); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((size + local_size + 7) & ~7) - size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 i, tmp; + sljit_uw pop; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + if (compiler->local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); + + /* Push saved registers, temporary registers + ldmia sp!, {..., pc} */ + pop = POP | (1 << 15); + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; + + return push_inst(compiler, pop); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* flags: */ + /* Arguments are swapped. */ +#define ARGS_SWAPPED 0x01 + /* Inverted immediate. */ +#define INV_IMM 0x02 + /* Source and destination is register. */ +#define MOVE_REG_CONV 0x04 + /* Unused return value. */ +#define UNUSED_RETURN 0x08 +/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS (1 << 20) +/* dst: reg + src1: reg + src2: reg or imm (if allowed) + SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ +#define SRC2_IMM (1 << 25) + +#define EMIT_SHIFT_INS_AND_RETURN(opcode) \ + SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ + if (compiler->shift_imm != 0x20) { \ + SLJIT_ASSERT(src1 == TMP_REG1); \ + SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \ + \ + if (compiler->shift_imm != 0) \ + return push_inst(compiler, MOV | (flags & SET_FLAGS) | \ + RD(dst) | (compiler->shift_imm << 7) | (opcode << 5) | RM(src2)); \ + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); \ + } \ + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | \ + (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)); + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) +{ + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (dst != src2) { + if (src2 & SRC2_IMM) { + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); + } + return push_inst(compiler, MOV | RD(dst) | RM(src2)); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (flags & MOVE_REG_CONV) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (op == SLJIT_MOV_U8) + return push_inst(compiler, AND | RD(dst) | RN(src2) | SRC2_IMM | 0xff); + FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | RM(src2))); + return push_inst(compiler, MOV | RD(dst) | (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst)); +#else + return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (flags & MOVE_REG_CONV) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + FAIL_IF(push_inst(compiler, MOV | RD(dst) | (16 << 7) | RM(src2))); + return push_inst(compiler, MOV | RD(dst) | (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst)); +#else + return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + if (src2 & SRC2_IMM) { + return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2); + } + return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(!(flags & INV_IMM)); + SLJIT_ASSERT(!(src2 & SRC2_IMM)); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); + return SLJIT_SUCCESS; + + case SLJIT_ADD: + SLJIT_ASSERT(!(flags & INV_IMM)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + + if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_ADDC: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_SUB: + SLJIT_ASSERT(!(flags & INV_IMM)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + + if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_SUBC: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & INV_IMM)); + SLJIT_ASSERT(!(src2 & SRC2_IMM)); + compiler->status_flags_state = 0; + + if (!HAS_FLAGS(op)) + return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]); + + FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1])); + + /* cmp TMP_REG1, dst asr #31. */ + return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0); + + case SLJIT_AND: + return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_OR: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_XOR: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_SHL: + EMIT_SHIFT_INS_AND_RETURN(0); + + case SLJIT_LSHR: + EMIT_SHIFT_INS_AND_RETURN(1); + + case SLJIT_ASHR: + EMIT_SHIFT_INS_AND_RETURN(2); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +#undef EMIT_SHIFT_INS_AND_RETURN + +/* Tests whether the immediate can be stored in the 12 bit imm field. + Returns with 0 if not possible. */ +static sljit_uw get_imm(sljit_uw imm) +{ + sljit_s32 rol; + + if (imm <= 0xff) + return SRC2_IMM | imm; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol = 8; + } + else { + imm = (imm << 24) | (imm >> 8); + rol = 0; + } + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + return SRC2_IMM | (imm >> 24) | (rol << 8); + else + return 0; +} + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive) +{ + sljit_uw mask; + sljit_uw imm1; + sljit_uw imm2; + sljit_s32 rol; + + /* Step1: Search a zero byte (8 continous zero bit). */ + mask = 0xff000000; + rol = 8; + while(1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = 4 + (rol >> 1); + break; + } + rol += 2; + mask >>= 2; + if (mask & 0x3) { + /* rol by 8. */ + imm = (imm << 8) | (imm >> 24); + mask = 0xff00; + rol = 24; + while (1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = (rol >> 1) - 8; + break; + } + rol += 2; + mask >>= 2; + if (mask & 0x3) + return 0; + } + break; + } + } + + /* The low 8 bit must be zero. */ + SLJIT_ASSERT(!(imm & 0xff)); + + if (!(imm & 0xff000000)) { + imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); + imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); + } + else if (imm & 0xc0000000) { + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } + else { + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } + + FAIL_IF(push_inst(compiler, (positive ? MOV : MVN) | RD(reg) | imm1)); + FAIL_IF(push_inst(compiler, (positive ? ORR : BIC) | RD(reg) | RN(reg) | imm2)); + return 1; +} +#endif + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm) +{ + sljit_uw tmp; + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + if (!(imm & ~0xffff)) + return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); +#endif + + /* Create imm by 1 inst. */ + tmp = get_imm(imm); + if (tmp) + return push_inst(compiler, MOV | RD(reg) | tmp); + + tmp = get_imm(~imm); + if (tmp) + return push_inst(compiler, MVN | RD(reg) | tmp); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + /* Create imm by 2 inst. */ + FAIL_IF(generate_int(compiler, reg, imm, 1)); + FAIL_IF(generate_int(compiler, reg, ~imm, 0)); + + /* Load integer. */ + return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm); +#else + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); + if (imm <= 0xffff) + return SLJIT_SUCCESS; + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); +#endif +} + +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_uw imm, offset_reg; + sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags); + + SLJIT_ASSERT (arg & SLJIT_MEM); + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); + + if ((arg & REG_MASK) == SLJIT_UNUSED) { + if (is_type1_transfer) { + FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff)); + argw &= 0xfff; + } + else { + FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xff)); + argw &= 0xff; + } + + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, + is_type1_transfer ? argw : TYPE2_TRANSFER_IMM(argw))); + } + + if (arg & OFFS_REG_MASK) { + offset_reg = OFFS_REG(arg); + arg &= REG_MASK; + argw &= 0x3; + + if (argw != 0 && !is_type1_transfer) { + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | (argw << 7))); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); + } + + /* Bit 25: RM is offset. */ + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, + RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7))); + } + + arg &= REG_MASK; + + if (is_type1_transfer) { + if (argw > 0xfff) { + imm = get_imm(argw & ~0xfff); + if (imm) { + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); + argw = argw & 0xfff; + arg = tmp_reg; + } + } + else if (argw < -0xfff) { + imm = get_imm(-argw & ~0xfff); + if (imm) { + FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); + argw = -(-argw & 0xfff); + arg = tmp_reg; + } + } + + if (argw >= 0 && argw <= 0xfff) + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw)); + + if (argw < 0 && argw >= -0xfff) + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, -argw)); + } + else { + if (argw > 0xff) { + imm = get_imm(argw & ~0xff); + if (imm) { + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); + argw = argw & 0xff; + arg = tmp_reg; + } + } + else if (argw < -0xff) { + imm = get_imm(-argw & ~0xff); + if (imm) { + FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); + argw = -(-argw & 0xff); + arg = tmp_reg; + } + } + + if (argw >= 0 && argw <= 0xff) + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, TYPE2_TRANSFER_IMM(argw))); + + if (argw < 0 && argw >= -0xff) { + argw = -argw; + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, TYPE2_TRANSFER_IMM(argw))); + } + } + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, + RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0))); +} + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* src1 is reg or TMP_REG1 + src2 is reg, TMP_REG2, or imm + result goes to TMP_REG2, so put result can use TMP_REG1. */ + + /* We prefers register and simple consts. */ + sljit_s32 dst_reg; + sljit_s32 src1_reg; + sljit_s32 src2_reg; + sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + + /* Destination check. */ + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) + flags |= UNUSED_RETURN; + + SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); + + src2_reg = 0; + + do { + if (!(inp_flags & ALLOW_IMM)) + break; + + if (src2 & SLJIT_IMM) { + src2_reg = get_imm(src2w); + if (src2_reg) + break; + if (inp_flags & ALLOW_INV_IMM) { + src2_reg = get_imm(~src2w); + if (src2_reg) { + flags |= INV_IMM; + break; + } + } + if (GET_OPCODE(op) == SLJIT_ADD) { + src2_reg = get_imm(-src2w); + if (src2_reg) { + op = SLJIT_SUB | GET_ALL_FLAGS(op); + break; + } + } + if (GET_OPCODE(op) == SLJIT_SUB) { + src2_reg = get_imm(-src2w); + if (src2_reg) { + op = SLJIT_ADD | GET_ALL_FLAGS(op); + break; + } + } + } + + if (src1 & SLJIT_IMM) { + src2_reg = get_imm(src1w); + if (src2_reg) { + flags |= ARGS_SWAPPED; + src1 = src2; + src1w = src2w; + break; + } + if (inp_flags & ALLOW_INV_IMM) { + src2_reg = get_imm(~src1w); + if (src2_reg) { + flags |= ARGS_SWAPPED | INV_IMM; + src1 = src2; + src1w = src2w; + break; + } + } + if (GET_OPCODE(op) == SLJIT_ADD) { + src2_reg = get_imm(-src1w); + if (src2_reg) { + /* Note: add is commutative operation. */ + src1 = src2; + src1w = src2w; + op = SLJIT_SUB | GET_ALL_FLAGS(op); + break; + } + } + } + } while(0); + + /* Source 1. */ + if (FAST_IS_REG(src1)) + src1_reg = src1; + else if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_reg = TMP_REG1; + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_reg = TMP_REG1; + } + + /* Destination. */ + dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + + if (op <= SLJIT_MOV_P) { + if (dst & SLJIT_MEM) { + if (inp_flags & BYTE_SIZE) + inp_flags &= ~SIGNED; + + if (FAST_IS_REG(src2)) + return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2); + } + + if (FAST_IS_REG(src2) && dst_reg != TMP_REG2) + flags |= MOVE_REG_CONV; + } + + /* Source 2. */ + if (src2_reg == 0) { + src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2; + + if (FAST_IS_REG(src2)) + src2_reg = src2; + else if (src2 & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); + else + FAIL_IF(load_immediate(compiler, src2_reg, src2w)); + } + + FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg)); + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1); +} + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator); +extern int __aeabi_idivmod(int numerator, int denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_sw saved_reg_list[3]; + sljit_sw saved_reg_count; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + FAIL_IF(push_inst(compiler, BKPT)); + break; + case SLJIT_NOP: + FAIL_IF(push_inst(compiler, NOP)); + break; + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) + | (reg_map[SLJIT_R1] << 16) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 8) + | reg_map[SLJIT_R1]); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); + SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3); + + saved_reg_count = 0; + if (compiler->scratches >= 4) + saved_reg_list[saved_reg_count++] = 3; + if (compiler->scratches >= 3) + saved_reg_list[saved_reg_count++] = 2; + if (op >= SLJIT_DIV_UW) + saved_reg_list[saved_reg_count++] = 1; + + if (saved_reg_count > 0) { + FAIL_IF(push_inst(compiler, 0xe52d0000 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst(compiler, 0xe58d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */)); + } + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst(compiler, 0xe58d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */)); + } + } + +#if defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + + if (saved_reg_count > 0) { + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst(compiler, 0xe59d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */)); + } + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); + } + return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); + } + return SLJIT_SUCCESS; + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_U8: + return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); + + case SLJIT_CLZ: + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_SUB: + case SLJIT_SUBC: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + if (src2 & SLJIT_IMM) { + compiler->shift_imm = src2w & 0x1f; + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w); + } + else { + compiler->shift_imm = 0x20; + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + } + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1)); + + return push_inst(compiler, BX | RM(TMP_REG2)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + SLJIT_ASSERT(src & SLJIT_MEM); + return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1); +#else /* !SLJIT_CONFIG_ARM_V7 */ + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_ARM_V7 */ + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return (freg_map[reg] << 1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_uw*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + + +#define FPU_LOAD (1 << 20) +#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ + ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg_map[freg] << 12) | (offs)) +#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ + ((opcode) | (mode) | (freg_map[dst] << 12) | freg_map[src1] | (freg_map[src2] << 16)) + +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_uw imm; + sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); + + SLJIT_ASSERT(arg & SLJIT_MEM); + arg &= ~SLJIT_MEM; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))); + arg = TMP_REG2; + argw = 0; + } + + /* Fast loads and stores. */ + if (arg) { + if (!(argw & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2)); + if (!(-argw & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); + + imm = get_imm(argw & ~0x3fc); + if (imm) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); + } + imm = get_imm(-argw & ~0x3fc); + if (imm) { + argw = -argw; + FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2)); + } + } + + if (arg) { + FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2))); + } + else + FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + op ^= SLJIT_F32_OP; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0))); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (freg_map[TMP_FREG1] << 16)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + op ^= SLJIT_F32_OP; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, VMOV | RD(src) | (freg_map[TMP_FREG1] << 16))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (freg_map[TMP_FREG1] << 16))); + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + op ^= SLJIT_F32_OP; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0))); + return push_inst(compiler, VMRS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + op ^= SLJIT_F32_OP; + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + op ^= SLJIT_F32_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + break; + } + + if (dst_r == TMP_FREG1) + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw)); + + return SLJIT_SUCCESS; +} + +#undef FPU_LOAD +#undef EMIT_FPU_DATA_TRANSFER + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_EQUAL_F64: + return 0x00000000; + + case SLJIT_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: + return 0x10000000; + + case SLJIT_LESS: + case SLJIT_LESS_F64: + return 0x30000000; + + case SLJIT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: + return 0x20000000; + + case SLJIT_GREATER: + case SLJIT_GREATER_F64: + return 0x80000000; + + case SLJIT_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: + return 0x90000000; + + case SLJIT_SIG_LESS: + return 0xb0000000; + + case SLJIT_SIG_GREATER_EQUAL: + return 0xa0000000; + + case SLJIT_SIG_GREATER: + return 0xc0000000; + + case SLJIT_SIG_LESS_EQUAL: + return 0xd0000000; + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x10000000; + + case SLJIT_UNORDERED_F64: + return 0x60000000; + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x00000000; + + case SLJIT_ORDERED_F64: + return 0x70000000; + + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL); + return 0xe0000000; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type >= SLJIT_FAST_CALL) + PTR_FAIL_IF(prepare_blx(compiler)); + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0)); + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->addr = compiler->size; + compiler->patches++; + } + + if (type >= SLJIT_FAST_CALL) { + jump->flags |= IS_BL; + PTR_FAIL_IF(emit_blx(compiler)); + } + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) + jump->addr = compiler->size; +#else + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_BL; + PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type))); + jump->addr = compiler->size; +#endif + return jump; +} + +#ifdef __SOFTFP__ + +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 stack_offset = 0; + sljit_s32 arg_count = 0; + sljit_s32 word_arg_offset = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 src_offset = 4 * sizeof(sljit_sw); + sljit_u8 offsets[4]; + + if (src && FAST_IS_REG(*src)) + src_offset = reg_map[*src] * sizeof(sljit_sw); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_f32); + arg_count++; + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F64: + if (stack_offset & 0x7) + stack_offset += sizeof(sljit_sw); + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_f64); + arg_count++; + float_arg_count++; + break; + default: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_sw); + arg_count++; + word_arg_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (stack_offset > 16) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_offset - 16) + 0x7) & ~0x7))); + + /* Process arguments in reversed direction. */ + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + arg_count--; + float_arg_count--; + stack_offset = offsets[arg_count]; + + if (stack_offset < 16) { + if (src_offset == stack_offset) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10))); + } else + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + break; + case SLJIT_ARG_TYPE_F64: + arg_count--; + float_arg_count--; + stack_offset = offsets[arg_count]; + + SLJIT_ASSERT((stack_offset & 0x7) == 0); + + if (stack_offset < 16) { + if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + } else + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + break; + default: + arg_count--; + word_arg_offset -= sizeof(sljit_sw); + stack_offset = offsets[arg_count]; + + SLJIT_ASSERT(stack_offset >= word_arg_offset); + + if (stack_offset != word_arg_offset) { + if (stack_offset < 16) { + if (src_offset == stack_offset) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + else if (src_offset == word_arg_offset) { + *src = 1 + (stack_offset >> 2); + src_offset = stack_offset; + } + FAIL_IF(push_inst(compiler, MOV | (stack_offset << 10) | (word_arg_offset >> 2))); + } else + FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16))); + } + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_s32 stack_size = 0; + + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) + FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + if (stack_size & 0x7) + stack_size += sizeof(sljit_sw); + stack_size += sizeof(sljit_f64); + break; + default: + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (stack_size <= 16) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_size - 16) + 0x7) & ~0x7)); +} + +#else /* !__SOFTFP__ */ + +static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_u32 remap = 0; + sljit_u32 offset = 0; + sljit_u32 new_offset, mask; + + /* Remove return value. */ + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) { + new_offset = 0; + mask = 1; + + while (remap & mask) { + new_offset++; + mask <<= 1; + } + remap |= mask; + + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0, (new_offset >> 1) + 1, (offset >> 1) + 1, 0) | ((new_offset & 0x1) ? 0x400000 : 0))); + + offset += 2; + } + else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) { + new_offset = 0; + mask = 3; + + while (remap & mask) { + new_offset += 2; + mask <<= 2; + } + remap |= mask; + + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, SLJIT_F32_OP, (new_offset >> 1) + 1, (offset >> 1) + 1, 0))); + + offset += 2; + } + arg_types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif /* __SOFTFP__ */ + +#undef EMIT_FPU_OPERATION + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#ifdef __SOFTFP__ + struct sljit_jump *jump; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#ifdef __SOFTFP__ + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; +#else /* !__SOFTFP__ */ + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +#endif /* __SOFTFP__ */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + if (!(src & SLJIT_IMM)) { + if (FAST_IS_REG(src)) { + SLJIT_ASSERT(reg_map[src] != 14); + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); + } + + SLJIT_ASSERT(src & SLJIT_MEM); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = srcw; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type >= SLJIT_FAST_CALL) + FAIL_IF(prepare_blx(compiler)); + FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); + if (type >= SLJIT_FAST_CALL) + FAIL_IF(emit_blx(compiler)); +#else + FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); +#endif + jump->addr = compiler->size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#ifdef __SOFTFP__ + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + return softfloat_post_call_with_args(compiler, arg_types); +#else /* !__SOFTFP__ */ + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +#endif /* __SOFTFP__ */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op); + sljit_uw cc, ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + cc = get_cc(compiler, type & 0xff); + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (op < SLJIT_ADD) { + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0)); + FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc)); + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; + } + + ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR)); + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2)); + + FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc)); + + if (op == SLJIT_AND) + FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (flags & SLJIT_SET_Z) + return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_uw cc, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + dst_reg &= ~SLJIT_I32_OP; + + cc = get_cc(compiler, type & 0xff); + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + tmp = get_imm(srcw); + if (tmp) + return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc); + + tmp = get_imm(~srcw); + if (tmp) + return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc); + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + tmp = (sljit_uw) srcw; + FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff))); + if (tmp <= 0xffff) + return SLJIT_SUCCESS; + return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff)); +#else + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; +#endif + } + + return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + sljit_uw is_type1_transfer, inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + is_type1_transfer = 1; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + flags = BYTE_SIZE; + break; + case SLJIT_MOV_S8: + if (!(type & SLJIT_MEM_STORE)) + is_type1_transfer = 0; + flags = BYTE_SIZE | SIGNED; + break; + case SLJIT_MOV_U16: + is_type1_transfer = 0; + flags = HALF_SIZE; + break; + case SLJIT_MOV_S16: + is_type1_transfer = 0; + flags = HALF_SIZE | SIGNED; + break; + default: + SLJIT_UNREACHABLE(); + flags = WORD_SIZE; + break; + } + + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags)); + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + if (!is_type1_transfer && memw != 0) + return SLJIT_ERR_UNSUPPORTED; + } + else { + if (is_type1_transfer) { + if (memw > 4095 || memw < -4095) + return SLJIT_ERR_UNSUPPORTED; + } + else { + if (memw > 255 || memw < -255) + return SLJIT_ERR_UNSUPPORTED; + } + } + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | (memw << 7)); + + if (is_type1_transfer) + inst |= (1 << 25); + + if (type & SLJIT_MEM_PRE) + inst |= (1 << 21); + else + inst ^= (1 << 24); + + return push_inst(compiler, inst); + } + + inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0); + + if (type & SLJIT_MEM_PRE) + inst |= (1 << 21); + else + inst ^= (1 << 24); + + if (is_type1_transfer) { + if (memw >= 0) + inst |= (1 << 23); + else + memw = -memw; + + return push_inst(compiler, inst | memw); + } + + if (memw >= 0) + inst |= (1 << 23); + else + memw = -memw; + + return push_inst(compiler, inst | TYPE2_TRANSFER_IMM(memw)); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), init_value)); + compiler->patches++; +#else + PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value)); +#endif + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0)); + compiler->patches++; +#else + PTR_FAIL_IF(emit_imm(compiler, dst_r, 0)); +#endif + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + inline_set_jump_addr(addr, executable_offset, new_target, 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + inline_set_const(addr, executable_offset, new_constant, 1); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_64.c new file mode 100644 index 0000000000..3f0f5fcc30 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_64.c @@ -0,0 +1,2057 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "ARM-64" SLJIT_CPUINFO; +} + +/* Length of an instruction word */ +typedef sljit_u32 sljit_ins; + +#define TMP_ZERO (0) + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +/* r18 - platform register, currently not used */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { + 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7 +}; + +#define W_OP (1u << 31) +#define RD(rd) (reg_map[rd]) +#define RT(rt) (reg_map[rt]) +#define RN(rn) (reg_map[rn] << 5) +#define RT2(rt2) (reg_map[rt2] << 10) +#define RM(rm) (reg_map[rm] << 16) +#define VD(vd) (freg_map[vd]) +#define VT(vt) (freg_map[vt]) +#define VN(vn) (freg_map[vn] << 5) +#define VM(vm) (freg_map[vm] << 16) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define ADC 0x9a000000 +#define ADD 0x8b000000 +#define ADDE 0x8b200000 +#define ADDI 0x91000000 +#define AND 0x8a000000 +#define ANDI 0x92000000 +#define ASRV 0x9ac02800 +#define B 0x14000000 +#define B_CC 0x54000000 +#define BL 0x94000000 +#define BLR 0xd63f0000 +#define BR 0xd61f0000 +#define BRK 0xd4200000 +#define CBZ 0xb4000000 +#define CLZ 0xdac01000 +#define CSEL 0x9a800000 +#define CSINC 0x9a800400 +#define EOR 0xca000000 +#define EORI 0xd2000000 +#define FABS 0x1e60c000 +#define FADD 0x1e602800 +#define FCMP 0x1e602000 +#define FCVT 0x1e224000 +#define FCVTZS 0x9e780000 +#define FDIV 0x1e601800 +#define FMOV 0x1e604000 +#define FMUL 0x1e600800 +#define FNEG 0x1e614000 +#define FSUB 0x1e603800 +#define LDRI 0xf9400000 +#define LDP 0xa9400000 +#define LDP_PRE 0xa9c00000 +#define LDR_PRE 0xf8400c00 +#define LSLV 0x9ac02000 +#define LSRV 0x9ac02400 +#define MADD 0x9b000000 +#define MOVK 0xf2800000 +#define MOVN 0x92800000 +#define MOVZ 0xd2800000 +#define NOP 0xd503201f +#define ORN 0xaa200000 +#define ORR 0xaa000000 +#define ORRI 0xb2000000 +#define RET 0xd65f0000 +#define SBC 0xda000000 +#define SBFM 0x93000000 +#define SCVTF 0x9e620000 +#define SDIV 0x9ac00c00 +#define SMADDL 0x9b200000 +#define SMULH 0x9b403c00 +#define STP 0xa9000000 +#define STP_PRE 0xa9800000 +#define STRB 0x38206800 +#define STRBI 0x39000000 +#define STRI 0xf9000000 +#define STR_FI 0x3d000000 +#define STR_FR 0x3c206800 +#define STUR_FI 0x3c000000 +#define STURBI 0x38000000 +#define SUB 0xcb000000 +#define SUBI 0xd1000000 +#define SUBS 0xeb000000 +#define UBFM 0xd3000000 +#define UDIV 0x9ac00800 +#define UMULH 0x9bc03c00 + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +{ + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21))); + return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21)); +} + +static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->flags |= PATCH_ABS64; + return 0; + } + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; + + if (jump->flags & IS_COND) { + diff += sizeof(sljit_ins); + if (diff <= 0xfffff && diff >= -0x100000) { + code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; + jump->addr -= sizeof(sljit_ins); + jump->flags |= PATCH_COND; + return 5; + } + diff -= sizeof(sljit_ins); + } + + if (diff <= 0x7ffffff && diff >= -0x8000000) { + jump->flags |= PATCH_B; + return 4; + } + + if (target_addr < 0x100000000l) { + if (jump->flags & IS_COND) + code_ptr[-5] -= (2 << 5); + code_ptr[-2] = code_ptr[0]; + return 2; + } + + if (target_addr < 0x1000000000000l) { + if (jump->flags & IS_COND) + code_ptr[-5] -= (1 << 5); + jump->flags |= PATCH_ABS48; + code_ptr[-1] = code_ptr[0]; + return 1; + } + + jump->flags |= PATCH_ABS64; + return 0; +} + +static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +{ + if (max_label < 0x100000000l) { + put_label->flags = 0; + return 2; + } + + if (max_label < 0x1000000000000l) { + put_label->flags = 1; + return 1; + } + + put_label->flags = 2; + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; + sljit_s32 dst; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { + jump->addr = (sljit_uw)(code_ptr - 4); + code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)(code_ptr - 3); + code_ptr -= put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins *)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); + buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); + if (jump->flags & IS_COND) + buf_ptr[-1] -= (4 << 5); + break; + } + if (jump->flags & PATCH_COND) { + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); + buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); + break; + } + + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl); + + dst = buf_ptr[0] & 0x1f; + buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5); + buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21); + if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) + buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21); + if (jump->flags & PATCH_ABS64) + buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21); + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { + addr = put_label->label->addr; + buf_ptr = (sljit_ins *)put_label->addr; + + buf_ptr[0] |= (addr & 0xffff) << 5; + buf_ptr[1] |= ((addr >> 16) & 0xffff) << 5; + + if (put_label->flags >= 1) + buf_ptr[2] |= ((addr >> 32) & 0xffff) << 5; + + if (put_label->flags >= 2) + buf_ptr[3] |= ((addr >> 48) & 0xffff) << 5; + + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_PREFETCH: + return 1; + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Core code generator functions. */ +/* --------------------------------------------------------------------- */ + +#define COUNT_TRAILING_ZERO(value, result) \ + result = 0; \ + if (!(value & 0xffffffff)) { \ + result += 32; \ + value >>= 32; \ + } \ + if (!(value & 0xffff)) { \ + result += 16; \ + value >>= 16; \ + } \ + if (!(value & 0xff)) { \ + result += 8; \ + value >>= 8; \ + } \ + if (!(value & 0xf)) { \ + result += 4; \ + value >>= 4; \ + } \ + if (!(value & 0x3)) { \ + result += 2; \ + value >>= 2; \ + } \ + if (!(value & 0x1)) { \ + result += 1; \ + value >>= 1; \ + } + +#define LOGICAL_IMM_CHECK 0x100 + +static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) +{ + sljit_s32 negated, ones, right; + sljit_uw mask, uimm; + sljit_ins ins; + + if (len & LOGICAL_IMM_CHECK) { + len &= ~LOGICAL_IMM_CHECK; + if (len == 32 && (imm == 0 || imm == -1)) + return 0; + if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1)) + return 0; + } + + SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1) + || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1)); + + uimm = (sljit_uw)imm; + while (1) { + if (len <= 0) { + SLJIT_UNREACHABLE(); + return 0; + } + + mask = ((sljit_uw)1 << len) - 1; + if ((uimm & mask) != ((uimm >> len) & mask)) + break; + len >>= 1; + } + + len <<= 1; + + negated = 0; + if (uimm & 0x1) { + negated = 1; + uimm = ~uimm; + } + + if (len < 64) + uimm &= ((sljit_uw)1 << len) - 1; + + /* Unsigned right shift. */ + COUNT_TRAILING_ZERO(uimm, right); + + /* Signed shift. We also know that the highest bit is set. */ + imm = (sljit_sw)~uimm; + SLJIT_ASSERT(imm < 0); + + COUNT_TRAILING_ZERO(imm, ones); + + if (~imm) + return 0; + + if (len == 64) + ins = 1 << 22; + else + ins = (0x3f - ((len << 1) - 1)) << 10; + + if (negated) + return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16); + + return ins | ((ones - 1) << 10) | ((len - right) << 16); +} + +#undef COUNT_TRAILING_ZERO + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) +{ + sljit_uw imm = (sljit_uw)simm; + sljit_s32 i, zeros, ones, first; + sljit_ins bitmask; + + /* Handling simple immediates first. */ + if (imm <= 0xffff) + return push_inst(compiler, MOVZ | RD(dst) | (imm << 5)); + + if (simm < 0 && simm >= -0x10000) + return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)); + + if (imm <= 0xffffffffl) { + if ((imm & 0xffff) == 0) + return push_inst(compiler, MOVZ | RD(dst) | ((imm >> 16) << 5) | (1 << 21)); + if ((imm & 0xffff0000l) == 0xffff0000) + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5)); + if ((imm & 0xffff) == 0xffff) + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + + bitmask = logical_imm(simm, 16); + if (bitmask != 0) + return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); + + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + } + + bitmask = logical_imm(simm, 32); + if (bitmask != 0) + return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask); + + if (simm < 0 && simm >= -0x100000000l) { + if ((imm & 0xffff) == 0xffff) + return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + } + + /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */ + + zeros = 0; + ones = 0; + for (i = 4; i > 0; i--) { + if ((simm & 0xffff) == 0) + zeros++; + if ((simm & 0xffff) == 0xffff) + ones++; + simm >>= 16; + } + + simm = (sljit_sw)imm; + first = 1; + if (ones > zeros) { + simm = ~simm; + for (i = 0; i < 4; i++) { + if (!(simm & 0xffff)) { + simm >>= 16; + continue; + } + if (first) { + first = 0; + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + } + else + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21))); + simm >>= 16; + } + return SLJIT_SUCCESS; + } + + for (i = 0; i < 4; i++) { + if (!(simm & 0xffff)) { + simm >>= 16; + continue; + } + if (first) { + first = 0; + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + } + else + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + simm >>= 16; + } + return SLJIT_SUCCESS; +} + +#define ARG1_IMM 0x0010000 +#define ARG2_IMM 0x0020000 +#define INT_OP 0x0040000 +#define SET_FLAGS 0x0080000 +#define UNUSED_RETURN 0x0100000 + +#define CHECK_FLAGS(flag_bits) \ + if (flags & SET_FLAGS) { \ + inv_bits |= flag_bits; \ + if (flags & UNUSED_RETURN) \ + dst = TMP_ZERO; \ + } + +static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2) +{ + /* dst must be register, TMP_REG1 + arg1 must be register, TMP_REG1, imm + arg2 must be register, TMP_REG2, imm */ + sljit_ins inv_bits = (flags & INT_OP) ? W_OP : 0; + sljit_ins inst_bits; + sljit_s32 op = (flags & 0xffff); + sljit_s32 reg; + sljit_sw imm, nimm; + + if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { + /* Both are immediates. */ + flags &= ~ARG1_IMM; + if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB) + arg1 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + + if (flags & (ARG1_IMM | ARG2_IMM)) { + reg = (flags & ARG2_IMM) ? arg1 : arg2; + imm = (flags & ARG2_IMM) ? arg2 : arg1; + + switch (op) { + case SLJIT_MUL: + case SLJIT_NEG: + case SLJIT_CLZ: + case SLJIT_ADDC: + case SLJIT_SUBC: + /* No form with immediate operand (except imm 0, which + is represented by a ZERO register). */ + break; + case SLJIT_MOV: + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); + return load_immediate(compiler, dst, imm); + case SLJIT_NOT: + SLJIT_ASSERT(flags & ARG2_IMM); + FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); + goto set_flags; + case SLJIT_SUB: + if (flags & ARG1_IMM) + break; + imm = -imm; + /* Fall through. */ + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if (imm == 0) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); + } + if (imm > 0 && imm <= 0xfff) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10)); + } + nimm = -imm; + if (nimm > 0 && nimm <= 0xfff) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10)); + } + if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)); + } + if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)); + } + if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { + FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10)); + } + if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { + FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10)); + } + break; + case SLJIT_AND: + inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); + if (!inst_bits) + break; + CHECK_FLAGS(3 << 29); + return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits); + case SLJIT_OR: + case SLJIT_XOR: + inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); + if (!inst_bits) + break; + if (op == SLJIT_OR) + inst_bits |= ORRI; + else + inst_bits |= EORI; + FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg))); + goto set_flags; + case SLJIT_SHL: + if (flags & ARG1_IMM) + break; + if (flags & INT_OP) { + imm &= 0x1f; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10))); + } + else { + imm &= 0x3f; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10))); + } + goto set_flags; + case SLJIT_LSHR: + case SLJIT_ASHR: + if (flags & ARG1_IMM) + break; + if (op == SLJIT_ASHR) + inv_bits |= 1 << 30; + if (flags & INT_OP) { + imm &= 0x1f; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10))); + } + else { + imm &= 0x3f; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10))); + } + goto set_flags; + default: + SLJIT_UNREACHABLE(); + break; + } + + if (flags & ARG2_IMM) { + if (arg2 == 0) + arg2 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); + arg2 = TMP_REG2; + } + } + else { + if (arg1 == 0) + arg1 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + } + + /* Both arguments are registers. */ + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (dst == arg2) + return SLJIT_SUCCESS; + return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + case SLJIT_MOV_U8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (7 << 10)); + case SLJIT_MOV_S8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (!(flags & INT_OP)) + inv_bits |= 1 << 22; + return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); + case SLJIT_MOV_U16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (15 << 10)); + case SLJIT_MOV_S16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (!(flags & INT_OP)) + inv_bits |= 1 << 22; + return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); + case SLJIT_MOV_U32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if ((flags & INT_OP) && dst == arg2) + return SLJIT_SUCCESS; + return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + case SLJIT_MOV_S32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if ((flags & INT_OP) && dst == arg2) + return SLJIT_SUCCESS; + return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); + case SLJIT_NOT: + SLJIT_ASSERT(arg1 == TMP_REG1); + FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_NEG: + SLJIT_ASSERT(arg1 == TMP_REG1); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if (flags & SET_FLAGS) + inv_bits |= 1 << 29; + return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + case SLJIT_CLZ: + SLJIT_ASSERT(arg1 == TMP_REG1); + return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); + case SLJIT_ADD: + CHECK_FLAGS(1 << 29); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_ADDC: + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_SUB: + CHECK_FLAGS(1 << 29); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_SUBC: + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_MUL: + compiler->status_flags_state = 0; + if (!(flags & SET_FLAGS)) + return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); + if (flags & INT_OP) { + FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10))); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); + } + FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2))); + FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO))); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); + case SLJIT_AND: + CHECK_FLAGS(3 << 29); + return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_OR: + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_XOR: + FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_SHL: + FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_LSHR: + FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_ASHR: + FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + +set_flags: + if (flags & SET_FLAGS) + return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO)); + return SLJIT_SUCCESS; +} + +#define STORE 0x10 +#define SIGNED 0x20 + +#define BYTE_SIZE 0x0 +#define HALF_SIZE 0x1 +#define INT_SIZE 0x2 +#define WORD_SIZE 0x3 + +#define MEM_SIZE_SHIFT(flags) ((flags) & 0x3) + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_u32 shift = MEM_SIZE_SHIFT(flags); + sljit_u32 type = (shift << 30); + + if (!(flags & STORE)) + type |= (flags & SIGNED) ? 0x00800000 : 0x00400000; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (argw == 0 || argw == shift) + return push_inst(compiler, STRB | type | RT(reg) + | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); + + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10))); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg)); + } + + arg &= REG_MASK; + + if (arg == SLJIT_UNUSED) { + FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift))); + + argw = (argw >> shift) & 0xfff; + + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10)); + } + + if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { + if ((argw >> shift) <= 0xfff) { + return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | (argw << (10 - shift))); + } + + if (argw <= 0xffffff) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | ((argw >> 12) << 10))); + + argw = ((argw & 0xfff) >> shift); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10)); + } + } + + if (argw <= 255 && argw >= -256) + return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg)); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args, i, tmp, offs, prev, saved_regs_size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + if (saved_regs_size & 0x8) + saved_regs_size += sizeof(sljit_sw); + + local_size = (local_size + 15) & ~0xf; + compiler->local_size = local_size + saved_regs_size; + + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15))); + +#ifdef _WIN32 + if (local_size >= 4096) + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + else if (local_size > 256) + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10))); +#endif + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + prev = -1; + offs = 2 << 15; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + if (prev != -1) + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); + + + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10))); + + args = get_arg_count(arg_types); + + if (args >= 1) + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); + if (args >= 2) + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); + if (args >= 3) + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); + +#ifdef _WIN32 + if (local_size >= 4096) { + if (local_size < 4 * 4096) { + /* No need for a loop. */ + if (local_size >= 2 * 4096) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); + local_size -= 4096; + } + + if (local_size >= 2 * 4096) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); + local_size -= 4096; + } + + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + local_size -= 4096; + } + else { + FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5))); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10))); + FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */)); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + + local_size &= 0xfff; + } + + if (local_size > 256) { + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10))); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + } + else if (local_size > 0) + FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12))); + + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + } + else if (local_size > 256) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + } + else if (local_size > 0) + FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12))); + +#else /* !_WIN32 */ + + /* The local_size does not include saved registers size. */ + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + if (local_size != 0) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); + +#endif /* _WIN32 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 saved_regs_size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + if (saved_regs_size & 0x8) + saved_regs_size += sizeof(sljit_sw); + + compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 local_size; + sljit_s32 i, tmp, offs, prev, saved_regs_size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2); + if (saved_regs_size & 0x8) + saved_regs_size += sizeof(sljit_sw); + + local_size = compiler->local_size - saved_regs_size; + + /* Load LR as early as possible. */ + if (local_size == 0) + FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + else if (local_size < 63 * sizeof(sljit_sw)) { + FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (local_size << (15 - 3)))); + } + else { + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + if (local_size) + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); + + FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + prev = -1; + offs = 2 << 15; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + if (prev != -1) + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); + + /* These two can be executed in parallel. */ + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10))); + return push_inst(compiler, RET | RN(TMP_LR)); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_ins inv_bits = (op & SLJIT_I32_OP) ? W_OP : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BRK); + case SLJIT_NOP: + return push_inst(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r, flags, mem_flags; + sljit_s32 op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { + /* Both operands are registers. */ + if (dst_r != TMP_REG1 && FAST_IS_REG(src)) + return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + mem_flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + mem_flags = BYTE_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u8)srcw; + break; + case SLJIT_MOV_S8: + mem_flags = BYTE_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s8)srcw; + break; + case SLJIT_MOV_U16: + mem_flags = HALF_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u16)srcw; + break; + case SLJIT_MOV_S16: + mem_flags = HALF_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s16)srcw; + break; + case SLJIT_MOV_U32: + mem_flags = INT_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u32)srcw; + break; + case SLJIT_MOV_S32: + mem_flags = INT_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s32)srcw; + break; + default: + SLJIT_UNREACHABLE(); + mem_flags = 0; + break; + } + + if (src & SLJIT_IMM) + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); + else if (!(src & SLJIT_MEM)) + dst_r = src; + else + FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG1)); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; + } + + flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + + if (op_flags & SLJIT_I32_OP) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + if (dst == SLJIT_UNUSED) + flags |= UNUSED_RETURN; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2)); + src = TMP_REG2; + } + + emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, src); + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags, mem_flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + + if (op & SLJIT_I32_OP) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + if (dst == SLJIT_UNUSED) + flags |= UNUSED_RETURN; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, src1, src1w, TMP_REG1)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src2, src2w, TMP_REG2)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_IMM) + flags |= ARG1_IMM; + else + src1w = src1; + + if (src2 & SLJIT_IMM) + flags |= ARG2_IMM; + else + src2w = src2; + + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1)); + + return push_inst(compiler, RET | RN(TMP_LR)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4); + + /* The reg_map[op] should provide the appropriate constant. */ + if (op == SLJIT_PREFETCH_L1) + op = 1; + else if (op == SLJIT_PREFETCH_L2) + op = 3; + else if (op == SLJIT_PREFETCH_L3) + op = 5; + else + op = 2; + + /* Signed word sized load is the prefetch instruction. */ + return emit_op_mem(compiler, WORD_SIZE | SIGNED, op, src, srcw, TMP_REG1); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_u32 shift = MEM_SIZE_SHIFT(flags); + sljit_ins type = (shift << 30); + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(flags & STORE)) + type |= 0x00400000; + + if (arg & OFFS_REG_MASK) { + argw &= 3; + if (argw == 0 || argw == shift) + return push_inst(compiler, STR_FR | type | VT(reg) + | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); + + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10))); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1)); + } + + arg &= REG_MASK; + + if (arg == SLJIT_UNUSED) { + FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift))); + + argw = (argw >> shift) & 0xfff; + + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10)); + } + + if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { + if ((argw >> shift) <= 0xfff) + return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | (argw << (10 - shift))); + + if (argw <= 0xffffff) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | ((argw >> 12) << 10))); + + argw = ((argw & 0xfff) >> shift); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10)); + } + } + + if (argw <= 255 && argw >= -256) + return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); + return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) + inv_bits |= W_OP; + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + inv_bits |= W_OP; + + if (src & SLJIT_MEM) { + emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1); + src = TMP_REG1; + } else if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw, TMP_REG1); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_EQUAL_F64: + return 0x1; + + case SLJIT_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: + return 0x0; + + case SLJIT_LESS: + case SLJIT_LESS_F64: + return 0x2; + + case SLJIT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: + return 0x3; + + case SLJIT_GREATER: + case SLJIT_GREATER_F64: + return 0x9; + + case SLJIT_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: + return 0x8; + + case SLJIT_SIG_LESS: + return 0xa; + + case SLJIT_SIG_GREATER_EQUAL: + return 0xb; + + case SLJIT_SIG_GREATER: + return 0xd; + + case SLJIT_SIG_LESS_EQUAL: + return 0xc; + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x0; + + case SLJIT_UNORDERED_F64: + return 0x7; + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x1; + + case SLJIT_ORDERED_F64: + return 0x6; + + default: + SLJIT_UNREACHABLE(); + return 0xe; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(compiler, type))); + } + else if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_BL; + + PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1))); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +} + +static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + sljit_ins inv_bits = (type & SLJIT_I32_OP) ? W_OP : 0; + + SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); + ADJUST_LOCAL_OFFSET(src, srcw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + jump->flags |= IS_CBZ | IS_COND; + + if (src & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + else if (src & SLJIT_IMM) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + SLJIT_ASSERT(FAST_IS_REG(src)); + + if ((type & 0xff) == SLJIT_EQUAL) + inv_bits |= 1 << 24; + + PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); + PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1))); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (!(src & SLJIT_IMM)) { + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = srcw; + + FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 dst_r, src_r, flags, mem_flags; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + cc = get_cc(compiler, type & 0xff); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (GET_OPCODE(op) < SLJIT_ADD) { + FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); + + if (dst_r == TMP_REG1) { + mem_flags = (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE; + return emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG2); + } + + return SLJIT_SUCCESS; + } + + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + + if (op & SLJIT_I32_OP) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + src_r = dst; + + if (dst & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG1)); + src_r = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO))); + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + if (dst_reg & SLJIT_I32_OP) + srcw = (sljit_s32)srcw; + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + srcw = 0; + } + + cc = get_cc(compiler, type & 0xff); + dst_reg &= ~SLJIT_I32_OP; + + return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u32 sign = 0, inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_P: + inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400; + break; + case SLJIT_MOV_S8: + sign = 1; + case SLJIT_MOV_U8: + inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400; + break; + case SLJIT_MOV_S16: + sign = 1; + case SLJIT_MOV_U16: + inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400; + break; + case SLJIT_MOV_S32: + sign = 1; + case SLJIT_MOV_U32: + inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400; + break; + default: + SLJIT_UNREACHABLE(); + inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400; + break; + } + + if (!(type & SLJIT_MEM_STORE)) + inst |= sign ? 0x00800000 : 0x00400000; + + if (type & SLJIT_MEM_PRE) + inst |= 0x800; + + return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u32 inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + inst = STUR_FI | 0x80000400; + + if (!(type & SLJIT_F32_OP)) + inst |= 0x40000000; + + if (!(type & SLJIT_MEM_STORE)) + inst |= 0x00400000; + + if (type & SLJIT_MEM_PRE) + inst |= 0x800; + + return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + sljit_s32 dst_reg; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + + SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0); + + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (offset <= 0xffffff && offset >= -0xffffff) { + ins = ADDI; + if (offset < 0) { + offset = -offset; + ins = SUBI; + } + + if (offset <= 0xfff) + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10))); + else { + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); + + offset &= 0xfff; + if (offset != 0) + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10))); + } + } + else { + FAIL_IF(load_immediate (compiler, dst_reg, offset)); + /* Add extended register form. */ + FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg))); + } + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, 0)); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 1); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins* inst = (sljit_ins*)addr; + sljit_s32 dst; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); + + dst = inst[0] & 0x1f; + SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); + inst[0] = MOVZ | dst | ((new_target & 0xffff) << 5); + inst[1] = MOVK | dst | (((new_target >> 16) & 0xffff) << 5) | (1 << 21); + inst[2] = MOVK | dst | (((new_target >> 32) & 0xffff) << 5) | (2 << 21); + inst[3] = MOVK | dst | ((new_target >> 48) << 5) | (3 << 21); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, new_constant, executable_offset); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_T2_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_T2_32.c new file mode 100644 index 0000000000..e35dbe99b3 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeARM_T2_32.c @@ -0,0 +1,2392 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#ifdef __SOFTFP__ + return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp"; +#else + return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp"; +#endif +} + +/* Length of an instruction word. */ +typedef sljit_u32 sljit_ins; + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7 +}; + +#define COPY_BITS(src, from, to, bits) \ + ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to)) + +/* Thumb16 encodings. */ +#define RD3(rd) (reg_map[rd]) +#define RN3(rn) (reg_map[rn] << 3) +#define RM3(rm) (reg_map[rm] << 6) +#define RDN3(rdn) (reg_map[rdn] << 8) +#define IMM3(imm) (imm << 6) +#define IMM8(imm) (imm) + +/* Thumb16 helpers. */ +#define SET_REGS44(rd, rn) \ + ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4)) +#define IS_2_LO_REGS(reg1, reg2) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7) +#define IS_3_LO_REGS(reg1, reg2, reg3) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) + +/* Thumb32 encodings. */ +#define RD4(rd) (reg_map[rd] << 8) +#define RN4(rn) (reg_map[rn] << 16) +#define RM4(rm) (reg_map[rm]) +#define RT4(rt) (reg_map[rt] << 12) +#define DD4(dd) (freg_map[dd] << 12) +#define DN4(dn) (freg_map[dn] << 16) +#define DM4(dm) (freg_map[dm]) +#define IMM5(imm) \ + (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6)) +#define IMM12(imm) \ + (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* dot '.' changed to _ + I immediate form (possibly followed by number of immediate bits). */ +#define ADCI 0xf1400000 +#define ADCS 0x4140 +#define ADC_W 0xeb400000 +#define ADD 0x4400 +#define ADDS 0x1800 +#define ADDSI3 0x1c00 +#define ADDSI8 0x3000 +#define ADD_W 0xeb000000 +#define ADDWI 0xf2000000 +#define ADD_SP 0xb000 +#define ADD_W 0xeb000000 +#define ADD_WI 0xf1000000 +#define ANDI 0xf0000000 +#define ANDS 0x4000 +#define AND_W 0xea000000 +#define ASRS 0x4100 +#define ASRSI 0x1000 +#define ASR_W 0xfa40f000 +#define ASR_WI 0xea4f0020 +#define BCC 0xd000 +#define BICI 0xf0200000 +#define BKPT 0xbe00 +#define BLX 0x4780 +#define BX 0x4700 +#define CLZ 0xfab0f080 +#define CMNI_W 0xf1100f00 +#define CMP 0x4280 +#define CMPI 0x2800 +#define CMPI_W 0xf1b00f00 +#define CMP_X 0x4500 +#define CMP_W 0xebb00f00 +#define EORI 0xf0800000 +#define EORS 0x4040 +#define EOR_W 0xea800000 +#define IT 0xbf00 +#define LDRI 0xf8500800 +#define LSLS 0x4080 +#define LSLSI 0x0000 +#define LSL_W 0xfa00f000 +#define LSL_WI 0xea4f0000 +#define LSRS 0x40c0 +#define LSRSI 0x0800 +#define LSR_W 0xfa20f000 +#define LSR_WI 0xea4f0010 +#define MOV 0x4600 +#define MOVS 0x0000 +#define MOVSI 0x2000 +#define MOVT 0xf2c00000 +#define MOVW 0xf2400000 +#define MOV_W 0xea4f0000 +#define MOV_WI 0xf04f0000 +#define MUL 0xfb00f000 +#define MVNS 0x43c0 +#define MVN_W 0xea6f0000 +#define MVN_WI 0xf06f0000 +#define NOP 0xbf00 +#define ORNI 0xf0600000 +#define ORRI 0xf0400000 +#define ORRS 0x4300 +#define ORR_W 0xea400000 +#define POP 0xbc00 +#define POP_W 0xe8bd0000 +#define PUSH 0xb400 +#define PUSH_W 0xe92d0000 +#define RSB_WI 0xf1c00000 +#define RSBSI 0x4240 +#define SBCI 0xf1600000 +#define SBCS 0x4180 +#define SBC_W 0xeb600000 +#define SDIV 0xfb90f0f0 +#define SMULL 0xfb800000 +#define STR_SP 0x9000 +#define SUBS 0x1a00 +#define SUBSI3 0x1e00 +#define SUBSI8 0x3800 +#define SUB_W 0xeba00000 +#define SUBWI 0xf2a00000 +#define SUB_SP 0xb080 +#define SUB_WI 0xf1a00000 +#define SXTB 0xb240 +#define SXTB_W 0xfa4ff080 +#define SXTH 0xb200 +#define SXTH_W 0xfa0ff080 +#define TST 0x4200 +#define UDIV 0xfbb0f0f0 +#define UMULL 0xfba00000 +#define UXTB 0xb2c0 +#define UXTB_W 0xfa5ff080 +#define UXTH 0xb280 +#define UXTH_W 0xfa1ff080 +#define VABS_F32 0xeeb00ac0 +#define VADD_F32 0xee300a00 +#define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 +#define VDIV_F32 0xee800a00 +#define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 +#define VMOV2 0xec400a10 +#define VMRS 0xeef1fa10 +#define VMUL_F32 0xee200a00 +#define VNEG_F32 0xeeb10a40 +#define VSTR_F32 0xed000a00 +#define VSUB_F32 0xee300a40 + +static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_u16 *ptr; + SLJIT_ASSERT(!(inst & 0xffff0000)); + + ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16)); + FAIL_IF(!ptr); + *ptr = inst; + compiler->size++; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr++ = inst >> 16; + *ptr = inst; + compiler->size += 2; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +{ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) + | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst) + | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); +} + +static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) +{ + sljit_s32 dst = inst[1] & 0x0f00; + SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); + inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1); + inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff); + inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1); + inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); +} + +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) +{ + sljit_sw diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + + if (jump->flags & JUMP_ADDR) { + /* Branch to ARM code is not optimized yet. */ + if (!(jump->u.target & 0x1)) + return 0; + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1; + } + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1; + } + + if (jump->flags & IS_COND) { + SLJIT_ASSERT(!(jump->flags & IS_BL)); + if (diff <= 127 && diff >= -128) { + jump->flags |= PATCH_TYPE1; + return 5; + } + if (diff <= 524287 && diff >= -524288) { + jump->flags |= PATCH_TYPE2; + return 4; + } + /* +1 comes from the prefix IT instruction. */ + diff--; + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= PATCH_TYPE3; + return 3; + } + } + else if (jump->flags & IS_BL) { + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= PATCH_BL; + return 3; + } + } + else { + if (diff <= 1023 && diff >= -1024) { + jump->flags |= PATCH_TYPE4; + return 4; + } + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= PATCH_TYPE5; + return 3; + } + } + + return 0; +} + +static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_s32 type = (jump->flags >> 4) & 0xf; + sljit_sw diff; + sljit_u16 *jump_inst; + sljit_s32 s, j1, j2; + + if (SLJIT_UNLIKELY(type == 0)) { + modify_imm32_const((sljit_u16*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); + return; + } + + if (jump->flags & JUMP_ADDR) { + SLJIT_ASSERT(jump->u.target & 0x1); + diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; + } + else { + SLJIT_ASSERT(jump->u.label->addr & 0x1); + diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; + } + jump_inst = (sljit_u16*)jump->addr; + + switch (type) { + case 1: + /* Encoding T1 of 'B' instruction */ + SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); + jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff); + return; + case 2: + /* Encoding T3 of 'B' instruction */ + SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); + jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1); + jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff); + return; + case 3: + SLJIT_ASSERT(jump->flags & IS_COND); + *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8; + diff--; + type = 5; + break; + case 4: + /* Encoding T2 of 'B' instruction */ + SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); + jump_inst[0] = 0xe000 | (diff & 0x7ff); + return; + } + + SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608); + + /* Really complex instruction form for branches. */ + s = (diff >> 23) & 0x1; + j1 = (~(diff >> 22) ^ s) & 0x1; + j2 = (~(diff >> 21) ^ s) & 0x1; + jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); + jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); + + /* The others have a common form. */ + if (type == 5) /* Encoding T4 of 'B' instruction */ + jump_inst[1] |= 0x9000; + else if (type == 6) /* Encoding T1 of 'BL' instruction */ + jump_inst[1] |= 0xd000; + else + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_u16 *code; + sljit_u16 *code_ptr; + sljit_u16 *buf_ptr; + sljit_u16 *buf_end; + sljit_uw half_count; + sljit_uw next_addr; + sljit_sw executable_offset; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_u16*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_u16), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + half_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_u16*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 1); + do { + *code_ptr = *buf_ptr++; + if (next_addr == half_count) { + SLJIT_ASSERT(!label || label->size >= half_count); + SLJIT_ASSERT(!jump || jump->addr >= half_count); + SLJIT_ASSERT(!const_ || const_->addr >= half_count); + SLJIT_ASSERT(!put_label || put_label->addr >= half_count); + + /* These structures are ordered by their address. */ + if (label && label->size == half_count) { + label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == half_count) { + jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); + code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == half_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == half_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr ++; + half_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == half_count) { + label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + set_jump_instruction(jump, executable_offset); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { + modify_imm32_const((sljit_u16 *)put_label->addr, put_label->label->addr); + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16); + + code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + + /* Set thumb mode flag. */ + return (void*)((sljit_uw)code | 0x1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_PREFETCH: + return 1; + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Core code generator functions. */ +/* --------------------------------------------------------------------- */ + +#define INVALID_IMM 0x80000000 +static sljit_uw get_imm(sljit_uw imm) +{ + /* Thumb immediate form. */ + sljit_s32 counter; + + if (imm <= 0xff) + return imm; + + if ((imm & 0xffff) == (imm >> 16)) { + /* Some special cases. */ + if (!(imm & 0xff00)) + return (1 << 12) | (imm & 0xff); + if (!(imm & 0xff)) + return (2 << 12) | ((imm >> 8) & 0xff); + if ((imm & 0xff00) == ((imm & 0xff) << 8)) + return (3 << 12) | (imm & 0xff); + } + + /* Assembly optimization: count leading zeroes? */ + counter = 8; + if (!(imm & 0xffff0000)) { + counter += 16; + imm <<= 16; + } + if (!(imm & 0xff000000)) { + counter += 8; + imm <<= 8; + } + if (!(imm & 0xf0000000)) { + counter += 4; + imm <<= 4; + } + if (!(imm & 0xc0000000)) { + counter += 2; + imm <<= 2; + } + if (!(imm & 0x80000000)) { + counter += 1; + imm <<= 1; + } + /* Since imm >= 128, this must be true. */ + SLJIT_ASSERT(counter <= 31); + + if (imm & 0x00ffffff) + return INVALID_IMM; /* Cannot be encoded. */ + + return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1); +} + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +{ + sljit_uw tmp; + + /* MOVS cannot be used since it destroy flags. */ + + if (imm >= 0x10000) { + tmp = get_imm(imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MOV_WI | RD4(dst) | tmp); + tmp = get_imm(~imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MVN_WI | RD4(dst) | tmp); + } + + /* set low 16 bits, set hi 16 bits to 0. */ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) + | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + + /* set hi 16 bit if needed. */ + if (imm >= 0x10000) + return push_inst32(compiler, MOVT | RD4(dst) + | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); + return SLJIT_SUCCESS; +} + +#define ARG1_IMM 0x0010000 +#define ARG2_IMM 0x0020000 +/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS 0x0100000 +#define UNUSED_RETURN 0x0200000 + +static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2) +{ + /* dst must be register, TMP_REG1 + arg1 must be register, imm + arg2 must be register, imm */ + sljit_s32 reg; + sljit_uw imm, nimm; + + if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { + /* Both are immediates, no temporaries are used. */ + flags &= ~ARG1_IMM; + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + + if (flags & (ARG1_IMM | ARG2_IMM)) { + reg = (flags & ARG2_IMM) ? arg1 : arg2; + imm = (flags & ARG2_IMM) ? arg2 : arg1; + + switch (flags & 0xffff) { + case SLJIT_CLZ: + case SLJIT_MUL: + /* No form with immediate operand. */ + break; + case SLJIT_MOV: + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2); + return load_immediate(compiler, dst, imm); + case SLJIT_NOT: + if (!(flags & SET_FLAGS)) + return load_immediate(compiler, dst, ~imm); + /* Since the flags should be set, we just fallback to the register mode. + Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ + break; + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + nimm = -(sljit_sw)imm; + if (IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (nimm <= 0x7) + return push_inst16(compiler, SUBSI3 | IMM3(nimm) | RD3(dst) | RN3(reg)); + if (reg == dst) { + if (imm <= 0xff) + return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst)); + if (nimm <= 0xff) + return push_inst16(compiler, SUBSI8 | IMM8(nimm) | RDN3(dst)); + } + } + if (!(flags & SET_FLAGS)) { + if (imm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm)); + if (nimm <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(nimm)); + } + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + nimm = get_imm(-(sljit_sw)imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + break; + case SLJIT_ADDC: + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SUB: + /* SUB operation can be replaced by ADD because of the negative carry flag. */ + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if (flags & ARG1_IMM) { + if (imm == 0 && IS_2_LO_REGS(reg, dst)) + return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + } + if (flags & UNUSED_RETURN) { + if (imm <= 0xff && reg_map[reg] <= 7) + return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg)); + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, CMPI_W | RN4(reg) | nimm); + nimm = get_imm(-(sljit_sw)imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, CMNI_W | RN4(reg) | nimm); + } + nimm = -(sljit_sw)imm; + if (IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (nimm <= 0x7) + return push_inst16(compiler, ADDSI3 | IMM3(nimm) | RD3(dst) | RN3(reg)); + if (reg == dst) { + if (imm <= 0xff) + return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst)); + if (nimm <= 0xff) + return push_inst16(compiler, ADDSI8 | IMM8(nimm) | RDN3(dst)); + } + } + if (!(flags & SET_FLAGS)) { + if (imm <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm)); + if (nimm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(nimm)); + } + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + nimm = get_imm(-(sljit_sw)imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + break; + case SLJIT_SUBC: + if (flags & ARG1_IMM) + break; + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_AND: + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_OR: + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_XOR: + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + if (flags & ARG1_IMM) + break; + imm &= 0x1f; + if (imm == 0) { + if (!(flags & SET_FLAGS)) + return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); + } + switch (flags & 0xffff) { + case SLJIT_SHL: + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + case SLJIT_LSHR: + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + default: /* SLJIT_ASHR */ + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + } + default: + SLJIT_UNREACHABLE(); + break; + } + + if (flags & ARG2_IMM) { + imm = arg2; + arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(load_immediate(compiler, arg2, imm)); + } + else { + imm = arg1; + arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(load_immediate(compiler, arg1, imm)); + } + + SLJIT_ASSERT(arg1 != arg2); + } + + /* Both arguments are registers. */ + switch (flags & 0xffff) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (dst == arg2) + return SLJIT_SUCCESS; + return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); + case SLJIT_MOV_U8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_S8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_U16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_S16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_NOT: + SLJIT_ASSERT(arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2)); + case SLJIT_CLZ: + SLJIT_ASSERT(arg1 == TMP_REG2); + FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); + return SLJIT_SUCCESS; + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if (IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); + if (dst == arg1 && !(flags & SET_FLAGS)) + return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); + return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ADDC: + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if (flags & UNUSED_RETURN) { + if (IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); + return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2)); + } + if (IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); + return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUBC: + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MUL: + compiler->status_flags_state = 0; + if (!(flags & SET_FLAGS)) + return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); + SLJIT_ASSERT(dst != TMP_REG2); + FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2))); + /* cmp TMP_REG2, dst asr #31. */ + return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); + case SLJIT_AND: + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); + if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); + return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_OR: + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_XOR: + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SHL: + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_LSHR: + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ASHR: + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +#define STORE 0x01 +#define SIGNED 0x02 + +#define WORD_SIZE 0x00 +#define BYTE_SIZE 0x04 +#define HALF_SIZE 0x08 +#define PRELOAD 0x0c + +#define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE))) +#define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift))) + +/* + 1st letter: + w = word + b = byte + h = half + + 2nd letter: + s = signed + u = unsigned + + 3rd letter: + l = load + s = store +*/ + +static const sljit_ins sljit_mem16[12] = { +/* w u l */ 0x5800 /* ldr */, +/* w u s */ 0x5000 /* str */, +/* w s l */ 0x5800 /* ldr */, +/* w s s */ 0x5000 /* str */, + +/* b u l */ 0x5c00 /* ldrb */, +/* b u s */ 0x5400 /* strb */, +/* b s l */ 0x5600 /* ldrsb */, +/* b s s */ 0x5400 /* strb */, + +/* h u l */ 0x5a00 /* ldrh */, +/* h u s */ 0x5200 /* strh */, +/* h s l */ 0x5e00 /* ldrsh */, +/* h s s */ 0x5200 /* strh */, +}; + +static const sljit_ins sljit_mem16_imm5[12] = { +/* w u l */ 0x6800 /* ldr imm5 */, +/* w u s */ 0x6000 /* str imm5 */, +/* w s l */ 0x6800 /* ldr imm5 */, +/* w s s */ 0x6000 /* str imm5 */, + +/* b u l */ 0x7800 /* ldrb imm5 */, +/* b u s */ 0x7000 /* strb imm5 */, +/* b s l */ 0x0000 /* not allowed */, +/* b s s */ 0x7000 /* strb imm5 */, + +/* h u l */ 0x8800 /* ldrh imm5 */, +/* h u s */ 0x8000 /* strh imm5 */, +/* h s l */ 0x0000 /* not allowed */, +/* h s s */ 0x8000 /* strh imm5 */, +}; + +#define MEM_IMM8 0xc00 +#define MEM_IMM12 0x800000 +static const sljit_ins sljit_mem32[13] = { +/* w u l */ 0xf8500000 /* ldr.w */, +/* w u s */ 0xf8400000 /* str.w */, +/* w s l */ 0xf8500000 /* ldr.w */, +/* w s s */ 0xf8400000 /* str.w */, + +/* b u l */ 0xf8100000 /* ldrb.w */, +/* b u s */ 0xf8000000 /* strb.w */, +/* b s l */ 0xf9100000 /* ldrsb.w */, +/* b s s */ 0xf8000000 /* strb.w */, + +/* h u l */ 0xf8300000 /* ldrh.w */, +/* h u s */ 0xf8200000 /* strsh.w */, +/* h s l */ 0xf9300000 /* ldrsh.w */, +/* h s s */ 0xf8200000 /* strsh.w */, + +/* p u l */ 0xf8100000 /* pld */, +}; + +/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ +static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) +{ + if (value >= 0) { + if (value <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value)); + value = get_imm(value); + if (value != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value); + } + else { + value = -value; + if (value <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value)); + value = get_imm(value); + if (value != INVALID_IMM) + return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value); + } + return SLJIT_ERR_UNSUPPORTED; +} + +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_s32 other_r; + sljit_uw tmp; + + SLJIT_ASSERT(arg & SLJIT_MEM); + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); + arg &= ~SLJIT_MEM; + + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { + tmp = get_imm(argw & ~0xfff); + if (tmp != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | tmp)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff)); + } + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags]) + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg)); + } + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + other_r = OFFS_REG(arg); + arg &= 0xf; + + if (!argw && IS_3_LO_REGS(reg, arg, other_r)) + return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)); + } + + if (argw > 0xfff) { + tmp = get_imm(argw & ~0xfff); + if (tmp != INVALID_IMM) { + push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | tmp); + arg = tmp_reg; + argw = argw & 0xfff; + } + } + else if (argw < -0xff) { + tmp = get_imm(-argw & ~0xff); + if (tmp != INVALID_IMM) { + push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | tmp); + arg = tmp_reg; + argw = -(-argw & 0xff); + } + } + + if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) { + tmp = 3; + if (IS_WORD_SIZE(flags)) { + if (OFFSET_CHECK(0x1f, 2)) + tmp = 2; + } + else if (flags & BYTE_SIZE) + { + if (OFFSET_CHECK(0x1f, 0)) + tmp = 0; + } + else { + SLJIT_ASSERT(flags & HALF_SIZE); + if (OFFSET_CHECK(0x1f, 1)) + tmp = 1; + } + + if (tmp < 3) + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - tmp))); + } + else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && OFFSET_CHECK(0xff, 2) && reg_map[reg] <= 7) { + /* SP based immediate. */ + return push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)); + } + + if (argw >= 0 && argw <= 0xfff) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw); + else if (argw < 0 && argw >= -0xff) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw); + + SLJIT_ASSERT(arg != tmp_reg); + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + if (IS_3_LO_REGS(reg, arg, tmp_reg)) + return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args, size, i, tmp; + sljit_ins push = 0; +#ifdef _WIN32 + sljit_uw imm; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + + FAIL_IF((push & 0xff00) + ? push_inst32(compiler, PUSH_W | (1 << 14) | push) + : push_inst16(compiler, PUSH | (1 << 8) | push)); + + /* Stack must be aligned to 8 bytes: (LR, R4) */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + local_size = ((size + local_size + 7) & ~7) - size; + compiler->local_size = local_size; + +#ifdef _WIN32 + if (local_size >= 256) { + if (local_size > 4096) + imm = get_imm(4096); + else + imm = get_imm(local_size & ~0xff); + + SLJIT_ASSERT(imm != INVALID_IMM); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm)); + } +#else + if (local_size > 0) { + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); + } +#endif + + args = get_arg_count(arg_types); + + if (args >= 1) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0))); + if (args >= 2) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1))); + if (args >= 3) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); + +#ifdef _WIN32 + if (local_size >= 256) { + if (local_size > 4096) { + imm = get_imm(4096); + SLJIT_ASSERT(imm != INVALID_IMM); + + if (local_size < 4 * 4096) { + if (local_size > 2 * 4096) { + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); + local_size -= 4096; + } + + if (local_size > 2 * 4096) { + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); + local_size -= 4096; + } + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + local_size -= 4096; + + SLJIT_ASSERT(local_size > 0); + } + else { + FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1)); + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); + SLJIT_ASSERT(reg_map[SLJIT_R3] < 7); + FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1)); + FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff))); + + local_size &= 0xfff; + + if (local_size != 0) + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + } + + if (local_size >= 256) { + imm = get_imm(local_size & ~0xff); + SLJIT_ASSERT(imm != INVALID_IMM); + + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); + } + } + + local_size &= 0xff; + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size)); + + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1))); + } + else if (local_size > 0) + FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size)); +#endif + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((size + local_size + 7) & ~7) - size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 i, tmp; + sljit_ins pop = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + if (compiler->local_size > 0) { + if (compiler->local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; + + return (pop & 0xff00) + ? push_inst32(compiler, POP_W | (1 << 15) | pop) + : push_inst16(compiler, POP | (1 << 8) | pop); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 +extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator); +extern long long __rt_sdiv(int denominator, int numerator); +#elif defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator); +extern int __aeabi_idivmod(int numerator, int denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) + sljit_sw saved_reg_list[3]; + sljit_sw saved_reg_count; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst16(compiler, BKPT); + case SLJIT_NOP: + return push_inst16(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) + | (reg_map[SLJIT_R1] << 8) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 16) + | reg_map[SLJIT_R1]); +#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__) + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0))); + FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1))); + FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1))); + return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)); +#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */ + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); + SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3); + + saved_reg_count = 0; + if (compiler->scratches >= 4) + saved_reg_list[saved_reg_count++] = 3; + if (compiler->scratches >= 3) + saved_reg_list[saved_reg_count++] = 2; + if (op >= SLJIT_DIV_UW) + saved_reg_list[saved_reg_count++] = 1; + + if (saved_reg_count > 0) { + FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */)); + } + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */)); + } + } + +#ifdef _WIN32 + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1))); + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv)))); +#elif defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + + if (saved_reg_count > 0) { + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */)); + } + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */)); + } + return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); + } + return SLJIT_SUCCESS; +#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */ + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r, flags; + sljit_s32 op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + flags = BYTE_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u8)srcw; + break; + case SLJIT_MOV_S8: + flags = BYTE_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s8)srcw; + break; + case SLJIT_MOV_U16: + flags = HALF_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u16)srcw; + break; + case SLJIT_MOV_S16: + flags = HALF_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s16)srcw; + break; + default: + SLJIT_UNREACHABLE(); + flags = 0; + break; + } + + if (src & SLJIT_IMM) + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, srcw)); + else if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1)); + } else { + if (dst_r != TMP_REG1) + return emit_op_imm(compiler, op, dst_r, TMP_REG2, src); + dst_r = src; + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); + } + + if (op == SLJIT_NEG) { +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw); + } + + flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, src); + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_reg, flags, src2_reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + + dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + + if (src1 & SLJIT_IMM) + flags |= ARG1_IMM; + else if (src1 & SLJIT_MEM) { + emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1); + src1w = TMP_REG1; + } + else + src1w = src1; + + if (src2 & SLJIT_IMM) + flags |= ARG2_IMM; + else if (src2 & SLJIT_MEM) { + src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1; + emit_op_mem(compiler, WORD_SIZE, src2_reg, src2, src2w, src2_reg); + src2w = src2_reg; + } + else + src2w = src2; + + if (dst == SLJIT_UNUSED) + flags |= UNUSED_RETURN; + + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, src1w, src2w); + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2)); + + return push_inst16(compiler, BX | RN3(TMP_REG2)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return (freg_map[reg] << 1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + if (size == 2) + return push_inst16(compiler, *(sljit_u16*)instruction); + return push_inst32(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FPU_LOAD (1 << 20) + +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_uw imm; + sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); + + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads and stores. */ + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6))); + arg = SLJIT_MEM | TMP_REG1; + argw = 0; + } + + if ((arg & REG_MASK) && (argw & 0x3) == 0) { + if (!(argw & ~0x3fc)) + return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | (argw >> 2)); + if (!(-argw & ~0x3fc)) + return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2)); + } + + if (arg & REG_MASK) { + if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); + } + imm = get_imm(argw & ~0x3fc); + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); + } + imm = get_imm(-argw & ~0x3fc); + if (imm != INVALID_IMM) { + argw = -argw; + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); + return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); + } + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); + if (arg & REG_MASK) + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK)))); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + op ^= SLJIT_F32_OP; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_F32_OP) | DD4(TMP_FREG1) | DM4(src))); + + if (FAST_IS_REG(dst)) + return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + op ^= SLJIT_F32_OP; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); + } + + FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + op ^= SLJIT_F32_OP; + + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_F32_OP) | DD4(src1) | DM4(src2))); + return push_inst32(compiler, VMRS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + op ^= SLJIT_F32_OP; + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + op ^= SLJIT_F32_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); +} + +#undef FPU_LOAD + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(dst)) + return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_EQUAL_F64: + return 0x0; + + case SLJIT_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: + return 0x1; + + case SLJIT_LESS: + case SLJIT_LESS_F64: + return 0x3; + + case SLJIT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: + return 0x2; + + case SLJIT_GREATER: + case SLJIT_GREATER_F64: + return 0x8; + + case SLJIT_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: + return 0x9; + + case SLJIT_SIG_LESS: + return 0xb; + + case SLJIT_SIG_GREATER_EQUAL: + return 0xa; + + case SLJIT_SIG_GREATER: + return 0xc; + + case SLJIT_SIG_LESS_EQUAL: + return 0xd; + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x1; + + case SLJIT_UNORDERED_F64: + return 0x6; + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x0; + + case SLJIT_ORDERED_F64: + return 0x7; + + default: /* SLJIT_JUMP */ + SLJIT_UNREACHABLE(); + return 0xe; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins cc; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); + if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + cc = get_cc(compiler, type); + jump->flags |= cc << 8; + PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + } + + jump->addr = compiler->size; + if (type <= SLJIT_JUMP) + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1))); + else { + jump->flags |= IS_BL; + PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); + } + + return jump; +} + +#ifdef __SOFTFP__ + +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 stack_offset = 0; + sljit_s32 arg_count = 0; + sljit_s32 word_arg_offset = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 src_offset = 4 * sizeof(sljit_sw); + sljit_u8 offsets[4]; + + if (src && FAST_IS_REG(*src)) + src_offset = reg_map[*src] * sizeof(sljit_sw); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_f32); + arg_count++; + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F64: + if (stack_offset & 0x7) + stack_offset += sizeof(sljit_sw); + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_f64); + arg_count++; + float_arg_count++; + break; + default: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_sw); + arg_count++; + word_arg_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (stack_offset > 16) + FAIL_IF(push_inst16(compiler, SUB_SP | (((stack_offset - 16) + 0x7) & ~0x7) >> 2)); + + SLJIT_ASSERT(reg_map[TMP_REG1] == 12); + + /* Process arguments in reversed direction. */ + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + arg_count--; + float_arg_count--; + stack_offset = offsets[arg_count]; + + if (stack_offset < 16) { + if (src_offset == stack_offset) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10))); + } else + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + break; + case SLJIT_ARG_TYPE_F64: + arg_count--; + float_arg_count--; + stack_offset = offsets[arg_count]; + + SLJIT_ASSERT((stack_offset & 0x7) == 0); + + if (stack_offset < 16) { + if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + } else + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + break; + default: + arg_count--; + word_arg_offset -= sizeof(sljit_sw); + stack_offset = offsets[arg_count]; + + SLJIT_ASSERT(stack_offset >= word_arg_offset); + + if (stack_offset != word_arg_offset) { + if (stack_offset < 16) { + if (src_offset == stack_offset) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + else if (src_offset == word_arg_offset) { + *src = 1 + (stack_offset >> 2); + src_offset = stack_offset; + } + FAIL_IF(push_inst16(compiler, MOV | (stack_offset >> 2) | (word_arg_offset << 1))); + } else + FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((stack_offset - 16) >> 2))); + } + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_s32 stack_size = 0; + + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12))); + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) + FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + if (stack_size & 0x7) + stack_size += sizeof(sljit_sw); + stack_size += sizeof(sljit_f64); + break; + default: + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (stack_size <= 16) + return SLJIT_SUCCESS; + + return push_inst16(compiler, ADD_SP | ((((stack_size - 16) + 0x7) & ~0x7) >> 2)); +} + +#else + +static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_u32 remap = 0; + sljit_u32 offset = 0; + sljit_u32 new_offset, mask; + + /* Remove return value. */ + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) { + new_offset = 0; + mask = 1; + + while (remap & mask) { + new_offset++; + mask <<= 1; + } + remap |= mask; + + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | DD4((new_offset >> 1) + 1) + | ((new_offset & 0x1) ? 0x400000 : 0) | DM4((offset >> 1) + 1))); + + offset += 2; + } + else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) { + new_offset = 0; + mask = 3; + + while (remap & mask) { + new_offset += 2; + mask <<= 2; + } + remap |= mask; + + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_F32_OP | DD4((new_offset >> 1) + 1) | DM4((offset >> 1) + 1))); + + offset += 2; + } + arg_types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#ifdef __SOFTFP__ + struct sljit_jump *jump; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#ifdef __SOFTFP__ + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; +#else + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + if (!(src & SLJIT_IMM)) { + if (FAST_IS_REG(src)) { + SLJIT_ASSERT(reg_map[src] != 14); + return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src)); + } + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1)); + if (type >= SLJIT_FAST_CALL) + return push_inst16(compiler, BLX | RN3(TMP_REG1)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = srcw; + + FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#ifdef __SOFTFP__ + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + return softfloat_post_call_with_args(compiler, arg_types); +#else /* !__SOFTFP__ */ + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +#endif /* __SOFTFP__ */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + cc = get_cc(compiler, type & 0xff); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (op < SLJIT_ADD) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + if (reg_map[dst_r] > 7) { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1)); + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0)); + } else { + /* The movsi (immediate) instruction does not set flags in IT block. */ + FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1)); + FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0)); + } + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2); + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (op == SLJIT_AND) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1)); + FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0)); + } + else { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1)); + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (!(flags & SLJIT_SET_Z)) + return SLJIT_SUCCESS; + + /* The condition must always be set, even if the ORR/EORI is not executed above. */ + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_uw cc, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + dst_reg &= ~SLJIT_I32_OP; + + cc = get_cc(compiler, type & 0xff); + + if (!(src & SLJIT_IMM)) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src)); + } + + tmp = (sljit_uw) srcw; + + if (tmp < 0x10000) { + /* set low 16 bits, set hi 16 bits to 0. */ + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MOVW | RD4(dst_reg) + | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)); + } + + tmp = get_imm(srcw); + if (tmp != INVALID_IMM) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp); + } + + tmp = get_imm(~srcw); + if (tmp != INVALID_IMM) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp); + } + + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4)); + + tmp = (sljit_uw) srcw; + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) + | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst_reg) + | COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + flags = BYTE_SIZE; + break; + case SLJIT_MOV_S8: + flags = BYTE_SIZE | SIGNED; + break; + case SLJIT_MOV_U16: + flags = HALF_SIZE; + break; + case SLJIT_MOV_S16: + flags = HALF_SIZE | SIGNED; + break; + default: + SLJIT_UNREACHABLE(); + flags = WORD_SIZE; + break; + } + + if (type & SLJIT_MEM_STORE) + flags |= STORE; + + inst = sljit_mem32[flags] | 0x900; + + if (type & SLJIT_MEM_PRE) + inst |= 0x400; + + if (memw >= 0) + inst |= 0x200; + else + memw = -memw; + + return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | memw); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, 0)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_u16 *inst = (sljit_u16*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); + modify_imm32_const(inst, new_target); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); + inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, new_constant, executable_offset); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_32.c new file mode 100644 index 0000000000..a90345f1f8 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_32.c @@ -0,0 +1,671 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* mips 32-bit arch dependent functions. */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) +{ + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm < 0 && imm >= SIMM_MIN) + return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); + return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; +} + +#define EMIT_LOGICAL(op_imm, op_norm) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ + } + +#define EMIT_SHIFT(op_imm, op_v) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \ + } + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_s32 is_overflow, is_carry, is_handled; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S8) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S16) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst))); +#else /* SLJIT_MIPS_REV < 1 */ + if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { + FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); + return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); + } + /* Nearly all instructions are unmovable in the following sequence. */ + FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + /* Check zero. */ + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst))); + /* Loop for searching the highest bit. */ + FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst))); + FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); +#endif /* SLJIT_MIPS_REV >= 1 */ + return SLJIT_SUCCESS; + + case SLJIT_ADD: + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + + if (is_overflow || is_carry) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + else { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + } + } + /* dst may be the same as src1 or src2. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + /* dst may be the same as src1 or src2. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* a + b >= a | b (otherwise, the carry should be set to 1). */ + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (!is_overflow) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); + + case SLJIT_ADDC: + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_carry) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + else { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + } + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); + } else { + if (is_carry) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); + } + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + if (!is_carry) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + /* Set carry flag. */ + return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL) + { + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL) + { + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + /* dst may be the same as src1 or src2. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + /* dst may be the same as src1 or src2. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS; + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, MUH | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG)); + return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + EMIT_SHIFT(SLL, SLLV); + return SLJIT_SUCCESS; + + case SLJIT_LSHR: + EMIT_SHIFT(SRL, SRLV); + return SLJIT_SUCCESS; + + case SLJIT_ASHR: + EMIT_SHIFT(SRA, SRAV); + return SLJIT_SUCCESS; + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst))); + return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + SLJIT_ASSERT((inst[0] & 0xffe00000) == LUI && (inst[1] & 0xfc000000) == ORI); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, new_constant, executable_offset); +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) +{ + sljit_s32 stack_offset = 0; + sljit_s32 arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 arg_count_save, types_save; + sljit_ins prev_ins = NOP; + sljit_ins ins = NOP; + sljit_u8 offsets[4]; + + SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + offsets[arg_count] = (sljit_u8)stack_offset; + + if (word_arg_count == 0 && arg_count <= 1) + offsets[arg_count] = 254 + arg_count; + + stack_offset += sizeof(sljit_f32); + arg_count++; + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F64: + if (stack_offset & 0x7) + stack_offset += sizeof(sljit_sw); + offsets[arg_count] = (sljit_u8)stack_offset; + + if (word_arg_count == 0 && arg_count <= 1) + offsets[arg_count] = 254 + arg_count; + + stack_offset += sizeof(sljit_f64); + arg_count++; + float_arg_count++; + break; + default: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_sw); + arg_count++; + word_arg_count++; + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */ + if (stack_offset > 16) + FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + + types_save = types; + arg_count_save = arg_count; + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + arg_count--; + if (offsets[arg_count] < 254) + ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F64: + arg_count--; + if (offsets[arg_count] < 254) + ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]); + float_arg_count--; + break; + default: + if (offsets[arg_count - 1] >= 16) + ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(offsets[arg_count - 1]); + else if (arg_count != word_arg_count) + ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2)); + else if (arg_count == 1) + ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4); + + arg_count--; + word_arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_DEF_SHIFT; + } + + types = types_save; + arg_count = arg_count_save; + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + arg_count--; + if (offsets[arg_count] == 254) + ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + else if (offsets[arg_count] < 16) + ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]); + break; + case SLJIT_ARG_TYPE_F64: + arg_count--; + if (offsets[arg_count] == 254) + ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + else if (offsets[arg_count] < 16) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]); + ins = LW | S(SLJIT_SP) | TA(5 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count] + sizeof(sljit_sw)); + } + break; + default: + arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_DEF_SHIFT; + } + + *ins_ptr = prev_ins; + + return SLJIT_SUCCESS; +} + +static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_s32 stack_offset = 0; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_offset += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + if (stack_offset & 0x7) + stack_offset += sizeof(sljit_sw); + stack_offset += sizeof(sljit_f64); + break; + default: + stack_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */ + if (stack_offset > 16) + return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(16), DR(SLJIT_SP)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_ins ins; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); + + jump->flags |= IS_JAL | IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + PTR_FAIL_IF(post_call_with_args(compiler, arg_types)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); + else if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + } + + FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + /* Register input. */ + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + return post_call_with_args(compiler, arg_types); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_64.c new file mode 100644 index 0000000000..1f22e49ed9 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_64.c @@ -0,0 +1,672 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* mips 64-bit arch dependent functions. */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) +{ + sljit_s32 shift = 32; + sljit_s32 shift2; + sljit_s32 inv = 0; + sljit_ins ins; + sljit_uw uimm; + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm < 0 && imm >= SIMM_MIN) + return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); + return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; + } + + /* Zero extended number. */ + uimm = imm; + if (imm < 0) { + uimm = ~imm; + inv = 1; + } + + while (!(uimm & 0xff00000000000000l)) { + shift -= 8; + uimm <<= 8; + } + + if (!(uimm & 0xf000000000000000l)) { + shift -= 4; + uimm <<= 4; + } + + if (!(uimm & 0xc000000000000000l)) { + shift -= 2; + uimm <<= 2; + } + + if ((sljit_sw)uimm < 0) { + uimm >>= 1; + shift += 1; + } + SLJIT_ASSERT(((uimm & 0xc000000000000000l) == 0x4000000000000000l) && (shift > 0) && (shift <= 32)); + + if (inv) + uimm = ~uimm; + + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(uimm >> 48), dst_ar)); + if (uimm & 0x0000ffff00000000l) + FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 32), dst_ar)); + + imm &= (1l << shift) - 1; + if (!(imm & ~0xffff)) { + ins = (shift == 32) ? DSLL32 : DSLL; + if (shift < 32) + ins |= SH_IMM(shift); + FAIL_IF(push_inst(compiler, ins | TA(dst_ar) | DA(dst_ar), dst_ar)); + return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar); + } + + /* Double shifts needs to be performed. */ + uimm <<= 32; + shift2 = shift - 16; + + while (!(uimm & 0xf000000000000000l)) { + shift2 -= 4; + uimm <<= 4; + } + + if (!(uimm & 0xc000000000000000l)) { + shift2 -= 2; + uimm <<= 2; + } + + if (!(uimm & 0x8000000000000000l)) { + shift2--; + uimm <<= 1; + } + + SLJIT_ASSERT((uimm & 0x8000000000000000l) && (shift2 > 0) && (shift2 <= 16)); + + FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift - shift2), dst_ar)); + FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 48), dst_ar)); + FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift2), dst_ar)); + + imm &= (1l << shift2) - 1; + return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar); +} + +#define SELECT_OP(a, b) \ + (!(op & SLJIT_I32_OP) ? a : b) + +#define EMIT_LOGICAL(op_imm, op_norm) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ + } + +#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \ + if (flags & SRC2_IMM) { \ + if (src2 >= 32) { \ + SLJIT_ASSERT(!(op & SLJIT_I32_OP)); \ + ins = op_dimm32; \ + src2 -= 32; \ + } \ + else \ + ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ + } \ + else { \ + ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \ + } + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_ins ins; + sljit_s32 is_overflow, is_carry, is_handled; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S8) { + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S16) { + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U32: + SLJIT_ASSERT(!(op & SLJIT_I32_OP)); + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); + return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst))); +#else /* SLJIT_MIPS_REV < 1 */ + if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); + return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); + } + /* Nearly all instructions are unmovable in the following sequence. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + /* Check zero. */ + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_I32_OP) ? 32 : 64), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst))); + /* Loop for searching the highest bit. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst))); + FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); +#endif /* SLJIT_MIPS_REV >= 1 */ + return SLJIT_SUCCESS; + + case SLJIT_ADD: + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + + if (is_overflow || is_carry) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + } + } + /* dst may be the same as src1 or src2. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + /* dst may be the same as src1 or src2. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* a + b >= a | b (otherwise, the carry should be set to 1). */ + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (!is_overflow) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); + + case SLJIT_ADDC: + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_carry) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + } + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } else { + if (is_carry) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + if (!is_carry) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + /* Set carry flag. */ + return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL) + { + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL) + { + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + /* dst may be the same as src1 or src2. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + /* dst may be the same as src1 or src2. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS; + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_I32_OP) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); + FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 6 */ + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG)); + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV); + return SLJIT_SUCCESS; + + case SLJIT_LSHR: + EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV); + return SLJIT_SUCCESS; + + case SLJIT_ASHR: + EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV); + return SLJIT_SUCCESS; + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst))); + FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 32), DR(dst))); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 16), DR(dst))); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 6); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, new_constant, executable_offset); +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) +{ + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_ins prev_ins = NOP; + sljit_ins ins = NOP; + + SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + case SLJIT_ARG_TYPE_F64: + arg_count++; + float_arg_count++; + break; + default: + arg_count++; + word_arg_count++; + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count); + else if (arg_count == 1) + ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + arg_count--; + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F64: + if (arg_count != float_arg_count) + ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count); + else if (arg_count == 1) + ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + arg_count--; + float_arg_count--; + break; + default: + if (arg_count != word_arg_count) + ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count); + else if (arg_count == 1) + ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4); + arg_count--; + word_arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_DEF_SHIFT; + } + + *ins_ptr = prev_ins; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_ins ins; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); + + jump->flags |= IS_JAL | IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); + else if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + } + + FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + /* Register input. */ + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, ins, UNMOVABLE_INS); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_common.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_common.c new file mode 100644 index 0000000000..fd747695a7 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeMIPS_common.c @@ -0,0 +1,2309 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Latest MIPS architecture. */ + +#ifndef __mips_hard_float +/* Disable automatic detection, covers both -msoft-float and -mno-float */ +#undef SLJIT_IS_FPU_AVAILABLE +#define SLJIT_IS_FPU_AVAILABLE 0 +#endif + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R6" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R6" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R1" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R1" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#else /* SLJIT_MIPS_REV < 1 */ + return "MIPS III" SLJIT_CPUINFO; +#endif /* SLJIT_MIPS_REV >= 6 */ +} + +/* Length of an instruction word + Both for mips-32 and mips-64 */ +typedef sljit_u32 sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) + +/* For position independent code, t9 must contain the function address. */ +#define PIC_ADDR_REG TMP_REG2 + +/* Floating point status register. */ +#define FCSR_REG 31 +/* Return address register. */ +#define RETURN_ADDR_REG 31 + +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG 3 +#define OTHER_FLAG 1 + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) +#define TMP_FREG3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31 +}; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { + 0, 0, 14, 2, 4, 6, 8, 12, 10, 16 +}; + +#else + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { + 0, 0, 13, 14, 15, 16, 17, 12, 18, 10 +}; + +#endif + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define S(s) (reg_map[s] << 21) +#define T(t) (reg_map[t] << 16) +#define D(d) (reg_map[d] << 11) +#define FT(t) (freg_map[t] << 16) +#define FS(s) (freg_map[s] << 11) +#define FD(d) (freg_map[d] << 6) +/* Absolute registers. */ +#define SA(s) ((s) << 21) +#define TA(t) ((t) << 16) +#define DA(d) ((d) << 11) +#define IMM(imm) ((imm) & 0xffff) +#define SH_IMM(imm) ((imm) << 6) + +#define DR(dr) (reg_map[dr]) +#define FR(dr) (freg_map[dr]) +#define HI(opcode) ((opcode) << 26) +#define LO(opcode) (opcode) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +/* CMP.cond.fmt */ +/* S = (20 << 21) D = (21 << 21) */ +#define CMP_FMT_S (20 << 21) +#endif /* SLJIT_MIPS_REV >= 6 */ +/* S = (16 << 21) D = (17 << 21) */ +#define FMT_S (16 << 21) +#define FMT_D (17 << 21) + +#define ABS_S (HI(17) | FMT_S | LO(5)) +#define ADD_S (HI(17) | FMT_S | LO(0)) +#define ADDIU (HI(9)) +#define ADDU (HI(0) | LO(33)) +#define AND (HI(0) | LO(36)) +#define ANDI (HI(12)) +#define B (HI(4)) +#define BAL (HI(1) | (17 << 16)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define BC1EQZ (HI(17) | (9 << 21) | FT(TMP_FREG3)) +#define BC1NEZ (HI(17) | (13 << 21) | FT(TMP_FREG3)) +#else /* SLJIT_MIPS_REV < 6 */ +#define BC1F (HI(17) | (8 << 21)) +#define BC1T (HI(17) | (8 << 21) | (1 << 16)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define BEQ (HI(4)) +#define BGEZ (HI(1) | (1 << 16)) +#define BGTZ (HI(7)) +#define BLEZ (HI(6)) +#define BLTZ (HI(1) | (0 << 16)) +#define BNE (HI(5)) +#define BREAK (HI(0) | LO(13)) +#define CFC1 (HI(17) | (2 << 21)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define C_UEQ_S (HI(17) | CMP_FMT_S | LO(3)) +#define C_ULE_S (HI(17) | CMP_FMT_S | LO(7)) +#define C_ULT_S (HI(17) | CMP_FMT_S | LO(5)) +#define C_UN_S (HI(17) | CMP_FMT_S | LO(1)) +#define C_FD (FD(TMP_FREG3)) +#else /* SLJIT_MIPS_REV < 6 */ +#define C_UEQ_S (HI(17) | FMT_S | LO(51)) +#define C_ULE_S (HI(17) | FMT_S | LO(55)) +#define C_ULT_S (HI(17) | FMT_S | LO(53)) +#define C_UN_S (HI(17) | FMT_S | LO(49)) +#define C_FD (0) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define CVT_S_S (HI(17) | FMT_S | LO(32)) +#define DADDIU (HI(25)) +#define DADDU (HI(0) | LO(45)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define DDIV (HI(0) | (2 << 6) | LO(30)) +#define DDIVU (HI(0) | (2 << 6) | LO(31)) +#define DMOD (HI(0) | (3 << 6) | LO(30)) +#define DMODU (HI(0) | (3 << 6) | LO(31)) +#define DIV (HI(0) | (2 << 6) | LO(26)) +#define DIVU (HI(0) | (2 << 6) | LO(27)) +#define DMUH (HI(0) | (3 << 6) | LO(28)) +#define DMUHU (HI(0) | (3 << 6) | LO(29)) +#define DMUL (HI(0) | (2 << 6) | LO(28)) +#define DMULU (HI(0) | (2 << 6) | LO(29)) +#else /* SLJIT_MIPS_REV < 6 */ +#define DDIV (HI(0) | LO(30)) +#define DDIVU (HI(0) | LO(31)) +#define DIV (HI(0) | LO(26)) +#define DIVU (HI(0) | LO(27)) +#define DMULT (HI(0) | LO(28)) +#define DMULTU (HI(0) | LO(29)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define DIV_S (HI(17) | FMT_S | LO(3)) +#define DSLL (HI(0) | LO(56)) +#define DSLL32 (HI(0) | LO(60)) +#define DSLLV (HI(0) | LO(20)) +#define DSRA (HI(0) | LO(59)) +#define DSRA32 (HI(0) | LO(63)) +#define DSRAV (HI(0) | LO(23)) +#define DSRL (HI(0) | LO(58)) +#define DSRL32 (HI(0) | LO(62)) +#define DSRLV (HI(0) | LO(22)) +#define DSUBU (HI(0) | LO(47)) +#define J (HI(2)) +#define JAL (HI(3)) +#define JALR (HI(0) | LO(9)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define JR (HI(0) | LO(9)) +#else /* SLJIT_MIPS_REV < 6 */ +#define JR (HI(0) | LO(8)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define LD (HI(55)) +#define LUI (HI(15)) +#define LW (HI(35)) +#define MFC1 (HI(17)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define MOD (HI(0) | (3 << 6) | LO(26)) +#define MODU (HI(0) | (3 << 6) | LO(27)) +#else /* SLJIT_MIPS_REV < 6 */ +#define MFHI (HI(0) | LO(16)) +#define MFLO (HI(0) | LO(18)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define MOV_S (HI(17) | FMT_S | LO(6)) +#define MTC1 (HI(17) | (4 << 21)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define MUH (HI(0) | (3 << 6) | LO(24)) +#define MUHU (HI(0) | (3 << 6) | LO(25)) +#define MUL (HI(0) | (2 << 6) | LO(24)) +#define MULU (HI(0) | (2 << 6) | LO(25)) +#else /* SLJIT_MIPS_REV < 6 */ +#define MULT (HI(0) | LO(24)) +#define MULTU (HI(0) | LO(25)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define MUL_S (HI(17) | FMT_S | LO(2)) +#define NEG_S (HI(17) | FMT_S | LO(7)) +#define NOP (HI(0) | LO(0)) +#define NOR (HI(0) | LO(39)) +#define OR (HI(0) | LO(37)) +#define ORI (HI(13)) +#define SD (HI(63)) +#define SDC1 (HI(61)) +#define SLT (HI(0) | LO(42)) +#define SLTI (HI(10)) +#define SLTIU (HI(11)) +#define SLTU (HI(0) | LO(43)) +#define SLL (HI(0) | LO(0)) +#define SLLV (HI(0) | LO(4)) +#define SRL (HI(0) | LO(2)) +#define SRLV (HI(0) | LO(6)) +#define SRA (HI(0) | LO(3)) +#define SRAV (HI(0) | LO(7)) +#define SUB_S (HI(17) | FMT_S | LO(1)) +#define SUBU (HI(0) | LO(35)) +#define SW (HI(43)) +#define SWC1 (HI(57)) +#define TRUNC_W_S (HI(17) | FMT_S | LO(13)) +#define XOR (HI(0) | LO(38)) +#define XORI (HI(14)) + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#define CLZ (HI(28) | LO(32)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define DCLZ (LO(18)) +#else /* SLJIT_MIPS_REV < 6 */ +#define DCLZ (HI(28) | LO(36)) +#define MOVF (HI(0) | (0 << 16) | LO(1)) +#define MOVN (HI(0) | LO(11)) +#define MOVT (HI(0) | (1 << 16) | LO(1)) +#define MOVZ (HI(0) | LO(10)) +#define MUL (HI(28) | LO(2)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define PREF (HI(51)) +#define PREFX (HI(19) | LO(15)) +#define SEB (HI(31) | (16 << 6) | LO(32)) +#define SEH (HI(31) | (24 << 6) | LO(32)) +#endif /* SLJIT_MIPS_REV >= 1 */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ADDU_W ADDU +#define ADDIU_W ADDIU +#define SLL_W SLL +#define SUBU_W SUBU +#else +#define ADDU_W DADDU +#define ADDIU_W DADDIU +#define SLL_W DSLL +#define SUBU_W DSUBU +#endif + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS + || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + compiler->delay_slot = delay_slot; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags) +{ + if (flags & IS_BIT26_COND) + return (1 << 26); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + if (flags & IS_BIT23_COND) + return (1 << 23); +#endif /* SLJIT_MIPS_REV >= 6 */ + return (1 << 16); +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + sljit_ins saved_inst; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) + return code_ptr; +#else + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return code_ptr; +#endif + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + inst = (sljit_ins *)jump->addr; + if (jump->flags & IS_COND) + inst--; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (jump->flags & IS_CALL) + goto keep_address; +#endif + + /* B instructions. */ + if (jump->flags & IS_MOVABLE) { + diff = ((sljit_sw)target_addr - (sljit_sw)inst - executable_offset) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? BAL : B; + jump->addr -= sizeof(sljit_ins); + return inst; + } + saved_inst = inst[0]; + inst[0] = inst[-1]; + inst[-1] = saved_inst ^ invert_branch(jump->flags); + jump->addr -= 2 * sizeof(sljit_ins); + return inst; + } + } + else { + diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1) - executable_offset) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = (jump->flags & IS_JAL) ? BAL : B; + inst[1] = NOP; + return inst + 1; + } + inst[0] = inst[0] ^ invert_branch(jump->flags); + inst[1] = NOP; + jump->addr -= sizeof(sljit_ins); + return inst + 1; + } + } + + if (jump->flags & IS_COND) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~0xfffffff)) { + jump->flags |= PATCH_J; + saved_inst = inst[0]; + inst[0] = inst[-1]; + inst[-1] = (saved_inst & 0xffff0000) | 3; + inst[1] = J; + inst[2] = NOP; + return inst + 2; + } + else if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (inst[0] & 0xffff0000) | 3; + inst[1] = NOP; + inst[2] = J; + inst[3] = NOP; + jump->addr += sizeof(sljit_ins); + return inst + 3; + } + } + else { + /* J instuctions. */ + if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? JAL : J; + jump->addr -= sizeof(sljit_ins); + return inst; + } + + if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (jump->flags & IS_JAL) ? JAL : J; + inst[1] = NOP; + return inst + 1; + } + } + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +keep_address: + if (target_addr <= 0x7fffffff) { + jump->flags |= PATCH_ABS32; + if (jump->flags & IS_COND) { + inst[0] -= 4; + inst++; + } + inst[2] = inst[6]; + inst[3] = inst[7]; + return inst + 3; + } + if (target_addr <= 0x7fffffffffffl) { + jump->flags |= PATCH_ABS48; + if (jump->flags & IS_COND) { + inst[0] -= 2; + inst++; + } + inst[4] = inst[6]; + inst[5] = inst[7]; + return inst + 5; + } +#endif + + return code_ptr; +} + +#ifdef __GNUC__ +static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ptr) +{ + SLJIT_CACHE_FLUSH(code, code_ptr); +} +#endif + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + +static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +{ + if (max_label < 0x80000000l) { + put_label->flags = 0; + return 1; + } + + if (max_label < 0x800000000000l) { + put_label->flags = 1; + return 3; + } + + put_label->flags = 2; + return 5; +} + +static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) +{ + sljit_uw addr = put_label->label->addr; + sljit_ins *inst = (sljit_ins *)put_label->addr; + sljit_s32 reg = *inst; + + if (put_label->flags == 0) { + SLJIT_ASSERT(addr < 0x80000000l); + inst[0] = LUI | T(reg) | IMM(addr >> 16); + } + else if (put_label->flags == 1) { + SLJIT_ASSERT(addr < 0x800000000000l); + inst[0] = LUI | T(reg) | IMM(addr >> 32); + inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + inst += 2; + } + else { + inst[0] = LUI | T(reg) | IMM(addr >> 48); + inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff); + inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + inst[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + inst[4] = DSLL | T(reg) | D(reg) | SH_IMM(16); + inst += 4; + } + + inst[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff); +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 7); +#endif + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); + word_count += 5; +#endif + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins *)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (sljit_sw)(addr - ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins))) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff); + break; + } + if (jump->flags & PATCH_J) { + SLJIT_ASSERT((addr & ~0xfffffff) == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~0xfffffff)); + buf_ptr[0] |= (addr >> 2) & 0x03ffffff; + break; + } + + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); +#else + if (jump->flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= 0x7fffffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + } + else if (jump->flags & PATCH_ABS48) { + SLJIT_ASSERT(addr <= 0x7fffffffffffl); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff); + } + else { + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[5] = (buf_ptr[5] & 0xffff0000) | (addr & 0xffff); + } +#endif + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + addr = put_label->label->addr; + buf_ptr = (sljit_ins *)put_label->addr; + + SLJIT_ASSERT((buf_ptr[0] & 0xffe00000) == LUI && (buf_ptr[1] & 0xfc000000) == ORI); + buf_ptr[0] |= (addr >> 16) & 0xffff; + buf_ptr[1] |= addr & 0xffff; +#else + put_label_set(put_label); +#endif + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + +#ifndef __GNUC__ + SLJIT_CACHE_FLUSH(code, code_ptr); +#else + /* GCC workaround for invalid code generation with -O2. */ + sljit_cache_flush(code, code_ptr); +#endif + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + sljit_sw fir = 0; + + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#elif defined(__GNUC__) + __asm__ ("cfc1 %0, $0" : "=r"(fir)); + return (fir >> 22) & 0x1; +#else +#error "FIR check is not implemented for this architecture" +#endif + case SLJIT_HAS_ZERO_REGISTER: + return 1; + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_PREFETCH: + return 1; +#endif /* SLJIT_MIPS_REV >= 1 */ + + default: + return fir; + } +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define ARG_TEST 0x00020 +#define ALT_KEEP_CACHE 0x00040 +#define CUMULATIVE_OP 0x00080 +#define LOGICAL_OP 0x00100 +#define IMM_OP 0x00200 +#define SRC2_IMM 0x00400 + +#define UNUSED_DEST 0x00800 +#define REG_DEST 0x01000 +#define REG1_SOURCE 0x02000 +#define REG2_SOURCE 0x04000 +#define SLOW_SRC1 0x08000 +#define SLOW_SRC2 0x10000 +#define SLOW_DEST 0x20000 + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define STACK_STORE SW +#define STACK_LOAD LW +#else +#define STACK_STORE SD +#define STACK_LOAD LD +#endif + +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw); + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#include "sljitNativeMIPS_32.c" +#else +#include "sljitNativeMIPS_64.c" +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_ins base; + sljit_s32 args, i, tmp, offs; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + local_size = (local_size + 15) & ~0xf; +#else + local_size = (local_size + 31) & ~0x1f; +#endif + compiler->local_size = local_size; + + if (local_size <= SIMM_MAX) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); + base = S(SLJIT_SP); + offs = local_size - (sljit_sw)sizeof(sljit_sw); + } + else { + FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); + base = S(TMP_REG2); + local_size = 0; + offs = -(sljit_sw)sizeof(sljit_sw); + } + + FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS)); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_s32)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_s32)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } + + args = get_arg_count(arg_types); + + if (args >= 1) + FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0))); + if (args >= 2) + FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_S1), DR(SLJIT_S1))); + if (args >= 3) + FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_S2), DR(SLJIT_S2))); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->local_size = (local_size + 15) & ~0xf; +#else + compiler->local_size = (local_size + 31) & ~0x1f; +#endif + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 local_size, i, tmp, offs; + sljit_ins base; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + local_size = compiler->local_size; + if (local_size <= SIMM_MAX) + base = S(SLJIT_SP); + else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); + base = S(TMP_REG1); + local_size = 0; + } + + FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_s32)sizeof(sljit_sw)), RETURN_ADDR_REG)); + offs = local_size - (sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_s32)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_s32)(sizeof(sljit_sw)); + } + + SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw))); + + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (compiler->local_size <= SIMM_MAX) + return push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(compiler->local_size), UNMOVABLE_INS); + else + return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_SP), UNMOVABLE_INS); +} + +#undef STACK_STORE +#undef STACK_LOAD + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static const sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* u b s */ HI(40) /* sb */, +/* u b l */ HI(36) /* lbu */, +/* u h s */ HI(41) /* sh */, +/* u h l */ HI(37) /* lhu */, +/* u i s */ HI(43) /* sw */, +/* u i l */ ARCH_32_64(HI(35) /* lw */, HI(39) /* lwu */), + +/* s w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* s w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* s b s */ HI(40) /* sb */, +/* s b l */ HI(32) /* lb */, +/* s h s */ HI(41) /* sh */, +/* s h l */ HI(33) /* lh */, +/* s i s */ HI(43) /* sw */, +/* s i l */ HI(35) /* lw */, + +/* d s */ HI(61) /* sdc1 */, +/* d l */ HI(53) /* ldc1 */, +/* s s */ HI(57) /* swc1 */, +/* s l */ HI(49) /* lwc1 */, +}; + +#undef ARCH_32_64 + +/* reg_ar is an absoulute register! */ + +/* Can perform an operation using at most 1 instruction. */ +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) { + /* Works for both absoulte and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & REG_MASK) + | TA(reg_ar) | IMM(argw), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? reg_ar : MOVABLE_INS)); + return -1; + } + return 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + next_argw &= 0x3; + if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) + return 1; + return 0; + } + + if (arg == next_arg) { + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + sljit_s32 tmp_ar, base, delay_slot; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { + tmp_ar = reg_ar; + delay_slot = reg_ar; + } + else { + tmp_ar = DR(TMP_REG1); + delay_slot = MOVABLE_INS; + } + base = arg & REG_MASK; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + /* Using the cache. */ + if (argw == compiler->cache_argw) { + if (arg == compiler->cache_arg) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + } + + if (SLJIT_UNLIKELY(argw)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3))); + } + + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + tmp_ar = DR(TMP_REG3); + } + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + compiler->cache_argw = argw; + } + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + } + + if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { + if (argw != compiler->cache_argw) + FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + } + else { + compiler->cache_arg = SLJIT_MEM; + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + } + compiler->cache_argw = argw; + + if (!base) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + + if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + } + + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); +} + +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) +{ + sljit_s32 tmp_ar, base, delay_slot; + + if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) + return compiler->error; + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { + tmp_ar = reg_ar; + delay_slot = reg_ar; + } + else { + tmp_ar = DR(TMP_REG1); + delay_slot = MOVABLE_INS; + } + base = arg & REG_MASK; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar)); + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar)); + } + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + + FAIL_IF(load_immediate(compiler, tmp_ar, argw)); + + if (base != 0) + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar)); + + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); +} + +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_sw src2_r = 0; + sljit_s32 sugg_src2_r = TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + SLJIT_ASSERT(HAS_FLAGS(op)); + flags |= UNUSED_DEST; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) + sugg_src2_r = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w) { + if ((!(flags & LOGICAL_OP) && (src2w <= SIMM_MAX && src2w >= SIMM_MIN)) + || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_MAX))) { + flags |= SRC2_IMM; + src2_r = src2w; + } + } + if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { + if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN)) + || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); + src1_r = TMP_REG1; + } + else + src1_r = 0; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w)); + src2_r = sugg_src2_r; + } + else { + src2_r = 0; + if ((op >= SLJIT_MOV && op <= SLJIT_MOV_P) && (dst & SLJIT_MEM)) + dst_r = 0; + } + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, DR(dst_r), dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, DR(dst_r), dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_s32 int_op = op & SLJIT_I32_OP; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BREAK, UNMOVABLE_INS); + case SLJIT_NOP: + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULU : DMUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMUHU : DMUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULU : MUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MUHU : MUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1)); +#else /* SLJIT_MIPS_REV < 6 */ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); +#endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (int_op) { + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); + } + else { + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DMODU : DMOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); + } +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0))); + return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1)); +#else /* SLJIT_MIPS_REV < 6 */ +#if !(defined SLJIT_MIPS_REV) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* !SLJIT_MIPS_REV */ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (int_op) + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); +#endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + if (!(src & OFFS_REG_MASK)) { + if (srcw <= SIMM_MAX && srcw >= SIMM_MIN) + return push_inst(compiler, PREF | S(src & REG_MASK) | IMM(srcw), MOVABLE_INS); + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS); + } + + srcw &= 0x3; + + if (SLJIT_UNLIKELY(srcw != 0)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(src)) | D(TMP_REG1) | SH_IMM(srcw), DR(TMP_REG1))); + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS); + } + + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(OFFS_REG(src)), MOVABLE_INS); +} +#endif /* SLJIT_MIPS_REV >= 1 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_s32 flags = 0; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT) + flags |= INT_DATA | SIGNED_DATA; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_U32: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); +#endif + + case SLJIT_MOV_S32: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); +#endif + + case SLJIT_MOV_U8: + return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); + + case SLJIT_CLZ: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_s32 flags = 0; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op & SLJIT_I32_OP) { + flags |= INT_DATA | SIGNED_DATA; + if (src1 & SLJIT_IMM) + src1w = (sljit_s32)src1w; + if (src2 & SLJIT_IMM) + src2w = (sljit_s32)src2w; + } +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + compiler->status_flags_state = 0; + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (src2 & SLJIT_IMM) + src2w &= 0x1f; +#else + if (src2 & SLJIT_IMM) { + if (op & SLJIT_I32_OP) + src2w &= 0x1f; + else + src2w &= 0x3f; + } +#endif + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG)); + else + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); + + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return emit_prefetch(compiler, src, srcw); +#else /* SLJIT_MIPS_REV < 1 */ + return SLJIT_SUCCESS; +#endif /* SLJIT_MIPS_REV >= 1 */ + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return FR(reg); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) +#define FMT(op) (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) << (21 - 8)) + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) << 21; +#endif + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS)); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS); + + /* Store the integer value from a VFP register. */ + return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, FR(TMP_FREG1), dst, dstw, 0, 0); + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef is_long +#endif +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) << 21; +#endif + + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); + } + else { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); + } + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins inst; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_EQUAL_F64: + case SLJIT_NOT_EQUAL_F64: + inst = C_UEQ_S; + break; + case SLJIT_LESS_F64: + case SLJIT_GREATER_EQUAL_F64: + inst = C_ULT_S; + break; + case SLJIT_GREATER_F64: + case SLJIT_LESS_EQUAL_F64: + inst = C_ULE_S; + break; + default: + SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED_F64 || GET_FLAG_TYPE(op) == SLJIT_ORDERED_F64); + inst = C_UN_S; + break; + } + return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1) | C_FD, UNMOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(dst_r), src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_F32_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); + op ^= SLJIT_F32_OP; + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(dst_r), dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG2), dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), UNMOVABLE_INS); + + /* Memory. */ + FAIL_IF(emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw)); + compiler->delay_slot = UNMOVABLE_INS; + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + compiler->delay_slot = UNMOVABLE_INS; + return label; +} + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define JUMP_LENGTH 4 +#else +#define JUMP_LENGTH 8 +#endif + +#define BR_Z(src) \ + inst = BEQ | SA(src) | TA(0) | JUMP_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#define BR_NZ(src) \ + inst = BNE | SA(src) | TA(0) | JUMP_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + +#define BR_T() \ + inst = BC1NEZ; \ + flags = IS_BIT23_COND; \ + delay_check = FCSR_FCC; +#define BR_F() \ + inst = BC1EQZ; \ + flags = IS_BIT23_COND; \ + delay_check = FCSR_FCC; + +#else /* SLJIT_MIPS_REV < 6 */ + +#define BR_T() \ + inst = BC1T | JUMP_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC; +#define BR_F() \ + inst = BC1F | JUMP_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC; + +#endif /* SLJIT_MIPS_REV >= 6 */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins inst; + sljit_s32 flags = 0; + sljit_s32 delay_check = UNMOVABLE_INS; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + BR_NZ(EQUAL_FLAG); + break; + case SLJIT_NOT_EQUAL: + BR_Z(EQUAL_FLAG); + break; + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + BR_Z(OTHER_FLAG); + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + BR_NZ(OTHER_FLAG); + break; + case SLJIT_NOT_EQUAL_F64: + case SLJIT_GREATER_EQUAL_F64: + case SLJIT_GREATER_F64: + case SLJIT_ORDERED_F64: + BR_T(); + break; + case SLJIT_EQUAL_F64: + case SLJIT_LESS_F64: + case SLJIT_LESS_EQUAL_F64: + case SLJIT_UNORDERED_F64: + BR_F(); + break; + default: + /* Not conditional branch. */ + inst = 0; + break; + } + + jump->flags |= flags; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != delay_check)) + jump->flags |= IS_MOVABLE; + + if (inst) + PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); + + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + + if (type <= SLJIT_JUMP) + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + else { + jump->flags |= IS_JAL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return jump; +} + +#define RESOLVE_IMM1() \ + if (src1 & SLJIT_IMM) { \ + if (src1w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \ + src1 = TMP_REG1; \ + } \ + else \ + src1 = 0; \ + } + +#define RESOLVE_IMM2() \ + if (src2 & SLJIT_IMM) { \ + if (src2w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \ + src2 = TMP_REG2; \ + } \ + else \ + src2 = 0; \ + } + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_s32 flags; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + flags = ((type & SLJIT_I32_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA; + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); + src2 = TMP_REG2; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type <= SLJIT_NOT_EQUAL) { + RESOLVE_IMM1(); + RESOLVE_IMM2(); + jump->flags |= IS_BIT26_COND; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2))) + jump->flags |= IS_MOVABLE; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS)); + } + else if (type >= SLJIT_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) { + inst = NOP; + if ((src1 & SLJIT_IMM) && (src1w == 0)) { + RESOLVE_IMM2(); + switch (type) { + case SLJIT_SIG_LESS: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_GREATER: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + } + src1 = src2; + } + else { + RESOLVE_IMM1(); + switch (type) { + case SLJIT_SIG_LESS: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_GREATER: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + } + } + PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | JUMP_LENGTH, UNMOVABLE_INS)); + } + else { + if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) { + RESOLVE_IMM1(); + if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1))); + else { + RESOLVE_IMM2(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_LESS || type == SLJIT_SIG_LESS) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; + } + else { + RESOLVE_IMM2(); + if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1))); + else { + RESOLVE_IMM1(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_GREATER || type == SLJIT_SIG_GREATER) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; + } + + jump->flags |= IS_BIT26_COND; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS)); + } + + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return jump; +} + +#undef RESOLVE_IMM1 +#undef RESOLVE_IMM2 + +#undef JUMP_LENGTH +#undef BR_Z +#undef BR_NZ +#undef BR_T +#undef BR_F + +#undef FLOAT_DATA +#undef FMT + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump = NULL; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); + jump->u.target = srcw; + + if (compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + + FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + src = TMP_REG2; + } + else if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(TMP_REG2), src, srcw)); + src = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS)); + if (jump) + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 src_ar, dst_ar; + sljit_s32 saved_op = op; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_s32 mem_type = WORD_DATA; +#else + sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op == SLJIT_MOV_S32) + mem_type = INT_DATA | SIGNED_DATA; +#endif + dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), dst, dstw, dst, dstw)); + + switch (type & 0xff) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB) { + src_ar = OTHER_FLAG; + break; + } + FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + type ^= 0x1; /* Flip type bit for the XORI below. */ + break; + case SLJIT_GREATER_F64: + case SLJIT_LESS_EQUAL_F64: + type ^= 0x1; /* Flip type bit for the XORI below. */ + case SLJIT_EQUAL_F64: + case SLJIT_NOT_EQUAL_F64: + case SLJIT_LESS_F64: + case SLJIT_GREATER_EQUAL_F64: + case SLJIT_UNORDERED_F64: + case SLJIT_ORDERED_F64: +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, MFC1 | TA(dst_ar) | FS(TMP_FREG3), dst_ar)); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, CFC1 | TA(dst_ar) | DA(FCSR_REG), dst_ar)); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar)); + FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + break; + + default: + src_ar = OTHER_FLAG; + break; + } + + if (type & 0x1) { + FAIL_IF(push_inst(compiler, XORI | SA(src_ar) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + } + + if (op < SLJIT_ADD) { + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, src_ar, dst, dstw); + + if (src_ar != dst_ar) + return push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | DA(dst_ar), dst_ar); + return SLJIT_SUCCESS; + } + + /* OTHER_FLAG cannot be specified as src2 argument at the moment. */ + if (DR(TMP_REG2) != src_ar) + FAIL_IF(push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + + mem_type |= CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE; + + if (dst & SLJIT_MEM) + return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, TMP_REG2, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + sljit_ins ins; +#endif /* SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (dst_reg & SLJIT_I32_OP) + srcw = (sljit_s32)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + src = TMP_REG1; + srcw = 0; + } + + dst_reg &= ~SLJIT_I32_OP; + + switch (type & 0xff) { + case SLJIT_EQUAL: + ins = MOVZ | TA(EQUAL_FLAG); + break; + case SLJIT_NOT_EQUAL: + ins = MOVN | TA(EQUAL_FLAG); + break; + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + ins = MOVN | TA(OTHER_FLAG); + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + ins = MOVZ | TA(OTHER_FLAG); + break; + case SLJIT_EQUAL_F64: + case SLJIT_LESS_F64: + case SLJIT_LESS_EQUAL_F64: + case SLJIT_UNORDERED_F64: + ins = MOVT; + break; + case SLJIT_NOT_EQUAL_F64: + case SLJIT_GREATER_EQUAL_F64: + case SLJIT_GREATER_F64: + case SLJIT_ORDERED_F64: + ins = MOVF; + break; + default: + ins = MOVZ | TA(OTHER_FLAG); + SLJIT_UNREACHABLE(); + break; + } + + return push_inst(compiler, ins | S(src) | D(dst_reg), DR(dst_reg)); + +#else /* SLJIT_MIPS_REV < 1 || SLJIT_MIPS_REV >= 6 */ + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); +#endif /* SLJIT_MIPS_REV >= 1 */ +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); +#else + PTR_FAIL_IF(push_inst(compiler, dst_r, UNMOVABLE_INS)); + compiler->size += 5; +#endif + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + + return put_label; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_32.c new file mode 100644 index 0000000000..6ddb5508ec --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_32.c @@ -0,0 +1,281 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 32-bit arch dependent functions. */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); + + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; +} + +#define INS_CLEAR_LEFT(dst, src, from) \ + (RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1)) + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) +{ + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S16) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_NEG: + SLJIT_ASSERT(src1 == TMP_REG1); + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, CNTLZW | S(src2) | A(dst)); + + case SLJIT_ADD: + if (flags & ALT_FORM1) { + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + } + + if (flags & ALT_FORM2) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + + if (flags & ALT_FORM3) + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + if (flags & ALT_FORM4) { + FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)))); + src1 = dst; + } + + return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + SLJIT_ASSERT(!(flags & ALT_FORM4)); + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + if (flags & ALT_FORM5) + return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + if (flags & ALT_FORM2) { + FAIL_IF(push_inst(compiler, CMPLI | CRD(0) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMPL | CRD(0) | A(src1) | B(src2))); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + if (flags & ALT_FORM5) + return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_SUBC: + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); + } + return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); + } + return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + compiler->imm &= 0x1f; + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)); + } + return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + SLJIT_ASSERT((inst[0] & 0xfc1f0000) == ADDIS && (inst[1] & 0xfc000000) == ORI); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, new_constant, executable_offset); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_64.c new file mode 100644 index 0000000000..cbdf2dd8a2 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_64.c @@ -0,0 +1,507 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 64-bit arch dependent functions. */ + +#if defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) +#define ASM_SLJIT_CLZ(src, dst) \ + __asm__ volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) ) +#elif defined(__xlc__) +#error "Please enable GCC syntax for inline assembly statements" +#else +#error "Must implement count leading zeroes" +#endif + +#define PUSH_RLDICR(reg, shift) \ + push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1)) + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + sljit_uw tmp; + sljit_uw shift; + sljit_uw tmp2; + sljit_uw shift2; + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; + } + + /* Count leading zeroes. */ + tmp = (imm >= 0) ? imm : ~imm; + ASM_SLJIT_CLZ(tmp, shift); + SLJIT_ASSERT(shift > 0); + shift--; + tmp = (imm << shift); + + if ((tmp & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + shift += 15; + return PUSH_RLDICR(reg, shift); + } + + if ((tmp & ~0xffffffff00000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32))); + shift += 31; + return PUSH_RLDICR(reg, shift); + } + + /* Cut out the 16 bit from immediate. */ + shift += 15; + tmp2 = imm & ((1ul << (63 - shift)) - 1); + + if (tmp2 <= 0xffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(PUSH_RLDICR(reg, shift)); + return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2); + } + + if (tmp2 <= 0xffffffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(PUSH_RLDICR(reg, shift)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS; + } + + ASM_SLJIT_CLZ(tmp2, shift2); + tmp2 <<= shift2; + + if ((tmp2 & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + shift2 += 15; + shift += (63 - shift2); + FAIL_IF(PUSH_RLDICR(reg, shift)); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48))); + return PUSH_RLDICR(reg, shift2); + } + + /* The general version. */ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32))); + FAIL_IF(PUSH_RLDICR(reg, 31)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)); +} + +/* Simplified mnemonics: clrldi. */ +#define INS_CLEAR_LEFT(dst, src, from) \ + (RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5)) + +/* Sign extension for integer operations. */ +#define UN_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } + +#define BIN_EXTS() \ + if (flags & ALT_SIGN_EXT) { \ + if (flags & REG1_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } \ + if (flags & REG2_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } \ + } + +#define BIN_IMM_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) +{ + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S32) + return push_inst(compiler, EXTSW | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S16) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + UN_EXTS(); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_NEG: + SLJIT_ASSERT(src1 == TMP_REG1); + + if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2))); + return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); + } + + UN_EXTS(); + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + if (flags & ALT_FORM1) + return push_inst(compiler, CNTLZW | S(src2) | A(dst)); + return push_inst(compiler, CNTLZD | S(src2) | A(dst)); + + case SLJIT_ADD: + if (flags & ALT_FORM1) { + if (flags & ALT_SIGN_EXT) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); + src1 = TMP_REG1; + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + src2 = TMP_REG2; + } + /* Setting XER SO is not enough, CR SO is also needed. */ + FAIL_IF(push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2))); + if (flags & ALT_SIGN_EXT) + return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); + return SLJIT_SUCCESS; + } + + if (flags & ALT_FORM2) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + + if (flags & ALT_FORM3) + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + if (flags & ALT_FORM4) { + FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)))); + src1 = dst; + } + + return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + BIN_IMM_EXTS(); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM4) { + if (flags & ALT_FORM5) + FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm)); + else + FAIL_IF(push_inst(compiler, ADD | D(dst) | A(src1) | B(src2))); + return push_inst(compiler, CMPI | A(dst) | 0); + } + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + BIN_EXTS(); + if (flags & ALT_FORM5) + return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + BIN_EXTS(); + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + if (flags & ALT_FORM2) { + FAIL_IF(push_inst(compiler, CMPLI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMPL | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { + if (flags & ALT_SIGN_EXT) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); + src1 = TMP_REG1; + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + src2 = TMP_REG2; + } + /* Setting XER SO is not enough, CR SO is also needed. */ + FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + if (flags & ALT_SIGN_EXT) + return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); + return SLJIT_SUCCESS; + } + + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + BIN_EXTS(); + if (flags & ALT_FORM5) + return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_SUBC: + BIN_EXTS(); + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + BIN_EXTS(); + if (flags & ALT_FORM2) + return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, MULLD | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) { + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); + } + compiler->imm &= 0x3f; + return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags)); + } + return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) { + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); + } + compiler->imm &= 0x3f; + return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags)); + } + return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) { + compiler->imm &= 0x1f; + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)); + } + compiler->imm &= 0x3f; + return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4)); + } + return push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2)); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 reg = 0; + + if (src) + reg = *src & REG_MASK; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + case SLJIT_ARG_TYPE_F64: + arg_count++; + break; + default: + arg_count++; + word_arg_count++; + + if (arg_count != word_arg_count && arg_count == reg) { + FAIL_IF(push_inst(compiler, OR | S(reg) | A(TMP_CALL_REG) | B(reg))); + *src = TMP_CALL_REG; + } + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + case SLJIT_ARG_TYPE_F64: + arg_count--; + break; + default: + if (arg_count != word_arg_count) + FAIL_IF(push_inst(compiler, OR | S(word_arg_count) | A(arg_count) | B(word_arg_count))); + + arg_count--; + word_arg_count--; + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32))); + FAIL_IF(PUSH_RLDICR(reg, 31)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000) | (new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, new_constant, executable_offset); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_common.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_common.c new file mode 100644 index 0000000000..2174dbb07b --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativePPC_common.c @@ -0,0 +1,2447 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "PowerPC" SLJIT_CPUINFO; +} + +/* Length of an instruction word. + Both for ppc-32 and ppc-64. */ +typedef sljit_u32 sljit_ins; + +#if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_PPC_STACK_FRAME_V2 1 +#endif + +#ifdef _AIX +#include +#endif + +#if (defined _CALL_ELF && _CALL_ELF == 2) +#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1 +#endif + +#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) + +static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) +{ +#ifdef _AIX + _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from)); +#elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) +# if defined(_ARCH_PWR) || defined(_ARCH_PWR2) + /* Cache flush for POWER architecture. */ + while (from < to) { + __asm__ volatile ( + "clf 0, %0\n" + "dcs\n" + : : "r"(from) + ); + from++; + } + __asm__ volatile ( "ics" ); +# elif defined(_ARCH_COM) && !defined(_ARCH_PPC) +# error "Cache flush is not implemented for PowerPC/POWER common mode." +# else + /* Cache flush for PowerPC architecture. */ + while (from < to) { + __asm__ volatile ( + "dcbf 0, %0\n" + "sync\n" + "icbi 0, %0\n" + : : "r"(from) + ); + from++; + } + __asm__ volatile ( "isync" ); +# endif +# ifdef __xlc__ +# warning "This file may fail to compile if -qfuncsect is used" +# endif +#elif defined(__xlc__) +#error "Please enable GCC syntax for inline assembly statements with -qasm=gcc" +#else +#error "This platform requires a cache flush implementation." +#endif /* _AIX */ +} + +#endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */ + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_ZERO (SLJIT_NUMBER_OF_REGISTERS + 4) + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +#define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 5) +#else +#define TMP_CALL_REG TMP_REG2 +#endif + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 9, 10, 31, 12 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 1, 2, 3, 4, 5, 6, 0, 7 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ +#define D(d) (reg_map[d] << 21) +#define S(s) (reg_map[s] << 21) +#define A(a) (reg_map[a] << 16) +#define B(b) (reg_map[b] << 11) +#define C(c) (reg_map[c] << 6) +#define FD(fd) (freg_map[fd] << 21) +#define FS(fs) (freg_map[fs] << 21) +#define FA(fa) (freg_map[fa] << 16) +#define FB(fb) (freg_map[fb] << 11) +#define FC(fc) (freg_map[fc] << 6) +#define IMM(imm) ((imm) & 0xffff) +#define CRD(d) ((d) << 21) + +/* Instruction bit sections. + OE and Rc flag (see ALT_SET_FLAGS). */ +#define OE(flags) ((flags) & ALT_SET_FLAGS) +/* Rc flag (see ALT_SET_FLAGS). */ +#define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10) +#define HI(opcode) ((opcode) << 26) +#define LO(opcode) ((opcode) << 1) + +#define ADD (HI(31) | LO(266)) +#define ADDC (HI(31) | LO(10)) +#define ADDE (HI(31) | LO(138)) +#define ADDI (HI(14)) +#define ADDIC (HI(13)) +#define ADDIS (HI(15)) +#define ADDME (HI(31) | LO(234)) +#define AND (HI(31) | LO(28)) +#define ANDI (HI(28)) +#define ANDIS (HI(29)) +#define Bx (HI(18)) +#define BCx (HI(16)) +#define BCCTR (HI(19) | LO(528) | (3 << 11)) +#define BLR (HI(19) | LO(16) | (0x14 << 21)) +#define CNTLZD (HI(31) | LO(58)) +#define CNTLZW (HI(31) | LO(26)) +#define CMP (HI(31) | LO(0)) +#define CMPI (HI(11)) +#define CMPL (HI(31) | LO(32)) +#define CMPLI (HI(10)) +#define CROR (HI(19) | LO(449)) +#define DCBT (HI(31) | LO(278)) +#define DIVD (HI(31) | LO(489)) +#define DIVDU (HI(31) | LO(457)) +#define DIVW (HI(31) | LO(491)) +#define DIVWU (HI(31) | LO(459)) +#define EXTSB (HI(31) | LO(954)) +#define EXTSH (HI(31) | LO(922)) +#define EXTSW (HI(31) | LO(986)) +#define FABS (HI(63) | LO(264)) +#define FADD (HI(63) | LO(21)) +#define FADDS (HI(59) | LO(21)) +#define FCFID (HI(63) | LO(846)) +#define FCMPU (HI(63) | LO(0)) +#define FCTIDZ (HI(63) | LO(815)) +#define FCTIWZ (HI(63) | LO(15)) +#define FDIV (HI(63) | LO(18)) +#define FDIVS (HI(59) | LO(18)) +#define FMR (HI(63) | LO(72)) +#define FMUL (HI(63) | LO(25)) +#define FMULS (HI(59) | LO(25)) +#define FNEG (HI(63) | LO(40)) +#define FRSP (HI(63) | LO(12)) +#define FSUB (HI(63) | LO(20)) +#define FSUBS (HI(59) | LO(20)) +#define LD (HI(58) | 0) +#define LWZ (HI(32)) +#define MFCR (HI(31) | LO(19)) +#define MFLR (HI(31) | LO(339) | 0x80000) +#define MFXER (HI(31) | LO(339) | 0x10000) +#define MTCTR (HI(31) | LO(467) | 0x90000) +#define MTLR (HI(31) | LO(467) | 0x80000) +#define MTXER (HI(31) | LO(467) | 0x10000) +#define MULHD (HI(31) | LO(73)) +#define MULHDU (HI(31) | LO(9)) +#define MULHW (HI(31) | LO(75)) +#define MULHWU (HI(31) | LO(11)) +#define MULLD (HI(31) | LO(233)) +#define MULLI (HI(7)) +#define MULLW (HI(31) | LO(235)) +#define NEG (HI(31) | LO(104)) +#define NOP (HI(24)) +#define NOR (HI(31) | LO(124)) +#define OR (HI(31) | LO(444)) +#define ORI (HI(24)) +#define ORIS (HI(25)) +#define RLDICL (HI(30)) +#define RLWINM (HI(21)) +#define SLD (HI(31) | LO(27)) +#define SLW (HI(31) | LO(24)) +#define SRAD (HI(31) | LO(794)) +#define SRADI (HI(31) | LO(413 << 1)) +#define SRAW (HI(31) | LO(792)) +#define SRAWI (HI(31) | LO(824)) +#define SRD (HI(31) | LO(539)) +#define SRW (HI(31) | LO(536)) +#define STD (HI(62) | 0) +#define STDU (HI(62) | 1) +#define STDUX (HI(31) | LO(181)) +#define STFIWX (HI(31) | LO(983)) +#define STW (HI(36)) +#define STWU (HI(37)) +#define STWUX (HI(31) | LO(183)) +#define SUBF (HI(31) | LO(40)) +#define SUBFC (HI(31) | LO(8)) +#define SUBFE (HI(31) | LO(136)) +#define SUBFIC (HI(8)) +#define XOR (HI(31) | LO(316)) +#define XORI (HI(26)) +#define XORIS (HI(27)) + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +#define RLDI(dst, src, sh, mb, type) \ + (HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20)) + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func) +{ + sljit_sw* ptrs; + if (func_ptr) + *func_ptr = (void*)context; + ptrs = (sljit_sw*)func; + context->addr = addr ? addr : ptrs[0]; + context->r2 = ptrs[1]; + context->r11 = ptrs[2]; +} +#endif + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_sw extra_jump_flags; + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) + return 0; +#else + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; +#endif + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (jump->flags & IS_CALL) + goto keep_address; +#endif + + diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l; + + extra_jump_flags = 0; + if (jump->flags & IS_COND) { + if (diff <= 0x7fff && diff >= -0x8000) { + jump->flags |= PATCH_B; + return 1; + } + if (target_addr <= 0xffff) { + jump->flags |= PATCH_B | PATCH_ABS_B; + return 1; + } + extra_jump_flags = REMOVE_COND; + + diff -= sizeof(sljit_ins); + } + + if (diff <= 0x01ffffff && diff >= -0x02000000) { + jump->flags |= PATCH_B | extra_jump_flags; + return 1; + } + + if (target_addr <= 0x03ffffff) { + jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags; + return 1; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +keep_address: +#endif + if (target_addr <= 0x7fffffff) { + jump->flags |= PATCH_ABS32; + return 1; + } + + if (target_addr <= 0x7fffffffffffl) { + jump->flags |= PATCH_ABS48; + return 1; + } +#endif + + return 0; +} + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + +static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +{ + if (max_label < 0x100000000l) { + put_label->flags = 0; + return 1; + } + + if (max_label < 0x1000000000000l) { + put_label->flags = 1; + return 3; + } + + put_label->flags = 2; + return 4; +} + +static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) +{ + sljit_uw addr = put_label->label->addr; + sljit_ins *inst = (sljit_ins *)put_label->addr; + sljit_s32 reg = *inst; + + if (put_label->flags == 0) { + SLJIT_ASSERT(addr < 0x100000000l); + inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 16); + } + else { + if (put_label->flags == 1) { + SLJIT_ASSERT(addr < 0x1000000000000l); + inst[0] = ORI | S(TMP_ZERO) | A(reg) | IMM(addr >> 32); + } + else { + inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48); + inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff); + inst ++; + } + + inst[1] = RLDI(reg, reg, 32, 31, 1); + inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff); + inst += 2; + } + + inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff); +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); +#else + compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); +#endif +#endif + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 6); +#endif + if (detect_jump_type(jump, code_ptr, code, executable_offset)) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + code_ptr[-3] = code_ptr[0]; + code_ptr -= 3; +#else + if (jump->flags & PATCH_ABS32) { + code_ptr -= 3; + code_ptr[-1] = code_ptr[2]; + code_ptr[0] = code_ptr[3]; + } + else if (jump->flags & PATCH_ABS48) { + code_ptr--; + code_ptr[-1] = code_ptr[0]; + code_ptr[0] = code_ptr[1]; + /* rldicr rX,rX,32,31 -> rX,rX,16,47 */ + SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6); + code_ptr[-3] ^= 0x8422; + /* oris -> ori */ + code_ptr[-2] ^= 0x4000000; + } + else { + code_ptr[-6] = code_ptr[0]; + code_ptr -= 6; + } +#endif + if (jump->flags & REMOVE_COND) { + code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001); + code_ptr++; + jump->addr += sizeof(sljit_ins); + code_ptr[0] = Bx; + jump->flags -= IS_COND; + } + } + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); + word_count += 4; +#endif + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins))); +#else + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); +#endif + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins *)jump->addr; + + if (jump->flags & PATCH_B) { + if (jump->flags & IS_COND) { + if (!(jump->flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); + *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); + } + else { + SLJIT_ASSERT(addr <= 0xffff); + *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); + } + } + else { + if (!(jump->flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); + *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); + } + else { + SLJIT_ASSERT(addr <= 0x03ffffff); + *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); + } + } + break; + } + + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); +#else + if (jump->flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= 0x7fffffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + break; + } + if (jump->flags & PATCH_ABS48) { + SLJIT_ASSERT(addr <= 0x7fffffffffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff); + break; + } + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff); +#endif + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + addr = put_label->label->addr; + buf_ptr = (sljit_ins *)put_label->addr; + + SLJIT_ASSERT((buf_ptr[0] & 0xfc1f0000) == ADDIS && (buf_ptr[1] & 0xfc000000) == ORI); + buf_ptr[0] |= (addr >> 16) & 0xffff; + buf_ptr[1] |= addr & 0xffff; +#else + put_label_set(put_label); +#endif + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (((sljit_sw)code_ptr) & 0x4) + code_ptr++; +#endif + sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); +#endif + + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + return code_ptr; +#else + return code; +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + /* A saved register is set to a zero value. */ + case SLJIT_HAS_ZERO_REGISTER: + case SLJIT_HAS_CLZ: + case SLJIT_HAS_PREFETCH: + return 1; + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* inp_flags: */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define INDEXED 0x02 +#define SIGNED_DATA 0x04 + +#define WORD_DATA 0x00 +#define BYTE_DATA 0x08 +#define HALF_DATA 0x10 +#define INT_DATA 0x18 +/* Separates integer and floating point registers */ +#define GPR_REG 0x1f +#define DOUBLE_DATA 0x20 + +#define MEM_MASK 0x7f + +/* Other inp_flags. */ + +/* Integer opertion and set flags -> requires exts on 64 bit systems. */ +#define ALT_SIGN_EXT 0x000100 +/* This flag affects the RC() and OERC() macros. */ +#define ALT_SET_FLAGS 0x000400 +#define ALT_FORM1 0x001000 +#define ALT_FORM2 0x002000 +#define ALT_FORM3 0x004000 +#define ALT_FORM4 0x008000 +#define ALT_FORM5 0x010000 + +/* Source and destination is register. */ +#define REG_DEST 0x000001 +#define REG1_SOURCE 0x000002 +#define REG2_SOURCE 0x000004 +/* +ALT_SIGN_EXT 0x000100 +ALT_SET_FLAGS 0x000200 +ALT_FORM1 0x001000 +... +ALT_FORM5 0x010000 */ + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#include "sljitNativePPC_32.c" +#else +#include "sljitNativePPC_64.c" +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define STACK_STORE STW +#define STACK_LOAD LWZ +#else +#define STACK_STORE STD +#define STACK_LOAD LD +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args, i, tmp, offs; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + FAIL_IF(push_inst(compiler, MFLR | D(0))); + offs = -(sljit_s32)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_s32)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_s32)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1)); + +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); +#else + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); +#endif + + FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); + + args = get_arg_count(arg_types); + + if (args >= 1) + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0))); + if (args >= 2) + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1))); + if (args >= 3) + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2))); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + local_size = (local_size + 15) & ~0xf; + compiler->local_size = local_size; + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); + else { + FAIL_IF(load_immediate(compiler, 0, -local_size)); + FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); + } +#else + if (local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); + else { + FAIL_IF(load_immediate(compiler, 0, -local_size)); + FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); + } +#endif + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + compiler->local_size = (local_size + 15) & ~0xf; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 i, tmp, offs; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + if (compiler->local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size))); + else { + FAIL_IF(load_immediate(compiler, 0, compiler->local_size)); + FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0))); + } + +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); +#else + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); +#endif + + offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_s32)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_s32)(sizeof(sljit_sw)); + } + + FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw))); + + FAIL_IF(push_inst(compiler, MTLR | S(0))); + FAIL_IF(push_inst(compiler, BLR)); + + return SLJIT_SUCCESS; +} + +#undef STACK_STORE +#undef STACK_LOAD + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* s/l - store/load (1 bit) + i/x - immediate/indexed form + u/s - signed/unsigned (1 bit) + w/b/h/i - word/byte/half/int allowed (2 bit) + + Some opcodes are repeated (e.g. store signed / unsigned byte is the same instruction). */ + +/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define INT_ALIGNED 0x10000 +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define ARCH_32_64(a, b) a +#define INST_CODE_AND_DST(inst, flags, reg) \ + ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) +#else +#define ARCH_32_64(a, b) b +#define INST_CODE_AND_DST(inst, flags, reg) \ + (((inst) & ~INT_ALIGNED) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) +#endif + +static const sljit_ins data_transfer_insts[64 + 16] = { + +/* -------- Integer -------- */ + +/* Word. */ + +/* w u i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* w u i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), +/* w u x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* w u x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), + +/* w s i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* w s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), +/* w s x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* w s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), + +/* Byte. */ + +/* b u i s */ HI(38) /* stb */, +/* b u i l */ HI(34) /* lbz */, +/* b u x s */ HI(31) | LO(215) /* stbx */, +/* b u x l */ HI(31) | LO(87) /* lbzx */, + +/* b s i s */ HI(38) /* stb */, +/* b s i l */ HI(34) /* lbz */ /* EXTS_REQ */, +/* b s x s */ HI(31) | LO(215) /* stbx */, +/* b s x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */, + +/* Half. */ + +/* h u i s */ HI(44) /* sth */, +/* h u i l */ HI(40) /* lhz */, +/* h u x s */ HI(31) | LO(407) /* sthx */, +/* h u x l */ HI(31) | LO(279) /* lhzx */, + +/* h s i s */ HI(44) /* sth */, +/* h s i l */ HI(42) /* lha */, +/* h s x s */ HI(31) | LO(407) /* sthx */, +/* h s x l */ HI(31) | LO(343) /* lhax */, + +/* Int. */ + +/* i u i s */ HI(36) /* stw */, +/* i u i l */ HI(32) /* lwz */, +/* i u x s */ HI(31) | LO(151) /* stwx */, +/* i u x l */ HI(31) | LO(23) /* lwzx */, + +/* i s i s */ HI(36) /* stw */, +/* i s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */), +/* i s x s */ HI(31) | LO(151) /* stwx */, +/* i s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */), + +/* -------- Floating point -------- */ + +/* d i s */ HI(54) /* stfd */, +/* d i l */ HI(50) /* lfd */, +/* d x s */ HI(31) | LO(727) /* stfdx */, +/* d x l */ HI(31) | LO(599) /* lfdx */, + +/* s i s */ HI(52) /* stfs */, +/* s i l */ HI(48) /* lfs */, +/* s x s */ HI(31) | LO(663) /* stfsx */, +/* s x l */ HI(31) | LO(535) /* lfsx */, +}; + +static const sljit_ins updated_data_transfer_insts[64] = { + +/* -------- Integer -------- */ + +/* Word. */ + +/* w u i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* w u i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), +/* w u x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* w u x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), + +/* w s i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* w s i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), +/* w s x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* w s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), + +/* Byte. */ + +/* b u i s */ HI(39) /* stbu */, +/* b u i l */ HI(35) /* lbzu */, +/* b u x s */ HI(31) | LO(247) /* stbux */, +/* b u x l */ HI(31) | LO(119) /* lbzux */, + +/* b s i s */ HI(39) /* stbu */, +/* b s i l */ 0 /* no such instruction */, +/* b s x s */ HI(31) | LO(247) /* stbux */, +/* b s x l */ 0 /* no such instruction */, + +/* Half. */ + +/* h u i s */ HI(45) /* sthu */, +/* h u i l */ HI(41) /* lhzu */, +/* h u x s */ HI(31) | LO(439) /* sthux */, +/* h u x l */ HI(31) | LO(311) /* lhzux */, + +/* h s i s */ HI(45) /* sthu */, +/* h s i l */ HI(43) /* lhau */, +/* h s x s */ HI(31) | LO(439) /* sthux */, +/* h s x l */ HI(31) | LO(375) /* lhaux */, + +/* Int. */ + +/* i u i s */ HI(37) /* stwu */, +/* i u i l */ HI(33) /* lwzu */, +/* i u x s */ HI(31) | LO(183) /* stwux */, +/* i u x l */ HI(31) | LO(55) /* lwzux */, + +/* i s i s */ HI(37) /* stwu */, +/* i s i l */ ARCH_32_64(HI(33) /* lwzu */, 0 /* no such instruction */), +/* i s x s */ HI(31) | LO(183) /* stwux */, +/* i s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */), + +/* -------- Floating point -------- */ + +/* d i s */ HI(55) /* stfdu */, +/* d i l */ HI(51) /* lfdu */, +/* d x s */ HI(31) | LO(759) /* stfdux */, +/* d x l */ HI(31) | LO(631) /* lfdux */, + +/* s i s */ HI(53) /* stfsu */, +/* s i l */ HI(49) /* lfsu */, +/* s x s */ HI(31) | LO(695) /* stfsux */, +/* s x l */ HI(31) | LO(567) /* lfsux */, +}; + +#undef ARCH_32_64 + +/* Simple cases, (no caching is required). */ +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_ins inst; + sljit_s32 offs_reg; + sljit_sw high_short; + + /* Should work when (arg & REG_MASK) == 0. */ + SLJIT_ASSERT(A(0) == 0); + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + offs_reg = OFFS_REG(arg); + + if (argw != 0) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | (argw << 11) | ((31 - argw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(tmp_reg, OFFS_REG(arg), argw, 63 - argw, 1))); +#endif + offs_reg = tmp_reg; + } + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + SLJIT_ASSERT(!(inst & INT_ALIGNED)); +#endif + + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(offs_reg)); + } + + inst = data_transfer_insts[inp_flags & MEM_MASK]; + arg &= REG_MASK; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((inst & INT_ALIGNED) && (argw & 0x3) != 0) { + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg)); + } +#endif + + if (argw <= SIMM_MAX && argw >= SIMM_MIN) + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | IMM(argw)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (argw <= 0x7fff7fffl && argw >= -0x80000000l) { +#endif + + high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l); +#else + SLJIT_ASSERT(high_short); +#endif + + FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM(high_short >> 16))); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_reg) | IMM(argw)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + } + + /* The rest is PPC-64 only. */ + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg)); +#endif +} + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + result goes to TMP_REG2, so put result can use TMP_REG1. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_s32 src2_r; + sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); + + /* Destination check. */ + if (SLOW_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) + sugg_src2_r = dst_r; + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } + else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2)); + src2_r = sugg_src2_r; + } + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, input_flags, dst_r, dst, dstw, TMP_REG1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 int_op = op & SLJIT_I32_OP; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + case SLJIT_NOP: + return push_inst(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); +#else + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); +#endif + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); +#else + FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); +#endif + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); +#else + return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); +#endif + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + if (!(src & OFFS_REG_MASK)) { + if (srcw == 0 && (src & REG_MASK) != SLJIT_UNUSED) + return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + /* Works with SLJIT_MEM0() case as well. */ + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1)); + } + + srcw &= 0x3; + + if (srcw == 0) + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src))); + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | (srcw << 11) | ((31 - srcw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1))); +#endif + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1)); +} + +#define EMIT_MOV(type, type_flags, type_cast) \ + emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; + sljit_s32 op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + op = GET_OPCODE(op); + if ((src & SLJIT_IMM) && srcw == 0) + src = TMP_ZERO; + + if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW) + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + + if (op < SLJIT_NOT && FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op_flags & SLJIT_I32_OP) { + if (op < SLJIT_NOT) { + if (src & SLJIT_MEM) { + if (op == SLJIT_MOV_S32) + op = SLJIT_MOV_U32; + } + else if (src & SLJIT_IMM) { + if (op == SLJIT_MOV_U32) + op = SLJIT_MOV_S32; + } + } + else { + /* Most operations expect sign extended arguments. */ + flags |= INT_DATA | SIGNED_DATA; + if (HAS_FLAGS(op_flags)) + flags |= ALT_SIGN_EXT; + } + } +#endif + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: +#endif + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + case SLJIT_MOV_U32: + return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32)); + + case SLJIT_MOV_S32: + return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32)); +#endif + + case SLJIT_MOV_U8: + return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8)); + + case SLJIT_MOV_S8: + return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8)); + + case SLJIT_MOV_U16: + return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16)); + + case SLJIT_MOV_S16: + return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16)); + + case SLJIT_NOT: + return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_CLZ: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw); +#endif + } + + return SLJIT_SUCCESS; +} + +#undef EMIT_MOV + +#define TEST_SL_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN) + +#define TEST_UL_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffff)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_SH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l) +#else +#define TEST_SH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & 0xffff)) +#endif + +#define TEST_UH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_ADD_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l) +#else +#define TEST_ADD_IMM(src, srcw) \ + ((src) & SLJIT_IMM) +#endif + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_UI_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff)) +#else +#define TEST_UI_IMM(src, srcw) \ + ((src) & SLJIT_IMM) +#endif + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z | SLJIT_SET_CARRY)) +#define TEST_SUB_FORM2(op) \ + ((GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) \ + || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z)) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) +#else +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#define TEST_SUB_FORM2(op) \ + (GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + + if ((src1 & SLJIT_IMM) && src1w == 0) + src1 = TMP_ZERO; + if ((src2 & SLJIT_IMM) && src2w == 0) + src2 = TMP_ZERO; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_I32_OP) { + /* Most operations expect sign extended arguments. */ + flags |= INT_DATA | SIGNED_DATA; + if (src1 & SLJIT_IMM) + src1w = (sljit_s32)(src1w); + if (src2 & SLJIT_IMM) + src2w = (sljit_s32)(src2w); + if (HAS_FLAGS(op)) + flags |= ALT_SIGN_EXT; + } +#endif + if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + if (TEST_ADD_FORM1(op)) + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); + + if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src2, src2w)) { + compiler->imm = (src2w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src1, src1w)) { + compiler->imm = (src1w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_ADD_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src2, src2w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } +#endif + if (HAS_FLAGS(op)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_ADDC: + return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { + if (dst == SLJIT_UNUSED) { + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); + } + + if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) { + compiler->imm = src2w; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); + } + + if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w); + } + + if (TEST_SUB_FORM2(op)) { + if ((src2 & SLJIT_IMM) && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } + + if (TEST_SUB_FORM3(op)) + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); + + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | (!HAS_FLAGS(op) ? ALT_FORM2 : ALT_FORM3), dst, dstw, src1, src1w, TMP_REG2, 0); + } + + if (TEST_SL_IMM(src1, src1w) && !(op & SLJIT_SET_Z)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } + + if (!HAS_FLAGS(op)) { + if (TEST_SH_IMM(src2, -src2w)) { + compiler->imm = ((-src2w) >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, -src2w)) { + compiler->imm = -src2w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + } + + /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ + return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUBC: + return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_I32_OP) + flags |= ALT_FORM2; +#endif + if (!HAS_FLAGS(op)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + else + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + /* Commutative unsigned operations. */ + if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UL_IMM(src1, src1w)) { + compiler->imm = src1w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src2, src2w)) { + compiler->imm = (src2w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src1, src1w)) { + compiler->imm = (src1w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) { + /* Unlike or and xor, and resets unwanted bits as well. */ + if (TEST_UI_IMM(src2, src2w)) { + compiler->imm = src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UI_IMM(src1, src1w)) { + compiler->imm = src1w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_I32_OP) + flags |= ALT_FORM2; +#endif + if (src2 & SLJIT_IMM) { + compiler->imm = src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +#undef TEST_ADD_FORM1 +#undef TEST_SUB_FORM2 +#undef TEST_SUB_FORM3 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MTLR | S(src))); + else { + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2))); + } + + return push_inst(compiler, BLR); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return emit_prefetch(compiler, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6)) +#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw)) + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw)) +#endif + +#endif /* SLJIT_CONFIG_PPC_64 */ + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + /* We can ignore the temporary data store on the stack from caching point of view. */ + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1)); + src = TMP_FREG1; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op = GET_OPCODE(op); + FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src))); + + if (op == SLJIT_CONV_SW_FROM_F64) { + if (FAST_IS_REG(dst)) { + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + return emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1); + } + return emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, TMP_REG1); + } +#else + FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src))); +#endif + + if (FAST_IS_REG(dst)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1))); + return emit_op_mem(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1); + } + + SLJIT_ASSERT(dst & SLJIT_MEM); + + if (dst & OFFS_REG_MASK) { + dstw &= 0x3; + if (dstw) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1))); +#endif + dstw = TMP_REG1; + } + else + dstw = OFFS_REG(dst); + } + else { + if ((dst & REG_MASK) && !dstw) { + dstw = dst & REG_MASK; + dst = 0; + } + else { + /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */ + FAIL_IF(load_immediate(compiler, TMP_REG1, dstw)); + dstw = TMP_REG1; + } + } + + return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) { + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); + else + FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + } + else + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1)); + + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + if (op & SLJIT_F32_OP) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#else + + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_s32 invert_sign = 1; + + if (src & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000)); + src = TMP_REG1; + invert_sign = 0; + } + else if (!FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31))) + The double precision format has exactly 53 bit precision, so the lower 32 bit represents + the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000 + to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating + point value, we need to substract 2^53 + 2^31 from the constructed value. */ + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); + if (invert_sign) + FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000)); + FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2)); + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000)); + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + + FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + if (op & SLJIT_F32_OP) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#endif +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2)); + src2 = TMP_FREG2; + } + + return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, TMP_REG1)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_CONV_F64_FROM_F32: + op ^= SLJIT_F32_OP; + if (op & SLJIT_F32_OP) { + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src))); + break; + } + /* Fall through. */ + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src))); + break; + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), dst_r, dst, dstw, TMP_REG1)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2)); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2))); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2))); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2))); + break; + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, TMP_REG1)); + + return SLJIT_SUCCESS; +} + +#undef SELECT_FOP + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MFLR | D(dst)); + + /* Memory. */ + FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2))); + return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +static sljit_ins get_bo_bi_flags(sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + return (12 << 21) | (2 << 16); + + case SLJIT_NOT_EQUAL: + return (4 << 21) | (2 << 16); + + case SLJIT_LESS: + case SLJIT_SIG_LESS: + return (12 << 21) | (0 << 16); + + case SLJIT_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + return (4 << 21) | (0 << 16); + + case SLJIT_GREATER: + case SLJIT_SIG_GREATER: + return (12 << 21) | (1 << 16); + + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + return (4 << 21) | (1 << 16); + + case SLJIT_LESS_F64: + return (12 << 21) | ((4 + 0) << 16); + + case SLJIT_GREATER_EQUAL_F64: + return (4 << 21) | ((4 + 0) << 16); + + case SLJIT_GREATER_F64: + return (12 << 21) | ((4 + 1) << 16); + + case SLJIT_LESS_EQUAL_F64: + return (4 << 21) | ((4 + 1) << 16); + + case SLJIT_OVERFLOW: + return (12 << 21) | (3 << 16); + + case SLJIT_NOT_OVERFLOW: + return (4 << 21) | (3 << 16); + + case SLJIT_EQUAL_F64: + return (12 << 21) | ((4 + 2) << 16); + + case SLJIT_NOT_EQUAL_F64: + return (4 << 21) | ((4 + 2) << 16); + + case SLJIT_UNORDERED_F64: + return (12 << 21) | ((4 + 3) << 16); + + case SLJIT_ORDERED_F64: + return (4 << 21) | ((4 + 3) << 16); + + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL); + return (20 << 21); + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins bo_bi_flags; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + bo_bi_flags = get_bo_bi_flags(type & 0xff); + if (!bo_bi_flags) + return NULL; + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + /* In PPC, we don't need to touch the arguments. */ + if (type < SLJIT_JUMP) + jump->flags |= IS_COND; +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL) + jump->flags |= IS_CALL; +#endif + + PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); + PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG))); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0))); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump = NULL; + sljit_s32 src_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) { +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src_r = TMP_CALL_REG; + } + else + src_r = src; +#else + src_r = src; +#endif + } else if (src & SLJIT_IMM) { + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR); + jump->u.target = srcw; +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL) + jump->flags |= IS_CALL; +#endif + FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); + src_r = TMP_CALL_REG; + } + else { + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw)); + src_r = TMP_CALL_REG; + } + + FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); + if (jump) + jump->addr = compiler->size; + return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw)); + src = TMP_CALL_REG; + } + + FAIL_IF(call_with_args(compiler, arg_types, &src)); +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 reg, input_flags, cr_bit, invert; + sljit_s32 saved_op = op; + sljit_sw saved_dstw = dstw; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + input_flags = (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA; +#else + input_flags = WORD_DATA; +#endif + + op = GET_OPCODE(op); + reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1)); + + invert = 0; + cr_bit = 0; + + switch (type & 0xff) { + case SLJIT_LESS: + case SLJIT_SIG_LESS: + break; + + case SLJIT_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + invert = 1; + break; + + case SLJIT_GREATER: + case SLJIT_SIG_GREATER: + cr_bit = 1; + break; + + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + cr_bit = 1; + invert = 1; + break; + + case SLJIT_EQUAL: + cr_bit = 2; + break; + + case SLJIT_NOT_EQUAL: + cr_bit = 2; + invert = 1; + break; + + case SLJIT_OVERFLOW: + cr_bit = 3; + break; + + case SLJIT_NOT_OVERFLOW: + cr_bit = 3; + invert = 1; + break; + + case SLJIT_LESS_F64: + cr_bit = 4 + 0; + break; + + case SLJIT_GREATER_EQUAL_F64: + cr_bit = 4 + 0; + invert = 1; + break; + + case SLJIT_GREATER_F64: + cr_bit = 4 + 1; + break; + + case SLJIT_LESS_EQUAL_F64: + cr_bit = 4 + 1; + invert = 1; + break; + + case SLJIT_EQUAL_F64: + cr_bit = 4 + 2; + break; + + case SLJIT_NOT_EQUAL_F64: + cr_bit = 4 + 2; + invert = 1; + break; + + case SLJIT_UNORDERED_F64: + cr_bit = 4 + 3; + break; + + case SLJIT_ORDERED_F64: + cr_bit = 4 + 3; + invert = 1; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + + FAIL_IF(push_inst(compiler, MFCR | D(reg))); + FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + (cr_bit)) << 11) | (31 << 6) | (31 << 1))); + + if (invert) + FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1)); + + if (op < SLJIT_ADD) { + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1); + } + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + if (dst & SLJIT_MEM) + return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0); + return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 mem_flags; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (type & SLJIT_MEM_POST) + return SLJIT_ERR_UNSUPPORTED; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: +#endif + mem_flags = WORD_DATA; + break; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + case SLJIT_MOV_U32: + mem_flags = INT_DATA; + break; + + case SLJIT_MOV_S32: + mem_flags = INT_DATA; + + if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_I32_OP)) { + if (mem & OFFS_REG_MASK) + mem_flags |= SIGNED_DATA; + else + return SLJIT_ERR_UNSUPPORTED; + } + break; +#endif + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + mem_flags = BYTE_DATA; + break; + + case SLJIT_MOV_U16: + mem_flags = HALF_DATA; + break; + + case SLJIT_MOV_S16: + mem_flags = HALF_DATA | SIGNED_DATA; + break; + + default: + SLJIT_UNREACHABLE(); + mem_flags = WORD_DATA; + break; + } + + if (!(type & SLJIT_MEM_STORE)) + mem_flags |= LOAD_DATA; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + if (memw != 0) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + inst = updated_data_transfer_insts[mem_flags | INDEXED]; + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | B(OFFS_REG(mem)))); + } + else { + if (memw > SIMM_MAX || memw < SIMM_MIN) + return SLJIT_ERR_UNSUPPORTED; + + inst = updated_data_transfer_insts[mem_flags]; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((inst & INT_ALIGNED) && (memw & 0x3) != 0) + return SLJIT_ERR_UNSUPPORTED; +#endif + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | IMM(memw))); + } + + if ((mem_flags & LOAD_DATA) && (type & 0xff) == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(reg) | A(reg)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 mem_flags; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + if (type & SLJIT_MEM_POST) + return SLJIT_ERR_UNSUPPORTED; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + if (memw != 0) + return SLJIT_ERR_UNSUPPORTED; + } + else { + if (memw > SIMM_MAX || memw < SIMM_MIN) + return SLJIT_ERR_UNSUPPORTED; + } + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + mem_flags = FLOAT_DATA(type); + + if (!(type & SLJIT_MEM_STORE)) + mem_flags |= LOAD_DATA; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + inst = updated_data_transfer_insts[mem_flags | INDEXED]; + return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | B(OFFS_REG(mem))); + } + + inst = updated_data_transfer_insts[mem_flags]; + return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | IMM(memw)); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); +#else + PTR_FAIL_IF(push_inst(compiler, dst_r)); + compiler->size += 4; +#endif + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + + return put_label; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeS390X.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeS390X.c new file mode 100644 index 0000000000..716491ec72 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeS390X.c @@ -0,0 +1,3034 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#ifdef __ARCH__ +#define ENABLE_STATIC_FACILITY_DETECTION 1 +#else +#define ENABLE_STATIC_FACILITY_DETECTION 0 +#endif +#define ENABLE_DYNAMIC_FACILITY_DETECTION 1 + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "s390x" SLJIT_CPUINFO; +} + +/* Instructions. */ +typedef sljit_uw sljit_ins; + +/* Instruction tags (most significant halfword). */ +static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 +}; + +/* there are also a[2-15] available, but they are slower to access and + * their use is limited as mundaym explained: + * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689 + */ + +/* General Purpose Registers [0-15]. */ +typedef sljit_uw sljit_gpr; + +/* + * WARNING + * the following code is non standard and should be improved for + * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based + * registers because r0 and r1 are the ABI recommended volatiles. + * there is a gpr() function that maps sljit to physical register numbers + * that should be used instead of the usual index into reg_map[] and + * will be retired ASAP (TODO: carenas) + */ + +static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */ +static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */ +static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */ +static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */ +static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */ +static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */ +static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */ +static const sljit_gpr r7 = 7; /* reg_map[6] */ +static const sljit_gpr r8 = 8; /* reg_map[7] */ +static const sljit_gpr r9 = 9; /* reg_map[8] */ +static const sljit_gpr r10 = 10; /* reg_map[9] */ +static const sljit_gpr r11 = 11; /* reg_map[10] */ +static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ +static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ +static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */ +static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ + +/* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */ +/* TODO(carenas): r12 might conflict in PIC code, reserve? */ +/* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp + * like we do know might be faster though, reserve? + */ + +/* TODO(carenas): should be named TMP_REG[1-2] for consistency */ +#define tmp0 r0 +#define tmp1 r1 + +/* TODO(carenas): flags should move to a different register so that + * link register doesn't need to change + */ + +/* Link registers. The normal link register is r14, but since + we use that for flags we need to use r0 instead to do fast + calls so that flags are preserved. */ +static const sljit_gpr link_r = 14; /* r14 */ +static const sljit_gpr fast_link_r = 0; /* r0 */ + +/* Flag register layout: + + 0 32 33 34 36 64 + +---------------+---+---+-------+-------+ + | ZERO | 0 | 0 | C C |///////| + +---------------+---+---+-------+-------+ +*/ +static const sljit_gpr flag_r = 14; /* r14 */ + +struct sljit_s390x_const { + struct sljit_const const_; /* must be first */ + sljit_sw init_value; /* required to build literal pool */ +}; + +/* Convert SLJIT register to hardware register. */ +static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) +{ + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0]))); + return reg_map[r]; +} + +/* Size of instruction in bytes. Tags must already be cleared. */ +static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins) +{ + /* keep faulting instructions */ + if (ins == 0) + return 2; + + if ((ins & 0x00000000ffffL) == ins) + return 2; + if ((ins & 0x0000ffffffffL) == ins) + return 4; + if ((ins & 0xffffffffffffL) == ins) + return 6; + + SLJIT_UNREACHABLE(); + return (sljit_uw)-1; +} + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ibuf); + *ibuf = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static sljit_s32 encode_inst(void **ptr, sljit_ins ins) +{ + sljit_u16 *ibuf = (sljit_u16 *)*ptr; + sljit_uw size = sizeof_ins(ins); + + SLJIT_ASSERT((size & 6) == size); + switch (size) { + case 6: + *ibuf++ = (sljit_u16)(ins >> 32); + /* fallthrough */ + case 4: + *ibuf++ = (sljit_u16)(ins >> 16); + /* fallthrough */ + case 2: + *ibuf++ = (sljit_u16)(ins); + } + *ptr = (void*)ibuf; + return SLJIT_SUCCESS; +} + +#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB) + +/* Map the given type to a 4-bit condition code mask. */ +static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { + const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */ + const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */ + const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */ + const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */ + + switch (type) { + case SLJIT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return cc0; + if (type == SLJIT_OVERFLOW) + return (cc0 | cc3); + return (cc0 | cc2); + } + + case SLJIT_EQUAL_F64: + return cc0; + + case SLJIT_NOT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return (cc1 | cc2 | cc3); + if (type == SLJIT_OVERFLOW) + return (cc1 | cc2); + return (cc1 | cc3); + } + + case SLJIT_NOT_EQUAL_F64: + return (cc1 | cc2 | cc3); + + case SLJIT_LESS: + return cc1; + + case SLJIT_GREATER_EQUAL: + return (cc0 | cc2 | cc3); + + case SLJIT_GREATER: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return cc2; + return cc3; + + case SLJIT_LESS_EQUAL: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return (cc0 | cc1); + return (cc0 | cc1 | cc2); + + case SLJIT_SIG_LESS: + case SLJIT_LESS_F64: + return cc1; + + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: + return (cc0 | cc1); + + case SLJIT_SIG_GREATER: + /* Overflow is considered greater, see SLJIT_SUB. */ + return cc2 | cc3; + + case SLJIT_SIG_GREATER_EQUAL: + return (cc0 | cc2 | cc3); + + case SLJIT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc2 | cc3); + + case SLJIT_UNORDERED_F64: + return cc3; + + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc0 | cc1); + + case SLJIT_ORDERED_F64: + return (cc0 | cc1 | cc2); + + case SLJIT_GREATER_F64: + return cc2; + + case SLJIT_GREATER_EQUAL_F64: + return (cc0 | cc2); + } + + SLJIT_UNREACHABLE(); + return (sljit_u8)-1; +} + +/* Facility to bit index mappings. + Note: some facilities share the same bit index. */ +typedef sljit_uw facility_bit; +#define STORE_FACILITY_LIST_EXTENDED_FACILITY 7 +#define FAST_LONG_DISPLACEMENT_FACILITY 19 +#define EXTENDED_IMMEDIATE_FACILITY 21 +#define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34 +#define DISTINCT_OPERAND_FACILITY 45 +#define HIGH_WORD_FACILITY 45 +#define POPULATION_COUNT_FACILITY 45 +#define LOAD_STORE_ON_CONDITION_1_FACILITY 45 +#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49 +#define LOAD_STORE_ON_CONDITION_2_FACILITY 53 +#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58 +#define VECTOR_FACILITY 129 +#define VECTOR_ENHANCEMENTS_1_FACILITY 135 + +/* Report whether a facility is known to be present due to the compiler + settings. This function should always be compiled to a constant + value given a constant argument. */ +static SLJIT_INLINE int have_facility_static(facility_bit x) +{ +#if ENABLE_STATIC_FACILITY_DETECTION + switch (x) { + case FAST_LONG_DISPLACEMENT_FACILITY: + return (__ARCH__ >= 6 /* z990 */); + case EXTENDED_IMMEDIATE_FACILITY: + case STORE_FACILITY_LIST_EXTENDED_FACILITY: + return (__ARCH__ >= 7 /* z9-109 */); + case GENERAL_INSTRUCTION_EXTENSION_FACILITY: + return (__ARCH__ >= 8 /* z10 */); + case DISTINCT_OPERAND_FACILITY: + return (__ARCH__ >= 9 /* z196 */); + case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY: + return (__ARCH__ >= 10 /* zEC12 */); + case LOAD_STORE_ON_CONDITION_2_FACILITY: + case VECTOR_FACILITY: + return (__ARCH__ >= 11 /* z13 */); + case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY: + case VECTOR_ENHANCEMENTS_1_FACILITY: + return (__ARCH__ >= 12 /* z14 */); + default: + SLJIT_UNREACHABLE(); + } +#endif + return 0; +} + +static SLJIT_INLINE unsigned long get_hwcap() +{ + static unsigned long hwcap = 0; + if (SLJIT_UNLIKELY(!hwcap)) { + hwcap = getauxval(AT_HWCAP); + SLJIT_ASSERT(hwcap != 0); + } + return hwcap; +} + +static SLJIT_INLINE int have_stfle() +{ + if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY)) + return 1; + + return (get_hwcap() & HWCAP_S390_STFLE); +} + +/* Report whether the given facility is available. This function always + performs a runtime check. */ +static int have_facility_dynamic(facility_bit x) +{ +#if ENABLE_DYNAMIC_FACILITY_DETECTION + static struct { + sljit_uw bits[4]; + } cpu_features; + size_t size = sizeof(cpu_features); + const sljit_uw word_index = x >> 6; + const sljit_uw bit_index = ((1UL << 63) >> (x & 63)); + + SLJIT_ASSERT(x < size * 8); + if (SLJIT_UNLIKELY(!have_stfle())) + return 0; + + if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) { + __asm__ __volatile__ ( + "lgr %%r0, %0;" + "stfle 0(%1);" + /* outputs */: + /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features) + /* clobbers */: "r0", "cc", "memory" + ); + SLJIT_ASSERT(cpu_features.bits[0] != 0); + } + return (cpu_features.bits[word_index] & bit_index) != 0; +#else + return 0; +#endif +} + +#define HAVE_FACILITY(name, bit) \ +static SLJIT_INLINE int name() \ +{ \ + static int have = -1; \ + /* Static check first. May allow the function to be optimized away. */ \ + if (have_facility_static(bit)) \ + have = 1; \ + else if (SLJIT_UNLIKELY(have < 0)) \ + have = have_facility_dynamic(bit) ? 1 : 0; \ +\ + return have; \ +} + +HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY) +HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY) +HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY) +HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY) +HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY) +HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY) +#undef HAVE_FACILITY + +#define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL) +#define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL) + +#define CHECK_SIGNED(v, bitlen) \ + ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1))) + +#define is_s8(d) CHECK_SIGNED((d), 8) +#define is_s16(d) CHECK_SIGNED((d), 16) +#define is_s20(d) CHECK_SIGNED((d), 20) +#define is_s32(d) ((d) == (sljit_s32)(d)) + +static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d) +{ + SLJIT_ASSERT(is_s20(d)); + + sljit_uw dh = (d >> 12) & 0xff; + sljit_uw dl = (d << 8) & 0xfff00; + return (dh | dl) << 8; +} + +/* TODO(carenas): variadic macro is not strictly needed */ +#define SLJIT_S390X_INSTRUCTION(op, ...) \ +static SLJIT_INLINE sljit_ins op(__VA_ARGS__) + +/* RR form instructions. */ +#define SLJIT_S390X_RR(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ +{ \ + return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ +} + +/* AND */ +SLJIT_S390X_RR(nr, 0x1400) + +/* BRANCH AND SAVE */ +SLJIT_S390X_RR(basr, 0x0d00) + +/* BRANCH ON CONDITION */ +SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */ + +/* DIVIDE */ +SLJIT_S390X_RR(dr, 0x1d00) + +/* EXCLUSIVE OR */ +SLJIT_S390X_RR(xr, 0x1700) + +/* LOAD */ +SLJIT_S390X_RR(lr, 0x1800) + +/* LOAD COMPLEMENT */ +SLJIT_S390X_RR(lcr, 0x1300) + +/* OR */ +SLJIT_S390X_RR(or, 0x1600) + +#undef SLJIT_S390X_RR + +/* RRE form instructions */ +#define SLJIT_S390X_RRE(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ +{ \ + return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ +} + +/* AND */ +SLJIT_S390X_RRE(ngr, 0xb9800000) + +/* DIVIDE LOGICAL */ +SLJIT_S390X_RRE(dlr, 0xb9970000) +SLJIT_S390X_RRE(dlgr, 0xb9870000) + +/* DIVIDE SINGLE */ +SLJIT_S390X_RRE(dsgr, 0xb90d0000) + +/* EXCLUSIVE OR */ +SLJIT_S390X_RRE(xgr, 0xb9820000) + +/* LOAD */ +SLJIT_S390X_RRE(lgr, 0xb9040000) +SLJIT_S390X_RRE(lgfr, 0xb9140000) + +/* LOAD BYTE */ +SLJIT_S390X_RRE(lbr, 0xb9260000) +SLJIT_S390X_RRE(lgbr, 0xb9060000) + +/* LOAD COMPLEMENT */ +SLJIT_S390X_RRE(lcgr, 0xb9030000) + +/* LOAD HALFWORD */ +SLJIT_S390X_RRE(lhr, 0xb9270000) +SLJIT_S390X_RRE(lghr, 0xb9070000) + +/* LOAD LOGICAL */ +SLJIT_S390X_RRE(llgfr, 0xb9160000) + +/* LOAD LOGICAL CHARACTER */ +SLJIT_S390X_RRE(llcr, 0xb9940000) +SLJIT_S390X_RRE(llgcr, 0xb9840000) + +/* LOAD LOGICAL HALFWORD */ +SLJIT_S390X_RRE(llhr, 0xb9950000) +SLJIT_S390X_RRE(llghr, 0xb9850000) + +/* MULTIPLY LOGICAL */ +SLJIT_S390X_RRE(mlgr, 0xb9860000) + +/* MULTIPLY SINGLE */ +SLJIT_S390X_RRE(msgfr, 0xb91c0000) + +/* OR */ +SLJIT_S390X_RRE(ogr, 0xb9810000) + +/* SUBTRACT */ +SLJIT_S390X_RRE(sgr, 0xb9090000) + +#undef SLJIT_S390X_RRE + +/* RI-a form instructions */ +#define SLJIT_S390X_RIA(name, pattern, imm_type) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ +{ \ + return (pattern) | ((reg & 0xf) << 20) | (imm & 0xffff); \ +} + +/* ADD HALFWORD IMMEDIATE */ +SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16) + +/* LOAD HALFWORD IMMEDIATE */ +SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16) +SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16) + +/* LOAD LOGICAL IMMEDIATE */ +SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16) +SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16) +SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16) +SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16) + +/* MULTIPLY HALFWORD IMMEDIATE */ +SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16) +SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16) + +/* OR IMMEDIATE */ +SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16) + +#undef SLJIT_S390X_RIA + +/* RIL-a form instructions (requires extended immediate facility) */ +#define SLJIT_S390X_RILA(name, pattern, imm_type) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ +{ \ + SLJIT_ASSERT(have_eimm()); \ + return (pattern) | ((sljit_ins)(reg & 0xf) << 36) | (imm & 0xffffffff); \ +} + +/* ADD IMMEDIATE */ +SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32) + +/* ADD IMMEDIATE HIGH */ +SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */ + +/* AND IMMEDIATE */ +SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32) + +/* EXCLUSIVE OR IMMEDIATE */ +SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32) + +/* INSERT IMMEDIATE */ +SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32) +SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32) + +/* LOAD IMMEDIATE */ +SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32) + +/* LOAD LOGICAL IMMEDIATE */ +SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32) +SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32) + +/* SUBTRACT LOGICAL IMMEDIATE */ +SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32) + +#undef SLJIT_S390X_RILA + +/* RX-a form instructions */ +#define SLJIT_S390X_RXA(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b) \ +{ \ + sljit_ins ri, xi, bi, di; \ +\ + SLJIT_ASSERT((d & 0xfff) == d); \ + ri = (sljit_ins)(r & 0xf) << 20; \ + xi = (sljit_ins)(x & 0xf) << 16; \ + bi = (sljit_ins)(b & 0xf) << 12; \ + di = (sljit_ins)(d & 0xfff); \ +\ + return (pattern) | ri | xi | bi | di; \ +} + +/* LOAD */ +SLJIT_S390X_RXA(l, 0x58000000) + +/* LOAD ADDRESS */ +SLJIT_S390X_RXA(la, 0x41000000) + +/* LOAD HALFWORD */ +SLJIT_S390X_RXA(lh, 0x48000000) + +/* MULTIPLY SINGLE */ +SLJIT_S390X_RXA(ms, 0x71000000) + +/* STORE */ +SLJIT_S390X_RXA(st, 0x50000000) + +/* STORE CHARACTER */ +SLJIT_S390X_RXA(stc, 0x42000000) + +/* STORE HALFWORD */ +SLJIT_S390X_RXA(sth, 0x40000000) + +#undef SLJIT_S390X_RXA + +/* RXY-a instructions */ +#define SLJIT_S390X_RXYA(name, pattern, cond) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ +{ \ + sljit_ins ri, xi, bi, di; \ +\ + SLJIT_ASSERT(cond); \ + ri = (sljit_ins)(r & 0xf) << 36; \ + xi = (sljit_ins)(x & 0xf) << 32; \ + bi = (sljit_ins)(b & 0xf) << 28; \ + di = disp_s20(d); \ +\ + return (pattern) | ri | xi | bi | di; \ +} + +/* LOAD */ +SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp()) +SLJIT_S390X_RXYA(lg, 0xe30000000004, 1) +SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1) + +/* LOAD BYTE */ +SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp()) +SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp()) + +/* LOAD HALFWORD */ +SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp()) +SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1) + +/* LOAD LOGICAL */ +SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1) + +/* LOAD LOGICAL CHARACTER */ +SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm()) +SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1) + +/* LOAD LOGICAL HALFWORD */ +SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm()) +SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1) + +/* MULTIPLY SINGLE */ +SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp()) +SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1) + +/* STORE */ +SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp()) +SLJIT_S390X_RXYA(stg, 0xe30000000024, 1) + +/* STORE CHARACTER */ +SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp()) + +/* STORE HALFWORD */ +SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp()) + +#undef SLJIT_S390X_RXYA + +/* RSY-a instructions */ +#define SLJIT_S390X_RSYA(name, pattern, cond) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gpr b) \ +{ \ + sljit_ins r1, r3, b2, d2; \ +\ + SLJIT_ASSERT(cond); \ + r1 = (sljit_ins)(dst & 0xf) << 36; \ + r3 = (sljit_ins)(src & 0xf) << 32; \ + b2 = (sljit_ins)(b & 0xf) << 28; \ + d2 = disp_s20(d); \ +\ + return (pattern) | r1 | r3 | b2 | d2; \ +} + +/* LOAD MULTIPLE */ +SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1) + +/* SHIFT LEFT LOGICAL */ +SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1) + +/* SHIFT RIGHT SINGLE */ +SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1) + +/* STORE MULTIPLE */ +SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1) + +#undef SLJIT_S390X_RSYA + +/* RIE-f instructions (require general-instructions-extension facility) */ +#define SLJIT_S390X_RIEF(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \ +{ \ + sljit_ins r1, r2, i3, i4, i5; \ +\ + SLJIT_ASSERT(have_genext()); \ + r1 = (sljit_ins)(dst & 0xf) << 36; \ + r2 = (sljit_ins)(src & 0xf) << 32; \ + i3 = (sljit_ins)start << 24; \ + i4 = (sljit_ins)end << 16; \ + i5 = (sljit_ins)rot << 8; \ +\ + return (pattern) | r1 | r2 | i3 | i4 | i5; \ +} + +/* ROTATE THEN AND SELECTED BITS */ +/* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */ + +/* ROTATE THEN EXCLUSIVE OR SELECTED BITS */ +/* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */ + +/* ROTATE THEN OR SELECTED BITS */ +SLJIT_S390X_RIEF(rosbg, 0xec0000000056) + +/* ROTATE THEN INSERT SELECTED BITS */ +/* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */ +/* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */ + +/* ROTATE THEN INSERT SELECTED BITS HIGH */ +SLJIT_S390X_RIEF(risbhg, 0xec000000005d) + +/* ROTATE THEN INSERT SELECTED BITS LOW */ +/* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */ + +#undef SLJIT_S390X_RIEF + +/* RRF-c instructions (require load/store-on-condition 1 facility) */ +#define SLJIT_S390X_RRFC(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \ +{ \ + sljit_ins r1, r2, m3; \ +\ + SLJIT_ASSERT(have_lscond1()); \ + r1 = (sljit_ins)(dst & 0xf) << 4; \ + r2 = (sljit_ins)(src & 0xf); \ + m3 = (sljit_ins)(mask & 0xf) << 12; \ +\ + return (pattern) | m3 | r1 | r2; \ +} + +/* LOAD HALFWORD IMMEDIATE ON CONDITION */ +SLJIT_S390X_RRFC(locr, 0xb9f20000) +SLJIT_S390X_RRFC(locgr, 0xb9e20000) + +#undef SLJIT_S390X_RRFC + +/* RIE-g instructions (require load/store-on-condition 2 facility) */ +#define SLJIT_S390X_RIEG(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \ +{ \ + sljit_ins r1, m3, i2; \ +\ + SLJIT_ASSERT(have_lscond2()); \ + r1 = (sljit_ins)(reg & 0xf) << 36; \ + m3 = (sljit_ins)(mask & 0xf) << 32; \ + i2 = (sljit_ins)(imm & 0xffffL) << 16; \ +\ + return (pattern) | r1 | m3 | i2; \ +} + +/* LOAD HALFWORD IMMEDIATE ON CONDITION */ +SLJIT_S390X_RIEG(lochi, 0xec0000000042) +SLJIT_S390X_RIEG(locghi, 0xec0000000046) + +#undef SLJIT_S390X_RIEG + +#define SLJIT_S390X_RILB(name, pattern, cond) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \ +{ \ + sljit_ins r1, ri2; \ +\ + SLJIT_ASSERT(cond); \ + r1 = (sljit_ins)(reg & 0xf) << 36; \ + ri2 = (sljit_ins)(ri & 0xffffffff); \ +\ + return (pattern) | r1 | ri2; \ +} + +/* BRANCH RELATIVE AND SAVE LONG */ +SLJIT_S390X_RILB(brasl, 0xc00500000000, 1) + +/* LOAD ADDRESS RELATIVE LONG */ +SLJIT_S390X_RILB(larl, 0xc00000000000, 1) + +/* LOAD RELATIVE LONG */ +SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext()) + +#undef SLJIT_S390X_RILB + +SLJIT_S390X_INSTRUCTION(br, sljit_gpr target) +{ + return 0x07f0 | target; +} + +SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target) +{ + sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20; + sljit_ins ri2 = (sljit_ins)target & 0xffff; + return 0xa7040000L | m1 | ri2; +} + +SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target) +{ + sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36; + sljit_ins ri2 = (sljit_ins)target & 0xffffffff; + return 0xc00400000000L | m1 | ri2; +} + +SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src) +{ + sljit_ins r1 = ((sljit_ins)dst & 0xf) << 8; + sljit_ins r2 = ((sljit_ins)src & 0xf); + SLJIT_ASSERT(have_eimm()); + return 0xb9830000 | r1 | r2; +} + +/* INSERT PROGRAM MASK */ +SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst) +{ + return 0xb2220000 | ((sljit_ins)(dst & 0xf) << 4); +} + +/* SET PROGRAM MASK */ +SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst) +{ + return 0x0400 | ((sljit_ins)(dst & 0xf) << 4); +} + +/* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */ +SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) +{ + return risbhg(dst, src, start, 0x8 | end, rot); +} + +#undef SLJIT_S390X_INSTRUCTION + +static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r) +{ + /* Condition codes: bits 18 and 19. + Transformation: + 0 (zero and no overflow) : unchanged + 1 (non-zero and no overflow) : unchanged + 2 (zero and overflow) : decreased by 1 + 3 (non-zero and overflow) : decreased by 1 if non-zero */ + FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_I32_OP) ? 1 : 2) + 2 + 3 + 1))); + FAIL_IF(push_inst(compiler, ipm(flag_r))); + FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); + FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3))); + FAIL_IF(push_inst(compiler, slfi(flag_r, 0x10000000))); + FAIL_IF(push_inst(compiler, spm(flag_r))); + return SLJIT_SUCCESS; +} + +/* load 64-bit immediate into register without clobbering flags */ +static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v) +{ + /* 4 byte instructions */ + if (is_s16(v)) + return push_inst(compiler, lghi(target, (sljit_s16)v)); + + if ((sljit_uw)v == (v & 0x000000000000ffffU)) + return push_inst(compiler, llill(target, (sljit_u16)v)); + + if ((sljit_uw)v == (v & 0x00000000ffff0000U)) + return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16))); + + if ((sljit_uw)v == (v & 0x0000ffff00000000U)) + return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32))); + + if ((sljit_uw)v == (v & 0xffff000000000000U)) + return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48))); + + /* 6 byte instructions (requires extended immediate facility) */ + if (have_eimm()) { + if (is_s32(v)) + return push_inst(compiler, lgfi(target, (sljit_s32)v)); + + if ((sljit_uw)v == (v & 0x00000000ffffffffU)) + return push_inst(compiler, llilf(target, (sljit_u32)v)); + + if ((sljit_uw)v == (v & 0xffffffff00000000U)) + return push_inst(compiler, llihf(target, (sljit_u32)(v >> 32))); + + FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v))); + return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32))); + } + /* TODO(mundaym): instruction sequences that don't use extended immediates */ + abort(); +} + +struct addr { + sljit_gpr base; + sljit_gpr index; + sljit_sw offset; +}; + +/* transform memory operand into D(X,B) form with a signed 20-bit offset */ +static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler, + struct addr *addr, sljit_s32 mem, sljit_sw off, + sljit_gpr tmp /* clobbered, must not be r0 */) +{ + sljit_gpr base = r0; + sljit_gpr index = r0; + + SLJIT_ASSERT(tmp != r0); + if (mem & REG_MASK) + base = gpr(mem & REG_MASK); + + if (mem & OFFS_REG_MASK) { + index = gpr(OFFS_REG(mem)); + if (off != 0) { + /* shift and put the result into tmp */ + SLJIT_ASSERT(0 <= off && off < 64); + FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0))); + index = tmp; + off = 0; /* clear offset */ + } + } + else if (!is_s20(off)) { + FAIL_IF(push_load_imm_inst(compiler, tmp, off)); + index = tmp; + off = 0; /* clear offset */ + } + addr->base = base; + addr->index = index; + addr->offset = off; + return SLJIT_SUCCESS; +} + +/* transform memory operand into D(X,B) form with an unsigned 12-bit offset */ +static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, + struct addr *addr, sljit_s32 mem, sljit_sw off, + sljit_gpr tmp /* clobbered, must not be r0 */) +{ + sljit_gpr base = r0; + sljit_gpr index = r0; + + SLJIT_ASSERT(tmp != r0); + if (mem & REG_MASK) + base = gpr(mem & REG_MASK); + + if (mem & OFFS_REG_MASK) { + index = gpr(OFFS_REG(mem)); + if (off != 0) { + /* shift and put the result into tmp */ + SLJIT_ASSERT(0 <= off && off < 64); + FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0))); + index = tmp; + off = 0; /* clear offset */ + } + } + else if (!is_u12(off)) { + FAIL_IF(push_load_imm_inst(compiler, tmp, off)); + index = tmp; + off = 0; /* clear offset */ + } + addr->base = base; + addr->index = index; + addr->offset = off; + return SLJIT_SUCCESS; +} + +#define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base) +#define WHEN(cond, r, i1, i2, addr) \ + (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr) + +/* May clobber tmp1. */ +static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst, + sljit_s32 src, sljit_sw srcw, + sljit_s32 is_32bit) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(src & SLJIT_MEM); + if (have_ldisp() || !is_32bit) + FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); + else + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + + if (is_32bit) + ins = WHEN(is_u12(addr.offset), dst, l, ly, addr); + else + ins = lg(dst, addr.offset, addr.index, addr.base); + + return push_inst(compiler, ins); +} + +/* May clobber tmp1. */ +static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 is_32bit) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(dst & SLJIT_MEM); + if (have_ldisp() || !is_32bit) + FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1)); + else + FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1)); + + if (is_32bit) + ins = WHEN(is_u12(addr.offset), src, st, sty, addr); + else + ins = stg(src, addr.offset, addr.index, addr.base); + + return push_inst(compiler, ins); +} + +#undef WHEN + +static sljit_s32 emit_move(struct sljit_compiler *compiler, + sljit_gpr dst_r, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_ASSERT(!SLOW_IS_REG(src) || dst_r != gpr(src & REG_MASK)); + + if (src & SLJIT_IMM) + return push_load_imm_inst(compiler, dst_r, srcw); + + if (src & SLJIT_MEM) + return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_I32_OP) != 0); + + sljit_gpr src_r = gpr(src & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); +} + +static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = tmp0; + sljit_gpr src_r = tmp1; + sljit_s32 needs_move = 1; + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + + if (dst == src1) + needs_move = 0; + else if (dst == src2) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (FAST_IS_REG(src2)) + src_r = gpr(src2 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + FAIL_IF(push_inst(compiler, ins | (dst_r << 4) | src_r)); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src1_r = tmp0; + sljit_gpr src2_r = tmp1; + + if (FAST_IS_REG(src1)) + src1_r = gpr(src1 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (FAST_IS_REG(src2)) + src2_r = gpr(src2 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + return push_inst(compiler, ins | (dst_r << 4) | src1_r | (src2_r << 12)); +} + +typedef enum { + RI_A, + RIL_A, +} emit_ril_type; + +static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w, + emit_ril_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == RIL_A) + return push_inst(compiler, ins | (dst_r << 36) | (src2w & 0xffffffff)); + return push_inst(compiler, ins | (dst_r << 20) | (src2w & 0xffff)); +} + +static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w) +{ + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + + if (!SLOW_IS_REG(src1)) + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + else + src_r = gpr(src1 & REG_MASK); + + return push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (src2w & 0xffff) << 16); +} + +typedef enum { + RX_A, + RXY_A, +} emit_rx_type; + +static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w, + emit_rx_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + sljit_gpr base, index; + + SLJIT_ASSERT(src2 & SLJIT_MEM); + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst); + + if (dst == src1) + needs_move = 0; + else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + base = gpr(src2 & REG_MASK); + index = tmp0; + + if (src2 & OFFS_REG_MASK) { + index = gpr(OFFS_REG(src2)); + + if (src2w != 0) { + FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0))); + src2w = 0; + index = tmp1; + } + } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w)); + + if (src2 & REG_MASK) + index = tmp1; + else + base = tmp1; + src2w = 0; + } + + if (type == RX_A) + ins |= (dst_r << 20) | (index << 16) | (base << 12) | src2w; + else + ins |= (dst_r << 36) | (index << 32) | (base << 28) | disp_s20(src2w); + + FAIL_IF(push_inst(compiler, ins)); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst); + return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_sw srcw) +{ + SLJIT_ASSERT(dst & SLJIT_MEM); + + sljit_gpr dst_r = tmp1; + + if (dst & OFFS_REG_MASK) { + sljit_gpr index = tmp1; + + if ((dstw & 0x3) == 0) + index = gpr(OFFS_REG(dst)); + else + FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0))); + + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index))); + dstw = 0; + } + else if (!is_s20(dstw)) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw)); + + if (dst & REG_MASK) + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1))); + + dstw = 0; + } + else + dst_r = gpr(dst & REG_MASK); + + return push_inst(compiler, ins | ((srcw & 0xff) << 32) | (dst_r << 28) | disp_s20(dstw)); +} + +struct ins_forms { + sljit_ins op_r; + sljit_ins op_gr; + sljit_ins op_rk; + sljit_ins op_grk; + sljit_ins op; + sljit_ins op_y; + sljit_ins op_g; +}; + +static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins, ins_k; + + if ((src1 | src2) & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_I32_OP) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + /* Extra instructions needed for address computation can be executed independently. */ + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + if (src1 & SLJIT_MEM) { + if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w)) + return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A); + + return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A); + } + } + else if (ins12 || ins20) { + emit_rx_type rx_type; + + if (ins12) { + rx_type = RX_A; + ins = ins12; + } + else { + rx_type = RXY_A; + ins = ins20; + } + + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w))))) + return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type); + + if (src1 & SLJIT_MEM) + return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type); + } + } + + if (mode & SLJIT_I32_OP) { + ins = forms->op_r; + ins_k = forms->op_rk; + } + else { + ins = forms->op_gr; + ins_k = forms->op_grk; + } + + SLJIT_ASSERT(ins != 0 || ins_k != 0); + + if (ins && SLOW_IS_REG(dst)) { + if (dst == src1) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + if (dst == src2) + return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w); + } + + if (ins_k == 0) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w); +} + +static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins; + + if (src2 & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_I32_OP) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + else if (ins12) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + else if (ins20) + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + ins = (mode & SLJIT_I32_OP) ? forms->op_rk : forms->op_grk; + + if (ins == 0 || (SLOW_IS_REG(dst) && dst == src1)) + return emit_rr(compiler, (mode & SLJIT_I32_OP) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_s390x_const *const_; + struct sljit_put_label *put_label; + sljit_sw executable_offset; + sljit_uw ins_size = 0; /* instructions */ + sljit_uw pool_size = 0; /* literal pool */ + sljit_uw pad_size; + sljit_uw i, j = 0; + struct sljit_memory_fragment *buf; + void *code, *code_ptr; + sljit_uw *pool, *pool_ptr; + + sljit_uw source; + sljit_sw offset; /* TODO(carenas): only need 32 bit */ + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + /* branch handling */ + label = compiler->labels; + jump = compiler->jumps; + put_label = compiler->put_labels; + + /* TODO(carenas): compiler->executable_size could be calculated + * before to avoid the following loop (except for + * pool_size) + */ + /* calculate the size of the code */ + for (buf = compiler->buf; buf != NULL; buf = buf->next) { + sljit_uw len = buf->used_size / sizeof(sljit_ins); + sljit_ins *ibuf = (sljit_ins *)buf->memory; + for (i = 0; i < len; ++i, ++j) { + sljit_ins ins = ibuf[i]; + + /* TODO(carenas): instruction tag vs size/addr == j + * using instruction tags for const is creative + * but unlike all other architectures, and is not + * done consistently for all other objects. + * This might need reviewing later. + */ + if (ins & sljit_ins_const) { + pool_size += sizeof(*pool); + ins &= ~sljit_ins_const; + } + if (label && label->size == j) { + label->size = ins_size; + label = label->next; + } + if (jump && jump->addr == j) { + if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { + /* encoded: */ + /* brasl %r14, (or brcl , ) */ + /* replace with: */ + /* lgrl %r1, */ + /* bras %r14, %r1 (or bcr , %r1) */ + pool_size += sizeof(*pool); + ins_size += 2; + } + jump = jump->next; + } + if (put_label && put_label->addr == j) { + pool_size += sizeof(*pool); + put_label = put_label->next; + } + ins_size += sizeof_ins(ins); + } + } + + /* emit trailing label */ + if (label && label->size == j) { + label->size = ins_size; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!put_label); + + /* pad code size to 8 bytes so is accessible with half word offsets */ + /* the literal pool needs to be doubleword aligned */ + pad_size = ((ins_size + 7UL) & ~7UL) - ins_size; + SLJIT_ASSERT(pad_size < 8UL); + + /* allocate target buffer */ + code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size, + compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + code_ptr = code; + executable_offset = SLJIT_EXEC_OFFSET(code); + + /* TODO(carenas): pool is optional, and the ABI recommends it to + * be created before the function code, instead of + * globally; if generated code is too big could + * need offsets bigger than 32bit words and asser() + */ + pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size); + pool_ptr = pool; + const_ = (struct sljit_s390x_const *)compiler->consts; + + /* update label addresses */ + label = compiler->labels; + while (label) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET( + (sljit_uw)code_ptr + label->size, executable_offset); + label = label->next; + } + + /* reset jumps */ + jump = compiler->jumps; + put_label = compiler->put_labels; + + /* emit the code */ + j = 0; + for (buf = compiler->buf; buf != NULL; buf = buf->next) { + sljit_uw len = buf->used_size / sizeof(sljit_ins); + sljit_ins *ibuf = (sljit_ins *)buf->memory; + for (i = 0; i < len; ++i, ++j) { + sljit_ins ins = ibuf[i]; + if (ins & sljit_ins_const) { + /* clear the const tag */ + ins &= ~sljit_ins_const; + + /* update instruction with relative address of constant */ + source = (sljit_uw)code_ptr; + offset = (sljit_uw)pool_ptr - source; + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; /* halfword (not byte) offset */ + SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; + + /* update address */ + const_->const_.addr = (sljit_uw)pool_ptr; + + /* store initial value into pool and update pool address */ + *(pool_ptr++) = const_->init_value; + + /* move to next constant */ + const_ = (struct sljit_s390x_const *)const_->const_.next; + } + if (jump && jump->addr == j) { + sljit_sw target = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { + jump->addr = (sljit_uw)pool_ptr; + + /* load address into tmp1 */ + source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + encode_inst(&code_ptr, + lgrl(tmp1, offset & 0xffffffff)); + + /* store jump target into pool and update pool address */ + *(pool_ptr++) = target; + + /* branch to tmp1 */ + sljit_ins op = (ins >> 32) & 0xf; + sljit_ins arg = (ins >> 36) & 0xf; + switch (op) { + case 4: /* brcl -> bcr */ + ins = bcr(arg, tmp1); + break; + case 5: /* brasl -> basr */ + ins = basr(arg, tmp1); + break; + default: + abort(); + } + } + else { + jump->addr = (sljit_uw)code_ptr + 2; + source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = target - source; + + /* offset must be halfword aligned */ + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */ + + /* patch jump target */ + ins |= (sljit_ins)offset & 0xffffffff; + } + jump = jump->next; + } + if (put_label && put_label->addr == j) { + source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; + + /* store target into pool */ + *pool_ptr = put_label->label->addr; + offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + pool_ptr++; + + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; + + put_label = put_label->next; + } + encode_inst(&code_ptr, ins); + } + } + SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr); + SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr); + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = ins_size; + code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + /* TODO(mundaym): implement all */ + switch (feature_type) { + case SLJIT_HAS_CLZ: + return have_eimm() ? 1 : 0; /* FLOGR instruction */ + case SLJIT_HAS_CMOV: + return have_lscond1() ? 1 : 0; + case SLJIT_HAS_FPU: + return 0; + } + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args = get_arg_count(arg_types); + sljit_sw frame_size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* saved registers go in callee allocated save area */ + compiler->local_size = (local_size + 0xf) & ~0xf; + frame_size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE; + + FAIL_IF(push_inst(compiler, stmg(r6, r15, r6 * sizeof(sljit_sw), r15))); /* save registers TODO(MGM): optimize */ + if (frame_size != 0) { + if (is_s16(-frame_size)) + FAIL_IF(push_inst(compiler, aghi(r15, -((sljit_s16)frame_size)))); + else if (is_s32(-frame_size)) + FAIL_IF(push_inst(compiler, agfi(r15, -((sljit_s32)frame_size)))); + else { + FAIL_IF(push_load_imm_inst(compiler, tmp1, -frame_size)); + FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15))); + } + } + + if (args >= 1) + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0), gpr(SLJIT_R0)))); + if (args >= 2) + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S1), gpr(SLJIT_R1)))); + if (args >= 3) + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S2), gpr(SLJIT_R2)))); + SLJIT_ASSERT(args < 4); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* TODO(mundaym): stack space for saved floating point registers */ + compiler->local_size = (local_size + 0xf) & ~0xf; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_sw size; + sljit_gpr end; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + (r6 * sizeof(sljit_sw)); + if (!is_s20(size)) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE)); + FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15))); + size = r6 * sizeof(sljit_sw); + end = r14; /* r15 has been restored already */ + } + else + end = r15; + + FAIL_IF(push_inst(compiler, lmg(r6, end, size, r15))); /* restore registers TODO(MGM): optimize */ + FAIL_IF(push_inst(compiler, br(r14))); /* return */ + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_gpr arg0 = gpr(SLJIT_R0); + sljit_gpr arg1 = gpr(SLJIT_R1); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op) | (op & SLJIT_I32_OP); + switch (op) { + case SLJIT_BREAKPOINT: + /* The following invalid instruction is emitted by gdb. */ + return push_inst(compiler, 0x0001 /* 2-byte trap */); + case SLJIT_NOP: + return push_inst(compiler, 0x0700 /* 2-byte nop */); + case SLJIT_LMUL_UW: + FAIL_IF(push_inst(compiler, mlgr(arg0, arg0))); + break; + case SLJIT_LMUL_SW: + /* signed multiplication from: */ + /* Hacker's Delight, Second Edition: Chapter 8-3. */ + FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0))); + FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0))); + FAIL_IF(push_inst(compiler, ngr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, ngr(tmp1, arg0))); + + /* unsigned multiplication */ + FAIL_IF(push_inst(compiler, mlgr(arg0, arg0))); + + FAIL_IF(push_inst(compiler, sgr(arg0, tmp0))); + FAIL_IF(push_inst(compiler, sgr(arg0, tmp1))); + break; + case SLJIT_DIV_U32: + case SLJIT_DIVMOD_U32: + FAIL_IF(push_inst(compiler, lhi(tmp0, 0))); + FAIL_IF(push_inst(compiler, lr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dlr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_U32) + return push_inst(compiler, lr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_DIV_S32: + case SLJIT_DIVMOD_S32: + FAIL_IF(push_inst(compiler, lhi(tmp0, 0))); + FAIL_IF(push_inst(compiler, lr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_S32) + return push_inst(compiler, lr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_DIV_UW: + case SLJIT_DIVMOD_UW: + FAIL_IF(push_inst(compiler, lghi(tmp0, 0))); + FAIL_IF(push_inst(compiler, lgr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_UW) + return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_DIV_SW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, lgr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_SW) + return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_ENDBR: + return SLJIT_SUCCESS; + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + default: + SLJIT_UNREACHABLE(); + } + /* swap result registers */ + FAIL_IF(push_inst(compiler, lgr(tmp0, arg0))); + FAIL_IF(push_inst(compiler, lgr(arg0, arg1))); + return push_inst(compiler, lgr(arg1, tmp0)); +} + +/* LEVAL will be defined later with different parameters as needed */ +#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + struct addr mem; + sljit_gpr dst_r; + sljit_gpr src_r; + sljit_s32 opcode = GET_OPCODE(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if ((dst == SLJIT_UNUSED) && !HAS_FLAGS(op)) { + /* TODO(carenas): implement prefetch? */ + return SLJIT_SUCCESS; + } + + if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) { + /* LOAD REGISTER */ + if (FAST_IS_REG(dst) && FAST_IS_REG(src)) { + dst_r = gpr(dst); + src_r = gpr(src); + switch (opcode | (op & SLJIT_I32_OP)) { + /* 32-bit */ + case SLJIT_MOV32_U8: + ins = llcr(dst_r, src_r); + break; + case SLJIT_MOV32_S8: + ins = lbr(dst_r, src_r); + break; + case SLJIT_MOV32_U16: + ins = llhr(dst_r, src_r); + break; + case SLJIT_MOV32_S16: + ins = lhr(dst_r, src_r); + break; + case SLJIT_MOV32: + ins = lr(dst_r, src_r); + break; + /* 64-bit */ + case SLJIT_MOV_U8: + ins = llgcr(dst_r, src_r); + break; + case SLJIT_MOV_S8: + ins = lgbr(dst_r, src_r); + break; + case SLJIT_MOV_U16: + ins = llghr(dst_r, src_r); + break; + case SLJIT_MOV_S16: + ins = lghr(dst_r, src_r); + break; + case SLJIT_MOV_U32: + ins = llgfr(dst_r, src_r); + break; + case SLJIT_MOV_S32: + ins = lgfr(dst_r, src_r); + break; + case SLJIT_MOV: + case SLJIT_MOV_P: + ins = lgr(dst_r, src_r); + break; + default: + ins = 0; + SLJIT_UNREACHABLE(); + } + FAIL_IF(push_inst(compiler, ins)); + return SLJIT_SUCCESS; + } + /* LOAD IMMEDIATE */ + if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) { + switch (opcode) { + case SLJIT_MOV_U8: + srcw = (sljit_sw)((sljit_u8)(srcw)); + break; + case SLJIT_MOV_S8: + srcw = (sljit_sw)((sljit_s8)(srcw)); + break; + case SLJIT_MOV_U16: + srcw = (sljit_sw)((sljit_u16)(srcw)); + break; + case SLJIT_MOV_S16: + srcw = (sljit_sw)((sljit_s16)(srcw)); + break; + case SLJIT_MOV_U32: + srcw = (sljit_sw)((sljit_u32)(srcw)); + break; + case SLJIT_MOV_S32: + srcw = (sljit_sw)((sljit_s32)(srcw)); + break; + } + return push_load_imm_inst(compiler, gpr(dst), srcw); + } + /* LOAD */ + /* TODO(carenas): avoid reg being defined later */ + #define LEVAL(i) EVAL(i, reg, mem) + if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) { + sljit_gpr reg = gpr(dst); + + FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1)); + /* TODO(carenas): convert all calls below to LEVAL */ + switch (opcode | (op & SLJIT_I32_OP)) { + case SLJIT_MOV32_U8: + ins = llc(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV32_S8: + ins = lb(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV32_U16: + ins = llh(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV32_S16: + ins = WHEN2(is_u12(mem.offset), lh, lhy); + break; + case SLJIT_MOV32: + ins = WHEN2(is_u12(mem.offset), l, ly); + break; + case SLJIT_MOV_U8: + ins = LEVAL(llgc); + break; + case SLJIT_MOV_S8: + ins = lgb(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV_U16: + ins = LEVAL(llgh); + break; + case SLJIT_MOV_S16: + ins = lgh(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV_U32: + ins = LEVAL(llgf); + break; + case SLJIT_MOV_S32: + ins = lgf(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV_P: + case SLJIT_MOV: + ins = lg(reg, mem.offset, mem.index, mem.base); + break; + default: + SLJIT_UNREACHABLE(); + } + FAIL_IF(push_inst(compiler, ins)); + return SLJIT_SUCCESS; + } + /* STORE and STORE IMMEDIATE */ + if ((dst & SLJIT_MEM) + && (FAST_IS_REG(src) || (src & SLJIT_IMM))) { + sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0; + if (src & SLJIT_IMM) { + /* TODO(mundaym): MOVE IMMEDIATE? */ + FAIL_IF(push_load_imm_inst(compiler, reg, srcw)); + } + struct addr mem; + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + switch (opcode) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + return push_inst(compiler, + WHEN2(is_u12(mem.offset), stc, stcy)); + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + return push_inst(compiler, + WHEN2(is_u12(mem.offset), sth, sthy)); + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + return push_inst(compiler, + WHEN2(is_u12(mem.offset), st, sty)); + case SLJIT_MOV_P: + case SLJIT_MOV: + FAIL_IF(push_inst(compiler, LEVAL(stg))); + return SLJIT_SUCCESS; + default: + SLJIT_UNREACHABLE(); + } + } + #undef LEVAL + /* MOVE CHARACTERS */ + if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) { + struct addr mem; + FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1)); + switch (opcode) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + FAIL_IF(push_inst(compiler, + EVAL(llgc, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + return push_inst(compiler, + EVAL(stcy, tmp0, mem)); + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + FAIL_IF(push_inst(compiler, + EVAL(llgh, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + return push_inst(compiler, + EVAL(sthy, tmp0, mem)); + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + FAIL_IF(push_inst(compiler, + EVAL(ly, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + return push_inst(compiler, + EVAL(sty, tmp0, mem)); + case SLJIT_MOV_P: + case SLJIT_MOV: + FAIL_IF(push_inst(compiler, + EVAL(lg, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + FAIL_IF(push_inst(compiler, + EVAL(stg, tmp0, mem))); + return SLJIT_SUCCESS; + default: + SLJIT_UNREACHABLE(); + } + } + SLJIT_UNREACHABLE(); + } + + SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */ + + dst_r = SLOW_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; + src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0; + if (src & SLJIT_MEM) + FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_I32_OP)); + + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); + + /* TODO(mundaym): optimize loads and stores */ + switch (opcode | (op & SLJIT_I32_OP)) { + case SLJIT_NOT: + /* emulate ~x with x^-1 */ + FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); + if (src_r != dst_r) + FAIL_IF(push_inst(compiler, lgr(dst_r, src_r))); + + FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1))); + break; + case SLJIT_NOT32: + /* emulate ~x with x^-1 */ + if (have_eimm()) + FAIL_IF(push_inst(compiler, xilf(dst_r, -1))); + else { + FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); + if (src_r != dst_r) + FAIL_IF(push_inst(compiler, lr(dst_r, src_r))); + + FAIL_IF(push_inst(compiler, xr(dst_r, tmp1))); + } + break; + case SLJIT_NEG: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; + FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r))); + break; + case SLJIT_NEG32: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; + FAIL_IF(push_inst(compiler, lcr(dst_r, src_r))); + break; + case SLJIT_CLZ: + if (have_eimm()) { + FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */ + if (dst_r != tmp0) + FAIL_IF(push_inst(compiler, lgr(dst_r, tmp0))); + } else { + abort(); /* TODO(mundaym): no eimm (?) */ + } + break; + case SLJIT_CLZ32: + if (have_eimm()) { + FAIL_IF(push_inst(compiler, sllg(tmp1, src_r, 32, 0))); + FAIL_IF(push_inst(compiler, iilf(tmp1, 0xffffffff))); + FAIL_IF(push_inst(compiler, flogr(tmp0, tmp1))); /* clobbers tmp1 */ + if (dst_r != tmp0) + FAIL_IF(push_inst(compiler, lr(dst_r, tmp0))); + } else { + abort(); /* TODO(mundaym): no eimm (?) */ + } + break; + default: + SLJIT_UNREACHABLE(); + } + + if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW)) + FAIL_IF(update_zero_overflow(compiler, op, dst_r)); + + /* TODO(carenas): doesn't need FAIL_IF */ + if ((dst != SLJIT_UNUSED) && (dst & SLJIT_MEM)) + FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int is_commutative(sljit_s32 op) +{ + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_MUL: + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return 1; + } + return 0; +} + +static SLJIT_INLINE int is_shift(sljit_s32 op) { + sljit_s32 v = GET_OPCODE(op); + return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0; +} + +static SLJIT_INLINE int sets_signed_flag(sljit_s32 op) +{ + switch (GET_FLAG_TYPE(op)) { + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + case SLJIT_SIG_LESS: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_SIG_GREATER: + case SLJIT_SIG_GREATER_EQUAL: + return 1; + } + return 0; +} + +static const struct ins_forms add_forms = { + 0x1a00, /* ar */ + 0xb9080000, /* agr */ + 0xb9f80000, /* ark */ + 0xb9e80000, /* agrk */ + 0x5a000000, /* a */ + 0xe3000000005a, /* ay */ + 0xe30000000008, /* ag */ +}; + +static const struct ins_forms logical_add_forms = { + 0x1e00, /* alr */ + 0xb90a0000, /* algr */ + 0xb9fa0000, /* alrk */ + 0xb9ea0000, /* algrk */ + 0x5e000000, /* al */ + 0xe3000000005e, /* aly */ + 0xe3000000000a, /* alg */ +}; + +static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW; + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; + + if (src2 & SLJIT_IMM) { + if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_overflow) + ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, src2w); + } + + if (is_s16(src2w)) { + if (sets_overflow) + ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w)); + goto done; + } + + if (!sets_overflow) { + if ((op & SLJIT_I32_OP) || is_u32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(-src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A)); + goto done; + } + } + else if ((op & SLJIT_I32_OP) || is_s32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + } + + forms = sets_overflow ? &add_forms : &logical_add_forms; + FAIL_IF(emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + +done: + if (sets_zero_overflow) + FAIL_IF(update_zero_overflow(compiler, op, SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms sub_forms = { + 0x1b00, /* sr */ + 0xb9090000, /* sgr */ + 0xb9f90000, /* srk */ + 0xb9e90000, /* sgrk */ + 0x5b000000, /* s */ + 0xe3000000005b, /* sy */ + 0xe30000000009, /* sg */ +}; + +static const struct ins_forms logical_sub_forms = { + 0x1f00, /* slr */ + 0xb90b0000, /* slgr */ + 0xb9fb0000, /* slrk */ + 0xb9eb0000, /* slgrk */ + 0x5f000000, /* sl */ + 0xe3000000005f, /* sly */ + 0xe3000000000b, /* slg */ +}; + +static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + int sets_signed = sets_signed_flag(op); + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; + + if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + int compare_signed = GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS; + + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; + + if (src2 & SLJIT_IMM) { + if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) + { + if ((op & SLJIT_I32_OP) || is_s32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); + } + } + else { + if ((op & SLJIT_I32_OP) || is_u32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); + } + if (is_s16(src2w)) + return emit_rie_d(compiler, 0xec00000000db /* alghsik */, SLJIT_UNUSED, src1, src1w, src2w); + } + } + else if (src2 & SLJIT_MEM) { + if ((op & SLJIT_I32_OP) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { + ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A); + } + + if (compare_signed) + ins = (op & SLJIT_I32_OP) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; + else + ins = (op & SLJIT_I32_OP) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A); + } + + if (compare_signed) + ins = (op & SLJIT_I32_OP) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; + else + ins = (op & SLJIT_I32_OP) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; + return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); + } + + if (src2 & SLJIT_IMM) { + sljit_sw neg_src2w = -src2w; + + if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) { + if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_signed) + ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, neg_src2w); + } + + if (is_s16(neg_src2w)) { + if (sets_signed) + ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w)); + goto done; + } + } + + if (!sets_signed) { + if ((op & SLJIT_I32_OP) || is_u32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(neg_src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; + } + } + else if ((op & SLJIT_I32_OP) || is_s32(neg_src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; + } + } + + forms = sets_signed ? &sub_forms : &logical_sub_forms; + FAIL_IF(emit_non_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + +done: + if (sets_signed) { + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + + if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) { + /* In case of overflow, the sign bit of the two source operands must be different, and + - the first operand is greater if the sign bit of the result is set + - the first operand is less if the sign bit of the result is not set + The -result operation sets the corrent sign, because the result cannot be zero. + The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ + FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2))); + FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(update_zero_overflow(compiler, op, dst_r)); + } + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms multiply_forms = { + 0xb2520000, /* msr */ + 0xb90c0000, /* msgr */ + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0x71000000, /* ms */ + 0xe30000000051, /* msy */ + 0xe3000000000c, /* msg */ +}; + +static const struct ins_forms multiply_overflow_forms = { + 0, + 0, + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0, + 0xe30000000053, /* msc */ + 0xe30000000083, /* msgc */ +}; + +static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins; + + if (HAS_FLAGS(op)) { + /* if have_misc2 fails, this operation should be emulated. 32 bit emulation: + FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r))); + FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r))); + if (dst_r != tmp0) { + FAIL_IF(push_inst(compiler, lr(dst_r, tmp0))); + } + FAIL_IF(push_inst(compiler, aih(tmp0, 1))); + FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); + FAIL_IF(push_inst(compiler, ipm(flag_r))); + FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000))); */ + + return emit_commutative(compiler, &multiply_overflow_forms, dst, dstw, src1, src1w, src2, src2w); + } + + if (src2 & SLJIT_IMM) { + if (is_s16(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A); + } + + if (is_s32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A); + } + } + + return emit_commutative(compiler, &multiply_forms, dst, dstw, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_uw imm, sljit_s32 count16) +{ + sljit_s32 mode = compiler->mode; + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == SLJIT_AND) { + if (!(mode & SLJIT_I32_OP)) + FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | (dst_r << 36) | (imm >> 32))); + return push_inst(compiler, 0xc00b00000000 /* nilf */ | (dst_r << 36) | (imm & 0xffffffff)); + } + else if (type == SLJIT_OR) { + if (count16 >= 3) { + FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32))); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); + } + + if (count16 >= 2) { + if ((imm & 0x00000000ffffffffull) == 0) + return push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32)); + if ((imm & 0xffffffff00000000ull) == 0) + return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); + } + + if ((imm & 0xffff000000000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | (dst_r << 20) | (imm >> 48))); + if ((imm & 0x0000ffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | (dst_r << 20) | ((imm >> 32) & 0xffff))); + if ((imm & 0x00000000ffff0000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | (dst_r << 20) | ((imm >> 16) & 0xffff))); + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa50b0000 /* oill */ | (dst_r << 20) | (imm & 0xffff)); + return SLJIT_SUCCESS; + } + + if ((imm & 0xffffffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | (dst_r << 36) | (imm >> 32))); + if ((imm & 0x00000000ffffffffull) != 0 || imm == 0) + return push_inst(compiler, 0xc00700000000 /* xilf */ | (dst_r << 36) | (imm & 0xffffffff)); + return SLJIT_SUCCESS; +} + +static const struct ins_forms bitwise_and_forms = { + 0x1400, /* nr */ + 0xb9800000, /* ngr */ + 0xb9f40000, /* nrk */ + 0xb9e40000, /* ngrk */ + 0x54000000, /* n */ + 0xe30000000054, /* ny */ + 0xe30000000080, /* ng */ +}; + +static const struct ins_forms bitwise_or_forms = { + 0x1600, /* or */ + 0xb9810000, /* ogr */ + 0xb9f60000, /* ork */ + 0xb9e60000, /* ogrk */ + 0x56000000, /* o */ + 0xe30000000056, /* oy */ + 0xe30000000081, /* og */ +}; + +static const struct ins_forms bitwise_xor_forms = { + 0x1700, /* xr */ + 0xb9820000, /* xgr */ + 0xb9f70000, /* xrk */ + 0xb9e70000, /* xgrk */ + 0x57000000, /* x */ + 0xe30000000057, /* xy */ + 0xe30000000082, /* xg */ +}; + +static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + const struct ins_forms *forms; + + if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == SLJIT_UNUSED))) { + sljit_s32 count16 = 0; + sljit_uw imm = (sljit_uw)src2w; + + if (op & SLJIT_I32_OP) + imm &= 0xffffffffull; + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + count16++; + if ((imm & 0x00000000ffff0000ull) != 0) + count16++; + if ((imm & 0x0000ffff00000000ull) != 0) + count16++; + if ((imm & 0xffff000000000000ull) != 0) + count16++; + + if (type == SLJIT_AND && dst == SLJIT_UNUSED && count16 == 1) { + sljit_gpr src_r = tmp0; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa7010000 | (src_r << 20) | imm); + if ((imm & 0x00000000ffff0000ull) != 0) + return push_inst(compiler, 0xa7000000 | (src_r << 20) | (imm >> 16)); + if ((imm & 0x0000ffff00000000ull) != 0) + return push_inst(compiler, 0xa7030000 | (src_r << 20) | (imm >> 32)); + return push_inst(compiler, 0xa7020000 | (src_r << 20) | (imm >> 48)); + } + + if (!(op & SLJIT_SET_Z)) + return sljit_emit_bitwise_imm(compiler, type, dst, dstw, src1, src1w, imm, count16); + } + + if (type == SLJIT_AND) + forms = &bitwise_and_forms; + else if (type == SLJIT_OR) + forms = &bitwise_or_forms; + else + forms = &bitwise_xor_forms; + + return emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + sljit_gpr base_r = tmp0; + sljit_ins imm = 0; + sljit_ins ins; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (src2 & SLJIT_IMM) + imm = src2w & ((op & SLJIT_I32_OP) ? 0x1f : 0x3f); + else if (FAST_IS_REG(src2)) + base_r = gpr(src2 & REG_MASK); + else { + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + base_r = tmp1; + } + + if ((op & SLJIT_I32_OP) && dst_r == src_r) { + if (type == SLJIT_SHL) + ins = 0x89000000 /* sll */; + else if (type == SLJIT_LSHR) + ins = 0x88000000 /* srl */; + else + ins = 0x8a000000 /* sra */; + + FAIL_IF(push_inst(compiler, ins | (dst_r << 20) | (base_r << 12) | imm)); + } + else { + if (type == SLJIT_SHL) + ins = (op & SLJIT_I32_OP) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; + else if (type == SLJIT_LSHR) + ins = (op & SLJIT_I32_OP) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; + else + ins = (op & SLJIT_I32_OP) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; + + FAIL_IF(push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (base_r << 28) | (imm << 16))); + } + + if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR) + return push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms addc_forms = { + 0xb9980000, /* alcr */ + 0xb9880000, /* alcgr */ + 0, + 0, + 0, + 0xe30000000098, /* alc */ + 0xe30000000088, /* alcg */ +}; + +static const struct ins_forms subc_forms = { + 0xb9990000, /* slbr */ + 0xb9890000, /* slbgr */ + 0, + 0, + 0, + 0xe30000000099, /* slb */ + 0xe30000000089, /* slbg */ +}; + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + + compiler->mode = op & SLJIT_I32_OP; + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); + + if (GET_OPCODE(op) >= SLJIT_ADD || GET_OPCODE(op) <= SLJIT_SUBC) + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; + + if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { + src1 ^= src2; + src2 ^= src1; + src1 ^= src2; + + src1w ^= src2w; + src2w ^= src1w; + src1w ^= src2w; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + FAIL_IF(emit_commutative(compiler, &addc_forms, dst, dstw, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return SLJIT_SUCCESS; + case SLJIT_SUB: + return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, dstw, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return SLJIT_SUCCESS; + case SLJIT_MUL: + FAIL_IF(sljit_emit_multiply(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + break; + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + FAIL_IF(sljit_emit_bitwise(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + break; + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + FAIL_IF(sljit_emit_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + break; + } + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( + struct sljit_compiler *compiler, + sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_gpr src_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + src_r = FAST_IS_REG(src) ? gpr(src) : tmp1; + if (src & SLJIT_MEM) + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0)); + + return push_inst(compiler, br(src_r)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + /* TODO(carenas): implement? */ + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + /* TODO(carenas): implement */ + return SLJIT_SUCCESS; + default: + /* TODO(carenas): probably should not success by default */ + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return gpr(reg); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + abort(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size); + return push_inst(compiler, ins); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + abort(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + abort(); +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, lgr(gpr(dst), fast_link_r)); + + /* memory */ + return store_word(compiler, fast_link_r, dst, dstw, 0); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + /* record jump */ + struct sljit_jump *jump = (struct sljit_jump *) + ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + jump->addr = compiler->size; + + /* emit jump instruction */ + type &= 0xff; + if (type >= SLJIT_FAST_CALL) + PTR_FAIL_IF(push_inst(compiler, brasl(type == SLJIT_FAST_CALL ? fast_link_r : link_r, 0))); + else + PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0))); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (src & SLJIT_IMM) { + SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */ + FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); + } + else if (src & SLJIT_MEM) + FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */)); + + /* emit jump instruction */ + if (type >= SLJIT_FAST_CALL) + return push_inst(compiler, basr(type == SLJIT_FAST_CALL ? fast_link_r : link_r, src_r)); + + return push_inst(compiler, br(src_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_u8 mask = get_cc(compiler, type & 0xff); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr loc_r = tmp1; + switch (GET_OPCODE(op)) { + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + compiler->status_flags_state = op & SLJIT_SET_Z; + + /* dst is also source operand */ + if (dst & SLJIT_MEM) + FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); + + break; + case SLJIT_MOV: + case (SLJIT_MOV32 & ~SLJIT_I32_OP): + /* can write straight into destination */ + loc_r = dst_r; + break; + default: + SLJIT_UNREACHABLE(); + } + + /* TODO(mundaym): fold into cmov helper function? */ + #define LEVAL(i) i(loc_r, 1, mask) + if (have_lscond2()) { + FAIL_IF(push_load_imm_inst(compiler, loc_r, 0)); + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_I32_OP, lochi, locghi))); + } else { + /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */ + abort(); + } + #undef LEVAL + + /* apply bitwise op and set condition codes */ + switch (GET_OPCODE(op)) { + #define LEVAL(i) i(dst_r, loc_r) + case SLJIT_AND: + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_I32_OP, nr, ngr))); + break; + case SLJIT_OR: + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_I32_OP, or, ogr))); + break; + case SLJIT_XOR: + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_I32_OP, xr, xgr))); + break; + #undef LEVAL + } + + /* store result to memory if required */ + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 mask = get_cc(compiler, type & 0xff); + sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_I32_OP); + sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + if (src & SLJIT_IMM) { + /* TODO(mundaym): fast path with lscond2 */ + FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); + } + + #define LEVAL(i) i(dst_r, src_r, mask) + if (have_lscond1()) + return push_inst(compiler, + WHEN2(dst_reg & SLJIT_I32_OP, locr, locgr)); + + #undef LEVAL + + /* TODO(mundaym): implement */ + return SLJIT_ERR_UNSUPPORTED; +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +/* On s390x we build a literal pool to hold constants. This has two main + advantages: + + 1. we only need one instruction in the instruction stream (LGRL) + 2. we can store 64 bit addresses and use 32 bit offsets + + To retrofit the extra information needed to build the literal pool we + add a new sljit_s390x_const struct that contains the initial value but + can still be cast to a sljit_const. */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_s390x_const *const_; + sljit_gpr dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + + const_ = (struct sljit_s390x_const*)ensure_abuf(compiler, + sizeof(struct sljit_s390x_const)); + PTR_FAIL_IF(!const_); + set_const((struct sljit_const*)const_, compiler); + const_->init_value = init_value; + + dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + if (have_genext()) + PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0))); + else { + PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0))); + PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1))); + } + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */)); + + return (struct sljit_const*)const_; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + /* Update the constant pool. */ + sljit_uw *ptr = (sljit_uw *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0); + *ptr = new_target; + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); + SLJIT_CACHE_FLUSH(ptr, ptr + 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, new_constant, executable_offset); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( + struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_gpr dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + + if (have_genext()) + PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0))); + else { + PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0))); + PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1))); + } + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0)); + + return put_label; +} + +/* TODO(carenas): EVAL probably should move up or be refactored */ +#undef WHEN2 +#undef EVAL + +#undef tmp1 +#undef tmp0 + +/* TODO(carenas): undef other macros that spill like is_u12? */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeSPARC_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeSPARC_32.c new file mode 100644 index 0000000000..28886405af --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeSPARC_32.c @@ -0,0 +1,286 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw imm) +{ + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst)); + + FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst))); + return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS; +} + +#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_U8) + return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst)); + FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); + return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); + } + else if (dst != src2) + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); + return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); + } + else if (dst != src2) + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS)); + FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst))); + + /* Loop. */ + FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); + FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS)); + return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); + + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_ADDC: + return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_SUBC: + return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_MUL: + compiler->status_flags_state = 0; + FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + if (!(flags & SET_FLAGS)) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK))); + return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); + + case SLJIT_AND: + return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_OR: + return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_XOR: + return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_SHL: + FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); + + case SLJIT_LSHR: + FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); + + case SLJIT_ASHR: + FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 reg_index = 8; + sljit_s32 word_reg_index = 8; + sljit_s32 float_arg_index = 1; + sljit_s32 double_arg_count = 0; + sljit_s32 float_offset = (16 + 6) * sizeof(sljit_sw); + sljit_s32 types = 0; + sljit_s32 reg = 0; + sljit_s32 move_to_tmp2 = 0; + + if (src) + reg = reg_map[*src & REG_MASK]; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + float_arg_index++; + if (reg_index == reg) + move_to_tmp2 = 1; + reg_index++; + break; + case SLJIT_ARG_TYPE_F64: + float_arg_index++; + double_arg_count++; + if (reg_index == reg || reg_index + 1 == reg) + move_to_tmp2 = 1; + reg_index += 2; + break; + default: + if (reg_index != word_reg_index && reg_index < 14 && reg_index == reg) + move_to_tmp2 = 1; + reg_index++; + word_reg_index++; + break; + } + + if (move_to_tmp2) { + move_to_tmp2 = 0; + if (reg < 14) + FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1))); + *src = TMP_REG1; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + arg_types = types; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + float_arg_index--; + FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + float_offset -= sizeof(sljit_f64); + break; + case SLJIT_ARG_TYPE_F64: + float_arg_index--; + if (float_arg_index == 4 && double_arg_count == 4) { + FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM((16 + 7) * sizeof(sljit_sw)), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | (1 << 25) | S1(SLJIT_SP) | IMM((16 + 8) * sizeof(sljit_sw)), MOVABLE_INS)); + } + else + FAIL_IF(push_inst(compiler, STDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + float_offset -= sizeof(sljit_f64); + break; + default: + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + float_offset = (16 + 6) * sizeof(sljit_sw); + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + reg_index--; + if (reg_index < 14) + FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); + float_offset -= sizeof(sljit_f64); + break; + case SLJIT_ARG_TYPE_F64: + reg_index -= 2; + if (reg_index < 14) { + if ((reg_index & 0x1) != 0) { + FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); + if (reg_index < 13) + FAIL_IF(push_inst(compiler, LDUW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), reg_index + 1)); + } + else + FAIL_IF(push_inst(compiler, LDD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); + } + float_offset -= sizeof(sljit_f64); + break; + default: + reg_index--; + word_reg_index--; + + if (reg_index != word_reg_index) { + if (reg_index < 14) + FAIL_IF(push_inst(compiler, OR | DA(reg_index) | S1(0) | S2A(word_reg_index), reg_index)); + else + FAIL_IF(push_inst(compiler, STW | DA(word_reg_index) | S1(SLJIT_SP) | IMM(92), word_reg_index)); + } + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst))); + return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + SLJIT_ASSERT(((inst[0] & 0xc1c00000) == 0x01000000) && ((inst[1] & 0xc1f82000) == 0x80102000)); + inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff); + inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, new_constant, executable_offset); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeSPARC_common.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeSPARC_common.c new file mode 100644 index 0000000000..e833f09d7a --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeSPARC_common.c @@ -0,0 +1,1558 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "SPARC" SLJIT_CPUINFO; +} + +/* Length of an instruction word + Both for sparc-32 and sparc-64 */ +typedef sljit_u32 sljit_ins; + +#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) + +static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) +{ +#if defined(__SUNPRO_C) && __SUNPRO_C < 0x590 + __asm ( + /* if (from == to) return */ + "cmp %i0, %i1\n" + "be .leave\n" + "nop\n" + + /* loop until from >= to */ + ".mainloop:\n" + "flush %i0\n" + "add %i0, 8, %i0\n" + "cmp %i0, %i1\n" + "bcs .mainloop\n" + "nop\n" + + /* The comparison was done above. */ + "bne .leave\n" + /* nop is not necessary here, since the + sub operation has no side effect. */ + "sub %i0, 4, %i0\n" + "flush %i0\n" + ".leave:" + ); +#else + if (SLJIT_UNLIKELY(from == to)) + return; + + do { + __asm__ volatile ( + "flush %0\n" + : : "r"(from) + ); + /* Operates at least on doubleword. */ + from += 2; + } while (from < to); + + if (from == to) { + /* Flush the last word. */ + from --; + __asm__ volatile ( + "flush %0\n" + : : "r"(from) + ); + } +#endif +} + +#endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */ + +/* TMP_REG2 is not used by getput_arg */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +/* This register is modified by calls, which affects the instruction + in the delay slot if it is used as a source register. */ +#define TMP_LINK (SLJIT_NUMBER_OF_REGISTERS + 5) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 8, 9, 10, 11, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 12, 13, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 2, 4, 6, 8, 10, 12, 14 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define D(d) (reg_map[d] << 25) +#define FD(d) (freg_map[d] << 25) +#define FDN(d) ((freg_map[d] | 0x1) << 25) +#define DA(d) ((d) << 25) +#define S1(s1) (reg_map[s1] << 14) +#define FS1(s1) (freg_map[s1] << 14) +#define S1A(s1) ((s1) << 14) +#define S2(s2) (reg_map[s2]) +#define FS2(s2) (freg_map[s2]) +#define FS2N(s2) (freg_map[s2] | 0x1) +#define S2A(s2) (s2) +#define IMM_ARG 0x2000 +#define DOP(op) ((op) << 5) +#define IMM(imm) (((imm) & 0x1fff) | IMM_ARG) + +#define DR(dr) (reg_map[dr]) +#define OPC1(opcode) ((opcode) << 30) +#define OPC2(opcode) ((opcode) << 22) +#define OPC3(opcode) ((opcode) << 19) +#define SET_FLAGS OPC3(0x10) + +#define ADD (OPC1(0x2) | OPC3(0x00)) +#define ADDC (OPC1(0x2) | OPC3(0x08)) +#define AND (OPC1(0x2) | OPC3(0x01)) +#define ANDN (OPC1(0x2) | OPC3(0x05)) +#define CALL (OPC1(0x1)) +#define FABSS (OPC1(0x2) | OPC3(0x34) | DOP(0x09)) +#define FADDD (OPC1(0x2) | OPC3(0x34) | DOP(0x42)) +#define FADDS (OPC1(0x2) | OPC3(0x34) | DOP(0x41)) +#define FCMPD (OPC1(0x2) | OPC3(0x35) | DOP(0x52)) +#define FCMPS (OPC1(0x2) | OPC3(0x35) | DOP(0x51)) +#define FDIVD (OPC1(0x2) | OPC3(0x34) | DOP(0x4e)) +#define FDIVS (OPC1(0x2) | OPC3(0x34) | DOP(0x4d)) +#define FDTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd2)) +#define FDTOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc6)) +#define FITOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc8)) +#define FITOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc4)) +#define FMOVS (OPC1(0x2) | OPC3(0x34) | DOP(0x01)) +#define FMULD (OPC1(0x2) | OPC3(0x34) | DOP(0x4a)) +#define FMULS (OPC1(0x2) | OPC3(0x34) | DOP(0x49)) +#define FNEGS (OPC1(0x2) | OPC3(0x34) | DOP(0x05)) +#define FSTOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc9)) +#define FSTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd1)) +#define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46)) +#define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) +#define JMPL (OPC1(0x2) | OPC3(0x38)) +#define LDD (OPC1(0x3) | OPC3(0x03)) +#define LDUW (OPC1(0x3) | OPC3(0x00)) +#define NOP (OPC1(0x0) | OPC2(0x04)) +#define OR (OPC1(0x2) | OPC3(0x02)) +#define ORN (OPC1(0x2) | OPC3(0x06)) +#define RDY (OPC1(0x2) | OPC3(0x28) | S1A(0)) +#define RESTORE (OPC1(0x2) | OPC3(0x3d)) +#define SAVE (OPC1(0x2) | OPC3(0x3c)) +#define SETHI (OPC1(0x0) | OPC2(0x04)) +#define SLL (OPC1(0x2) | OPC3(0x25)) +#define SLLX (OPC1(0x2) | OPC3(0x25) | (1 << 12)) +#define SRA (OPC1(0x2) | OPC3(0x27)) +#define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12)) +#define SRL (OPC1(0x2) | OPC3(0x26)) +#define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12)) +#define STDF (OPC1(0x3) | OPC3(0x27)) +#define STF (OPC1(0x3) | OPC3(0x24)) +#define STW (OPC1(0x3) | OPC3(0x04)) +#define SUB (OPC1(0x2) | OPC3(0x04)) +#define SUBC (OPC1(0x2) | OPC3(0x0c)) +#define TA (OPC1(0x2) | OPC3(0x3a) | (8 << 25)) +#define WRY (OPC1(0x2) | OPC3(0x30) | DA(0)) +#define XOR (OPC1(0x2) | OPC3(0x03)) +#define XNOR (OPC1(0x2) | OPC3(0x07)) + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define MAX_DISP (0x1fffff) +#define MIN_DISP (-0x200000) +#define DISP_MASK (0x3fffff) + +#define BICC (OPC1(0x0) | OPC2(0x2)) +#define FBFCC (OPC1(0x0) | OPC2(0x6)) +#define SLL_W SLL +#define SDIV (OPC1(0x2) | OPC3(0x0f)) +#define SMUL (OPC1(0x2) | OPC3(0x0b)) +#define UDIV (OPC1(0x2) | OPC3(0x0e)) +#define UMUL (OPC1(0x2) | OPC3(0x0a)) +#else +#define SLL_W SLLX +#endif + +#define SIMM_MAX (0x0fff) +#define SIMM_MIN (-0x1000) + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot) +{ + sljit_ins *ptr; + SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS + || (delay_slot & DST_INS_MASK) == MOVABLE_INS + || (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f)); + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + compiler->delay_slot = delay_slot; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + sljit_ins saved_inst; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return code_ptr; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + inst = (sljit_ins*)jump->addr; + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + if (jump->flags & IS_CALL) { + /* Call is always patchable on sparc 32. */ + jump->flags |= PATCH_CALL; + if (jump->flags & IS_MOVABLE) { + inst[0] = inst[-1]; + inst[-1] = CALL; + jump->addr -= sizeof(sljit_ins); + return inst; + } + inst[0] = CALL; + inst[1] = NOP; + return inst + 1; + } +#else + /* Both calls and BPr instructions shall not pass this point. */ +#error "Implementation required" +#endif + + if (jump->flags & IS_COND) + inst--; + + diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1) - executable_offset) >> 2; + + if (jump->flags & IS_MOVABLE) { + if (diff <= MAX_DISP && diff >= MIN_DISP) { + jump->flags |= PATCH_B; + inst--; + if (jump->flags & IS_COND) { + saved_inst = inst[0]; + inst[0] = inst[1] ^ (1 << 28); + inst[1] = saved_inst; + } else { + inst[1] = inst[0]; + inst[0] = BICC | DA(0x8); + } + jump->addr = (sljit_uw)inst; + return inst + 1; + } + } + + diff += sizeof(sljit_ins); + + if (diff <= MAX_DISP && diff >= MIN_DISP) { + jump->flags |= PATCH_B; + if (jump->flags & IS_COND) + inst[0] ^= (1 << 28); + else + inst[0] = BICC | DA(0x8); + inst[1] = NOP; + jump->addr = (sljit_uw)inst; + return inst + 1; + } + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 6); +#endif + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + /* Just recording the address. */ + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins *)jump->addr; + + if (jump->flags & PATCH_CALL) { + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000); + buf_ptr[0] = CALL | (addr & 0x3fffffff); + break; + } + if (jump->flags & PATCH_B) { + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP); + buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK); + break; + } + + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000)); + buf_ptr[0] |= (addr >> 10) & 0x3fffff; + buf_ptr[1] |= addr & 0x3ff; +#else +#error "Implementation required" +#endif + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { + addr = put_label->label->addr; + buf_ptr = (sljit_ins *)put_label->addr; + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000)); + buf_ptr[0] |= (addr >> 10) & 0x3fffff; + buf_ptr[1] |= addr & 0x3ff; +#else +#error "Implementation required" +#endif + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_ZERO_REGISTER: + return 1; + +#if (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64) + case SLJIT_HAS_CMOV: + return 1; +#endif + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define ARG_TEST 0x00020 +#define ALT_KEEP_CACHE 0x00040 +#define CUMULATIVE_OP 0x00080 +#define IMM_OP 0x00100 +#define SRC2_IMM 0x00200 + +#define REG_DEST 0x00400 +#define REG2_SOURCE 0x00800 +#define SLOW_SRC1 0x01000 +#define SLOW_SRC2 0x02000 +#define SLOW_DEST 0x04000 + +/* SET_FLAGS (0x10 << 19) also belong here! */ + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#include "sljitNativeSPARC_32.c" +#else +#include "sljitNativeSPARC_64.c" +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; + compiler->local_size = local_size; + + if (local_size <= SIMM_MAX) { + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size)); + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS)); + } + + /* Arguments are in their appropriate registers. */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + if (op != SLJIT_MOV || !FAST_IS_REG(src)) { + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + src = SLJIT_R0; + } + + FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); + return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static const sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), +/* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), +/* u b s */ OPC1(3) | OPC3(0x05) /* stb */, +/* u b l */ OPC1(3) | OPC3(0x01) /* ldub */, +/* u h s */ OPC1(3) | OPC3(0x06) /* sth */, +/* u h l */ OPC1(3) | OPC3(0x02) /* lduh */, +/* u i s */ OPC1(3) | OPC3(0x04) /* stw */, +/* u i l */ OPC1(3) | OPC3(0x00) /* lduw */, + +/* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), +/* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), +/* s b s */ OPC1(3) | OPC3(0x05) /* stb */, +/* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */, +/* s h s */ OPC1(3) | OPC3(0x06) /* sth */, +/* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */, +/* s i s */ OPC1(3) | OPC3(0x04) /* stw */, +/* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */), + +/* d s */ OPC1(3) | OPC3(0x27), +/* d l */ OPC1(3) | OPC3(0x23), +/* s s */ OPC1(3) | OPC3(0x24), +/* s l */ OPC1(3) | OPC3(0x20), +}; + +#undef ARCH_32_64 + +/* Can perform an operation using at most 1 instruction. */ +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) + || ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) { + /* Works for both absoulte and relative addresses (immediate case). */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] + | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)) + | S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)), + ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS)); + return -1; + } + return 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + SLJIT_ASSERT(argw); + next_argw &= 0x3; + if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == next_argw) + return 1; + return 0; + } + + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) + return 1; + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + sljit_s32 base, arg2, delay_slot; + sljit_ins dest; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + base = arg & REG_MASK; + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + /* Using the cache. */ + if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw)) + arg2 = TMP_REG3; + else { + if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + arg2 = TMP_REG3; + } + else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && reg != OFFS_REG(arg)) + arg2 = reg; + else /* It must be a mov operation, so tmp1 must be free to use. */ + arg2 = TMP_REG1; + FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | argw, DR(arg2))); + } + } + else { + /* Using the cache. */ + if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + compiler->cache_argw = argw; + } + arg2 = TMP_REG3; + } else { + if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) { + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + arg2 = TMP_REG3; + } + else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base) + arg2 = reg; + else /* It must be a mov operation, so tmp1 must be free to use. */ + arg2 = TMP_REG1; + FAIL_IF(load_immediate(compiler, arg2, argw)); + } + } + + dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)); + delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS; + if (!base) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot); +} + +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg, arg, argw, 0, 0); +} + +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_sw src2_r = 0; + sljit_s32 sugg_src2_r = TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (dst != SLJIT_UNUSED) { + if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) + sugg_src2_r = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + } + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w) { + if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src2w; + } + } + if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { + if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) + src1_r = src1; + else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + else + src1_r = 0; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } + else { + src2_r = 0; + if ((op >= SLJIT_MOV && op <= SLJIT_MOV_P) && (dst & SLJIT_MEM)) + dst_r = 0; + } + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, TA, UNMOVABLE_INS); + case SLJIT_NOP: + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1)); +#else +#error "Implementation required" +#endif + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + if ((op | 0x2) == SLJIT_DIV_UW) + FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); + else { + FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); + } + if (op <= SLJIT_DIVMOD_SW) + FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + if (op >= SLJIT_DIV_UW) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1))); + return push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1)); +#else +#error "Implementation required" +#endif + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_S32: + return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_U8: + return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_NOT: + case SLJIT_CLZ: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_MUL: + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + if (src2 & SLJIT_IMM) + src2w &= 0x1f; +#else + SLJIT_UNREACHABLE(); +#endif + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK))); + else + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw)); + + FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS)); + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) +#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) +#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw)) + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | FD(TMP_FREG1) | FS2(src), MOVABLE_INS)); + + if (FAST_IS_REG(dst)) { + FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET); + } + + /* Store the integer value from a VFP register. */ + return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + srcw = 0; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + src = SLJIT_MEM1(SLJIT_SP); + srcw = FLOAT_TMP_MEM_OFFSET; + } + + FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | FD(dst_r) | FS2(TMP_FREG1), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | FS1(src1) | FS2(src2), FCC_IS_SET | MOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) { + FAIL_IF(push_inst(compiler, FMOVS | FD(dst_r) | FS2(src), MOVABLE_INS)); + if (!(op & SLJIT_F32_OP)) + FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); + } + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, FNEGS | FD(dst_r) | FS2(src), MOVABLE_INS)); + if (dst_r != src && !(op & SLJIT_F32_OP)) + FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, FABSS | FD(dst_r) | FS2(src), MOVABLE_INS)); + if (dst_r != src && !(op & SLJIT_F32_OP)) + FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | FD(dst_r) | FS2(src), MOVABLE_INS)); + op ^= SLJIT_F32_OP; + break; + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); + break; + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +#undef FLOAT_DATA +#undef SELECT_FOP + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), UNMOVABLE_INS); + + /* Memory. */ + FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw)); + compiler->delay_slot = UNMOVABLE_INS; + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + compiler->delay_slot = UNMOVABLE_INS; + return label; +} + +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL_F64: /* Unordered. */ + return DA(0x1); + + case SLJIT_NOT_EQUAL: + case SLJIT_EQUAL_F64: + return DA(0x9); + + case SLJIT_LESS: + case SLJIT_GREATER_F64: /* Unordered. */ + return DA(0x5); + + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL_F64: + return DA(0xd); + + case SLJIT_GREATER: + case SLJIT_GREATER_EQUAL_F64: /* Unordered. */ + return DA(0xc); + + case SLJIT_LESS_EQUAL: + case SLJIT_LESS_F64: + return DA(0x4); + + case SLJIT_SIG_LESS: + return DA(0x3); + + case SLJIT_SIG_GREATER_EQUAL: + return DA(0xb); + + case SLJIT_SIG_GREATER: + return DA(0xa); + + case SLJIT_SIG_LESS_EQUAL: + return DA(0x2); + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return DA(0x9); + + case SLJIT_UNORDERED_F64: + return DA(0x7); + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return DA(0x1); + + case SLJIT_ORDERED_F64: + return DA(0xf); + + default: + SLJIT_UNREACHABLE(); + return DA(0x8); + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type < SLJIT_EQUAL_F64) { + jump->flags |= IS_COND; + if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET)) + jump->flags |= IS_MOVABLE; +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS)); +#else +#error "Implementation required" +#endif + } + else if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET)) + jump->flags |= IS_MOVABLE; +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS)); +#else +#error "Implementation required" +#endif + } + else { + if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_CALL; + } + + PTR_FAIL_IF(emit_const(compiler, TMP_REG1, 0)); + PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG1) | IMM(0), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump = NULL; + sljit_s32 src_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) + src_r = src; + else if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR); + jump->u.target = srcw; + + if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_CALL; + + FAIL_IF(emit_const(compiler, TMP_REG1, 0)); + src_r = TMP_REG1; + } + else { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src_r = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS)); + if (jump) + jump->addr = compiler->size; + return push_inst(compiler, NOP, UNMOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + + FAIL_IF(call_with_args(compiler, arg_types, &src)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 reg, flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + op = GET_OPCODE(op); + reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw)); + + type &= 0xff; + if (type < SLJIT_EQUAL_F64) + FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type) | 3, UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type) | 3, UNMOVABLE_INS)); + + FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS)); + + if (op >= SLJIT_ADD) { + flags |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE; + if (dst & SLJIT_MEM) + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + return emit_op(compiler, op, flags, dst, 0, dst, 0, TMP_REG2, 0); + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw); +#else +#error "Implementation required" +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; +#else +#error "Implementation required" +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + return put_label; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_32.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_32.c new file mode 100644 index 0000000000..79a7e8bba5 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_32.c @@ -0,0 +1,928 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 32-bit arch dependent functions. */ + +static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm) +{ + sljit_u8 *inst; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); + FAIL_IF(!inst); + INC_SIZE(1 + sizeof(sljit_sw)); + *inst++ = opcode; + sljit_unaligned_store_sw(inst, imm); + return SLJIT_SUCCESS; +} + +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) +{ + sljit_s32 type = jump->flags >> TYPE_SHIFT; + + if (type == SLJIT_JUMP) { + *code_ptr++ = JMP_i32; + jump->addr++; + } + else if (type >= SLJIT_FAST_CALL) { + *code_ptr++ = CALL_i32; + jump->addr++; + } + else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MW; + else + sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset); + code_ptr += 4; + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args, size; + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Emit ENDBR32 at function entry if needed. */ + FAIL_IF(emit_endbranch(compiler)); + + args = get_arg_count(arg_types); + compiler->args = args; + + /* [esp+0] for saving temporaries and function calls. */ + compiler->stack_tmp_size = 2 * sizeof(sljit_sw); + +#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (scratches > 3) + compiler->stack_tmp_size = 3 * sizeof(sljit_sw); +#endif + + compiler->saveds_offset = compiler->stack_tmp_size; + if (scratches > 3) + compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); + + compiler->locals_offset = compiler->saveds_offset; + + if (saveds > 3) + compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); + + if (options & SLJIT_F64_ALIGNMENT) + compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); + + size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); +#else + size += (args > 0 ? (2 + args * 3) : 0); +#endif + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + PUSH_REG(reg_map[TMP_REG1]); +#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (args > 0) { + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */; + } +#endif + if (saveds > 2 || scratches > 9) + PUSH_REG(reg_map[SLJIT_S2]); + if (saveds > 1 || scratches > 10) + PUSH_REG(reg_map[SLJIT_S1]); + if (saveds > 0 || scratches > 11) + PUSH_REG(reg_map[SLJIT_S0]); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (args > 0) { + inst[0] = MOV_r_rm; + inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; + inst += 2; + } + if (args > 1) { + inst[0] = MOV_r_rm; + inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; + inst += 2; + } + if (args > 2) { + inst[0] = MOV_r_rm; + inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; + inst[2] = 0x24; + inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ + } +#else + if (args > 0) { + inst[0] = MOV_r_rm; + inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; + inst[2] = sizeof(sljit_sw) * 2; + inst += 3; + } + if (args > 1) { + inst[0] = MOV_r_rm; + inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; + inst[2] = sizeof(sljit_sw) * 3; + inst += 3; + } + if (args > 2) { + inst[0] = MOV_r_rm; + inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; + inst[2] = sizeof(sljit_sw) * 4; + } +#endif + + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); + +#if defined(__APPLE__) + /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */ + saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; +#else + if (options & SLJIT_F64_ALIGNMENT) + local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); + else + local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); +#endif + + compiler->local_size = local_size; + +#ifdef _WIN32 + if (local_size > 0) { + if (local_size <= 4 * 4096) { + if (local_size > 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); + if (local_size > 2 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); + EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12); + + SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096); + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = JNE_i8; + inst[1] = (sljit_s8) -16; + } + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + } +#endif + + SLJIT_ASSERT(local_size > 0); + +#if !defined(__APPLE__) + if (options & SLJIT_F64_ALIGNMENT) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0); + + /* Some space might allocated during sljit_grow_stack() above on WIN32. */ + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw))); + +#if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->local_size > 1024) + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), + TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw))); +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!inst); + + INC_SIZE(6); + inst[0] = GROUP_BINARY_81; + inst[1] = MOD_REG | AND | reg_map[SLJIT_SP]; + sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1)); + + /* The real local size must be used. */ + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0); + } +#endif + return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + compiler->args = get_arg_count(arg_types); + + /* [esp+0] for saving temporaries and function calls. */ + compiler->stack_tmp_size = 2 * sizeof(sljit_sw); + +#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (scratches > 3) + compiler->stack_tmp_size = 3 * sizeof(sljit_sw); +#endif + + compiler->saveds_offset = compiler->stack_tmp_size; + if (scratches > 3) + compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); + + compiler->locals_offset = compiler->saveds_offset; + + if (saveds > 3) + compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); + + if (options & SLJIT_F64_ALIGNMENT) + compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); + +#if defined(__APPLE__) + saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; +#else + if (options & SLJIT_F64_ALIGNMENT) + compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); + else + compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); +#endif + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 size; + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + SLJIT_ASSERT(compiler->args >= 0); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + SLJIT_ASSERT(compiler->local_size > 0); + +#if !defined(__APPLE__) + if (compiler->options & SLJIT_F64_ALIGNMENT) + EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size) + else + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); +#else + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); +#endif + + size = 2 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args > 2) + size += 2; +#endif + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + + if (compiler->saveds > 0 || compiler->scratches > 11) + POP_REG(reg_map[SLJIT_S0]); + if (compiler->saveds > 1 || compiler->scratches > 10) + POP_REG(reg_map[SLJIT_S1]); + if (compiler->saveds > 2 || compiler->scratches > 9) + POP_REG(reg_map[SLJIT_S2]); + POP_REG(reg_map[TMP_REG1]); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args > 2) + RET_I16(sizeof(sljit_sw)); + else + RET(); +#else + RET(); +#endif + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* Size contains the flags as well. */ +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, + /* The register or immediate operand. */ + sljit_s32 a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_s32 flags = size & ~0xf; + sljit_s32 inst_size; + + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); + SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) + && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) + && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + /* We don't support (%ebp). */ + SLJIT_ASSERT(!(b & SLJIT_MEM) || immb || reg_map[b & REG_MASK] != 5); + + size &= 0xf; + inst_size = size; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) + inst_size++; + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if ((b & REG_MASK) == SLJIT_UNUSED) + inst_size += sizeof(sljit_sw); + else if (immb != 0 && !(b & OFFS_REG_MASK)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_sw); + } + + if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); + + if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) + inst_size += 1; /* SIB byte. */ + } + + /* Calculate size of a. */ + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + imma &= 0x1f; + if (imma != 1) { + inst_size ++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_sw); + } + else + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + if (flags & EX86_PREF_66) + *inst++ = 0x66; + + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if (a & SLJIT_IMM) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = reg_map[a] << 3; + else + *buf_ptr = a << 3; + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b); + else if ((b & REG_MASK) != SLJIT_UNUSED) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) + *buf_ptr++ |= reg_map[b & REG_MASK]; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3); + } + + if (immb != 0) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = immb; /* 8 bit displacement. */ + else { + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6); + } + } + else { + *buf_ptr++ |= 0x05; + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = imma; + else if (flags & EX86_HALF_ARG) + sljit_unaligned_store_s16(buf_ptr, imma); + else if (!(flags & EX86_SHIFT_INS)) + sljit_unaligned_store_sw(buf_ptr, imma); + } + + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + +static sljit_s32 c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) +{ + sljit_s32 stack_size = 0; + sljit_s32 word_arg_count = 0; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + stack_size += sizeof(sljit_f64); + break; + default: + word_arg_count++; + if (word_arg_count > 2) + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (word_arg_count_ptr) + *word_arg_count_ptr = word_arg_count; + + return stack_size; +} + +static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args) +{ + sljit_u8 *inst; + sljit_s32 float_arg_count; + + if (stack_size == sizeof(sljit_sw) && word_arg_count == 3) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + PUSH_REG(reg_map[SLJIT_R2]); + } + else if (stack_size > 0) { + if (word_arg_count >= 4) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); + + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + + stack_size = 0; + arg_types >>= SLJIT_DEF_SHIFT; + word_arg_count = 0; + float_arg_count = 0; + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += sizeof(sljit_f64); + break; + default: + word_arg_count++; + if (word_arg_count == 3) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0); + stack_size += sizeof(sljit_sw); + } + else if (word_arg_count == 4) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0); + stack_size += sizeof(sljit_sw); + } + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + } + + if (word_arg_count > 0) { + if (swap_args) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + + *inst++ = XCHG_EAX_r | reg_map[SLJIT_R2]; + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; + } + } + + return SLJIT_SUCCESS; +} + +#endif + +static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) +{ + sljit_s32 stack_size = 0; + sljit_s32 word_arg_count = 0; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + stack_size += sizeof(sljit_f64); + break; + default: + word_arg_count++; + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (word_arg_count_ptr) + *word_arg_count_ptr = word_arg_count; + + if (stack_size <= compiler->stack_tmp_size) + return 0; + +#if defined(__APPLE__) + return ((stack_size - compiler->stack_tmp_size + 15) & ~15); +#else + return stack_size - compiler->stack_tmp_size; +#endif +} + +static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count) +{ + sljit_s32 float_arg_count = 0; + + if (word_arg_count >= 4) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); + + if (stack_size > 0) + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + + stack_size = 0; + word_arg_count = 0; + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += sizeof(sljit_f64); + break; + default: + word_arg_count++; + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0); + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_s32 stack_size) +{ + sljit_u8 *inst; + sljit_s32 single; + + if (stack_size > 0) + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + + if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32) + return SLJIT_SUCCESS; + + single = ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + inst[0] = single ? FSTPS : FSTPD; + inst[1] = (0x03 << 3) | 0x04; + inst[2] = (0x04 << 3) | reg_map[SLJIT_SP]; + + return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_s32 stack_size = 0; + sljit_s32 word_arg_count; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if ((type & 0xff) == SLJIT_CALL) { + stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); + PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0)); + return jump; + } +#endif + + stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); + PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size)); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 stack_size = 0; + sljit_s32 word_arg_count; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + sljit_s32 swap_args; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3); + + if ((type & 0xff) == SLJIT_CALL) { + stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); + swap_args = 0; + + if (word_arg_count > 0) { + if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) { + swap_args = 1; + if (((src & REG_MASK) | 0x2) == SLJIT_R2) + src ^= 0x2; + if ((OFFS_REG(src) | 0x2) == SLJIT_R2) + src ^= TO_OFFS_REG(0x2); + } + } + + FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args)); + + compiler->saveds_offset += stack_size; + compiler->locals_offset += stack_size; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + compiler->saveds_offset -= stack_size; + compiler->locals_offset -= stack_size; + + return post_call_with_args(compiler, arg_types, 0); + } +#endif + + stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); + FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); + + compiler->saveds_offset += stack_size; + compiler->locals_offset += stack_size; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + compiler->saveds_offset -= stack_size; + compiler->locals_offset -= stack_size; + + return post_call_with_args(compiler, arg_types, stack_size); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + dst = TMP_REG1; + + if (FAST_IS_REG(dst)) { + /* Unused dest is possible here. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1); + POP_REG(reg_map[dst]); + return SLJIT_SUCCESS; + } + + /* Memory. */ + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = POP_rm; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (FAST_IS_REG(src)) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1 + 1); + PUSH_REG(reg_map[src]); + } + else { + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= PUSH_rm; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + } + + RET(); + return SLJIT_SUCCESS; +} + +static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) +{ + sljit_s32 size, saved_size; + sljit_s32 has_f64_aligment; + + /* Don't adjust shadow stack if it isn't enabled. */ + if (!cpu_has_shadow_stack ()) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(compiler->args >= 0); + SLJIT_ASSERT(compiler->local_size > 0); + +#if !defined(__APPLE__) + has_f64_aligment = compiler->options & SLJIT_F64_ALIGNMENT; +#else + has_f64_aligment = 0; +#endif + + size = compiler->local_size; + saved_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + (compiler->saveds <= 3 ? compiler->saveds : 3)) * sizeof(sljit_uw); + if (has_f64_aligment) { + /* mov TMP_REG1, [esp + local_size]. */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), size); + /* mov TMP_REG1, [TMP_REG1+ saved_size]. */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), saved_size); + /* Move return address to [esp]. */ + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0); + size = 0; + } else + size += saved_size; + + return adjust_shadow_stack(compiler, SLJIT_UNUSED, 0, SLJIT_SP, size); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_64.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_64.c new file mode 100644 index 0000000000..e85b56a61a --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_64.c @@ -0,0 +1,918 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 64-bit arch dependent functions. */ + +static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + sljit_u8 *inst; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); + FAIL_IF(!inst); + INC_SIZE(2 + sizeof(sljit_sw)); + *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); + *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); + sljit_unaligned_store_sw(inst, imm); + return SLJIT_SUCCESS; +} + +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr) +{ + sljit_s32 type = jump->flags >> TYPE_SHIFT; + + int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff); + + /* The relative jump below specialized for this case. */ + SLJIT_ASSERT(reg_map[TMP_REG2] >= 8); + + if (type < SLJIT_JUMP) { + /* Invert type. */ + *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10; + *code_ptr++ = short_addr ? (6 + 3) : (10 + 3); + } + + *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B); + *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2]; + jump->addr = (sljit_uw)code_ptr; + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MD; + else if (short_addr) + sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target); + else + sljit_unaligned_store_sw(code_ptr, jump->u.target); + + code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw); + + *code_ptr++ = REX_B; + *code_ptr++ = GROUP_FF; + *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]; + + return code_ptr; +} + +static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label) +{ + if (max_label > HALFWORD_MAX) { + put_label->addr -= put_label->flags; + put_label->flags = PATCH_MD; + return code_ptr; + } + + if (put_label->flags == 0) { + /* Destination is register. */ + code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw); + + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); + + if ((code_ptr[0] & 0x07) != 0) { + code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x08); + code_ptr += 2 + sizeof(sljit_s32); + } + else { + code_ptr[0] = code_ptr[1]; + code_ptr += 1 + sizeof(sljit_s32); + } + + put_label->addr = (sljit_uw)code_ptr; + return code_ptr; + } + + code_ptr -= put_label->flags + (2 + sizeof(sljit_uw)); + SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); + + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + + if ((code_ptr[1] & 0xf8) == MOV_r_i32) { + code_ptr += 2 + sizeof(sljit_uw); + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + } + + SLJIT_ASSERT(code_ptr[1] == MOV_rm_r); + + code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x4); + code_ptr[1] = MOV_rm_i32; + code_ptr[2] = (sljit_u8)(code_ptr[2] & ~(0x7 << 3)); + + code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32)); + put_label->addr = (sljit_uw)code_ptr; + put_label->flags = 0; + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args, i, tmp, size, saved_register_size; + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Emit ENDBR64 at function entry if needed. */ + FAIL_IF(emit_endbranch(compiler)); + + compiler->mode32 = 0; + +#ifdef _WIN64 + /* Two/four register slots for parameters plus space for xmm6 register if needed. */ + if (fscratches >= 6 || fsaveds >= 1) + compiler->locals_offset = 6 * sizeof(sljit_sw); + else + compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); +#endif + + /* Including the return address saved by the call instruction. */ + saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + + args = get_arg_count(arg_types); + + if (args > 0) { + size = args * 3; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + +#ifndef _WIN64 + if (args > 0) { + inst[0] = REX_W; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; + inst += 3; + } + if (args > 1) { + inst[0] = REX_W | REX_R; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; + inst += 3; + } + if (args > 2) { + inst[0] = REX_W | REX_R; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; + } +#else + if (args > 0) { + inst[0] = REX_W; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; + inst += 3; + } + if (args > 1) { + inst[0] = REX_W; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; + inst += 3; + } + if (args > 2) { + inst[0] = REX_W | REX_B; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; + } +#endif + } + + local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; + compiler->local_size = local_size; + +#ifdef _WIN64 + if (local_size > 0) { + if (local_size <= 4 * 4096) { + if (local_size > 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); + if (local_size > 2 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12); + + SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); + + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096); + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = JNE_i8; + inst[1] = (sljit_s8) -19; + } + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + } +#endif + + if (local_size > 0) { + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); + } + +#ifdef _WIN64 + /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ + if (fscratches >= 6 || fsaveds >= 1) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + sljit_unaligned_store_s32(inst, 0x20247429); + } +#endif + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 saved_register_size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + +#ifdef _WIN64 + /* Two/four register slots for parameters plus space for xmm6 register if needed. */ + if (fscratches >= 6 || fsaveds >= 1) + compiler->locals_offset = 6 * sizeof(sljit_sw); + else + compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); +#endif + + /* Including the return address saved by the call instruction. */ + saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 i, tmp, size; + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + +#ifdef _WIN64 + /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ + if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + sljit_unaligned_store_s32(inst, 0x20247428); + } +#endif + + if (compiler->local_size > 0) { + if (compiler->local_size <= 127) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | ADD | 4; + *inst = compiler->local_size; + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!inst); + INC_SIZE(7); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_81; + *inst++ = MOD_REG | ADD | 4; + sljit_unaligned_store_s32(inst, compiler->local_size); + } + } + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + RET(); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) +{ + sljit_u8 *inst; + sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + length); + FAIL_IF(!inst); + INC_SIZE(length); + if (rex) + *inst++ = rex; + *inst++ = opcode; + sljit_unaligned_store_s32(inst, imm); + return SLJIT_SUCCESS; +} + +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, + /* The register or immediate operand. */ + sljit_s32 a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_u8 rex = 0; + sljit_s32 flags = size & ~0xf; + sljit_s32 inst_size; + + /* The immediate operand must be 32 bit. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); + SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) + && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) + && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + + size &= 0xf; + inst_size = size; + + if (!compiler->mode32 && !(flags & EX86_NO_REXW)) + rex |= REX_W; + else if (flags & EX86_REX) + rex |= REX; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) + inst_size++; + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if (!(b & OFFS_REG_MASK)) { + if (NOT_HALFWORD(immb)) { + PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); + immb = 0; + if (b & REG_MASK) + b |= TO_OFFS_REG(TMP_REG2); + else + b |= TMP_REG2; + } + else if (reg_lmap[b & REG_MASK] == 4) + b |= TO_OFFS_REG(SLJIT_SP); + } + + if ((b & REG_MASK) == SLJIT_UNUSED) + inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ + else { + if (reg_map[b & REG_MASK] >= 8) + rex |= REX_B; + + if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_s32); + } + else if (reg_lmap[b & REG_MASK] == 5) + inst_size += sizeof(sljit_s8); + + if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { + inst_size += 1; /* SIB byte. */ + if (reg_map[OFFS_REG(b)] >= 8) + rex |= REX_X; + } + } + } + else if (!(flags & EX86_SSE2_OP2)) { + if (reg_map[b] >= 8) + rex |= REX_B; + } + else if (freg_map[b] >= 8) + rex |= REX_B; + + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + imma &= compiler->mode32 ? 0x1f : 0x3f; + if (imma != 1) { + inst_size ++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_s32); + } + else { + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ + if (!(flags & EX86_SSE2_OP1)) { + if (reg_map[a] >= 8) + rex |= REX_R; + } + else if (freg_map[a] >= 8) + rex |= REX_R; + } + + if (rex) + inst_size++; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + if (flags & EX86_PREF_66) + *inst++ = 0x66; + if (rex) + *inst++ = rex; + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if (a & SLJIT_IMM) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = reg_lmap[a] << 3; + else + *buf_ptr = freg_lmap[a] << 3; + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : freg_lmap[b]); + else if ((b & REG_MASK) != SLJIT_UNUSED) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) + *buf_ptr++ |= reg_lmap[b & REG_MASK]; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3); + } + + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = immb; /* 8 bit displacement. */ + else { + sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); + } + } + } + else { + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr |= 0x40; + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6); + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr++ = 0; + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = 0x25; + sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = imma; + else if (flags & EX86_HALF_ARG) + sljit_unaligned_store_s16(buf_ptr, imma); + else if (!(flags & EX86_SHIFT_INS)) + sljit_unaligned_store_s32(buf_ptr, imma); + } + + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +#ifndef _WIN64 + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw) +{ + sljit_s32 src = src_ptr ? (*src_ptr) : 0; + sljit_s32 word_arg_count = 0; + + SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2); + + compiler->mode32 = 0; + + /* Remove return value. */ + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32) + word_arg_count++; + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (word_arg_count == 0) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + *src_ptr = TMP_REG2; + } + else if (src == SLJIT_R2 && word_arg_count >= SLJIT_R2) + *src_ptr = TMP_REG1; + + if (word_arg_count >= 3) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0); + return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0); +} + +#else + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw) +{ + sljit_s32 src = src_ptr ? (*src_ptr) : 0; + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 data_trandfer = 0; + static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 }; + + SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9); + + compiler->mode32 = 0; + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + case SLJIT_ARG_TYPE_F64: + arg_count++; + float_arg_count++; + + if (arg_count != float_arg_count) + data_trandfer = 1; + break; + default: + arg_count++; + word_arg_count++; + + if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) { + data_trandfer = 1; + + if (src == word_arg_regs[arg_count]) { + EMIT_MOV(compiler, TMP_REG2, 0, src, 0); + *src_ptr = TMP_REG2; + } + } + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (!data_trandfer) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + *src_ptr = TMP_REG2; + } + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0)); + arg_count--; + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F64: + if (arg_count != float_arg_count) + FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0)); + arg_count--; + float_arg_count--; + break; + default: + if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) + EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0); + arg_count--; + word_arg_count--; + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL, 0)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + FAIL_IF(call_with_args(compiler, arg_types, &src, srcw)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + dst = TMP_REG1; + + if (FAST_IS_REG(dst)) { + if (reg_map[dst] < 8) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + POP_REG(reg_lmap[dst]); + return SLJIT_SUCCESS; + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = REX_B; + POP_REG(reg_lmap[dst]); + return SLJIT_SUCCESS; + } + + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = POP_rm; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + + if (FAST_IS_REG(src)) { + if (reg_map[src] < 8) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1 + 1); + PUSH_REG(reg_lmap[src]); + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1); + FAIL_IF(!inst); + + INC_SIZE(2 + 1); + *inst++ = REX_B; + PUSH_REG(reg_lmap[src]); + } + } + else { + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= PUSH_rm; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + } + + RET(); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Extend input */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + + compiler->mode32 = 0; + + if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) + return SLJIT_SUCCESS; /* Empty instruction. */ + + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { + if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + return emit_load_imm64(compiler, dst, srcw); + } + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + compiler->mode32 = 0; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) + dst_r = src; + else { + if (sign) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = MOVSXD_r_rm; + } else { + compiler->mode32 = 1; + FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); + compiler->mode32 = 0; + } + } + + if (dst & SLJIT_MEM) { + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + compiler->mode32 = 0; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) +{ + sljit_s32 tmp, size; + + /* Don't adjust shadow stack if it isn't enabled. */ + if (!cpu_has_shadow_stack ()) + return SLJIT_SUCCESS; + + size = compiler->local_size; + tmp = compiler->scratches; + if (tmp >= SLJIT_FIRST_SAVED_REG) + size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * sizeof(sljit_uw); + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + if (SLJIT_S0 >= tmp) + size += (SLJIT_S0 - tmp + 1) * sizeof(sljit_uw); + + return adjust_shadow_stack(compiler, SLJIT_UNUSED, 0, SLJIT_SP, size); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_common.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_common.c new file mode 100644 index 0000000000..515d98aefd --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitNativeX86_common.c @@ -0,0 +1,3140 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + return "x86" SLJIT_CPUINFO " ABI:fastcall"; +#else + return "x86" SLJIT_CPUINFO; +#endif +} + +/* + 32b register indexes: + 0 - EAX + 1 - ECX + 2 - EDX + 3 - EBX + 4 - ESP + 5 - EBP + 6 - ESI + 7 - EDI +*/ + +/* + 64b register indexes: + 0 - RAX + 1 - RCX + 2 - RDX + 3 - RBX + 4 - RSP + 5 - RBP + 6 - RSI + 7 - RDI + 8 - R8 - From now on REX prefix is required + 9 - R9 + 10 - R10 + 11 - R11 + 12 - R12 + 13 - R13 + 14 - R14 + 15 - R15 +*/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { + 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5 +}; + +#define CHECK_EXTRA_REGS(p, w, do) \ + if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ + if (p <= compiler->scratches) \ + w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \ + else \ + w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \ + p = SLJIT_MEM1(SLJIT_SP); \ + do; \ + } + +#else /* SLJIT_CONFIG_X86_32 */ + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) + +/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present + Note: avoid to use r12 and r13 for memory addessing + therefore r12 is better to be a higher saved register. */ +#ifndef _WIN64 +/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9 +}; +/* low-map. reg_map & 0x7. */ +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 +}; +#else +/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10 +}; +/* low-map. reg_map & 0x7. */ +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2 +}; +#endif + +/* Args: xmm0-xmm3 */ +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 4, 0, 1, 2, 3, 5, 6 +}; +/* low-map. freg_map & 0x7. */ +static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 4, 0, 1, 2, 3, 5, 6 +}; + +#define REX_W 0x48 +#define REX_R 0x44 +#define REX_X 0x42 +#define REX_B 0x41 +#define REX 0x40 + +#ifndef _WIN64 +#define HALFWORD_MAX 0x7fffffffl +#define HALFWORD_MIN -0x80000000l +#else +#define HALFWORD_MAX 0x7fffffffll +#define HALFWORD_MIN -0x80000000ll +#endif + +#define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) +#define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) + +#define CHECK_EXTRA_REGS(p, w, do) + +#endif /* SLJIT_CONFIG_X86_32 */ + +#define TMP_FREG (0) + +/* Size flags for emit_x86_instruction: */ +#define EX86_BIN_INS 0x0010 +#define EX86_SHIFT_INS 0x0020 +#define EX86_REX 0x0040 +#define EX86_NO_REXW 0x0080 +#define EX86_BYTE_ARG 0x0100 +#define EX86_HALF_ARG 0x0200 +#define EX86_PREF_66 0x0400 +#define EX86_PREF_F2 0x0800 +#define EX86_PREF_F3 0x1000 +#define EX86_SSE2_OP1 0x2000 +#define EX86_SSE2_OP2 0x4000 +#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define ADD (/* BINARY */ 0 << 3) +#define ADD_EAX_i32 0x05 +#define ADD_r_rm 0x03 +#define ADD_rm_r 0x01 +#define ADDSD_x_xm 0x58 +#define ADC (/* BINARY */ 2 << 3) +#define ADC_EAX_i32 0x15 +#define ADC_r_rm 0x13 +#define ADC_rm_r 0x11 +#define AND (/* BINARY */ 4 << 3) +#define AND_EAX_i32 0x25 +#define AND_r_rm 0x23 +#define AND_rm_r 0x21 +#define ANDPD_x_xm 0x54 +#define BSR_r_rm (/* GROUP_0F */ 0xbd) +#define CALL_i32 0xe8 +#define CALL_rm (/* GROUP_FF */ 2 << 3) +#define CDQ 0x99 +#define CMOVE_r_rm (/* GROUP_0F */ 0x44) +#define CMP (/* BINARY */ 7 << 3) +#define CMP_EAX_i32 0x3d +#define CMP_r_rm 0x3b +#define CMP_rm_r 0x39 +#define CVTPD2PS_x_xm 0x5a +#define CVTSI2SD_x_rm 0x2a +#define CVTTSD2SI_r_xm 0x2c +#define DIV (/* GROUP_F7 */ 6 << 3) +#define DIVSD_x_xm 0x5e +#define FSTPS 0xd9 +#define FSTPD 0xdd +#define INT3 0xcc +#define IDIV (/* GROUP_F7 */ 7 << 3) +#define IMUL (/* GROUP_F7 */ 5 << 3) +#define IMUL_r_rm (/* GROUP_0F */ 0xaf) +#define IMUL_r_rm_i8 0x6b +#define IMUL_r_rm_i32 0x69 +#define JE_i8 0x74 +#define JNE_i8 0x75 +#define JMP_i8 0xeb +#define JMP_i32 0xe9 +#define JMP_rm (/* GROUP_FF */ 4 << 3) +#define LEA_r_m 0x8d +#define MOV_r_rm 0x8b +#define MOV_r_i32 0xb8 +#define MOV_rm_r 0x89 +#define MOV_rm_i32 0xc7 +#define MOV_rm8_i8 0xc6 +#define MOV_rm8_r8 0x88 +#define MOVSD_x_xm 0x10 +#define MOVSD_xm_x 0x11 +#define MOVSXD_r_rm 0x63 +#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) +#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) +#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) +#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) +#define MUL (/* GROUP_F7 */ 4 << 3) +#define MULSD_x_xm 0x59 +#define NEG_rm (/* GROUP_F7 */ 3 << 3) +#define NOP 0x90 +#define NOT_rm (/* GROUP_F7 */ 2 << 3) +#define OR (/* BINARY */ 1 << 3) +#define OR_r_rm 0x0b +#define OR_EAX_i32 0x0d +#define OR_rm_r 0x09 +#define OR_rm8_r8 0x08 +#define POP_r 0x58 +#define POP_rm 0x8f +#define POPF 0x9d +#define PREFETCH 0x18 +#define PUSH_i32 0x68 +#define PUSH_r 0x50 +#define PUSH_rm (/* GROUP_FF */ 6 << 3) +#define PUSHF 0x9c +#define RET_near 0xc3 +#define RET_i16 0xc2 +#define SBB (/* BINARY */ 3 << 3) +#define SBB_EAX_i32 0x1d +#define SBB_r_rm 0x1b +#define SBB_rm_r 0x19 +#define SAR (/* SHIFT */ 7 << 3) +#define SHL (/* SHIFT */ 4 << 3) +#define SHR (/* SHIFT */ 5 << 3) +#define SUB (/* BINARY */ 5 << 3) +#define SUB_EAX_i32 0x2d +#define SUB_r_rm 0x2b +#define SUB_rm_r 0x29 +#define SUBSD_x_xm 0x5c +#define TEST_EAX_i32 0xa9 +#define TEST_rm_r 0x85 +#define UCOMISD_x_xm 0x2e +#define UNPCKLPD_x_xm 0x14 +#define XCHG_EAX_r 0x90 +#define XCHG_r_rm 0x87 +#define XOR (/* BINARY */ 6 << 3) +#define XOR_EAX_i32 0x35 +#define XOR_r_rm 0x33 +#define XOR_rm_r 0x31 +#define XORPD_x_xm 0x57 + +#define GROUP_0F 0x0f +#define GROUP_F7 0xf7 +#define GROUP_FF 0xff +#define GROUP_BINARY_81 0x81 +#define GROUP_BINARY_83 0x83 +#define GROUP_SHIFT_1 0xd1 +#define GROUP_SHIFT_N 0xc1 +#define GROUP_SHIFT_CL 0xd3 + +#define MOD_REG 0xc0 +#define MOD_DISP8 0x40 + +#define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) + +#define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) +#define POP_REG(r) (*inst++ = (POP_r + (r))) +#define RET() (*inst++ = (RET_near)) +#define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) +/* r32, r/m32 */ +#define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) + +/* Multithreading does not affect these static variables, since they store + built-in CPU features. Therefore they can be overwritten by different threads + if they detect the CPU features in the same time. */ +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +static sljit_s32 cpu_has_sse2 = -1; +#endif +static sljit_s32 cpu_has_cmov = -1; + +#ifdef _WIN32_WCE +#include +#elif defined(_MSC_VER) && _MSC_VER >= 1400 +#include +#endif + +/******************************************************/ +/* Unaligned-store functions */ +/******************************************************/ + +static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +/******************************************************/ +/* Utility functions */ +/******************************************************/ + +static void get_cpu_features(void) +{ + sljit_u32 features; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + int CPUInfo[4]; + __cpuid(CPUInfo, 1); + features = (sljit_u32)CPUInfo[3]; + +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) + + /* AT&T syntax. */ + __asm__ ( + "movl $0x1, %%eax\n" +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + /* On x86-32, there is no red zone, so this + should work (no need for a local variable). */ + "push %%ebx\n" +#endif + "cpuid\n" +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + "pop %%ebx\n" +#endif + "movl %%edx, %0\n" + : "=g" (features) + : +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "%eax", "%ecx", "%edx" +#else + : "%rax", "%rbx", "%rcx", "%rdx" +#endif + ); + +#else /* _MSC_VER && _MSC_VER >= 1400 */ + + /* Intel syntax. */ + __asm { + mov eax, 1 + cpuid + mov features, edx + } + +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + cpu_has_sse2 = (features >> 26) & 0x1; +#endif + cpu_has_cmov = (features >> 15) & 0x1; +} + +static sljit_u8 get_jump_code(sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_EQUAL_F64: + return 0x84 /* je */; + + case SLJIT_NOT_EQUAL: + case SLJIT_NOT_EQUAL_F64: + return 0x85 /* jne */; + + case SLJIT_LESS: + case SLJIT_LESS_F64: + return 0x82 /* jc */; + + case SLJIT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL_F64: + return 0x83 /* jae */; + + case SLJIT_GREATER: + case SLJIT_GREATER_F64: + return 0x87 /* jnbe */; + + case SLJIT_LESS_EQUAL: + case SLJIT_LESS_EQUAL_F64: + return 0x86 /* jbe */; + + case SLJIT_SIG_LESS: + return 0x8c /* jl */; + + case SLJIT_SIG_GREATER_EQUAL: + return 0x8d /* jnl */; + + case SLJIT_SIG_GREATER: + return 0x8f /* jnle */; + + case SLJIT_SIG_LESS_EQUAL: + return 0x8e /* jle */; + + case SLJIT_OVERFLOW: + return 0x80 /* jo */; + + case SLJIT_NOT_OVERFLOW: + return 0x81 /* jno */; + + case SLJIT_UNORDERED_F64: + return 0x8a /* jp */; + + case SLJIT_ORDERED_F64: + return 0x8b /* jpo */; + } + return 0; +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset); +#else +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr); +static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label); +#endif + +static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) +{ + sljit_s32 type = jump->flags >> TYPE_SHIFT; + sljit_s32 short_jump; + sljit_uw label_addr; + + if (jump->flags & JUMP_LABEL) + label_addr = (sljit_uw)(code + jump->u.label->size); + else + label_addr = jump->u.target - executable_offset; + + short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) + return generate_far_jump_code(jump, code_ptr); +#endif + + if (type == SLJIT_JUMP) { + if (short_jump) + *code_ptr++ = JMP_i8; + else + *code_ptr++ = JMP_i32; + jump->addr++; + } + else if (type >= SLJIT_FAST_CALL) { + short_jump = 0; + *code_ptr++ = CALL_i32; + jump->addr++; + } + else if (short_jump) { + *code_ptr++ = get_jump_code(type) - 0x10; + jump->addr++; + } + else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (short_jump) { + jump->flags |= PATCH_MB; + code_ptr += sizeof(sljit_s8); + } else { + jump->flags |= PATCH_MW; + code_ptr += sizeof(sljit_s32); + } + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_u8 *code; + sljit_u8 *code_ptr; + sljit_u8 *buf_ptr; + sljit_u8 *buf_end; + sljit_u8 len; + sljit_sw executable_offset; + sljit_sw jump_addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + /* Second code generation pass. */ + code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size, compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + executable_offset = SLJIT_EXEC_OFFSET(code); + + do { + buf_ptr = buf->memory; + buf_end = buf_ptr + buf->used_size; + do { + len = *buf_ptr++; + if (len > 0) { + /* The code is already generated. */ + SLJIT_MEMCPY(code_ptr, buf_ptr, len); + code_ptr += len; + buf_ptr += len; + } + else { + switch (*buf_ptr) { + case 0: + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = code_ptr - code; + label = label->next; + break; + case 1: + jump->addr = (sljit_uw)code_ptr; + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) + code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset); + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset); +#else + code_ptr = generate_far_jump_code(jump, code_ptr); +#endif + } + jump = jump->next; + break; + case 2: + const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); + const_ = const_->next; + break; + default: + SLJIT_ASSERT(*buf_ptr == 3); + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size); +#endif + put_label = put_label->next; + break; + } + buf_ptr++; + } + } while (buf_ptr < buf_end); + SLJIT_ASSERT(buf_ptr == buf_end); + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr <= code + compiler->size); + + jump = compiler->jumps; + while (jump) { + jump_addr = jump->addr + executable_offset; + + if (jump->flags & PATCH_MB) { + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127); + *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); + } else if (jump->flags & PATCH_MW) { + if (jump->flags & JUMP_LABEL) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw)))); +#else + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); + sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32)))); +#endif + } + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw)))); +#else + SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); + sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32)))); +#endif + } + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + else if (jump->flags & PATCH_MD) + sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr); +#endif + + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); +#else + if (put_label->flags & PATCH_MD) { + SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX); + sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); + } + else { + SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX); + sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr); + } +#endif + + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = code_ptr - code; + + code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + + SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1); + return (void*)code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else /* SLJIT_DETECT_SSE2 */ + return 1; +#endif /* SLJIT_DETECT_SSE2 */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + case SLJIT_HAS_VIRTUAL_REGISTERS: + return 1; +#endif + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + if (cpu_has_cmov == -1) + get_cpu_features(); + return cpu_has_cmov; + + case SLJIT_HAS_PREFETCH: + return 1; + + case SLJIT_HAS_SSE2: +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else + return 1; +#endif + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) + +static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, + sljit_u32 op_types, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_u32 op_types, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +static sljit_s32 emit_mov(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); + +#define EMIT_MOV(compiler, dst, dstw, src, srcw) \ + FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + +static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src); + +static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw); + +static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler) +{ +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) + /* Emit endbr32/endbr64 when CET is enabled. */ + sljit_u8 *inst; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *inst++ = 0xf3; + *inst++ = 0x0f; + *inst++ = 0x1e; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *inst = 0xfb; +#else + *inst = 0xfa; +#endif +#else /* !SLJIT_CONFIG_X86_CET */ + SLJIT_UNUSED_ARG(compiler); +#endif /* SLJIT_CONFIG_X86_CET */ + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) + +static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg) +{ + sljit_u8 *inst; + sljit_s32 size; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + size = 5; +#else + size = 4; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + *inst++ = 0xf3; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); +#endif + *inst++ = 0x0f; + *inst++ = 0x1e; + *inst = (0x3 << 6) | (0x1 << 3) | (reg_map[reg] & 0x7); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg) +{ + sljit_u8 *inst; + sljit_s32 size; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + size = 5; +#else + size = 4; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + *inst++ = 0xf3; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); +#endif + *inst++ = 0x0f; + *inst++ = 0xae; + *inst = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7); + return SLJIT_SUCCESS; +} + +#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ + +static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void) +{ +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) + return _get_ssp() != 0; +#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ + return 0; +#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ +} + +static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw, sljit_s32 base, sljit_sw disp) +{ +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) + sljit_u8 *inst, *jz_after_cmp_inst; + sljit_uw size_jz_after_cmp_inst; + + sljit_uw size_before_rdssp_inst = compiler->size; + + /* Generate "RDSSP TMP_REG1". */ + FAIL_IF(emit_rdssp(compiler, TMP_REG1)); + + /* Load return address on shadow stack into TMP_REG1. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + SLJIT_ASSERT(reg_map[TMP_REG1] == 5); + + /* Hand code unsupported "mov 0x0(%ebp),%ebp". */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + *inst++ = 0x8b; + *inst++ = 0x6d; + *inst = 0; +#else /* !SLJIT_CONFIG_X86_32 */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0); +#endif /* SLJIT_CONFIG_X86_32 */ + + if (src == SLJIT_UNUSED) { + /* Return address is on stack. */ + src = SLJIT_MEM1(base); + srcw = disp; + } + + /* Compare return address against TMP_REG1. */ + FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw)); + + /* Generate JZ to skip shadow stack ajdustment when shadow + stack matches normal stack. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10; + size_jz_after_cmp_inst = compiler->size; + jz_after_cmp_inst = inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary. */ + compiler->mode32 = 1; +#endif + /* Load 1 into TMP_REG1. */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); + + /* Generate "INCSSP TMP_REG1". */ + FAIL_IF(emit_incssp(compiler, TMP_REG1)); + + /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = JMP_i8; + *inst = size_before_rdssp_inst - compiler->size; + + *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst; +#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNUSED_ARG(base); + SLJIT_UNUSED_ARG(disp); +#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#include "sljitNativeX86_32.c" +#else +#include "sljitNativeX86_64.c" +#endif + +static sljit_s32 emit_mov(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + SLJIT_ASSERT(dst != SLJIT_UNUSED); + + if (FAST_IS_REG(src)) { + inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; + } + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); +#else + if (!compiler->mode32) { + if (NOT_HALFWORD(srcw)) + return emit_load_imm64(compiler, dst, srcw); + } + else + return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); +#endif + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (!compiler->mode32 && NOT_HALFWORD(srcw)) { + /* Immediate to memory move. Only SLJIT_MOV operation copies + an immediate directly into memory so TMP_REG1 can be used. */ + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; + } +#endif + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + return SLJIT_SUCCESS; + } + + /* Memory to memory move. Only SLJIT_MOV operation copies + data from memory to memory so TMP_REG1 can be used. */ + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_u8 *inst; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 size; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = INT3; + break; + case SLJIT_NOP: + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = NOP; + break; + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#ifdef _WIN64 + SLJIT_ASSERT( + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] == 2 + && reg_map[TMP_REG1] > 7); +#else + SLJIT_ASSERT( + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] < 7 + && reg_map[TMP_REG1] == 2); +#endif + compiler->mode32 = op & SLJIT_I32_OP; +#endif + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); + + op = GET_OPCODE(op); + if ((op | 0x2) == SLJIT_DIV_UW) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); +#else + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); +#endif + FAIL_IF(!inst); + *inst = XOR_r_rm; + } + + if ((op | 0x2) == SLJIT_DIV_SW) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = CDQ; +#else + if (compiler->mode32) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = CDQ; + } else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = REX_W; + *inst = CDQ; + } +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); +#else +#ifdef _WIN64 + size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; +#else + size = (!compiler->mode32) ? 3 : 2; +#endif + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); +#ifdef _WIN64 + if (!compiler->mode32) + *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); + else if (op >= SLJIT_DIVMOD_UW) + *inst++ = REX_B; + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); +#else + if (!compiler->mode32) + *inst++ = REX_W; + *inst++ = GROUP_F7; + *inst = MOD_REG | reg_map[SLJIT_R1]; +#endif +#endif + switch (op) { + case SLJIT_LMUL_UW: + *inst |= MUL; + break; + case SLJIT_LMUL_SW: + *inst |= IMUL; + break; + case SLJIT_DIVMOD_UW: + case SLJIT_DIV_UW: + *inst |= DIV; + break; + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_SW: + *inst |= IDIV; + break; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) + if (op <= SLJIT_DIVMOD_SW) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); +#else + if (op >= SLJIT_DIV_UW) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); +#endif + break; + case SLJIT_ENDBR: + return emit_endbranch(compiler); + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return skip_frames_before_return(compiler); + } + + return SLJIT_SUCCESS; +} + +#define ENCODE_PREFIX(prefix) \ + do { \ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ + FAIL_IF(!inst); \ + INC_SIZE(1); \ + *inst = (prefix); \ + } while (0) + +static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 work_r; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; +#endif + } + inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_i8; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg_map[src] >= 4) { + SLJIT_ASSERT(dst_r == TMP_REG1); + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + } else + dst_r = src; +#else + dst_r = src; +#endif + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else if (FAST_IS_REG(src) && reg_map[src] >= 4) { + /* src, dst are registers. */ + SLJIT_ASSERT(SLOW_IS_REG(dst)); + if (reg_map[dst] < 4) { + if (dst != src) + EMIT_MOV(compiler, dst, 0, src, 0); + inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; + } + else { + if (dst != src) + EMIT_MOV(compiler, dst, 0, src, 0); + if (sign) { + /* shl reg, 24 */ + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SHL; + /* sar reg, 24 */ + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SAR; + } + else { + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); + FAIL_IF(!inst); + *(inst + 1) |= AND; + } + } + return SLJIT_SUCCESS; + } +#endif + else { + /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; + } + + if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst_r == TMP_REG1) { + /* Find a non-used register, whose reg_map[src] < 4. */ + if ((dst & REG_MASK) == SLJIT_R0) { + if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) + work_r = SLJIT_R2; + else + work_r = SLJIT_R1; + } + else { + if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + work_r = SLJIT_R0; + else if ((dst & REG_MASK) == SLJIT_R1) + work_r = SLJIT_R2; + else + work_r = SLJIT_R1; + } + + if (work_r == SLJIT_R0) { + ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); + } + else { + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + } + + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; + + if (work_r == SLJIT_R0) { + ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); + } + else { + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + } + } + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; + } +#else + inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; +#endif + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst++ = PREFETCH; + + if (op == SLJIT_PREFETCH_L1) + *inst |= (1 << 3); + else if (op == SLJIT_PREFETCH_L2) + *inst |= (2 << 3); + else if (op == SLJIT_PREFETCH_L3) + *inst |= (3 << 3); + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; +#endif + } + inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) + dst_r = src; + else { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; + } + + if (dst & SLJIT_MEM) { + inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + if (dst == src && dstw == srcw) { + /* Same input and output */ + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + return SLJIT_SUCCESS; + } + + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) + dst = TMP_REG1; + + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + if (dst == SLJIT_UNUSED) + dst = TMP_REG1; + + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +static const sljit_sw emit_clz_arg = 32 + 31; +#endif + +static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + + SLJIT_UNUSED_ARG(op_flags); + + if (cpu_has_cmov == -1) + get_cpu_features(); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = BSR_r_rm; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (cpu_has_cmov) { + if (dst_r != TMP_REG1) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); + } + else + inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg); + + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CMOVE_r_rm; + } + else + FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31)); + + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); +#else + if (cpu_has_cmov) { + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)); + + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CMOVE_r_rm; + } + else + FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31))); + + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0); +#endif + + FAIL_IF(!inst); + *(inst + 1) |= XOR; + + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 op_flags = GET_ALL_FLAGS(op); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 dst_is_ereg = 0; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); + CHECK_EXTRA_REGS(src, srcw, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op_flags & SLJIT_I32_OP; +#endif + + op = GET_OPCODE(op); + + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } + + if (op_flags & SLJIT_I32_OP) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src & SLJIT_MEM) { + if (op == SLJIT_MOV_S32) + op = SLJIT_MOV_U32; + } + else if (src & SLJIT_IMM) { + if (op == SLJIT_MOV_U32) + op = SLJIT_MOV_S32; + } +#endif + } + + if (src & SLJIT_IMM) { + switch (op) { + case SLJIT_MOV_U8: + srcw = (sljit_u8)srcw; + break; + case SLJIT_MOV_S8: + srcw = (sljit_s8)srcw; + break; + case SLJIT_MOV_U16: + srcw = (sljit_u16)srcw; + break; + case SLJIT_MOV_S16: + srcw = (sljit_s16)srcw; + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_U32: + srcw = (sljit_u32)srcw; + break; + case SLJIT_MOV_S32: + srcw = (sljit_s32)srcw; + break; +#endif + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg)) + return emit_mov(compiler, dst, dstw, src, srcw); +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { + SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); + dst = TMP_REG1; + } +#endif + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: +#endif + FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_U8: + FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_S8: + FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_U16: + FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_S16: + FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_U32: + FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_S32: + FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); + break; +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); +#endif + return SLJIT_SUCCESS; + } + + switch (op) { + case SLJIT_NOT: + if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z)) + return emit_not_with_flags(compiler, dst, dstw, src, srcw); + return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); + + case SLJIT_NEG: + return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); + + case SLJIT_CLZ: + return emit_clz(compiler, op_flags, dst, dstw, src, srcw); + } + + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + if (IS_HALFWORD(immw) || compiler->mode32) { \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ + } \ + else { \ + FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ + inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ + FAIL_IF(!inst); \ + *inst = (op_mr); \ + } + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) + +#else + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) + +#endif + +static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, + sljit_u32 op_types, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_u8 op_eax_imm = (op_types >> 24); + sljit_u8 op_rm = (op_types >> 16) & 0xff; + sljit_u8 op_mr = (op_types >> 8) & 0xff; + sljit_u8 op_imm = op_types & 0xff; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + return SLJIT_SUCCESS; + } + + if (dst == src1 && dstw == src1w) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src2)) { + /* Special exception for sljit_emit_op_flags. */ + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* Only for cumulative operations. */ + if (dst == src2 && dstw == src2w) { + if (src1 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src1w); + } + else { + BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src1)) { + inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_u32 op_types, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_u8 op_eax_imm = (op_types >> 24); + sljit_u8 op_rm = (op_types >> 16) & 0xff; + sljit_u8 op_mr = (op_types >> 8) & 0xff; + sljit_u8 op_imm = op_types & 0xff; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + return SLJIT_SUCCESS; + } + + if (dst == src1 && dstw == src1w) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if (FAST_IS_REG(dst) && dst != src2) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mul(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_s32 dst_r; + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + + /* Register destination. */ + if (dst_r == src1 && !(src2 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } + else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } + else if (src1 & SLJIT_IMM) { + if (src2 & SLJIT_IMM) { + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); + src2 = dst_r; + src2w = 0; + } + + if (src1w <= 127 && src1w >= -128) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = (sljit_s8)src1w; + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_sw(inst, src1w); + } +#else + else if (IS_HALFWORD(src1w)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_s32(inst, (sljit_s32)src1w); + } + else { + if (dst_r != src2) + EMIT_MOV(compiler, dst_r, 0, src2, src2w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } +#endif + } + else if (src2 & SLJIT_IMM) { + /* Note: src1 is NOT immediate. */ + + if (src2w <= 127 && src2w >= -128) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = (sljit_s8)src2w; + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_sw(inst, src2w); + } +#else + else if (IS_HALFWORD(src2w)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_s32(inst, (sljit_s32)src2w); + } + else { + if (dst_r != src1) + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } +#endif + } + else { + /* Neither argument is immediate. */ + if (ADDRESSING_DEPENDS_ON(src2, dst_r)) + dst_r = TMP_REG1; + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } + + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_s32 dst_r, done = 0; + + /* These cases better be left to handled by normal way. */ + if (dst == src1 && dstw == src1w) + return SLJIT_ERR_UNSUPPORTED; + if (dst == src2 && dstw == src2w) + return SLJIT_ERR_UNSUPPORTED; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (FAST_IS_REG(src1)) { + if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); +#else + if (src2 & SLJIT_IMM) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); +#endif + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } + } + else if (FAST_IS_REG(src2)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); +#else + if (src1 & SLJIT_IMM) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); +#endif + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } + } + + if (done) { + if (dst_r == TMP_REG1) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + return SLJIT_ERR_UNSUPPORTED; +} + +static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(CMP_EAX_i32, src2w); + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src1)) { + if (src2 & SLJIT_IMM) { + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; + } + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = CMP_rm_r; + return SLJIT_SUCCESS; + } + + if (src2 & SLJIT_IMM) { + if (src1 & SLJIT_IMM) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = TMP_REG1; + src1w = 0; + } + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; + } + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(TEST_EAX_i32, src2w); + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(TEST_EAX_i32, src1w); + return SLJIT_SUCCESS; + } + + if (!(src1 & SLJIT_IMM)) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + return SLJIT_SUCCESS; + } + else if (FAST_IS_REG(src1)) { + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + return SLJIT_SUCCESS; + } + } + + if (!(src2 & SLJIT_IMM)) { + if (src1 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src1w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + return SLJIT_SUCCESS; + } + else if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + return SLJIT_SUCCESS; + } + } + + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_shift(struct sljit_compiler *compiler, + sljit_u8 mode, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + + if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { + if (dst == src1 && dstw == src1w) { + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst |= mode; + return SLJIT_SUCCESS; + } + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + return SLJIT_SUCCESS; + } + if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); + FAIL_IF(!inst); + *inst |= mode; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + + if (dst == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + } + else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { + if (src1 != dst) + EMIT_MOV(compiler, dst, 0, src1, src1w); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + } + else { + /* This case is complex since ecx itself may be used for + addressing, and this case must be supported as well. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); +#else + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); +#endif + if (dst != SLJIT_UNUSED) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, + sljit_u8 mode, sljit_s32 set_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* The CPU does not set flags if the shift count is 0. */ + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); +#else + if ((src2w & 0x1f) != 0) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); +#endif + if (!set_flags) + return emit_mov(compiler, dst, dstw, src1, src1w); + /* OR dst, src, 0 */ + return emit_cum_binary(compiler, BINARY_OPCODE(OR), + dst, dstw, src1, src1w, SLJIT_IMM, 0); + } + + if (!set_flags) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); + + if (!FAST_IS_REG(dst)) + FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); + + FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w)); + + if (FAST_IS_REG(dst)) + return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_I32_OP; +#endif + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + if (!HAS_FLAGS(op)) { + if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + } + return emit_cum_binary(compiler, BINARY_OPCODE(ADD), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + return emit_cum_binary(compiler, BINARY_OPCODE(ADC), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUB: + if (!HAS_FLAGS(op)) { + if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + if (SLOW_IS_REG(dst) && src2 == dst) { + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w)); + return emit_unary(compiler, NEG_rm, dst, 0, dst, 0); + } + } + + if (dst == SLJIT_UNUSED) + return emit_cmp_binary(compiler, src1, src1w, src2, src2w); + return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_MUL: + return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_AND: + if (dst == SLJIT_UNUSED) + return emit_test_binary(compiler, src1, src1w, src2, src2w); + return emit_cum_binary(compiler, BINARY_OPCODE(AND), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_OR: + return emit_cum_binary(compiler, BINARY_OPCODE(OR), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_XOR: + return emit_cum_binary(compiler, BINARY_OPCODE(XOR), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SHL: + return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_LSHR: + return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ASHR: + return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + switch (op) { + case SLJIT_FAST_RETURN: + return emit_fast_return(compiler, src, srcw); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + /* Don't adjust shadow stack if it isn't enabled. */ + if (!cpu_has_shadow_stack ()) + return SLJIT_SUCCESS; + return adjust_shadow_stack(compiler, src, srcw, SLJIT_UNUSED, 0); + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return emit_prefetch(compiler, op, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg >= SLJIT_R3 && reg <= SLJIT_R8) + return -1; +#endif + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return reg; +#else + return freg_map[reg]; +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_s32 size) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + SLJIT_MEMCPY(inst, instruction, size); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +/* Alignment(3) + 4 * 16 bytes. */ +static sljit_s32 sse2_data[3 + (4 * 4)]; +static sljit_s32 *sse2_buffer; + +static void init_compiler(void) +{ + /* Align to 16 bytes. */ + sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf); + + /* Single precision constants (each constant is 16 byte long). */ + sse2_buffer[0] = 0x80000000; + sse2_buffer[4] = 0x7fffffff; + /* Double precision constants (each constant is 16 byte long). */ + sse2_buffer[8] = 0; + sse2_buffer[9] = 0x80000000; + sse2_buffer[12] = 0xffffffff; + sse2_buffer[13] = 0x7fffffff; +} + +static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) +{ + sljit_u8 *inst; + + inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) +{ + sljit_u8 *inst; + + inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) +{ + return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); +} + +static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) +{ + return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + sljit_u8 *inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) + compiler->mode32 = 0; +#endif + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTTSD2SI_r_xm; + + if (dst & SLJIT_MEM) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_u8 *inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + compiler->mode32 = 0; +#endif + + if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTSI2SD_x_rm; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (!FAST_IS_REG(src1)) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); + src1 = TMP_FREG; + } + + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + CHECK_ERROR(); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_MOV_F64) { + if (FAST_IS_REG(dst)) + return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw); + if (FAST_IS_REG(src)) + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw)); + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + } + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + if (FAST_IS_REG(src)) { + /* We overwrite the high bits of source. From SLJIT point of view, + this is not an issue. + Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ + FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0)); + } + else { + FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw)); + src = TMP_FREG; + } + + FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0)); + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(dst)) { + dst_r = dst; + if (dst != src) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); + } + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); + } + + switch (GET_OPCODE(op)) { + case SLJIT_NEG_F64: + FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8))); + break; + + case SLJIT_ABS_F64: + FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12))); + break; + } + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (FAST_IS_REG(dst)) { + dst_r = dst; + if (dst == src1) + ; /* Do nothing here. */ + else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) { + /* Swap arguments. */ + src2 = src1; + src2w = src1w; + } + else if (dst != src2) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w)); + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); + } + } + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + break; + + case SLJIT_SUB_F64: + FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + break; + + case SLJIT_MUL_F64: + FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + break; + + case SLJIT_DIV_F64: + FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + break; + } + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + sljit_u8 *inst; + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + + inst = (sljit_u8*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); + + *inst++ = 0; + *inst++ = 0; + + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + sljit_u8 *inst; + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF_NULL(jump); + set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)); + type &= 0xff; + + /* Worst case size. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; +#else + compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 2); + PTR_FAIL_IF_NULL(inst); + + *inst++ = 0; + *inst++ = 1; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (src == SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF_NULL(jump); + set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT)); + jump->u.target = srcw; + + /* Worst case size. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->size += 5; +#else + compiler->size += 10 + 3; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 2); + FAIL_IF_NULL(inst); + + *inst++ = 0; + *inst++ = 1; + } + else { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; +#endif + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_u8 *inst; + sljit_u8 cond_set = 0; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 reg; +#endif + /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */ + sljit_s32 dst_save = dst; + sljit_sw dstw_save = dstw; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + type &= 0xff; + /* setcc = jcc + 0x10. */ + cond_set = get_jump_code(type) + 0x10; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); + FAIL_IF(!inst); + INC_SIZE(4 + 3); + /* Set low register to conditional flag. */ + *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_lmap[TMP_REG1]; + *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; + return SLJIT_SUCCESS; + } + + reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); + FAIL_IF(!inst); + INC_SIZE(4 + 4); + /* Set low register to conditional flag. */ + *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_lmap[reg]; + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); + /* The movzx instruction does not affect flags. */ + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; + + if (reg != TMP_REG1) + return SLJIT_SUCCESS; + + if (GET_OPCODE(op) < SLJIT_ADD) { + compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + } + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); + +#else + /* The SLJIT_CONFIG_X86_32 code path starts here. */ + if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { + if (reg_map[dst] <= 4) { + /* Low byte is accessible. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!inst); + INC_SIZE(3 + 3); + /* Set low byte to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_map[dst]; + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; + return SLJIT_SUCCESS; + } + + /* Low byte is not accessible. */ + if (cpu_has_cmov == -1) + get_cpu_features(); + + if (cpu_has_cmov) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); + /* a xor reg, reg operation would overwrite the flags. */ + EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + + *inst++ = GROUP_0F; + /* cmovcc = setcc - 0x50. */ + *inst++ = cond_set - 0x50; + *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; + return SLJIT_SUCCESS; + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 3 + 1); + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + /* Set al to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + return SLJIT_SUCCESS; + } + + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) { + SLJIT_ASSERT(reg_map[SLJIT_R0] == 0); + + if (dst != SLJIT_R0) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 2 + 1); + /* Set low register to conditional flag. */ + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); + FAIL_IF(!inst); + INC_SIZE(2 + 3 + 2 + 2); + /* Set low register to conditional flag. */ + *inst++ = XCHG_r_rm; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 1 /* ecx */; + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; + *inst++ = XCHG_r_rm; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; + } + return SLJIT_SUCCESS; + } + + /* Set TMP_REG1 to the bit. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 3 + 1); + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + /* Set al to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; + + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + + if (GET_OPCODE(op) < SLJIT_ADD) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_64 */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + dst_reg &= ~SLJIT_I32_OP; + + if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3)) + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); +#else + if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); +#endif + + /* ADJUST_LOCAL_OFFSET is not needed. */ + CHECK_EXTRA_REGS(src, srcw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = dst_reg & SLJIT_I32_OP; + dst_reg &= ~SLJIT_I32_OP; +#endif + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = get_jump_code(type & 0xff) - 0x40; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (NOT_HALFWORD(offset)) { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); + return compiler->error; +#else + return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); +#endif + } +#endif + + if (offset != 0) + return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + sljit_u8 *inst; + struct sljit_const *const_; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 reg; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (emit_load_imm64(compiler, reg, init_value)) + return NULL; +#else + if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) + return NULL; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); + + *inst++ = 0; + *inst++ = 2; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst & SLJIT_MEM) + if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) + return NULL; +#endif + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_u8 *inst; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 reg; + sljit_uw start_size; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (emit_load_imm64(compiler, reg, 0)) + return NULL; +#else + if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0)) + return NULL; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst & SLJIT_MEM) { + start_size = compiler->size; + if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) + return NULL; + put_label->flags = compiler->size - start_size; + } +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); + + *inst++ = 0; + *inst++ = 3; + + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset); +#else + sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target); +#endif + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0); + sljit_unaligned_store_sw((void*)addr, new_constant); + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitProtExecAllocator.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitProtExecAllocator.c new file mode 100644 index 0000000000..147175afa6 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitProtExecAllocator.c @@ -0,0 +1,474 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple executable memory allocator + + It is assumed, that executable code blocks are usually medium (or sometimes + large) memory blocks, and the allocator is not too frequently called (less + optimized than other allocators). Thus, using it as a generic allocator is + not suggested. + + How does it work: + Memory is allocated in continuous memory areas called chunks by alloc_chunk() + Chunk format: + [ block ][ block ] ... [ block ][ block terminator ] + + All blocks and the block terminator is started with block_header. The block + header contains the size of the previous and the next block. These sizes + can also contain special values. + Block size: + 0 - The block is a free_block, with a different size member. + 1 - The block is a block terminator. + n - The block is used at the moment, and the value contains its size. + Previous block size: + 0 - This is the first block of the memory chunk. + n - The size of the previous block. + + Using these size values we can go forward or backward on the block chain. + The unused blocks are stored in a chain list pointed by free_blocks. This + list is useful if we need to find a suitable memory area when the allocator + is called. + + When a block is freed, the new free block is connected to its adjacent free + blocks if possible. + + [ free block ][ used block ][ free block ] + and "used block" is freed, the three blocks are connected together: + [ one big free block ] +*/ + +/* --------------------------------------------------------------------- */ +/* System (OS) functions */ +/* --------------------------------------------------------------------- */ + +/* 64 KByte. */ +#define CHUNK_SIZE 0x10000 + +struct chunk_header { + void *executable; +}; + +/* + alloc_chunk / free_chunk : + * allocate executable system memory chunks + * the size is always divisible by CHUNK_SIZE + SLJIT_ALLOCATOR_LOCK / SLJIT_ALLOCATOR_UNLOCK : + * provided as part of sljitUtils + * only the allocator requires this lock, sljit is fully thread safe + as it only uses local variables +*/ + +#ifndef __NetBSD__ +#include +#include +#include +#include + +#ifndef O_NOATIME +#define O_NOATIME 0 +#endif + +/* this is a linux extension available since kernel 3.11 */ +#ifndef O_TMPFILE +#define O_TMPFILE 020200000 +#endif + +#ifndef _GNU_SOURCE +char *secure_getenv(const char *name); +int mkostemp(char *template, int flags); +#endif + +static SLJIT_INLINE int create_tempfile(void) +{ + int fd; + char tmp_name[256]; + size_t tmp_name_len = 0; + char *dir; + struct stat st; +#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED + mode_t mode; +#endif + +#ifdef HAVE_MEMFD_CREATE + /* this is a GNU extension, make sure to use -D_GNU_SOURCE */ + fd = memfd_create("sljit", MFD_CLOEXEC); + if (fd != -1) { + fchmod(fd, 0); + return fd; + } +#endif + + dir = secure_getenv("TMPDIR"); + + if (dir) { + tmp_name_len = strlen(dir); + if (tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name)) { + if ((stat(dir, &st) == 0) && S_ISDIR(st.st_mode)) + strcpy(tmp_name, dir); + } + } + +#ifdef P_tmpdir + if (!tmp_name_len) { + tmp_name_len = strlen(P_tmpdir); + if (tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name)) + strcpy(tmp_name, P_tmpdir); + } +#endif + if (!tmp_name_len) { + strcpy(tmp_name, "/tmp"); + tmp_name_len = 4; + } + + SLJIT_ASSERT(tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name)); + + if (tmp_name[tmp_name_len - 1] == '/') + tmp_name[--tmp_name_len] = '\0'; + +#ifdef __linux__ + /* + * the previous trimming might had left an empty string if TMPDIR="/" + * so work around the problem below + */ + fd = open(tmp_name_len ? tmp_name : "/", + O_TMPFILE | O_EXCL | O_RDWR | O_NOATIME | O_CLOEXEC, 0); + if (fd != -1) + return fd; +#endif + + if (tmp_name_len + 7 >= sizeof(tmp_name)) + return -1; + + strcpy(tmp_name + tmp_name_len, "/XXXXXX"); +#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED + mode = umask(0777); +#endif + fd = mkostemp(tmp_name, O_CLOEXEC | O_NOATIME); +#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED + umask(mode); +#else + fchmod(fd, 0); +#endif + + if (fd == -1) + return -1; + + if (unlink(tmp_name)) { + close(fd); + return -1; + } + + return fd; +} + +static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size) +{ + struct chunk_header *retval; + int fd; + + fd = create_tempfile(); + if (fd == -1) + return NULL; + + if (ftruncate(fd, size)) { + close(fd); + return NULL; + } + + retval = (struct chunk_header *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (retval == MAP_FAILED) { + close(fd); + return NULL; + } + + retval->executable = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + + if (retval->executable == MAP_FAILED) { + munmap((void *)retval, size); + close(fd); + return NULL; + } + + close(fd); + return retval; +} +#else +/* + * MAP_REMAPDUP is a NetBSD extension available sinde 8.0, make sure to + * adjust your feature macros (ex: -D_NETBSD_SOURCE) as needed + */ +static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size) +{ + struct chunk_header *retval; + + retval = (struct chunk_header *)mmap(NULL, size, + PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC), + MAP_ANON | MAP_SHARED, -1, 0); + + if (retval == MAP_FAILED) + return NULL; + + retval->executable = mremap(retval, size, NULL, size, MAP_REMAPDUP); + if (retval->executable == MAP_FAILED) { + munmap((void *)retval, size); + return NULL; + } + + if (mprotect(retval->executable, size, PROT_READ | PROT_EXEC) == -1) { + munmap(retval->executable, size); + munmap((void *)retval, size); + return NULL; + } + + return retval; +} +#endif /* NetBSD */ + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + struct chunk_header *header = ((struct chunk_header *)chunk) - 1; + + munmap(header->executable, size); + munmap((void *)header, size); +} + +/* --------------------------------------------------------------------- */ +/* Common functions */ +/* --------------------------------------------------------------------- */ + +#define CHUNK_MASK (~(CHUNK_SIZE - 1)) + +struct block_header { + sljit_uw size; + sljit_uw prev_size; + sljit_sw executable_offset; +}; + +struct free_block { + struct block_header header; + struct free_block *next; + struct free_block *prev; + sljit_uw size; +}; + +#define AS_BLOCK_HEADER(base, offset) \ + ((struct block_header*)(((sljit_u8*)base) + offset)) +#define AS_FREE_BLOCK(base, offset) \ + ((struct free_block*)(((sljit_u8*)base) + offset)) +#define MEM_START(base) ((void*)((base) + 1)) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) + +static struct free_block* free_blocks; +static sljit_uw allocated_size; +static sljit_uw total_size; + +static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size) +{ + free_block->header.size = 0; + free_block->size = size; + + free_block->next = free_blocks; + free_block->prev = NULL; + if (free_blocks) + free_blocks->prev = free_block; + free_blocks = free_block; +} + +static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) +{ + if (free_block->next) + free_block->next->prev = free_block->prev; + + if (free_block->prev) + free_block->prev->next = free_block->next; + else { + SLJIT_ASSERT(free_blocks == free_block); + free_blocks = free_block->next; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ + struct chunk_header *chunk_header; + struct block_header *header; + struct block_header *next_header; + struct free_block *free_block; + sljit_uw chunk_size; + sljit_sw executable_offset; + + SLJIT_ALLOCATOR_LOCK(); + if (size < (64 - sizeof(struct block_header))) + size = (64 - sizeof(struct block_header)); + size = ALIGN_SIZE(size); + + free_block = free_blocks; + while (free_block) { + if (free_block->size >= size) { + chunk_size = free_block->size; + if (chunk_size > size + 64) { + /* We just cut a block from the end of the free block. */ + chunk_size -= size; + free_block->size = chunk_size; + header = AS_BLOCK_HEADER(free_block, chunk_size); + header->prev_size = chunk_size; + header->executable_offset = free_block->header.executable_offset; + AS_BLOCK_HEADER(header, size)->prev_size = size; + } + else { + sljit_remove_free_block(free_block); + header = (struct block_header*)free_block; + size = chunk_size; + } + allocated_size += size; + header->size = size; + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); + } + free_block = free_block->next; + } + + chunk_size = sizeof(struct chunk_header) + sizeof(struct block_header); + chunk_size = (chunk_size + size + CHUNK_SIZE - 1) & CHUNK_MASK; + + chunk_header = alloc_chunk(chunk_size); + if (!chunk_header) { + SLJIT_ALLOCATOR_UNLOCK(); + return NULL; + } + + executable_offset = (sljit_sw)((sljit_u8*)chunk_header->executable - (sljit_u8*)chunk_header); + + chunk_size -= sizeof(struct chunk_header) + sizeof(struct block_header); + total_size += chunk_size; + + header = (struct block_header *)(chunk_header + 1); + + header->prev_size = 0; + header->executable_offset = executable_offset; + if (chunk_size > size + 64) { + /* Cut the allocated space into a free and a used block. */ + allocated_size += size; + header->size = size; + chunk_size -= size; + + free_block = AS_FREE_BLOCK(header, size); + free_block->header.prev_size = size; + free_block->header.executable_offset = executable_offset; + sljit_insert_free_block(free_block, chunk_size); + next_header = AS_BLOCK_HEADER(free_block, chunk_size); + } + else { + /* All space belongs to this allocation. */ + allocated_size += chunk_size; + header->size = chunk_size; + next_header = AS_BLOCK_HEADER(header, chunk_size); + } + next_header->size = 1; + next_header->prev_size = chunk_size; + next_header->executable_offset = executable_offset; + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + struct block_header *header; + struct free_block* free_block; + + SLJIT_ALLOCATOR_LOCK(); + header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); + header = AS_BLOCK_HEADER(header, -header->executable_offset); + allocated_size -= header->size; + + /* Connecting free blocks together if possible. */ + + /* If header->prev_size == 0, free_block will equal to header. + In this case, free_block->header.size will be > 0. */ + free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size); + if (SLJIT_UNLIKELY(!free_block->header.size)) { + free_block->size += header->size; + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + else { + free_block = (struct free_block*)header; + sljit_insert_free_block(free_block, header->size); + } + + header = AS_BLOCK_HEADER(free_block, free_block->size); + if (SLJIT_UNLIKELY(!header->size)) { + free_block->size += ((struct free_block*)header)->size; + sljit_remove_free_block((struct free_block*)header); + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + + /* The whole chunk is free. */ + if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { + /* If this block is freed, we still have (allocated_size / 2) free space. */ + if (total_size - free_block->size > (allocated_size * 3 / 2)) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + + sizeof(struct chunk_header) + + sizeof(struct block_header)); + } + } + + SLJIT_ALLOCATOR_UNLOCK(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + struct free_block* free_block; + struct free_block* next_free_block; + + SLJIT_ALLOCATOR_LOCK(); + + free_block = free_blocks; + while (free_block) { + next_free_block = free_block->next; + if (!free_block->header.prev_size && + AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + + sizeof(struct chunk_header) + + sizeof(struct block_header)); + } + free_block = next_free_block; + } + + SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + SLJIT_ALLOCATOR_UNLOCK(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr) +{ + return ((struct block_header *)(ptr))[-1].executable_offset; +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitUtils.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitUtils.c new file mode 100644 index 0000000000..9bce714735 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitUtils.c @@ -0,0 +1,341 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ------------------------------------------------------------------------ */ +/* Locks */ +/* ------------------------------------------------------------------------ */ + +/* Executable Allocator */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) \ + && !(defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR) +#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) +#define SLJIT_ALLOCATOR_LOCK() +#define SLJIT_ALLOCATOR_UNLOCK() +#elif !(defined _WIN32) +#include + +static pthread_mutex_t allocator_lock = PTHREAD_MUTEX_INITIALIZER; + +#define SLJIT_ALLOCATOR_LOCK() pthread_mutex_lock(&allocator_lock) +#define SLJIT_ALLOCATOR_UNLOCK() pthread_mutex_unlock(&allocator_lock) +#else /* windows */ +static HANDLE allocator_lock; + +static SLJIT_INLINE void allocator_grab_lock(void) +{ + HANDLE lock; + if (SLJIT_UNLIKELY(!InterlockedCompareExchangePointer(&allocator_lock, NULL, NULL))) { + lock = CreateMutex(NULL, FALSE, NULL); + if (InterlockedCompareExchangePointer(&allocator_lock, lock, NULL)) + CloseHandle(lock); + } + WaitForSingleObject(allocator_lock, INFINITE); +} + +#define SLJIT_ALLOCATOR_LOCK() allocator_grab_lock() +#define SLJIT_ALLOCATOR_UNLOCK() ReleaseMutex(allocator_lock) +#endif /* thread implementation */ +#endif /* SLJIT_EXECUTABLE_ALLOCATOR && !SLJIT_WX_EXECUTABLE_ALLOCATOR */ + +/* ------------------------------------------------------------------------ */ +/* Stack */ +/* ------------------------------------------------------------------------ */ + +#if ((defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) \ + && !(defined SLJIT_UTIL_SIMPLE_STACK_ALLOCATION && SLJIT_UTIL_SIMPLE_STACK_ALLOCATION)) \ + || ((defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) \ + && !((defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) \ + || (defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR))) + +#ifndef _WIN32 +/* Provides mmap function. */ +#include +#include + +#ifndef MAP_ANON +#ifdef MAP_ANONYMOUS +#define MAP_ANON MAP_ANONYMOUS +#endif /* MAP_ANONYMOUS */ +#endif /* !MAP_ANON */ + +#ifndef MAP_ANON + +#include + +#ifdef O_CLOEXEC +#define SLJIT_CLOEXEC O_CLOEXEC +#else /* !O_CLOEXEC */ +#define SLJIT_CLOEXEC 0 +#endif /* O_CLOEXEC */ + +/* Some old systems do not have MAP_ANON. */ +static int dev_zero = -1; + +#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) + +static SLJIT_INLINE int open_dev_zero(void) +{ + dev_zero = open("/dev/zero", O_RDWR | SLJIT_CLOEXEC); + + return dev_zero < 0; +} + +#else /* !SLJIT_SINGLE_THREADED */ + +#include + +static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER; + +static SLJIT_INLINE int open_dev_zero(void) +{ + pthread_mutex_lock(&dev_zero_mutex); + if (SLJIT_UNLIKELY(dev_zero < 0)) + dev_zero = open("/dev/zero", O_RDWR | SLJIT_CLOEXEC); + + pthread_mutex_unlock(&dev_zero_mutex); + return dev_zero < 0; +} + +#endif /* SLJIT_SINGLE_THREADED */ +#undef SLJIT_CLOEXEC +#endif /* !MAP_ANON */ +#endif /* !_WIN32 */ +#endif /* open_dev_zero */ + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) \ + || (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#ifdef _WIN32 + +static SLJIT_INLINE sljit_sw get_page_alignment(void) { + SYSTEM_INFO si; + static sljit_sw sljit_page_align; + if (!sljit_page_align) { + GetSystemInfo(&si); + sljit_page_align = si.dwPageSize - 1; + } + return sljit_page_align; +} + +#else + +#include + +static SLJIT_INLINE sljit_sw get_page_alignment(void) { + static sljit_sw sljit_page_align = -1; + if (sljit_page_align < 0) { +#ifdef _SC_PAGESIZE + sljit_page_align = sysconf(_SC_PAGESIZE); +#else + sljit_page_align = getpagesize(); +#endif + /* Should never happen. */ + if (sljit_page_align < 0) + sljit_page_align = 4096; + sljit_page_align--; + } + return sljit_page_align; +} + +#endif /* _WIN32 */ + +#endif /* get_page_alignment() */ + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +#if (defined SLJIT_UTIL_SIMPLE_STACK_ALLOCATION && SLJIT_UTIL_SIMPLE_STACK_ALLOCATION) + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data) +{ + struct sljit_stack *stack; + void *ptr; + + SLJIT_UNUSED_ARG(allocator_data); + + if (start_size > max_size || start_size < 1) + return NULL; + + stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); + if (stack == NULL) + return NULL; + + ptr = SLJIT_MALLOC(max_size, allocator_data); + if (ptr == NULL) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + + stack->min_start = (sljit_u8 *)ptr; + stack->end = stack->min_start + max_size; + stack->start = stack->end - start_size; + stack->top = stack->end; + return stack; +} + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + SLJIT_FREE((void*)stack->min_start, allocator_data); + SLJIT_FREE(stack, allocator_data); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start) +{ + if ((new_start < stack->min_start) || (new_start >= stack->end)) + return NULL; + stack->start = new_start; + return new_start; +} + +#else /* !SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */ + +#ifdef _WIN32 + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + VirtualFree((void*)stack->min_start, 0, MEM_RELEASE); + SLJIT_FREE(stack, allocator_data); +} + +#else /* !_WIN32 */ + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + munmap((void*)stack->min_start, stack->end - stack->min_start); + SLJIT_FREE(stack, allocator_data); +} + +#endif /* _WIN32 */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data) +{ + struct sljit_stack *stack; + void *ptr; + sljit_sw page_align; + + SLJIT_UNUSED_ARG(allocator_data); + + if (start_size > max_size || start_size < 1) + return NULL; + + stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); + if (stack == NULL) + return NULL; + + /* Align max_size. */ + page_align = get_page_alignment(); + max_size = (max_size + page_align) & ~page_align; + +#ifdef _WIN32 + ptr = VirtualAlloc(NULL, max_size, MEM_RESERVE, PAGE_READWRITE); + if (!ptr) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + + stack->min_start = (sljit_u8 *)ptr; + stack->end = stack->min_start + max_size; + stack->start = stack->end; + + if (sljit_stack_resize(stack, stack->end - start_size) == NULL) { + sljit_free_stack(stack, allocator_data); + return NULL; + } +#else /* !_WIN32 */ +#ifdef MAP_ANON + ptr = mmap(NULL, max_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); +#else /* !MAP_ANON */ + if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + ptr = mmap(NULL, max_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0); +#endif /* MAP_ANON */ + if (ptr == MAP_FAILED) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + stack->min_start = (sljit_u8 *)ptr; + stack->end = stack->min_start + max_size; + stack->start = stack->end - start_size; +#endif /* _WIN32 */ + + stack->top = stack->end; + return stack; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start) +{ +#if defined _WIN32 || defined(POSIX_MADV_DONTNEED) + sljit_uw aligned_old_start; + sljit_uw aligned_new_start; + sljit_sw page_align; +#endif + + if ((new_start < stack->min_start) || (new_start >= stack->end)) + return NULL; + +#ifdef _WIN32 + page_align = get_page_alignment(); + + aligned_new_start = (sljit_uw)new_start & ~page_align; + aligned_old_start = ((sljit_uw)stack->start) & ~page_align; + if (aligned_new_start != aligned_old_start) { + if (aligned_new_start < aligned_old_start) { + if (!VirtualAlloc((void*)aligned_new_start, aligned_old_start - aligned_new_start, MEM_COMMIT, PAGE_READWRITE)) + return NULL; + } + else { + if (!VirtualFree((void*)aligned_old_start, aligned_new_start - aligned_old_start, MEM_DECOMMIT)) + return NULL; + } + } +#elif defined(POSIX_MADV_DONTNEED) + if (stack->start < new_start) { + page_align = get_page_alignment(); + + aligned_new_start = (sljit_uw)new_start & ~page_align; + aligned_old_start = ((sljit_uw)stack->start) & ~page_align; + + if (aligned_new_start > aligned_old_start) { + posix_madvise((void*)aligned_old_start, aligned_new_start - aligned_old_start, POSIX_MADV_DONTNEED); +#ifdef MADV_FREE + madvise((void*)aligned_old_start, aligned_new_start - aligned_old_start, MADV_FREE); +#endif /* MADV_FREE */ + } + } +#endif /* _WIN32 */ + + stack->start = new_start; + return new_start; +} + +#endif /* SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */ + +#endif /* SLJIT_UTIL_STACK */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitWXExecAllocator.c b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitWXExecAllocator.c new file mode 100644 index 0000000000..72d5b8dd2b --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/sljit_src/sljitWXExecAllocator.c @@ -0,0 +1,229 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple W^X executable memory allocator for POSIX + like systems and Windows + + In *NIX, MAP_ANON is required (that is considered a feature) so make + sure to set the right availability macros for your system or the code + will fail to build. + + If your system doesn't support mapping of anonymous pages (ex: IRIX) it + is also likely that it doesn't need this allocator and should be using + the standard one instead. + + It allocates a separate map for each code block and may waste a lot of + memory, because whatever was requested, will be rounded up to the page + size (minimum 4KB, but could be even bigger). + + It changes the page permissions (RW <-> RX) as needed and therefore, if you + will be updating the code after it has been generated, need to make sure to + block any concurrent execution, or could result in a SIGBUS, that could + even manifest itself at a different address than the one that was being + modified. + + Only use if you are unable to use the regular allocator because of security + restrictions and adding exceptions to your application or the system are + not possible. +*/ + +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + sljit_update_wx_flags((from), (to), (enable_exec)) + +#ifndef _WIN32 +#include +#include + +#ifdef __NetBSD__ +#if defined(PROT_MPROTECT) +#define check_se_protected(ptr, size) (0) +#define SLJIT_PROT_WX PROT_MPROTECT(PROT_EXEC) +#else /* !PROT_MPROTECT */ +#ifdef _NETBSD_SOURCE +#include +#else /* !_NETBSD_SOURCE */ +typedef unsigned int u_int; +#define devmajor_t sljit_s32 +#endif /* _NETBSD_SOURCE */ +#include +#include + +#define check_se_protected(ptr, size) netbsd_se_protected() + +static SLJIT_INLINE int netbsd_se_protected(void) +{ + int mib[3]; + int paxflags; + size_t len = sizeof(paxflags); + + mib[0] = CTL_PROC; + mib[1] = getpid(); + mib[2] = PROC_PID_PAXFLAGS; + + if (SLJIT_UNLIKELY(sysctl(mib, 3, &paxflags, &len, NULL, 0) < 0)) + return -1; + + return (paxflags & CTL_PROC_PAXFLAGS_MPROTECT) ? -1 : 0; +} +#endif /* PROT_MPROTECT */ +#else /* POSIX */ +#define check_se_protected(ptr, size) generic_se_protected(ptr, size) + +static SLJIT_INLINE int generic_se_protected(void *ptr, sljit_uw size) +{ + if (SLJIT_LIKELY(!mprotect(ptr, size, PROT_EXEC))) + return mprotect(ptr, size, PROT_READ | PROT_WRITE); + + return -1; +} +#endif /* NetBSD */ + +#if defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED +#define SLJIT_SE_LOCK() +#define SLJIT_SE_UNLOCK() +#else /* !SLJIT_SINGLE_THREADED */ +#include +#define SLJIT_SE_LOCK() pthread_mutex_lock(&se_lock) +#define SLJIT_SE_UNLOCK() pthread_mutex_unlock(&se_lock) +#endif /* SLJIT_SINGLE_THREADED */ + +#ifndef SLJIT_PROT_WX +#define SLJIT_PROT_WX 0 +#endif /* !SLJIT_PROT_WX */ + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ +#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) + static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER; +#endif + static int se_protected = !SLJIT_PROT_WX; + int prot = PROT_READ | PROT_WRITE | SLJIT_PROT_WX; + sljit_uw* ptr; + + if (SLJIT_UNLIKELY(se_protected < 0)) + return NULL; + +#ifdef PROT_MAX + prot |= PROT_MAX(PROT_READ | PROT_WRITE | PROT_EXEC); +#endif + + size += sizeof(sljit_uw); + ptr = (sljit_uw*)mmap(NULL, size, prot, MAP_PRIVATE | MAP_ANON, -1, 0); + + if (ptr == MAP_FAILED) + return NULL; + + if (SLJIT_UNLIKELY(se_protected > 0)) { + SLJIT_SE_LOCK(); + se_protected = check_se_protected(ptr, size); + SLJIT_SE_UNLOCK(); + if (SLJIT_UNLIKELY(se_protected < 0)) { + munmap((void *)ptr, size); + return NULL; + } + } + + *ptr++ = size; + return ptr; +} + +#undef SLJIT_PROT_WX +#undef SLJIT_SE_UNLOCK +#undef SLJIT_SE_LOCK + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + sljit_uw *start_ptr = ((sljit_uw*)ptr) - 1; + munmap((void*)start_ptr, *start_ptr); +} + +static void sljit_update_wx_flags(void *from, void *to, sljit_s32 enable_exec) +{ + sljit_uw page_mask = (sljit_uw)get_page_alignment(); + sljit_uw start = (sljit_uw)from; + sljit_uw end = (sljit_uw)to; + int prot = PROT_READ | (enable_exec ? PROT_EXEC : PROT_WRITE); + + SLJIT_ASSERT(start < end); + + start &= ~page_mask; + end = (end + page_mask) & ~page_mask; + + mprotect((void*)start, end - start, prot); +} + +#else /* windows */ + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ + sljit_uw *ptr; + + size += sizeof(sljit_uw); + ptr = (sljit_uw*)VirtualAlloc(NULL, size, + MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + + if (!ptr) + return NULL; + + *ptr++ = size; + + return ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + sljit_uw start = (sljit_uw)ptr - sizeof(sljit_uw); +#if defined(SLJIT_DEBUG) && SLJIT_DEBUG + sljit_uw page_mask = (sljit_uw)get_page_alignment(); + + SLJIT_ASSERT(!(start & page_mask)); +#endif + VirtualFree((void*)start, 0, MEM_RELEASE); +} + +static void sljit_update_wx_flags(void *from, void *to, sljit_s32 enable_exec) +{ + DWORD oldprot; + sljit_uw page_mask = (sljit_uw)get_page_alignment(); + sljit_uw start = (sljit_uw)from; + sljit_uw end = (sljit_uw)to; + DWORD prot = enable_exec ? PAGE_EXECUTE : PAGE_READWRITE; + + SLJIT_ASSERT(start < end); + + start &= ~page_mask; + end = (end + page_mask) & ~page_mask; + + VirtualProtect((void*)start, end - start, prot, &oldprot); +} + +#endif /* !windows */ + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + /* This allocator does not keep unused memory for future allocations. */ +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPost.h b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPost.h new file mode 100644 index 0000000000..04f003e00f --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPost.h @@ -0,0 +1,33 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_POST_H_ +#define SLJIT_CONFIG_POST_H_ + +void *sljit_test_malloc_exec(sljit_uw size, void *exec_allocator_data); +void sljit_test_free_code(void* code, void *exec_allocator_data); + +#endif /* SLJIT_CONFIG_POST_H_ */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPre.h b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPre.h new file mode 100644 index 0000000000..c2c86d6b43 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitConfigPre.h @@ -0,0 +1,35 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_PRE_H_ +#define SLJIT_CONFIG_PRE_H_ + +#define SLJIT_HAVE_CONFIG_POST 1 + +#define SLJIT_MALLOC_EXEC(size, exec_allocator_data) sljit_test_malloc_exec((size), (exec_allocator_data)) +#define SLJIT_FREE_EXEC(ptr, exec_allocator_data) sljit_test_free_code((ptr), (exec_allocator_data)) + +#endif /* SLJIT_CONFIG_PRE_H_ */ diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitMain.c b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitMain.c new file mode 100644 index 0000000000..5df20e4a30 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitMain.c @@ -0,0 +1,84 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sljitLir.h" + +#include +#include + +int sljit_test(int argc, char* argv[]); + +void error(const char* str) +{ + printf("An error occured: %s\n", str); + exit(-1); +} + +union executable_code { + void* code; + sljit_sw (SLJIT_FUNC *func)(sljit_sw* a); +}; +typedef union executable_code executable_code; + +void devel(void) +{ + executable_code code; + + struct sljit_compiler *compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[4]; + + if (!compiler) + error("Not enough of memory"); + buf[0] = 5; + buf[1] = 12; + buf[2] = 0; + buf[3] = 0; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + sljit_compiler_verbose(compiler, stdout); +#endif + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 4, 5, 4, 0, 2 * sizeof(sljit_sw)); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + sljit_free_compiler(compiler); + + printf("Code at: %p\n", (void*)SLJIT_FUNC_OFFSET(code.code)); + + printf("Function returned with %ld\n", (long)code.func((sljit_sw*)buf)); + printf("buf[0] = %ld\n", (long)buf[0]); + printf("buf[1] = %ld\n", (long)buf[1]); + printf("buf[2] = %ld\n", (long)buf[2]); + printf("buf[3] = %ld\n", (long)buf[3]); + sljit_free_code(code.code, NULL); +} + +int main(int argc, char* argv[]) +{ + /* devel(); */ + return sljit_test(argc, argv); +} diff --git a/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTest.c b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTest.c new file mode 100644 index 0000000000..4ddf57ce36 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljit/test_src/sljitTest.c @@ -0,0 +1,6785 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Must be the first one. Must not depend on any other include. */ +#include "sljitLir.h" + +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4127) /* conditional expression is constant */ +#endif + +#if defined _WIN32 || defined _WIN64 +#define COLOR_RED +#define COLOR_GREEN +#define COLOR_ARCH +#define COLOR_DEFAULT +#else +#define COLOR_RED "\33[31m" +#define COLOR_GREEN "\33[32m" +#define COLOR_ARCH "\33[33m" +#define COLOR_DEFAULT "\33[0m" +#endif + +union executable_code { + void* code; + sljit_sw (SLJIT_FUNC *func0)(void); + sljit_sw (SLJIT_FUNC *func1)(sljit_sw a); + sljit_sw (SLJIT_FUNC *func2)(sljit_sw a, sljit_sw b); + sljit_sw (SLJIT_FUNC *func3)(sljit_sw a, sljit_sw b, sljit_sw c); +}; +typedef union executable_code executable_code; + +static sljit_s32 successful_tests = 0; +static sljit_s32 verbose = 0; +static sljit_s32 silent = 0; + +#define FAILED(cond, text) \ + if (SLJIT_UNLIKELY(cond)) { \ + printf(text); \ + return; \ + } + +#define CHECK(compiler) \ + if (sljit_get_compiler_error(compiler) != SLJIT_ERR_COMPILED) { \ + printf("Compiler error: %d\n", sljit_get_compiler_error(compiler)); \ + sljit_free_compiler(compiler); \ + return; \ + } + +static void cond_set(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_s32 type) +{ + /* Testing both sljit_emit_op_flags and sljit_emit_jump. */ + struct sljit_jump* jump; + struct sljit_label* label; + + sljit_emit_op_flags(compiler, SLJIT_MOV, dst, dstw, type); + jump = sljit_emit_jump(compiler, type); + sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, dst, dstw, SLJIT_IMM, 2); + label = sljit_emit_label(compiler); + sljit_set_label(jump, label); +} + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +/* For interface testing and for test64. */ +void *sljit_test_malloc_exec(sljit_uw size, void *exec_allocator_data) +{ + if (exec_allocator_data) + return exec_allocator_data; + + return SLJIT_BUILTIN_MALLOC_EXEC(size, exec_allocator_data); +} + +/* For interface testing. */ +void sljit_test_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_BUILTIN_FREE_EXEC(code, exec_allocator_data); +} + +#define MALLOC_EXEC(result, size) \ + result = SLJIT_MALLOC_EXEC(size, NULL); \ + if (!result) { \ + printf("Cannot allocate executable memory\n"); \ + return; \ + } \ + memset(result, 255, size); + +#define FREE_EXEC(ptr) \ + SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL); + +static void test_exec_allocator(void) +{ + /* This is not an sljit test. */ + void *ptr1; + void *ptr2; + void *ptr3; + + if (verbose) + printf("Run executable allocator test\n"); + + MALLOC_EXEC(ptr1, 32); + MALLOC_EXEC(ptr2, 512); + MALLOC_EXEC(ptr3, 512); + FREE_EXEC(ptr2); + FREE_EXEC(ptr3); + FREE_EXEC(ptr1); + MALLOC_EXEC(ptr1, 262104); + MALLOC_EXEC(ptr2, 32000); + FREE_EXEC(ptr1); + MALLOC_EXEC(ptr1, 262104); + FREE_EXEC(ptr1); + FREE_EXEC(ptr2); + MALLOC_EXEC(ptr1, 512); + MALLOC_EXEC(ptr2, 512); + MALLOC_EXEC(ptr3, 512); + FREE_EXEC(ptr2); + MALLOC_EXEC(ptr2, 512); +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + sljit_free_unused_memory_exec(); +#endif + FREE_EXEC(ptr3); + FREE_EXEC(ptr1); + FREE_EXEC(ptr2); + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + sljit_free_unused_memory_exec(); +#endif +} + +#undef MALLOC_EXEC + +#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ + +static void test1(void) +{ + /* Enter and return from an sljit function. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + + if (verbose) + printf("Run test1\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + /* 3 arguments passed, 3 arguments used. */ + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 3, 3, 0, 0, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_S1, 0); + + SLJIT_ASSERT(sljit_get_generated_code_size(compiler) == 0); + code.code = sljit_generate_code(compiler); + CHECK(compiler); + SLJIT_ASSERT(compiler->error == SLJIT_ERR_COMPILED); + SLJIT_ASSERT(sljit_get_generated_code_size(compiler) > 0); + sljit_free_compiler(compiler); + + FAILED(code.func3(3, -21, 86) != -21, "test1 case 1 failed\n"); + FAILED(code.func3(4789, 47890, 997) != 47890, "test1 case 2 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test2(void) +{ + /* Test mov. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[8]; + static sljit_sw data[2] = { 0, -9876 }; + + if (verbose) + printf("Run test2\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = 5678; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 0; + buf[7] = 0; + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 2, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 9999); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_S1, SLJIT_S0), SLJIT_WORD_SHIFT, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 3); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_S1, SLJIT_S0), SLJIT_WORD_SHIFT, SLJIT_MEM0(), (sljit_sw)&buf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), (sljit_sw)&data); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf - 0x12345678); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), 0x12345678); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 3456); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf - 0xff890 + 6 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0xff890, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf + 0xff890 + 7 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), -0xff890, SLJIT_R0, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R2, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != 9999, "test2 case 1 failed\n"); + FAILED(buf[1] != 9999, "test2 case 2 failed\n"); + FAILED(buf[2] != 9999, "test2 case 3 failed\n"); + FAILED(buf[3] != 5678, "test2 case 4 failed\n"); + FAILED(buf[4] != -9876, "test2 case 5 failed\n"); + FAILED(buf[5] != 5678, "test2 case 6 failed\n"); + FAILED(buf[6] != 3456, "test2 case 6 failed\n"); + FAILED(buf[7] != 3456, "test2 case 6 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test3(void) +{ + /* Test not. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[5]; + + if (verbose) + printf("Run test3\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 1234; + buf[1] = 0; + buf[2] = 9876; + buf[3] = 0; + buf[4] = 0x12345678; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_NOT, SLJIT_MEM0(), (sljit_sw)&buf[1], SLJIT_MEM0(), (sljit_sw)&buf[1]); + sljit_emit_op1(compiler, SLJIT_NOT, SLJIT_RETURN_REG, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_NOT, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf[4] - 0xff0000 - 0x20); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)&buf[4] - 0xff0000); + sljit_emit_op1(compiler, SLJIT_NOT, SLJIT_MEM1(SLJIT_R1), 0xff0000 + 0x20, SLJIT_MEM1(SLJIT_R2), 0xff0000); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != ~1234, "test3 case 1 failed\n"); + FAILED(buf[1] != ~1234, "test3 case 2 failed\n"); + FAILED(buf[3] != ~9876, "test3 case 3 failed\n"); + FAILED(buf[4] != ~0x12345678, "test3 case 4 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test4(void) +{ + /* Test neg. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[4]; + + if (verbose) + printf("Run test4\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 1234; + buf[2] = 0; + buf[3] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 2, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_NEG, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_NEG, SLJIT_MEM0(), (sljit_sw)&buf[0], SLJIT_MEM0(), (sljit_sw)&buf[1]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 299); + sljit_emit_op1(compiler, SLJIT_NEG, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_NEG, SLJIT_RETURN_REG, 0, SLJIT_S1, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func2((sljit_sw)&buf, 4567) != -4567, "test4 case 1 failed\n"); + FAILED(buf[0] != -1234, "test4 case 2 failed\n"); + FAILED(buf[2] != -4567, "test4 case 3 failed\n"); + FAILED(buf[3] != -299, "test4 case 4 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test5(void) +{ + /* Test add. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[9]; + + if (verbose) + printf("Run test5\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 100; + buf[1] = 200; + buf[2] = 300; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 0; + buf[7] = 0; + buf[8] = 313; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 2, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 50); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 1, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 1, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_sw) + 2); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 50); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_IMM, 4, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_IMM, 50, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_IMM, 50, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x1e7d39f2); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R1, 0, SLJIT_IMM, 0x23de7c06); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_IMM, 0x3d72e452, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_IMM, -43, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_IMM, 1000, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 1430); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_IMM, -99, SLJIT_R0, 0); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != 2437 + 2 * sizeof(sljit_sw), "test5 case 1 failed\n"); + FAILED(buf[0] != 202 + 2 * sizeof(sljit_sw), "test5 case 2 failed\n"); + FAILED(buf[2] != 500, "test5 case 3 failed\n"); + FAILED(buf[3] != 400, "test5 case 4 failed\n"); + FAILED(buf[4] != 200, "test5 case 5 failed\n"); + FAILED(buf[5] != 250, "test5 case 6 failed\n"); + FAILED(buf[6] != 0x425bb5f8, "test5 case 7 failed\n"); + FAILED(buf[7] != 0x5bf01e44, "test5 case 8 failed\n"); + FAILED(buf[8] != 270, "test5 case 9 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test6(void) +{ + /* Test addc, sub, subc. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[11]; + + if (verbose) + printf("Run test6\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 0; + buf[7] = 0; + buf[8] = 0; + buf[9] = 0; + buf[10] = 4000; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op2(compiler, SLJIT_ADDC, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADDC, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, 4); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 100); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_R0, 0, SLJIT_IMM, 50); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_CARRY, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 6000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_IMM, 10); + sljit_emit_op2(compiler, SLJIT_SUBC, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_IMM, 5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 100); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 4, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 5000); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 4, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 5000); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_IMM, 6000, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 6, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 100); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 32768); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 7, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, -32767); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 8, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x52cd3bf4); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 9, SLJIT_R0, 0, SLJIT_IMM, 0x3da297c6); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 6000); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 10); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_CARRY, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 5); + sljit_emit_op2(compiler, SLJIT_SUBC, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 2); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, -2220); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != 2223, "test6 case 1 failed\n"); + FAILED(buf[0] != 1, "test6 case 2 failed\n"); + FAILED(buf[1] != 5, "test6 case 3 failed\n"); + FAILED(buf[2] != 50, "test6 case 4 failed\n"); + FAILED(buf[3] != 4, "test6 case 5 failed\n"); + FAILED(buf[4] != 50, "test6 case 6 failed\n"); + FAILED(buf[5] != 50, "test6 case 7 failed\n"); + FAILED(buf[6] != 1000, "test6 case 8 failed\n"); + FAILED(buf[7] != 100 - 32768, "test6 case 9 failed\n"); + FAILED(buf[8] != 100 + 32767, "test6 case 10 failed\n"); + FAILED(buf[9] != 0x152aa42e, "test6 case 11 failed\n"); + FAILED(buf[10] != -2000, "test6 case 12 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test7(void) +{ + /* Test logical operators. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[8]; + + if (verbose) + printf("Run test7\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0xff80; + buf[1] = 0x0f808080; + buf[2] = 0; + buf[3] = 0xaaaaaa; + buf[4] = 0; + buf[5] = 0x4040; + buf[6] = 0; + buf[7] = 0xc43a7f95; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xf0C000); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_R0, 0, SLJIT_IMM, 0x308f); + sljit_emit_op2(compiler, SLJIT_XOR, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_AND, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_IMM, 0xf0f0f0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xC0F0); + sljit_emit_op2(compiler, SLJIT_XOR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xff0000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 4, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xC0F0); + sljit_emit_op2(compiler, SLJIT_AND, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5, SLJIT_R2, 0, SLJIT_IMM, 0xff0000); + sljit_emit_op2(compiler, SLJIT_XOR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, 0xFFFFFF, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 6, SLJIT_IMM, 0xa56c82c0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 6); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 7); + sljit_emit_op2(compiler, SLJIT_XOR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 7, SLJIT_IMM, 0xff00ff00, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xff00ff00); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 0x0f); + sljit_emit_op2(compiler, SLJIT_AND, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0x888888, SLJIT_R1, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != 0x8808, "test7 case 1 failed\n"); + FAILED(buf[0] != 0x0F807F00, "test7 case 2 failed\n"); + FAILED(buf[1] != 0x0F7F7F7F, "test7 case 3 failed\n"); + FAILED(buf[2] != 0x00F0F08F, "test7 case 4 failed\n"); + FAILED(buf[3] != 0x00A0A0A0, "test7 case 5 failed\n"); + FAILED(buf[4] != 0x00FF80B0, "test7 case 6 failed\n"); + FAILED(buf[5] != 0x00FF4040, "test7 case 7 failed\n"); + FAILED(buf[6] != 0xa56c82c0, "test7 case 8 failed\n"); + FAILED(buf[7] != 0x3b3a8095, "test7 case 9 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test8(void) +{ + /* Test flags (neg, cmp, test). */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[13]; + + if (verbose) + printf("Run test8\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 100; + buf[1] = 3; + buf[2] = 3; + buf[3] = 3; + buf[4] = 3; + buf[5] = 3; + buf[6] = 3; + buf[7] = 3; + buf[8] = 3; + buf[9] = 3; + buf[10] = 3; + buf[11] = 3; + buf[12] = 3; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 2, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 20); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 10); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_IMM, 6, SLJIT_IMM, 5); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_NOT_EQUAL); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 3000); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 3000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_S1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_LESS); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 4, SLJIT_R2, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, -15); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_SIG_GREATER); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -(sljit_sw)(~(sljit_uw)0 >> 1) - 1); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_NEG | SLJIT_SET_Z | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R0, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 6, SLJIT_OVERFLOW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_NOT | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 7, SLJIT_ZERO); + sljit_emit_op1(compiler, SLJIT_NOT | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 8, SLJIT_ZERO); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_IMM, 0xffff, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffff); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 9, SLJIT_NOT_ZERO); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_IMM, 0xffff, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_IMM, 0xffff); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 0x1); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, SLJIT_NOT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -(sljit_sw)(~(sljit_uw)0 >> 1) - 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 11, SLJIT_OVERFLOW); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 12, SLJIT_OVERFLOW); + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[1] != 1, "test8 case 1 failed\n"); + FAILED(buf[2] != 0, "test8 case 2 failed\n"); + FAILED(buf[3] != 0, "test8 case 3 failed\n"); + FAILED(buf[4] != 1, "test8 case 4 failed\n"); + FAILED(buf[5] != 1, "test8 case 5 failed\n"); + FAILED(buf[6] != 1, "test8 case 6 failed\n"); + FAILED(buf[7] != 1, "test8 case 7 failed\n"); + FAILED(buf[8] != 0, "test8 case 8 failed\n"); + FAILED(buf[9] != 1, "test8 case 9 failed\n"); + FAILED(buf[10] != 0, "test8 case 10 failed\n"); + FAILED(buf[11] != 1, "test8 case 11 failed\n"); + FAILED(buf[12] != 0, "test8 case 12 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test9(void) +{ + /* Test shift. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[13]; +#ifdef SLJIT_PREF_SHIFT_REG + sljit_s32 shift_reg = SLJIT_PREF_SHIFT_REG; +#else + sljit_s32 shift_reg = SLJIT_R2; +#endif + + SLJIT_ASSERT(shift_reg >= SLJIT_R2 && shift_reg <= SLJIT_R3); + + if (verbose) + printf("Run test9\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + buf[4] = 1 << 10; + buf[5] = 0; + buf[6] = 0; + buf[7] = 0; + buf[8] = 0; + buf[9] = 3; + buf[10] = 0; + buf[11] = 0; + buf[12] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 4, 2, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xf); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 3); + sljit_emit_op2(compiler, SLJIT_LSHR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -64); + sljit_emit_op1(compiler, SLJIT_MOV, shift_reg, 0, SLJIT_IMM, 2); + sljit_emit_op2(compiler, SLJIT_ASHR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_R0, 0, shift_reg, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, shift_reg, 0, SLJIT_IMM, 0xff); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 4); + sljit_emit_op2(compiler, SLJIT_SHL, shift_reg, 0, shift_reg, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, shift_reg, 0); + sljit_emit_op1(compiler, SLJIT_MOV, shift_reg, 0, SLJIT_IMM, 0xff); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_op2(compiler, SLJIT_LSHR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 4, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 4, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5, shift_reg, 0, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 0xf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 6, SLJIT_S1, 0); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 7, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xf00); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 4); + sljit_emit_op2(compiler, SLJIT_LSHR, SLJIT_R1, 0, SLJIT_R2, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 8, SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)buf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 9); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), SLJIT_WORD_SHIFT, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), SLJIT_WORD_SHIFT, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), SLJIT_WORD_SHIFT); + + sljit_emit_op1(compiler, SLJIT_MOV, shift_reg, 0, SLJIT_IMM, 4); + sljit_emit_op2(compiler, SLJIT_SHL, shift_reg, 0, SLJIT_IMM, 2, shift_reg, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, shift_reg, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xa9); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0x7d00); + sljit_emit_op2(compiler, SLJIT_LSHR32, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 32); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_R0, 0); +#endif + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xe30000); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op2(compiler, SLJIT_ASHR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffc0); +#else + sljit_emit_op2(compiler, SLJIT_ASHR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffe0); +#endif + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0x25000000); + sljit_emit_op2(compiler, SLJIT_SHL32, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xfffe1); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_R0, 0); +#endif + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 11, SLJIT_R1, 0, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, shift_reg, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x5c); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R1, 0, SLJIT_R0, 0, shift_reg, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0xf600); + sljit_emit_op2(compiler, SLJIT_LSHR32, SLJIT_R0, 0, SLJIT_R0, 0, shift_reg, 0); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + /* Alternative form of uint32 type cast. */ + sljit_emit_op2(compiler, SLJIT_AND, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffffffff); +#endif + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x630000); + sljit_emit_op2(compiler, SLJIT_ASHR, SLJIT_R0, 0, SLJIT_R0, 0, shift_reg, 0); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 12, SLJIT_R1, 0, SLJIT_R0, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[0] != 0x3c, "test9 case 1 failed\n"); + FAILED(buf[1] != 0xf0, "test9 case 2 failed\n"); + FAILED(buf[2] != -16, "test9 case 3 failed\n"); + FAILED(buf[3] != 0xff0, "test9 case 4 failed\n"); + FAILED(buf[4] != 4, "test9 case 5 failed\n"); + FAILED(buf[5] != 0xff00, "test9 case 6 failed\n"); + FAILED(buf[6] != 0x3c, "test9 case 7 failed\n"); + FAILED(buf[7] != 0xf0, "test9 case 8 failed\n"); + FAILED(buf[8] != 0xf0, "test9 case 9 failed\n"); + FAILED(buf[9] != 0x18, "test9 case 10 failed\n"); + FAILED(buf[10] != 32, "test9 case 11 failed\n"); + FAILED(buf[11] != 0x4ae37da9, "test9 case 12 failed\n"); + FAILED(buf[12] != 0x63f65c, "test9 case 13 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test10(void) +{ + /* Test multiplications. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[7]; + + if (verbose) + printf("Run test10\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 3; + buf[1] = 0; + buf[2] = 0; + buf[3] = 6; + buf[4] = -10; + buf[5] = 0; + buf[6] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 5); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 7); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_R0, 0, SLJIT_R2, 0, SLJIT_IMM, 8); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_R0, 0, SLJIT_IMM, -3, SLJIT_IMM, -4); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -2); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_sw) / 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&buf[3]); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 1, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 1, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 9); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5, SLJIT_R0, 0); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_R0, 0, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x123456789)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 6, SLJIT_R0, 0); +#endif + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 11, SLJIT_IMM, 10); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != 110, "test10 case 1 failed\n"); + FAILED(buf[0] != 15, "test10 case 2 failed\n"); + FAILED(buf[1] != 56, "test10 case 3 failed\n"); + FAILED(buf[2] != 12, "test10 case 4 failed\n"); + FAILED(buf[3] != -12, "test10 case 5 failed\n"); + FAILED(buf[4] != 100, "test10 case 6 failed\n"); + FAILED(buf[5] != 81, "test10 case 7 failed\n"); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[6] != SLJIT_W(0x123456789) * 3, "test10 case 8 failed\n"); +#endif + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test11(void) +{ + /* Test rewritable constants. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_const* const1; + struct sljit_const* const2; + struct sljit_const* const3; + struct sljit_const* const4; + void* value; + sljit_sw executable_offset; + sljit_uw const1_addr; + sljit_uw const2_addr; + sljit_uw const3_addr; + sljit_uw const4_addr; +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_sw word_value1 = SLJIT_W(0xaaaaaaaaaaaaaaaa); + sljit_sw word_value2 = SLJIT_W(0xfee1deadfbadf00d); +#else + sljit_sw word_value1 = 0xaaaaaaaal; + sljit_sw word_value2 = 0xfbadf00dl; +#endif + sljit_sw buf[3]; + + if (verbose) + printf("Run test11\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + + SLJIT_ASSERT(!sljit_alloc_memory(compiler, 0)); + SLJIT_ASSERT(!sljit_alloc_memory(compiler, 16 * sizeof(sljit_sw) + 1)); + + const1 = sljit_emit_const(compiler, SLJIT_MEM0(), (sljit_sw)&buf[0], -0x81b9); + + value = sljit_alloc_memory(compiler, 16 * sizeof(sljit_sw)); + if (value != NULL) + { + SLJIT_ASSERT(!((sljit_sw)value & (sizeof(sljit_sw) - 1))); + memset(value, 255, 16 * sizeof(sljit_sw)); + } + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + const2 = sljit_emit_const(compiler, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_WORD_SHIFT - 1, -65535); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf[0] + 2 * sizeof(sljit_sw) - 2); + const3 = sljit_emit_const(compiler, SLJIT_MEM1(SLJIT_R0), 0, word_value1); + + value = sljit_alloc_memory(compiler, 17); + if (value != NULL) + { + SLJIT_ASSERT(!((sljit_sw)value & (sizeof(sljit_sw) - 1))); + memset(value, 255, 16); + } + + const4 = sljit_emit_const(compiler, SLJIT_RETURN_REG, 0, 0xf7afcdb7); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + executable_offset = sljit_get_executable_offset(compiler); + const1_addr = sljit_get_const_addr(const1); + const2_addr = sljit_get_const_addr(const2); + const3_addr = sljit_get_const_addr(const3); + const4_addr = sljit_get_const_addr(const4); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != 0xf7afcdb7, "test11 case 1 failed\n"); + FAILED(buf[0] != -0x81b9, "test11 case 2 failed\n"); + FAILED(buf[1] != -65535, "test11 case 3 failed\n"); + FAILED(buf[2] != word_value1, "test11 case 4 failed\n"); + + sljit_set_const(const1_addr, -1, executable_offset); + sljit_set_const(const2_addr, word_value2, executable_offset); + sljit_set_const(const3_addr, 0xbab0fea1, executable_offset); + sljit_set_const(const4_addr, -60089, executable_offset); + + FAILED(code.func1((sljit_sw)&buf) != -60089, "test11 case 5 failed\n"); + FAILED(buf[0] != -1, "test11 case 6 failed\n"); + FAILED(buf[1] != word_value2, "test11 case 7 failed\n"); + FAILED(buf[2] != 0xbab0fea1, "test11 case 8 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test12(void) +{ + /* Test rewriteable jumps. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_label *label1; + struct sljit_label *label2; + struct sljit_label *label3; + struct sljit_jump *jump1; + struct sljit_jump *jump2; + struct sljit_jump *jump3; + sljit_sw executable_offset; + void* value; + sljit_uw jump1_addr; + sljit_uw label1_addr; + sljit_uw label2_addr; + sljit_sw buf[1]; + + if (verbose) + printf("Run test12\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 2, 0, 0, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_S1, 0, SLJIT_IMM, 10); + jump1 = sljit_emit_jump(compiler, SLJIT_REWRITABLE_JUMP | SLJIT_SIG_GREATER); + /* Default handler. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 5); + jump2 = sljit_emit_jump(compiler, SLJIT_JUMP); + + value = sljit_alloc_memory(compiler, 15); + if (value != NULL) + { + SLJIT_ASSERT(!((sljit_sw)value & (sizeof(sljit_sw) - 1))); + memset(value, 255, 15); + } + + /* Handler 1. */ + label1 = sljit_emit_label(compiler); + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 6); + jump3 = sljit_emit_jump(compiler, SLJIT_JUMP); + /* Handler 2. */ + label2 = sljit_emit_label(compiler); + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 7); + /* Exit. */ + label3 = sljit_emit_label(compiler); + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_set_label(jump2, label3); + sljit_set_label(jump3, label3); + /* By default, set to handler 1. */ + sljit_set_label(jump1, label1); + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + value = sljit_alloc_memory(compiler, 8); + if (value != NULL) + { + SLJIT_ASSERT(!((sljit_sw)value & (sizeof(sljit_sw) - 1))); + memset(value, 255, 8); + } + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + executable_offset = sljit_get_executable_offset(compiler); + jump1_addr = sljit_get_jump_addr(jump1); + label1_addr = sljit_get_label_addr(label1); + label2_addr = sljit_get_label_addr(label2); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, 4); + FAILED(buf[0] != 5, "test12 case 1 failed\n"); + + code.func2((sljit_sw)&buf, 11); + FAILED(buf[0] != 6, "test12 case 2 failed\n"); + + sljit_set_jump_addr(jump1_addr, label2_addr, executable_offset); + code.func2((sljit_sw)&buf, 12); + FAILED(buf[0] != 7, "test12 case 3 failed\n"); + + sljit_set_jump_addr(jump1_addr, label1_addr, executable_offset); + code.func2((sljit_sw)&buf, 13); + FAILED(buf[0] != 6, "test12 case 4 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test13(void) +{ + /* Test fpu monadic functions. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_f64 buf[7]; + sljit_sw buf2[6]; + + if (verbose) + printf("Run test13\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test13 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 7.75; + buf[1] = -4.5; + buf[2] = 0.0; + buf[3] = 0.0; + buf[4] = 0.0; + buf[5] = 0.0; + buf[6] = 0.0; + + buf2[0] = 10; + buf2[1] = 10; + buf2[2] = 10; + buf2[3] = 10; + buf2[4] = 10; + buf2[5] = 10; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 2, 6, 0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&buf[2], SLJIT_MEM0(), (sljit_sw)&buf[1]); + sljit_emit_fop1(compiler, SLJIT_ABS_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM0(), (sljit_sw)&buf[0]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0); + sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_FR2, 0, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_FR2, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM0(), (sljit_sw)&buf[4], SLJIT_FR3, 0); + sljit_emit_fop1(compiler, SLJIT_ABS_F64, SLJIT_FR4, 0, SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR4, 0); + sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR4, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_GREATER_F, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_GREATER_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_GREATER_F, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR5, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw), SLJIT_GREATER_F64); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_sw), SLJIT_EQUAL_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_LESS_F, SLJIT_FR1, 0, SLJIT_FR1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_LESS_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_EQUAL_F, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_EQUAL_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_NOT_EQUAL_F, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_NOT_EQUAL_F64); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, (sljit_sw)&buf2); + FAILED(buf[2] != -4.5, "test13 case 1 failed\n"); + FAILED(buf[3] != 4.5, "test13 case 2 failed\n"); + FAILED(buf[4] != -7.75, "test13 case 3 failed\n"); + FAILED(buf[5] != 4.5, "test13 case 4 failed\n"); + FAILED(buf[6] != -4.5, "test13 case 5 failed\n"); + + FAILED(buf2[0] != 1, "test13 case 6 failed\n"); + FAILED(buf2[1] != 0, "test13 case 7 failed\n"); + FAILED(buf2[2] != 1, "test13 case 8 failed\n"); + FAILED(buf2[3] != 0, "test13 case 9 failed\n"); + FAILED(buf2[4] != 0, "test13 case 10 failed\n"); + FAILED(buf2[5] != 1, "test13 case 11 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test14(void) +{ + /* Test fpu diadic functions. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_f64 buf[15]; + + if (verbose) + printf("Run test14\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test14 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + buf[0] = 7.25; + buf[1] = 3.5; + buf[2] = 1.75; + buf[3] = 0.0; + buf[4] = 0.0; + buf[5] = 0.0; + buf[6] = 0.0; + buf[7] = 0.0; + buf[8] = 0.0; + buf[9] = 0.0; + buf[10] = 0.0; + buf[11] = 0.0; + buf[12] = 8.0; + buf[13] = 4.0; + buf[14] = 0.0; + + FAILED(!compiler, "cannot create compiler\n"); + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 6, 0, 0); + + /* ADD */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2); + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 3, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_FR1, 0); + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR1, 0, SLJIT_FR0, 0, SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 4, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 5, SLJIT_FR1, 0); + + /* SUB */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); + sljit_emit_fop2(compiler, SLJIT_SUB_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 6, SLJIT_FR3, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), SLJIT_F64_SHIFT); + sljit_emit_fop2(compiler, SLJIT_SUB_F64, SLJIT_FR2, 0, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2); + sljit_emit_fop2(compiler, SLJIT_SUB_F64, SLJIT_FR3, 0, SLJIT_FR2, 0, SLJIT_FR3, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 7, SLJIT_FR2, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 8, SLJIT_FR3, 0); + + /* MUL */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_fop2(compiler, SLJIT_MUL_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 9, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), SLJIT_F64_SHIFT, SLJIT_FR1, 0); + sljit_emit_fop2(compiler, SLJIT_MUL_F64, SLJIT_FR1, 0, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_fop2(compiler, SLJIT_MUL_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 2, SLJIT_FR2, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 10, SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 11, SLJIT_FR5, 0); + + /* DIV */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 12); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 13); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR4, 0, SLJIT_FR5, 0); + sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 12, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 12, SLJIT_FR1, 0); + sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_FR5, 0, SLJIT_FR5, 0, SLJIT_FR1, 0); + sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_FR4, 0, SLJIT_FR1, 0, SLJIT_FR4, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 13, SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64) * 14, SLJIT_FR4, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[3] != 10.75, "test14 case 1 failed\n"); + FAILED(buf[4] != 5.25, "test14 case 2 failed\n"); + FAILED(buf[5] != 7.0, "test14 case 3 failed\n"); + FAILED(buf[6] != 0.0, "test14 case 4 failed\n"); + FAILED(buf[7] != 5.5, "test14 case 5 failed\n"); + FAILED(buf[8] != 3.75, "test14 case 6 failed\n"); + FAILED(buf[9] != 24.5, "test14 case 7 failed\n"); + FAILED(buf[10] != 38.5, "test14 case 8 failed\n"); + FAILED(buf[11] != 9.625, "test14 case 9 failed\n"); + FAILED(buf[12] != 2.0, "test14 case 10 failed\n"); + FAILED(buf[13] != 2.0, "test14 case 11 failed\n"); + FAILED(buf[14] != 0.5, "test14 case 12 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static sljit_sw SLJIT_FUNC func(sljit_sw a, sljit_sw b, sljit_sw c) +{ + return a + b + c + 5; +} + +static void test15(void) +{ + /* Test function call. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump = NULL; + sljit_sw buf[7]; + + if (verbose) + printf("Run test15\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = SLJIT_FUNC_OFFSET(func); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 4, 1, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 7); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -3); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(func)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_RETURN_REG, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -10); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2); + jump = sljit_emit_call(compiler, SLJIT_CALL | SLJIT_REWRITABLE_JUMP, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW)); + sljit_set_target(jump, (sljit_uw)-1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(func)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 40); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -3); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -60); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(func)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -30); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 10); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(func)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 100); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 110); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 120); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(func)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -10); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 6); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_RETURN_REG, 0); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_set_jump_addr(sljit_get_jump_addr(jump), SLJIT_FUNC_OFFSET(func), sljit_get_executable_offset(compiler)); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != -15, "test15 case 1 failed\n"); + FAILED(buf[0] != 14, "test15 case 2 failed\n"); + FAILED(buf[1] != -8, "test15 case 3 failed\n"); + FAILED(buf[2] != SLJIT_FUNC_OFFSET(func) + 42, "test15 case 4 failed\n"); + FAILED(buf[3] != SLJIT_FUNC_OFFSET(func) - 85, "test15 case 5 failed\n"); + FAILED(buf[4] != SLJIT_FUNC_OFFSET(func) + 31, "test15 case 6 failed\n"); + FAILED(buf[5] != 335, "test15 case 7 failed\n"); + FAILED(buf[6] != -15, "test15 case 8 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test16(void) +{ + /* Ackermann benchmark. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_label *entry; + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_jump *jump1; + struct sljit_jump *jump2; + + if (verbose) + printf("Run test16\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + entry = sljit_emit_label(compiler); + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 2, 0, 0, 0); + /* If x == 0. */ + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_S0, 0, SLJIT_IMM, 0); + jump1 = sljit_emit_jump(compiler, SLJIT_EQUAL); + /* If y == 0. */ + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_S1, 0, SLJIT_IMM, 0); + jump2 = sljit_emit_jump(compiler, SLJIT_EQUAL); + + /* Ack(x,y-1). */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, 1); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW)); + sljit_set_label(jump, entry); + + /* Returns with Ack(x-1, Ack(x,y-1)). */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_RETURN_REG, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW)); + sljit_set_label(jump, entry); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + /* Returns with y+1. */ + label = sljit_emit_label(compiler); + sljit_set_label(jump1, label); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1, SLJIT_S1, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + /* Returns with Ack(x-1,1) */ + label = sljit_emit_label(compiler); + sljit_set_label(jump2, label); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW)); + sljit_set_label(jump, entry); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func2(3, 3) != 61, "test16 case 1 failed\n"); + /* For benchmarking. */ + /* FAILED(code.func2(3, 11) != 16381, "test16 case 1 failed\n"); */ + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test17(void) +{ + /* Test arm constant pool. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_s32 i; + sljit_sw buf[5]; + + if (verbose) + printf("Run test17\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 5; i++) + buf[i] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + for (i = 0; i <= 0xfff; i++) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x81818000 | i); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x81818000 | i); + if ((i & 0x3ff) == 0) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), (i >> 10) * sizeof(sljit_sw), SLJIT_R0, 0); + } + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED((sljit_uw)buf[0] != 0x81818000, "test17 case 1 failed\n"); + FAILED((sljit_uw)buf[1] != 0x81818400, "test17 case 2 failed\n"); + FAILED((sljit_uw)buf[2] != 0x81818800, "test17 case 3 failed\n"); + FAILED((sljit_uw)buf[3] != 0x81818c00, "test17 case 4 failed\n"); + FAILED((sljit_uw)buf[4] != 0x81818fff, "test17 case 5 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test18(void) +{ + /* Test 64 bit. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[11]; + + if (verbose) + printf("Run test18\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 100; + buf[6] = 100; + buf[7] = 100; + buf[8] = 100; + buf[9] = 0; +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) + buf[10] = SLJIT_W(1) << 32; +#else + buf[10] = 1; +#endif + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 2, 0, 0, 0); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, SLJIT_W(0x1122334455667788)); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x1122334455667788)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(1000000000000)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(1000000000000)); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_IMM, SLJIT_W(5000000000000), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x1108080808)); + sljit_emit_op2(compiler, SLJIT_ADD32, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 4, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x1120202020)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x1108080808)); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x1120202020)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5, SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x1120202020)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 6, SLJIT_ZERO); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x1108080808)); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x2208080808)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 7, SLJIT_LESS); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x1104040404)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 8, SLJIT_NOT_ZERO); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 4); + sljit_emit_op2(compiler, SLJIT_SHL32, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 9, SLJIT_IMM, SLJIT_W(0xffff0000), SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_MUL32, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 10, SLJIT_IMM, -1); +#else + /* 32 bit operations. */ + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 0x11223344); + sljit_emit_op2(compiler, SLJIT_ADD32, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, 0x44332211); + +#endif + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[0] != SLJIT_W(0x1122334455667788), "test18 case 1 failed\n"); +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + FAILED(buf[1] != 0x55667788, "test18 case 2 failed\n"); +#else + FAILED(buf[1] != SLJIT_W(0x5566778800000000), "test18 case 2 failed\n"); +#endif + FAILED(buf[2] != SLJIT_W(2000000000000), "test18 case 3 failed\n"); + FAILED(buf[3] != SLJIT_W(4000000000000), "test18 case 4 failed\n"); +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + FAILED(buf[4] != 0x28282828, "test18 case 5 failed\n"); +#else + FAILED(buf[4] != SLJIT_W(0x2828282800000000), "test18 case 5 failed\n"); +#endif + FAILED(buf[5] != 0, "test18 case 6 failed\n"); + FAILED(buf[6] != 1, "test18 case 7 failed\n"); + FAILED(buf[7] != 1, "test18 case 8 failed\n"); + FAILED(buf[8] != 0, "test18 case 9 failed\n"); +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + FAILED(buf[9] != 0xfff00000, "test18 case 10 failed\n"); + FAILED(buf[10] != 0xffffffff, "test18 case 11 failed\n"); +#else + FAILED(buf[9] != SLJIT_W(0xfff0000000000000), "test18 case 10 failed\n"); + FAILED(buf[10] != SLJIT_W(0xffffffff00000000), "test18 case 11 failed\n"); +#endif +#else + FAILED(buf[0] != 0x11223344, "test18 case 1 failed\n"); + FAILED(buf[1] != 0x44332211, "test18 case 2 failed\n"); +#endif + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test19(void) +{ + /* Test arm partial instruction caching. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[10]; + + if (verbose) + printf("Run test19\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 6; + buf[1] = 4; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 2; + buf[7] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM0(), (sljit_sw)&buf[2], SLJIT_MEM0(), (sljit_sw)&buf[1], SLJIT_MEM0(), (sljit_sw)&buf[0]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_MEM1(SLJIT_R0), (sljit_sw)&buf[0], SLJIT_MEM1(SLJIT_R1), (sljit_sw)&buf[0]); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 4, SLJIT_MEM0(), (sljit_sw)&buf[0], SLJIT_IMM, 2); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_MEM1(SLJIT_R0), (sljit_sw)&buf[0] + 4 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 7, SLJIT_IMM, 10); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 7); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_R1), (sljit_sw)&buf[5], SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_MEM1(SLJIT_R1), (sljit_sw)&buf[5]); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[0] != 10, "test19 case 1 failed\n"); + FAILED(buf[1] != 4, "test19 case 2 failed\n"); + FAILED(buf[2] != 14, "test19 case 3 failed\n"); + FAILED(buf[3] != 14, "test19 case 4 failed\n"); + FAILED(buf[4] != 8, "test19 case 5 failed\n"); + FAILED(buf[5] != 6, "test19 case 6 failed\n"); + FAILED(buf[6] != 12, "test19 case 7 failed\n"); + FAILED(buf[7] != 10, "test19 case 8 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test20(void) +{ + /* Test stack. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump; + struct sljit_label* label; + sljit_sw buf[6]; +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_sw offset_value = SLJIT_W(0x1234567812345678); +#else + sljit_sw offset_value = SLJIT_W(0x12345678); +#endif + + if (verbose) + printf("Run test20\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 5; + buf[1] = 12; + buf[2] = 0; + buf[3] = 0; + buf[4] = 111; + buf[5] = -12345; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, 4 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw), SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_uw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S4, 0, SLJIT_IMM, -1); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_uw), SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_uw), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw), SLJIT_MEM1(SLJIT_SP), 0); + sljit_get_local_base(compiler, SLJIT_R0, 0, -offset_value); + sljit_get_local_base(compiler, SLJIT_MEM1(SLJIT_S0), 0, -0x1234); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_uw), SLJIT_MEM1(SLJIT_R0), offset_value, SLJIT_MEM1(SLJIT_R1), 0x1234 + sizeof(sljit_sw)); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_uw)); + /* Dummy last instructions. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, 23); + sljit_emit_label(compiler); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != -12345, "test20 case 1 failed\n") + + FAILED(buf[2] != 60, "test20 case 2 failed\n"); + FAILED(buf[3] != 17, "test20 case 3 failed\n"); + FAILED(buf[4] != 7, "test20 case 4 failed\n"); + + sljit_free_code(code.code, NULL); + + compiler = sljit_create_compiler(NULL, NULL); + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 3, 3, 0, 0, SLJIT_MAX_LOCAL_SIZE); + + sljit_get_local_base(compiler, SLJIT_R0, 0, SLJIT_MAX_LOCAL_SIZE - sizeof(sljit_sw)); + sljit_get_local_base(compiler, SLJIT_R1, 0, -(sljit_sw)sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -1); + label = sljit_emit_label(compiler); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_R0, 0); + jump = sljit_emit_jump(compiler, SLJIT_NOT_EQUAL); + sljit_set_label(jump, label); + + /* Saved registers should keep their value. */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_S1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_S2, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func3(1234, 5678, 9012) != 15924, "test20 case 5 failed\n"); + + sljit_free_code(code.code, NULL); + + compiler = sljit_create_compiler(NULL, NULL); + sljit_emit_enter(compiler, SLJIT_F64_ALIGNMENT, 0, 3, 0, 0, 0, SLJIT_MAX_LOCAL_SIZE); + + sljit_get_local_base(compiler, SLJIT_R0, 0, SLJIT_MAX_LOCAL_SIZE - sizeof(sljit_sw)); + sljit_get_local_base(compiler, SLJIT_R1, 0, -(sljit_sw)sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -1); + label = sljit_emit_label(compiler); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_R0, 0); + jump = sljit_emit_jump(compiler, SLJIT_NOT_EQUAL); + sljit_set_label(jump, label); + + sljit_get_local_base(compiler, SLJIT_R0, 0, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func0() % sizeof(sljit_f64) != 0, "test20 case 6 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test21(void) +{ + /* Test set context. The parts of the jit code can be separated in the memory. */ + executable_code code1; + executable_code code2; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump = NULL; + sljit_uw addr; + sljit_sw executable_offset; + sljit_sw buf[4]; + + if (verbose) + printf("Run test21\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 9; + buf[1] = -6; + buf[2] = 0; + buf[3] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 2, 0, 0, 2 * sizeof(sljit_sw)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, 10); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_SP), 0); + + jump = sljit_emit_jump(compiler, SLJIT_JUMP | SLJIT_REWRITABLE_JUMP); + sljit_set_target(jump, 0); + + code1.code = sljit_generate_code(compiler); + CHECK(compiler); + + executable_offset = sljit_get_executable_offset(compiler); + addr = sljit_get_jump_addr(jump); + + sljit_free_compiler(compiler); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + /* Other part of the jit code. */ + sljit_set_context(compiler, 0, 1, 3, 2, 0, 0, 2 * sizeof(sljit_sw)); + + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code2.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + sljit_set_jump_addr(addr, SLJIT_FUNC_OFFSET(code2.code), executable_offset); + + FAILED(code1.func1((sljit_sw)&buf) != 19, "test21 case 1 failed\n"); + FAILED(buf[2] != -16, "test21 case 2 failed\n"); + FAILED(buf[3] != 100, "test21 case 3 failed\n"); + + sljit_free_code(code1.code, NULL); + sljit_free_code(code2.code, NULL); + successful_tests++; +} + +static void test22(void) +{ + /* Test simple byte and half-int data transfers. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[2]; + sljit_s16 sbuf[9]; + sljit_s8 bbuf[5]; + + if (verbose) + printf("Run test22\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + + sbuf[0] = 0; + sbuf[1] = 0; + sbuf[2] = -9; + sbuf[3] = 0; + sbuf[4] = 0; + sbuf[5] = 0; + sbuf[6] = 0; + + bbuf[0] = 0; + bbuf[1] = 0; + bbuf[2] = -56; + bbuf[3] = 0; + bbuf[4] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 3, 3, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_IMM, -13); + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s16), SLJIT_IMM, 0x1234); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_s16)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, 2 * sizeof(sljit_s16)); + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s16), SLJIT_MEM1(SLJIT_S1), -(sljit_sw)sizeof(sljit_s16)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xff0000 + 8000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM2(SLJIT_S1, SLJIT_R1), 1, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R2, 0, SLJIT_S1, 0, SLJIT_IMM, 0x1234 - 3 * sizeof(sljit_s16)); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM1(SLJIT_R2), 0x1234, SLJIT_IMM, -9317); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S1, 0, SLJIT_IMM, 0x1234 + 4 * sizeof(sljit_s16)); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM1(SLJIT_R2), -0x1234, SLJIT_IMM, -9317); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R2, 0, SLJIT_S1, 0, SLJIT_IMM, 0x12348 - 5 * sizeof(sljit_s16)); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM1(SLJIT_R2), 0x12348, SLJIT_IMM, -8888); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S1, 0, SLJIT_IMM, 0x12348 + 6 * sizeof(sljit_s16)); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM1(SLJIT_R2), -0x12348, SLJIT_IMM, -8888); + + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_IMM, -45); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_s8), SLJIT_IMM, 0x12); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 4 * sizeof(sljit_s8)); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_s8)); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_S1, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_S1, 0, SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_R2, 0, SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_s8), SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM2(SLJIT_S2, SLJIT_R0), 0, SLJIT_R0, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&buf, (sljit_sw)&sbuf, (sljit_sw)&bbuf); + FAILED(buf[0] != -9, "test22 case 1 failed\n"); + FAILED(buf[1] != -56, "test22 case 2 failed\n"); + + FAILED(sbuf[0] != -13, "test22 case 3 failed\n"); + FAILED(sbuf[1] != 0x1234, "test22 case 4 failed\n"); + FAILED(sbuf[3] != 0x1234, "test22 case 5 failed\n"); + FAILED(sbuf[4] != 8000, "test22 case 6 failed\n"); + FAILED(sbuf[5] != -9317, "test22 case 7 failed\n"); + FAILED(sbuf[6] != -9317, "test22 case 8 failed\n"); + FAILED(sbuf[7] != -8888, "test22 case 9 failed\n"); + FAILED(sbuf[8] != -8888, "test22 case 10 failed\n"); + + FAILED(bbuf[0] != -45, "test22 case 11 failed\n"); + FAILED(bbuf[1] != 0x12, "test22 case 12 failed\n"); + FAILED(bbuf[3] != -56, "test22 case 13 failed\n"); + FAILED(bbuf[4] != 4, "test22 case 14 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test23(void) +{ + /* Test 32 bit / 64 bit signed / unsigned int transfer and conversion. + This test has do real things on 64 bit systems, but works on 32 bit systems as well. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[9]; + sljit_s32 ibuf[5]; + union { + sljit_s32 asint; + sljit_u8 asbytes[4]; + } u; +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_sw garbage = SLJIT_W(0x1234567812345678); +#else + sljit_sw garbage = 0x12345678; +#endif + + if (verbose) + printf("Run test23\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 0; + buf[7] = 0; + buf[8] = 0; + + ibuf[0] = 0; + ibuf[1] = 0; + ibuf[2] = -5791; + ibuf[3] = 43579; + ibuf[4] = 658923; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 3, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_IMM, 34567); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 4); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), 0, SLJIT_IMM, -7654); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, garbage); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_s32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, garbage); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_s32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, garbage); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_s32)); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x0f00f00); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 0x7777); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0x7777 + 3 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 0x7777); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), -0x7777 + 4 * (sljit_sw)sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 5 * sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_LSHR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_R1, SLJIT_R1), 0, SLJIT_IMM, 16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1, SLJIT_IMM, 64, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)&buf[7], SLJIT_IMM, 0x123456); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), (sljit_sw)&buf[6], SLJIT_MEM0(), (sljit_sw)&buf[7]); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 5 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(sljit_sw), SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 7 * sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_LSHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM2(SLJIT_R2, SLJIT_R2), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf[8] - 0x12340); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0x12340, SLJIT_R2, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_R0), 0x12340, SLJIT_MEM1(SLJIT_R2), 3 * sizeof(sljit_sw), SLJIT_IMM, 6); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_s32), SLJIT_IMM, 0x12345678); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x2bd700 | 243); + sljit_emit_return(compiler, SLJIT_MOV_S8, SLJIT_R1, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func2((sljit_sw)&buf, (sljit_sw)&ibuf) != -13, "test23 case 1 failed\n"); + FAILED(buf[0] != -5791, "test23 case 2 failed\n"); + FAILED(buf[1] != 43579, "test23 case 3 failed\n"); + FAILED(buf[2] != 658923, "test23 case 4 failed\n"); + FAILED(buf[3] != 0x0f00f00, "test23 case 5 failed\n"); + FAILED(buf[4] != 0x0f00f00, "test23 case 6 failed\n"); + FAILED(buf[5] != 80, "test23 case 7 failed\n"); + FAILED(buf[6] != 0x123456, "test23 case 8 failed\n"); + FAILED(buf[7] != (sljit_sw)&buf[5], "test23 case 9 failed\n"); + FAILED(buf[8] != (sljit_sw)&buf[5] + 6, "test23 case 10 failed\n"); + + FAILED(ibuf[0] != 34567, "test23 case 11 failed\n"); + FAILED(ibuf[1] != -7654, "test23 case 12 failed\n"); + u.asint = ibuf[4]; +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) + FAILED(u.asbytes[0] != 0x78, "test23 case 13 failed\n"); + FAILED(u.asbytes[1] != 0x56, "test23 case 14 failed\n"); + FAILED(u.asbytes[2] != 0x34, "test23 case 15 failed\n"); + FAILED(u.asbytes[3] != 0x12, "test23 case 16 failed\n"); +#else + FAILED(u.asbytes[0] != 0x12, "test23 case 13 failed\n"); + FAILED(u.asbytes[1] != 0x34, "test23 case 14 failed\n"); + FAILED(u.asbytes[2] != 0x56, "test23 case 15 failed\n"); + FAILED(u.asbytes[3] != 0x78, "test23 case 16 failed\n"); +#endif + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test24(void) +{ + /* Some complicated addressing modes. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[9]; + sljit_s16 sbuf[5]; + sljit_s8 bbuf[7]; + + if (verbose) + printf("Run test24\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = 100567; + buf[1] = 75799; + buf[2] = 0; + buf[3] = -8; + buf[4] = -50; + buf[5] = 0; + buf[6] = 0; + buf[7] = 0; + buf[8] = 0; + + sbuf[0] = 30000; + sbuf[1] = 0; + sbuf[2] = 0; + sbuf[3] = -12345; + sbuf[4] = 0; + + bbuf[0] = -128; + bbuf[1] = 0; + bbuf[2] = 0; + bbuf[3] = 99; + bbuf[4] = 0; + bbuf[5] = 0; + bbuf[6] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 3, 3, 0, 0, 0); + + /* Nothing should be updated. */ + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM0(), (sljit_sw)&sbuf[1], SLJIT_MEM0(), (sljit_sw)&sbuf[0]); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_MEM0(), (sljit_sw)&bbuf[1], SLJIT_MEM0(), (sljit_sw)&bbuf[0]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), 1, SLJIT_MEM0(), (sljit_sw)&sbuf[3]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf[0]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM2(SLJIT_R0, SLJIT_R2), SLJIT_WORD_SHIFT, SLJIT_MEM0(), (sljit_sw)&buf[0], SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_s8)); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_R0), (sljit_sw)&bbuf[1], SLJIT_MEM1(SLJIT_R0), (sljit_sw)&bbuf[2]); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_s16)); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_MEM1(SLJIT_R1), (sljit_sw)&sbuf[3], SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 3); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_WORD_SHIFT); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 4); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_WORD_SHIFT); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 9 * sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 4 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -4 << SLJIT_WORD_SHIFT); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_R0, SLJIT_R2), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf - 0x7fff8000 + 6 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 952467); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0x7fff8000, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0x7fff8000 + sizeof(sljit_sw), SLJIT_MEM1(SLJIT_R0), 0x7fff8000); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf + 0x7fff7fff + 6 * sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_R0), -0x7fff7fff + 2 * (sljit_sw)sizeof(sljit_sw), SLJIT_MEM1(SLJIT_R0), -0x7fff7fff + (sljit_sw)sizeof(sljit_sw), SLJIT_MEM1(SLJIT_R0), -0x7fff7fff); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&bbuf - 0x7fff7ffe + 3 * sizeof(sljit_s8)); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_MEM1(SLJIT_R0), 0x7fff7fff, SLJIT_MEM1(SLJIT_R0), 0x7fff7ffe); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&bbuf + 0x7fff7fff + 5 * sizeof(sljit_s8)); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_MEM1(SLJIT_R0), -0x7fff7fff, SLJIT_MEM1(SLJIT_R0), -0x7fff8000); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&bbuf - SLJIT_W(0x123456123456)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)&bbuf - SLJIT_W(0x123456123456)); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_MEM1(SLJIT_R0), SLJIT_W(0x123456123456) + 6 * sizeof(sljit_s8), SLJIT_MEM1(SLJIT_R1), SLJIT_W(0x123456123456)); +#endif + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&buf, (sljit_sw)&sbuf, (sljit_sw)&bbuf); + FAILED(buf[2] != 176366, "test24 case 1 failed\n"); + FAILED(buf[3] != 64, "test24 case 2 failed\n"); + FAILED(buf[4] != -100, "test24 case 3 failed\n"); + FAILED(buf[5] != 100567, "test24 case 4 failed\n"); + FAILED(buf[6] != 952467, "test24 case 5 failed\n"); + FAILED(buf[7] != 952467, "test24 case 6 failed\n"); + FAILED(buf[8] != 952467 * 2, "test24 case 7 failed\n"); + + FAILED(sbuf[1] != 30000, "test24 case 8 failed\n"); + FAILED(sbuf[2] != -12345, "test24 case 9 failed\n"); + FAILED(sbuf[4] != sizeof(sljit_s16), "test24 case 10 failed\n"); + + FAILED(bbuf[1] != -128, "test24 case 11 failed\n"); + FAILED(bbuf[2] != 99, "test24 case 12 failed\n"); + FAILED(bbuf[4] != 99, "test24 case 13 failed\n"); + FAILED(bbuf[5] != 99, "test24 case 14 failed\n"); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(bbuf[6] != -128, "test24 case 15 failed\n"); +#endif + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test25(void) +{ +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + /* 64 bit loads. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[14]; + + if (verbose) + printf("Run test25\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 7; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 0; + buf[7] = 0; + buf[8] = 0; + buf[9] = 0; + buf[10] = 0; + buf[11] = 0; + buf[12] = 0; + buf[13] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 1 * sizeof(sljit_sw), SLJIT_IMM, 0x7fff); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_IMM, -0x8000); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_IMM, 0x7fffffff); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(-0x80000000)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x1234567887654321)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0xff80000000)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x3ff0000000)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0xfffffff800100000)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0xfffffff80010f000)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x07fff00000008001)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x07fff00080010000)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x07fff00080018001)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x07fff00ffff00000)); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[0] != 0, "test25 case 1 failed\n"); + FAILED(buf[1] != 0x7fff, "test25 case 2 failed\n"); + FAILED(buf[2] != -0x8000, "test25 case 3 failed\n"); + FAILED(buf[3] != 0x7fffffff, "test25 case 4 failed\n"); + FAILED(buf[4] != SLJIT_W(-0x80000000), "test25 case 5 failed\n"); + FAILED(buf[5] != SLJIT_W(0x1234567887654321), "test25 case 6 failed\n"); + FAILED(buf[6] != SLJIT_W(0xff80000000), "test25 case 7 failed\n"); + FAILED(buf[7] != SLJIT_W(0x3ff0000000), "test25 case 8 failed\n"); + FAILED((sljit_uw)buf[8] != SLJIT_W(0xfffffff800100000), "test25 case 9 failed\n"); + FAILED((sljit_uw)buf[9] != SLJIT_W(0xfffffff80010f000), "test25 case 10 failed\n"); + FAILED(buf[10] != SLJIT_W(0x07fff00000008001), "test25 case 11 failed\n"); + FAILED(buf[11] != SLJIT_W(0x07fff00080010000), "test25 case 12 failed\n"); + FAILED(buf[12] != SLJIT_W(0x07fff00080018001), "test25 case 13 failed\n"); + FAILED(buf[13] != SLJIT_W(0x07fff00ffff00000), "test25 case 14 failed\n"); + + sljit_free_code(code.code, NULL); +#endif + successful_tests++; +} + +static void test26(void) +{ + /* Aligned access without aligned offsets. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[4]; + sljit_s32 ibuf[4]; + sljit_f64 dbuf[4]; + + if (verbose) + printf("Run test26\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = -2789; + buf[1] = 0; + buf[2] = 4; + buf[3] = -4; + + ibuf[0] = -689; + ibuf[1] = 0; + ibuf[2] = -6; + ibuf[3] = 3; + + dbuf[0] = 5.75; + dbuf[1] = 0.0; + dbuf[2] = 0.0; + dbuf[3] = -4.0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 3, 3, 0, 0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 3); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), -3); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32) - 1, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S1), -1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) - 3, SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 100); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw) * 2 - 103, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2 - 3, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 3 - 3); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, 100); + sljit_emit_op2(compiler, SLJIT_MUL32, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_s32) * 2 - 101, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32) * 2 - 1, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32) * 3 - 1); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S2, 0, SLJIT_IMM, 3); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f64) - 3, SLJIT_MEM1(SLJIT_S2), -3); + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f64) * 2 - 3, SLJIT_MEM1(SLJIT_S2), -3, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f64) - 3); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S2, 0, SLJIT_IMM, 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sizeof(sljit_f64) * 3 - 4) >> 1); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S2, 0, SLJIT_IMM, 1); + sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_f64) * 3 - 5, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f64) * 2 - 3, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 1); + } + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&buf, (sljit_sw)&ibuf, (sljit_sw)&dbuf); + + FAILED(buf[1] != -689, "test26 case 1 failed\n"); + FAILED(buf[2] != -16, "test26 case 2 failed\n"); + FAILED(ibuf[1] != -2789, "test26 case 3 failed\n"); + FAILED(ibuf[2] != -18, "test26 case 4 failed\n"); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + FAILED(dbuf[1] != 5.75, "test26 case 5 failed\n"); + FAILED(dbuf[2] != 11.5, "test26 case 6 failed\n"); + FAILED(dbuf[3] != -2.875, "test26 case 7 failed\n"); + } + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test27(void) +{ +#define SET_NEXT_BYTE(type) \ + cond_set(compiler, SLJIT_R2, 0, type); \ + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 1, SLJIT_R2, 0); \ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define RESULT(i) i +#else +#define RESULT(i) (3 - i) +#endif + + /* Playing with conditional flags. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_u8 buf[41]; + sljit_s32 i; +#ifdef SLJIT_PREF_SHIFT_REG + sljit_s32 shift_reg = SLJIT_PREF_SHIFT_REG; +#else + sljit_s32 shift_reg = SLJIT_R2; +#endif + + SLJIT_ASSERT(shift_reg >= SLJIT_R2 && shift_reg <= SLJIT_R3); + + if (verbose) + printf("Run test27\n"); + + for (i = 0; i < sizeof(buf); ++i) + buf[i] = 10; + + FAILED(!compiler, "cannot create compiler\n"); + + /* 3 arguments passed, 3 arguments used. */ + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 4, 3, 0, 0, 0); + + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x1001); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 20); + /* 0x100100000 on 64 bit machines, 0x100000 on 32 bit machines. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x800000); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op0(compiler, SLJIT_ENDBR); /* ENDBR should keep the flags. */ + sljit_emit_op0(compiler, SLJIT_NOP); /* Nop should keep the flags. */ + SET_NEXT_BYTE(SLJIT_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + SET_NEXT_BYTE(SLJIT_LESS); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op0(compiler, SLJIT_ENDBR); /* ENDBR should keep the flags. */ + sljit_emit_op0(compiler, SLJIT_NOP); /* Nop should keep the flags. */ + SET_NEXT_BYTE(SLJIT_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + SET_NEXT_BYTE(SLJIT_LESS); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x1000); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 20); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0x10); + /* 0x100000010 on 64 bit machines, 0x10 on 32 bit machines. */ + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0x80); + SET_NEXT_BYTE(SLJIT_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0x80); + SET_NEXT_BYTE(SLJIT_LESS); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0x80); + SET_NEXT_BYTE(SLJIT_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0x80); + SET_NEXT_BYTE(SLJIT_LESS); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + /* 0xff..ff on all machines. */ + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + SET_NEXT_BYTE(SLJIT_LESS_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + SET_NEXT_BYTE(SLJIT_GREATER_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_R2, 0, SLJIT_R1, 0, SLJIT_IMM, -1); + SET_NEXT_BYTE(SLJIT_SIG_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, -1); + SET_NEXT_BYTE(SLJIT_SIG_LESS); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R1, 0, SLJIT_R0, 0); + SET_NEXT_BYTE(SLJIT_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_R1, 0, SLJIT_R0, 0); + SET_NEXT_BYTE(SLJIT_NOT_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_R1, 0, SLJIT_IMM, -2); + SET_NEXT_BYTE(SLJIT_OVERFLOW); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_R1, 0, SLJIT_IMM, -2); + SET_NEXT_BYTE(SLJIT_NOT_OVERFLOW); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_R0, 0, SLJIT_R1, 0, SLJIT_IMM, -2); + SET_NEXT_BYTE(SLJIT_GREATER_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_R0, 0, SLJIT_R1, 0, SLJIT_IMM, -2); + SET_NEXT_BYTE(SLJIT_LESS_EQUAL); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)-1 << ((8 * sizeof(sljit_sw)) - 1)); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_R0, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + SET_NEXT_BYTE(SLJIT_SIG_LESS); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, -1); + SET_NEXT_BYTE(SLJIT_SIG_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER_EQUAL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, -2); + SET_NEXT_BYTE(SLJIT_SIG_GREATER_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2); + SET_NEXT_BYTE(SLJIT_SIG_GREATER); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x80000000); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 16); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 16); + /* 0x80..0 on 64 bit machines, 0 on 32 bit machines. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0xffffffff); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + SET_NEXT_BYTE(SLJIT_OVERFLOW); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + SET_NEXT_BYTE(SLJIT_NOT_OVERFLOW); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + SET_NEXT_BYTE(SLJIT_OVERFLOW); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + SET_NEXT_BYTE(SLJIT_NOT_OVERFLOW); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_CARRY, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_SUBC | SLJIT_SET_CARRY, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_SUBC, SLJIT_R0, 0, SLJIT_IMM, 6, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 1, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_CARRY, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_ADDC | SLJIT_SET_CARRY, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_ADDC, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 9); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 1, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, (8 * sizeof(sljit_sw)) - 1); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0); + SET_NEXT_BYTE(SLJIT_EQUAL); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R0, 0); + SET_NEXT_BYTE(SLJIT_EQUAL); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_ASHR | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0); + SET_NEXT_BYTE(SLJIT_EQUAL); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_LSHR | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffffc0); + SET_NEXT_BYTE(SLJIT_NOT_EQUAL); + sljit_emit_op1(compiler, SLJIT_MOV, shift_reg, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_ASHR | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_R0, 0, shift_reg, 0); + SET_NEXT_BYTE(SLJIT_EQUAL); + sljit_emit_op1(compiler, SLJIT_MOV, shift_reg, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_LSHR | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_R0, 0, shift_reg, 0); + SET_NEXT_BYTE(SLJIT_NOT_EQUAL); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_CARRY, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_SUBC | SLJIT_SET_CARRY, SLJIT_R2, 0, SLJIT_IMM, 1, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 1, SLJIT_R2, 0); + sljit_emit_op2(compiler, SLJIT_SUBC | SLJIT_SET_CARRY, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_SUBC, SLJIT_R2, 0, SLJIT_IMM, 1, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 2, SLJIT_R2, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 2); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -34); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0x1234); + SET_NEXT_BYTE(SLJIT_LESS); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0x1234); + SET_NEXT_BYTE(SLJIT_SIG_LESS); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x12300000000) - 43); +#else + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, -43); +#endif + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -96); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + SET_NEXT_BYTE(SLJIT_LESS); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + SET_NEXT_BYTE(SLJIT_SIG_GREATER); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + + FAILED(buf[0] != RESULT(1), "test27 case 1 failed\n"); + FAILED(buf[1] != RESULT(2), "test27 case 2 failed\n"); + FAILED(buf[2] != 2, "test27 case 3 failed\n"); + FAILED(buf[3] != 1, "test27 case 4 failed\n"); + FAILED(buf[4] != RESULT(1), "test27 case 5 failed\n"); + FAILED(buf[5] != RESULT(2), "test27 case 6 failed\n"); + FAILED(buf[6] != 2, "test27 case 7 failed\n"); + FAILED(buf[7] != 1, "test27 case 8 failed\n"); + + FAILED(buf[8] != 2, "test27 case 9 failed\n"); + FAILED(buf[9] != 1, "test27 case 10 failed\n"); + FAILED(buf[10] != 2, "test27 case 11 failed\n"); + FAILED(buf[11] != 1, "test27 case 12 failed\n"); + FAILED(buf[12] != 1, "test27 case 13 failed\n"); + FAILED(buf[13] != 2, "test27 case 14 failed\n"); + FAILED(buf[14] != 2, "test27 case 15 failed\n"); + FAILED(buf[15] != 1, "test27 case 16 failed\n"); + FAILED(buf[16] != 1, "test27 case 17 failed\n"); + FAILED(buf[17] != 2, "test27 case 18 failed\n"); + FAILED(buf[18] != 1, "test27 case 19 failed\n"); + FAILED(buf[19] != 1, "test27 case 20 failed\n"); + FAILED(buf[20] != 1, "test27 case 21 failed\n"); + FAILED(buf[21] != 2, "test27 case 22 failed\n"); + + FAILED(buf[22] != RESULT(1), "test27 case 23 failed\n"); + FAILED(buf[23] != RESULT(2), "test27 case 24 failed\n"); + FAILED(buf[24] != 2, "test27 case 25 failed\n"); + FAILED(buf[25] != 1, "test27 case 26 failed\n"); + + FAILED(buf[26] != 5, "test27 case 27 failed\n"); + FAILED(buf[27] != 9, "test27 case 28 failed\n"); + + FAILED(buf[28] != 2, "test27 case 29 failed\n"); + FAILED(buf[29] != 1, "test27 case 30 failed\n"); + + FAILED(buf[30] != 1, "test27 case 31 failed\n"); + FAILED(buf[31] != 1, "test27 case 32 failed\n"); + FAILED(buf[32] != 1, "test27 case 33 failed\n"); + FAILED(buf[33] != 1, "test27 case 34 failed\n"); + + FAILED(buf[34] != 1, "test27 case 35 failed\n"); + FAILED(buf[35] != 0, "test27 case 36 failed\n"); + + FAILED(buf[36] != 2, "test27 case 37 failed\n"); + FAILED(buf[37] != 1, "test27 case 38 failed\n"); + FAILED(buf[38] != 2, "test27 case 39 failed\n"); + FAILED(buf[39] != 1, "test27 case 40 failed\n"); + FAILED(buf[40] != 10, "test27 case 41 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +#undef SET_NEXT_BYTE +#undef RESULT +} + +static void test28(void) +{ + /* Test mov. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_const* const1 = NULL; + struct sljit_label* label = NULL; + sljit_uw label_addr = 0; + sljit_sw buf[5]; + + if (verbose) + printf("Run test28\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = -36; + buf[1] = 8; + buf[2] = 0; + buf[3] = 10; + buf[4] = 0; + + FAILED(!compiler, "cannot create compiler\n"); + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -234); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_MUL, SLJIT_S3, 0, SLJIT_R3, 0, SLJIT_R4, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_S3, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_S3, 0, SLJIT_IMM, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_NOT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_S3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S4, 0, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw)); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S4, 0, SLJIT_S4, 0, SLJIT_R4, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_S4, 0); + + const1 = sljit_emit_const(compiler, SLJIT_S3, 0, 0); + sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_S3, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S3, 0, SLJIT_S3, 0, SLJIT_IMM, 100); + label = sljit_emit_label(compiler); + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_S3, 0); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R4, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + label_addr = sljit_get_label_addr(label); + sljit_set_const(sljit_get_const_addr(const1), label_addr, sljit_get_executable_offset(compiler)); + + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != 8, "test28 case 1 failed\n"); + FAILED(buf[1] != -1872, "test28 case 2 failed\n"); + FAILED(buf[2] != 1, "test28 case 3 failed\n"); + FAILED(buf[3] != 2, "test28 case 4 failed\n"); + FAILED(buf[4] != label_addr, "test28 case 5 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test29(void) +{ + /* Test signed/unsigned bytes and halfs. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[25]; + sljit_s32 i; + + if (verbose) + printf("Run test29\n"); + + for (i = 0; i < 25; i++) + buf[i] = 0; + + FAILED(!compiler, "cannot create compiler\n"); + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_IMM, -187); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_IMM, -605); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_IMM, -56); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_R4, 0, SLJIT_IMM, 0xcde5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_uw), SLJIT_R4, 0); + + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_R0, 0, SLJIT_IMM, -45896); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_R0, 0, SLJIT_IMM, -1472797); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R0, 0, SLJIT_IMM, -12890); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R4, 0, SLJIT_IMM, 0x9cb0a6); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_uw), SLJIT_R4, 0); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-3580429715)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-100722768662)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-1457052677972)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, SLJIT_W(0xcef97a70b5)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_uw), SLJIT_R4, 0); +#endif + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -187); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -605); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -56); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0xcde5); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_R4, 0, SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 15 * sizeof(sljit_uw), SLJIT_R4, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -45896); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -1472797); + sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_R0, 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 17 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -12890); + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R0, 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 18 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0x9cb0a6); + sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R4, 0, SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 19 * sizeof(sljit_uw), SLJIT_R4, 0); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(-3580429715)); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 20 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, SLJIT_W(-100722768662)); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 21 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, SLJIT_W(-1457052677972)); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 22 * sizeof(sljit_uw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, SLJIT_W(0xcef97a70b5)); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 23 * sizeof(sljit_uw), SLJIT_R4, 0); +#endif + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 0x9faa5); + sljit_emit_op1(compiler, SLJIT_MOV_S8, SLJIT_S2, 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 24 * sizeof(sljit_uw), SLJIT_S2, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[0] != 69, "test29 case 1 failed\n"); + FAILED(buf[1] != -93, "test29 case 2 failed\n"); + FAILED(buf[2] != 200, "test29 case 3 failed\n"); + FAILED(buf[3] != 0xe5, "test29 case 4 failed\n"); + FAILED(buf[4] != 19640, "test29 case 5 failed\n"); + FAILED(buf[5] != -31005, "test29 case 6 failed\n"); + FAILED(buf[6] != 52646, "test29 case 7 failed\n"); + FAILED(buf[7] != 0xb0a6, "test29 case 8 failed\n"); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[8] != SLJIT_W(714537581), "test29 case 9 failed\n"); + FAILED(buf[9] != SLJIT_W(-1938520854), "test29 case 10 failed\n"); + FAILED(buf[10] != SLJIT_W(3236202668), "test29 case 11 failed\n"); + FAILED(buf[11] != SLJIT_W(0xf97a70b5), "test29 case 12 failed\n"); +#endif + + FAILED(buf[12] != 69, "test29 case 13 failed\n"); + FAILED(buf[13] != -93, "test29 case 14 failed\n"); + FAILED(buf[14] != 200, "test29 case 15 failed\n"); + FAILED(buf[15] != 0xe5, "test29 case 16 failed\n"); + FAILED(buf[16] != 19640, "test29 case 17 failed\n"); + FAILED(buf[17] != -31005, "test29 case 18 failed\n"); + FAILED(buf[18] != 52646, "test29 case 19 failed\n"); + FAILED(buf[19] != 0xb0a6, "test29 case 20 failed\n"); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[20] != SLJIT_W(714537581), "test29 case 21 failed\n"); + FAILED(buf[21] != SLJIT_W(-1938520854), "test29 case 22 failed\n"); + FAILED(buf[22] != SLJIT_W(3236202668), "test29 case 23 failed\n"); + FAILED(buf[23] != SLJIT_W(0xf97a70b5), "test29 case 24 failed\n"); +#endif + + FAILED(buf[24] != -91, "test29 case 25 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test30(void) +{ + /* Test unused results. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[1]; + + if (verbose) + printf("Run test30\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, 1); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S1, 0, SLJIT_IMM, SLJIT_W(-0x123ffffffff)); +#else + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S1, 0, SLJIT_IMM, 1); +#endif + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S4, 0, SLJIT_IMM, 1); + + /* Some calculations with unused results. */ + sljit_emit_op1(compiler, SLJIT_NOT | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_NEG | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_SHL | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_S3, 0, SLJIT_R2, 0); + sljit_emit_op2(compiler, SLJIT_LSHR | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 5); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_IMM, 0xff); + sljit_emit_op1(compiler, SLJIT_NOT32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_S1, 0); + + /* Testing that any change happens. */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R2, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R3, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R4, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_S1, 0, SLJIT_S1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_S1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_S2, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_S3, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0, SLJIT_S4, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[0] != 9, "test30 case 1 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test31(void) +{ + /* Integer mul and set flags. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[12]; + sljit_s32 i; +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_sw big_word = SLJIT_W(0x7fffffff00000000); + sljit_sw big_word2 = SLJIT_W(0x7fffffff00000012); +#else + sljit_sw big_word = 0x7fffffff; + sljit_sw big_word2 = 0x00000012; +#endif + + if (verbose) + printf("Run test31\n"); + + for (i = 0; i < 12; i++) + buf[i] = 3; + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 5, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_IMM, -45); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_NOT_OVERFLOW); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_IMM, -45); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_OVERFLOW); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, big_word); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R2, 0, SLJIT_S2, 0, SLJIT_IMM, -2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 33); /* Should not change flags. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); /* Should not change flags. */ + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_OVERFLOW); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R2, 0, SLJIT_S2, 0, SLJIT_IMM, -2); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_NOT_OVERFLOW); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S3, 0, SLJIT_IMM, 0x3f6b0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_S4, 0, SLJIT_IMM, 0x2a783); + sljit_emit_op2(compiler, SLJIT_MUL32 | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_S3, 0, SLJIT_S4, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_OVERFLOW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, big_word2); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_MUL32 | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 23); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_OVERFLOW); + + sljit_emit_op2(compiler, SLJIT_MUL32 | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, -23); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_NOT_OVERFLOW); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, -23); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_NOT_OVERFLOW); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 67); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, -23); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + + FAILED(buf[0] != 1, "test31 case 1 failed\n"); + FAILED(buf[1] != 2, "test31 case 2 failed\n"); +/* Qemu issues for 64 bit muls. */ +#if !(defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[2] != 1, "test31 case 3 failed\n"); + FAILED(buf[3] != 2, "test31 case 4 failed\n"); +#endif + FAILED(buf[4] != 1, "test31 case 5 failed\n"); + FAILED((buf[5] & 0xffffffff) != 0x85540c10, "test31 case 6 failed\n"); + FAILED(buf[6] != 2, "test31 case 7 failed\n"); + FAILED(buf[7] != 1, "test31 case 8 failed\n"); +#if !(defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[8] != 1, "test31 case 9 failed\n"); +#endif + FAILED(buf[9] != -1541, "test31 case 10 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test32(void) +{ + /* Floating point set flags. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_s32 i; + + sljit_sw buf[16]; + union { + sljit_f64 value; + struct { + sljit_s32 value1; + sljit_s32 value2; + } u; + } dbuf[4]; + + if (verbose) + printf("Run test32\n"); + + for (i = 0; i < 16; i++) + buf[i] = 5; + + /* Two NaNs */ + dbuf[0].u.value1 = 0x7fffffff; + dbuf[0].u.value2 = 0x7fffffff; + dbuf[1].u.value1 = 0x7fffffff; + dbuf[1].u.value2 = 0x7fffffff; + dbuf[2].value = -13.0; + dbuf[3].value = 27.0; + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test32 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + + FAILED(!compiler, "cannot create compiler\n"); + SLJIT_ASSERT(sizeof(sljit_f64) == 8 && sizeof(sljit_s32) == 4 && sizeof(dbuf[0]) == 8); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 1, 2, 4, 0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED_F, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_UNORDERED_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_ORDERED_F, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_ORDERED_F64); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_UNORDERED_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_ORDERED_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_ORDERED_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_LESS_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_LESS_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_GREATER_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_GREATER_EQUAL_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_GREATER_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_GREATER_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_LESS_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_LESS_EQUAL_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_EQUAL_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_NOT_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_NOT_EQUAL_F64); + + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED_F, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64)); + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR3, 0, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_UNORDERED_F64); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_EQUAL_F, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64)); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_EQUAL_F64); + + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_ORDERED_F, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64), SLJIT_FR0, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_ORDERED_F64); + + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | SLJIT_SET_UNORDERED_F, SLJIT_FR3, 0, SLJIT_FR2, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S1), 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_UNORDERED_F64); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, (sljit_sw)&dbuf); + + FAILED(buf[0] != 1, "test32 case 1 failed\n"); + FAILED(buf[1] != 2, "test32 case 2 failed\n"); + FAILED(buf[2] != 2, "test32 case 3 failed\n"); + FAILED(buf[3] != 1, "test32 case 4 failed\n"); + FAILED(buf[4] != 1, "test32 case 5 failed\n"); + FAILED(buf[5] != 2, "test32 case 6 failed\n"); + FAILED(buf[6] != 2, "test32 case 7 failed\n"); + FAILED(buf[7] != 1, "test32 case 8 failed\n"); + FAILED(buf[8] != 2, "test32 case 9 failed\n"); + FAILED(buf[9] != 1, "test32 case 10 failed\n"); + FAILED(buf[10] != 2, "test32 case 11 failed\n"); + FAILED(buf[11] != 1, "test32 case 12 failed\n"); + FAILED(buf[12] != 2, "test32 case 13 failed\n"); + FAILED(buf[13] != 1, "test32 case 14 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test33(void) +{ + /* Test setting multiple flags. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump; + sljit_sw buf[10]; + + if (verbose) + printf("Run test33\n"); + + buf[0] = 3; + buf[1] = 3; + buf[2] = 3; + buf[3] = 3; + buf[4] = 3; + buf[5] = 3; + buf[6] = 3; + buf[7] = 3; + buf[8] = 3; + buf[9] = 3; + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 3, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 20); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 10); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_R2, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -10); + jump = sljit_emit_jump(compiler, SLJIT_LESS); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_IMM, 11); + sljit_set_label(jump, sljit_emit_label(compiler)); + + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_R2, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_SIG_GREATER); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_IMM, 45); + jump = sljit_emit_jump(compiler, SLJIT_NOT_EQUAL); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_IMM, 55); + sljit_set_label(jump, sljit_emit_label(compiler)); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x8000000000000000)); +#else + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x80000000)); +#endif + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_OVERFLOW, SLJIT_R2, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_IMM, 33); + jump = sljit_emit_jump(compiler, SLJIT_NOT_OVERFLOW); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_IMM, 13); + sljit_set_label(jump, sljit_emit_label(compiler)); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x80000000)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_OVERFLOW, SLJIT_R2, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_NOT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_IMM, 78); + jump = sljit_emit_jump(compiler, SLJIT_OVERFLOW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_IMM, 48); + sljit_set_label(jump, sljit_emit_label(compiler)); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x8000000000000000)); +#else + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x80000000)); +#endif + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_OVERFLOW, SLJIT_R2, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_IMM, 30); + jump = sljit_emit_jump(compiler, SLJIT_NOT_OVERFLOW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_IMM, 50); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_ZERO); + sljit_set_label(jump, sljit_emit_label(compiler)); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + + FAILED(buf[0] != 0, "test33 case 1 failed\n"); + FAILED(buf[1] != 11, "test33 case 2 failed\n"); + FAILED(buf[2] != 1, "test33 case 3 failed\n"); + FAILED(buf[3] != 45, "test33 case 4 failed\n"); + FAILED(buf[4] != 13, "test33 case 5 failed\n"); + FAILED(buf[5] != 0, "test33 case 6 failed\n"); + FAILED(buf[6] != 0, "test33 case 7 failed\n"); + FAILED(buf[7] != 48, "test33 case 8 failed\n"); + FAILED(buf[8] != 50, "test33 case 9 failed\n"); + FAILED(buf[9] != 1, "test33 case 10 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test34(void) +{ + /* Test fast calls. */ + executable_code codeA; + executable_code codeB; + executable_code codeC; + executable_code codeD; + executable_code codeE; + executable_code codeF; + struct sljit_compiler* compiler; + struct sljit_jump *jump; + struct sljit_label* label; + sljit_uw addr; + sljit_p buf[2]; + + if (verbose) + printf("Run test34\n"); + + buf[0] = 0; + buf[1] = 0; + + /* A */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + sljit_set_context(compiler, 0, 1, 5, 5, 0, 0, 2 * sizeof(sljit_p)); + + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_fast_enter(compiler, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_R1, 0); + + codeA.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + /* B */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + sljit_set_context(compiler, 0, 1, 5, 5, 0, 0, 2 * sizeof(sljit_p)); + + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_fast_enter(compiler, SLJIT_R4, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 6); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(codeA.code)); + sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_R1, 0); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_R4, 0); + + codeB.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + /* C */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + sljit_set_context(compiler, 0, 1, 5, 5, 0, 0, 2 * sizeof(sljit_p)); + + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_p)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 8); + jump = sljit_emit_jump(compiler, SLJIT_FAST_CALL | SLJIT_REWRITABLE_JUMP); + sljit_set_target(jump, SLJIT_FUNC_OFFSET(codeB.code)); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_p)); + + codeC.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + /* D */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + sljit_set_context(compiler, 0, 1, 5, 5, 0, 0, 2 * sizeof(sljit_p)); + + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 10); + sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, SLJIT_FUNC_OFFSET(codeC.code)); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_MEM1(SLJIT_SP), 0); + + codeD.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + /* E */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + sljit_set_context(compiler, 0, 1, 5, 5, 0, 0, 2 * sizeof(sljit_p)); + + sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 12); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_p), SLJIT_IMM, SLJIT_FUNC_OFFSET(codeD.code)); + sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_p)); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_MEM1(SLJIT_S0), 0); + + codeE.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + /* F */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, 2 * sizeof(sljit_p)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, SLJIT_FUNC_OFFSET(codeE.code)); + label = sljit_emit_label(compiler); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + codeF.code = sljit_generate_code(compiler); + CHECK(compiler); + addr = sljit_get_label_addr(label); + sljit_free_compiler(compiler); + + FAILED(codeF.func1((sljit_sw)&buf) != 40, "test34 case 1 failed\n"); + FAILED(buf[0] != addr - SLJIT_RETURN_ADDRESS_OFFSET, "test34 case 2 failed\n"); + + sljit_free_code(codeA.code, NULL); + sljit_free_code(codeB.code, NULL); + sljit_free_code(codeC.code, NULL); + sljit_free_code(codeD.code, NULL); + sljit_free_code(codeE.code, NULL); + sljit_free_code(codeF.code, NULL); + successful_tests++; +} + +static void test35(void) +{ + /* More complicated tests for fast calls. */ + executable_code codeA; + executable_code codeB; + executable_code codeC; + struct sljit_compiler* compiler; + struct sljit_jump *jump = NULL; + struct sljit_label* label; + sljit_sw executable_offset; + sljit_uw return_addr; + sljit_uw jump_addr = 0; + sljit_p buf[1]; + + if (verbose) + printf("Run test35\n"); + + buf[0] = 0; + + /* A */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + sljit_set_context(compiler, 0, 0, 2, 2, 0, 0, 0); + + sljit_emit_fast_enter(compiler, SLJIT_MEM0(), (sljit_sw)&buf[0]); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 5); + + jump = sljit_emit_jump(compiler, SLJIT_FAST_CALL | SLJIT_REWRITABLE_JUMP); + sljit_set_target(jump, 0); + + label = sljit_emit_label(compiler); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_MEM0(), (sljit_sw)&buf[0]); + + codeA.code = sljit_generate_code(compiler); + CHECK(compiler); + executable_offset = sljit_get_executable_offset(compiler); + jump_addr = sljit_get_jump_addr(jump); + sljit_free_compiler(compiler); + + /* B */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + sljit_set_context(compiler, 0, 0, 2, 2, 0, 0, 0); + + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_fast_enter(compiler, SLJIT_R1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 7); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_R1, 0); + + codeB.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + sljit_set_jump_addr(jump_addr, SLJIT_FUNC_OFFSET(codeB.code), executable_offset); + + /* C */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, 0, 2, 2, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, SLJIT_FUNC_OFFSET(codeA.code)); + label = sljit_emit_label(compiler); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + codeC.code = sljit_generate_code(compiler); + CHECK(compiler); + return_addr = sljit_get_label_addr(label); + sljit_free_compiler(compiler); + + FAILED(codeC.func0() != 12, "test35 case 1 failed\n"); + FAILED(buf[0] != return_addr - SLJIT_RETURN_ADDRESS_OFFSET, "test35 case 2 failed\n"); + + sljit_free_code(codeA.code, NULL); + sljit_free_code(codeB.code, NULL); + sljit_free_code(codeC.code, NULL); + successful_tests++; +} + +static void cmp_test(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) +{ + /* 2 = true, 1 = false */ + struct sljit_jump* jump; + struct sljit_label* label; + + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 1, SLJIT_IMM, 2); + jump = sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_S0), 1, SLJIT_IMM, 1); + label = sljit_emit_label(compiler); + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_set_label(jump, label); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); +} + +#define TEST_CASES (7 + 10 + 12 + 11 + 4) +static void test36(void) +{ + /* Compare instruction. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + + sljit_s8 buf[TEST_CASES]; + sljit_s8 compare_buf[TEST_CASES] = { + 1, 1, 2, 2, 1, 2, 2, + 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, + 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, + 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, + 2, 1, 1, 2 + }; + sljit_sw data[4]; + sljit_s32 i; + + if (verbose) + printf("Run test36\n"); + + FAILED(!compiler, "cannot create compiler\n"); + for (i = 0; i < TEST_CASES; ++i) + buf[i] = 100; + data[0] = 32; + data[1] = -9; + data[2] = 43; + data[3] = -13; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 2, 0, 0, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 13); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 15); + cmp_test(compiler, SLJIT_EQUAL, SLJIT_IMM, 9, SLJIT_R0, 0); + cmp_test(compiler, SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 3); + cmp_test(compiler, SLJIT_EQUAL, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_IMM, -13); + cmp_test(compiler, SLJIT_NOT_EQUAL, SLJIT_IMM, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + cmp_test(compiler, SLJIT_NOT_EQUAL | SLJIT_REWRITABLE_JUMP, SLJIT_IMM, 0, SLJIT_R0, 0); + cmp_test(compiler, SLJIT_EQUAL, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_WORD_SHIFT); + cmp_test(compiler, SLJIT_EQUAL | SLJIT_REWRITABLE_JUMP, SLJIT_R0, 0, SLJIT_IMM, 0); + + cmp_test(compiler, SLJIT_SIG_LESS, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -8); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); + cmp_test(compiler, SLJIT_SIG_GREATER, SLJIT_R0, 0, SLJIT_IMM, 0); + cmp_test(compiler, SLJIT_SIG_LESS_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0); + cmp_test(compiler, SLJIT_SIG_LESS | SLJIT_REWRITABLE_JUMP, SLJIT_R0, 0, SLJIT_IMM, 0); + cmp_test(compiler, SLJIT_SIG_GREATER_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0); + cmp_test(compiler, SLJIT_SIG_GREATER, SLJIT_IMM, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_sw)); + cmp_test(compiler, SLJIT_SIG_LESS_EQUAL, SLJIT_IMM, 0, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_SIG_LESS, SLJIT_IMM, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_sw)); + cmp_test(compiler, SLJIT_SIG_LESS, SLJIT_IMM, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw)); + cmp_test(compiler, SLJIT_SIG_LESS | SLJIT_REWRITABLE_JUMP, SLJIT_IMM, 0, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); + cmp_test(compiler, SLJIT_LESS, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw)); + cmp_test(compiler, SLJIT_GREATER_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 8); + cmp_test(compiler, SLJIT_LESS, SLJIT_R0, 0, SLJIT_IMM, -10); + cmp_test(compiler, SLJIT_LESS, SLJIT_R0, 0, SLJIT_IMM, 8); + cmp_test(compiler, SLJIT_GREATER_EQUAL, SLJIT_IMM, 8, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_GREATER_EQUAL | SLJIT_REWRITABLE_JUMP, SLJIT_IMM, 8, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_GREATER, SLJIT_IMM, 8, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_LESS_EQUAL, SLJIT_IMM, 7, SLJIT_R0, 0); + cmp_test(compiler, SLJIT_GREATER, SLJIT_IMM, 1, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw)); + cmp_test(compiler, SLJIT_LESS_EQUAL, SLJIT_R0, 0, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_GREATER, SLJIT_R0, 0, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_GREATER | SLJIT_REWRITABLE_JUMP, SLJIT_R0, 0, SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -3); + cmp_test(compiler, SLJIT_SIG_LESS, SLJIT_R0, 0, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_SIG_GREATER_EQUAL, SLJIT_R0, 0, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_SIG_LESS, SLJIT_R0, 0, SLJIT_IMM, -1); + cmp_test(compiler, SLJIT_SIG_GREATER_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 1); + cmp_test(compiler, SLJIT_SIG_LESS, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_IMM, -1); + cmp_test(compiler, SLJIT_SIG_LESS | SLJIT_REWRITABLE_JUMP, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_IMM, -1); + cmp_test(compiler, SLJIT_SIG_LESS_EQUAL, SLJIT_R0, 0, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_SIG_GREATER, SLJIT_R0, 0, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_SIG_LESS_EQUAL, SLJIT_IMM, -4, SLJIT_R0, 0); + cmp_test(compiler, SLJIT_SIG_GREATER, SLJIT_IMM, -1, SLJIT_R1, 0); + cmp_test(compiler, SLJIT_SIG_GREATER | SLJIT_REWRITABLE_JUMP, SLJIT_R1, 0, SLJIT_IMM, -1); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0xf00000004)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_R0, 0); + cmp_test(compiler, SLJIT_LESS, SLJIT_R1, 0, SLJIT_IMM, 5); + cmp_test(compiler, SLJIT_LESS, SLJIT_R0, 0, SLJIT_IMM, 5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0xff0000004)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_R0, 0); + cmp_test(compiler, SLJIT_SIG_GREATER, SLJIT_R1, 0, SLJIT_IMM, 5); + cmp_test(compiler, SLJIT_SIG_GREATER, SLJIT_R0, 0, SLJIT_IMM, 5); +#else + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 4); + cmp_test(compiler, SLJIT_LESS, SLJIT_R0, 0, SLJIT_IMM, 5); + cmp_test(compiler, SLJIT_GREATER, SLJIT_R0, 0, SLJIT_IMM, 5); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0xf0000004); + cmp_test(compiler, SLJIT_SIG_GREATER, SLJIT_R0, 0, SLJIT_IMM, 5); + cmp_test(compiler, SLJIT_SIG_LESS, SLJIT_R0, 0, SLJIT_IMM, 5); +#endif + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, (sljit_sw)&data); + + for (i = 0; i < TEST_CASES; ++i) + if (SLJIT_UNLIKELY(buf[i] != compare_buf[i])) { + printf("test36 case %d failed\n", i + 1); + return; + } + + sljit_free_code(code.code, NULL); + successful_tests++; +} +#undef TEST_CASES + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define BITN(n) (SLJIT_W(1) << (63 - (n))) +#define RESN(n) (n) +#else +#define BITN(n) (1 << (31 - ((n) & 0x1f))) +#define RESN(n) ((n) & 0x1f) +#endif + +static void test37(void) +{ + /* Test count leading zeroes. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[9]; + sljit_s32 ibuf[2]; + sljit_s32 i; + + if (verbose) + printf("Run test37\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 9; i++) + buf[i] = -1; + buf[2] = 0; + buf[4] = BITN(13); + ibuf[0] = -1; + ibuf[1] = -1; + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 1, 3, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, BITN(27)); + sljit_emit_op1(compiler, SLJIT_CLZ, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, BITN(47)); + sljit_emit_op1(compiler, SLJIT_CLZ, SLJIT_R0, 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_CLZ, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_CLZ, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_CLZ32, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -1); + sljit_emit_op1(compiler, SLJIT_CLZ, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, BITN(58)); + sljit_emit_op1(compiler, SLJIT_CLZ, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_CLZ, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R0, 0); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0xff08a00000)); +#else + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0x08a00000); +#endif + sljit_emit_op1(compiler, SLJIT_CLZ32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_CLZ32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R0, 0); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0xffc8a00000)); +#else + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0xc8a00000); +#endif + sljit_emit_op1(compiler, SLJIT_CLZ32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, (sljit_sw)&ibuf); + FAILED(buf[0] != RESN(27), "test37 case 1 failed\n"); + FAILED(buf[1] != RESN(47), "test37 case 2 failed\n"); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[2] != 64, "test37 case 3 failed\n"); +#else + FAILED(buf[2] != 32, "test37 case 3 failed\n"); +#endif + FAILED(buf[3] != 0, "test37 case 4 failed\n"); + FAILED(ibuf[0] != 32, "test37 case 5 failed\n"); + FAILED(buf[4] != RESN(13), "test37 case 6 failed\n"); + FAILED(buf[5] != RESN(58), "test37 case 7 failed\n"); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[6] != 64, "test37 case 8 failed\n"); +#else + FAILED(buf[6] != 32, "test37 case 8 failed\n"); +#endif + FAILED(ibuf[1] != 4, "test37 case 9 failed\n"); + + FAILED((buf[7] & 0xffffffff) != 4, "test37 case 10 failed\n"); + FAILED((buf[8] & 0xffffffff) != 0, "test37 case 11 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} +#undef BITN +#undef RESN + +static void test38(void) +{ +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + /* Test stack utility. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* alloc1_fail; + struct sljit_jump* alloc2_fail; + struct sljit_jump* alloc3_fail; + struct sljit_jump* sanity1_fail; + struct sljit_jump* sanity2_fail; + struct sljit_jump* sanity3_fail; + struct sljit_jump* sanity4_fail; + struct sljit_jump* jump; + struct sljit_label* label; + + if (verbose) + printf("Run test38\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, 0, 3, 1, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8192); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 65536); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_allocate_stack)); + alloc1_fail = sljit_emit_cmp(compiler, SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_RETURN_REG, 0); + + /* Write 8k data. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct sljit_stack, start)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 8192); + label = sljit_emit_label(compiler); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_IMM, -1); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_sw)); + jump = sljit_emit_cmp(compiler, SLJIT_LESS, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_set_label(jump, label); + + /* Grow stack. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct sljit_stack, end), SLJIT_IMM, 65536); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); + alloc2_fail = sljit_emit_cmp(compiler, SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); + sanity1_fail = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct sljit_stack, start)); + + /* Write 64k data. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct sljit_stack, start)); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 65536); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct sljit_stack, min_start)); + sanity2_fail = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_R2, 0); + label = sljit_emit_label(compiler); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_IMM, -1); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_sw)); + jump = sljit_emit_cmp(compiler, SLJIT_LESS, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_set_label(jump, label); + + /* Shrink stack. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct sljit_stack, end), SLJIT_IMM, 32768); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); + alloc3_fail = sljit_emit_cmp(compiler, SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); + sanity3_fail = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct sljit_stack, start)); + + /* Write 32k data. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct sljit_stack, start)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct sljit_stack, end)); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R2, 0, SLJIT_R1, 0, SLJIT_IMM, 32768); + sanity4_fail = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_R2, 0); + label = sljit_emit_label(compiler); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 0, SLJIT_IMM, -1); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_sw)); + jump = sljit_emit_cmp(compiler, SLJIT_LESS, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_set_label(jump, label); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_free_stack)); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 4567); + + label = sljit_emit_label(compiler); + sljit_set_label(alloc1_fail, label); + sljit_set_label(alloc2_fail, label); + sljit_set_label(alloc3_fail, label); + sljit_set_label(sanity1_fail, label); + sljit_set_label(sanity2_fail, label); + sljit_set_label(sanity3_fail, label); + sljit_set_label(sanity4_fail, label); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + /* Just survive this. */ + FAILED(code.func0() != 4567, "test38 case 1 failed\n"); + + sljit_free_code(code.code, NULL); +#endif + successful_tests++; +} + +static void test39(void) +{ + /* Test error handling. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump; + + if (verbose) + printf("Run test39\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + /* Such assignment should never happen in a regular program. */ + compiler->error = -3967; + + SLJIT_ASSERT(sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 5, 5, 6, 0, 32) == -3967); + SLJIT_ASSERT(sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R1, 0) == -3967); + SLJIT_ASSERT(sljit_emit_op0(compiler, SLJIT_NOP) == -3967); + SLJIT_ASSERT(sljit_emit_op0(compiler, SLJIT_ENDBR) == -3967); + SLJIT_ASSERT(sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1) == -3967); + SLJIT_ASSERT(sljit_emit_op2(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), 64, SLJIT_MEM1(SLJIT_S0), -64) == -3967); + SLJIT_ASSERT(sljit_emit_fop1(compiler, SLJIT_ABS_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_R1), 0) == -3967); + SLJIT_ASSERT(sljit_emit_fop2(compiler, SLJIT_DIV_F64, SLJIT_FR2, 0, SLJIT_MEM2(SLJIT_R0, SLJIT_S0), 0, SLJIT_FR2, 0) == -3967); + SLJIT_ASSERT(!sljit_emit_label(compiler)); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW)); + SLJIT_ASSERT(!jump); + sljit_set_label(jump, (struct sljit_label*)0x123450); + sljit_set_target(jump, 0x123450); + jump = sljit_emit_cmp(compiler, SLJIT_SIG_LESS_EQUAL, SLJIT_R0, 0, SLJIT_R1, 0); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(SLJIT_R0), 8) == -3967); + SLJIT_ASSERT(sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_OVERFLOW) == -3967); + SLJIT_ASSERT(!sljit_emit_const(compiler, SLJIT_R0, 0, 99)); + + SLJIT_ASSERT(!compiler->labels && !compiler->jumps && !compiler->consts); + SLJIT_ASSERT(!compiler->last_label && !compiler->last_jump && !compiler->last_const); + SLJIT_ASSERT(!compiler->buf->next && !compiler->buf->used_size); + SLJIT_ASSERT(!compiler->abuf->next && !compiler->abuf->used_size); + + sljit_set_compiler_memory_error(compiler); + FAILED(sljit_get_compiler_error(compiler) != -3967, "test39 case 1 failed\n"); + + code.code = sljit_generate_code(compiler); + FAILED(sljit_get_compiler_error(compiler) != -3967, "test39 case 2 failed\n"); + FAILED(!!code.code, "test39 case 3 failed\n"); + sljit_free_compiler(compiler); + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + FAILED(sljit_get_compiler_error(compiler) != SLJIT_SUCCESS, "test39 case 4 failed\n"); + sljit_set_compiler_memory_error(compiler); + FAILED(sljit_get_compiler_error(compiler) != SLJIT_ERR_ALLOC_FAILED, "test39 case 5 failed\n"); + sljit_free_compiler(compiler); + + successful_tests++; +} + +static void test40(void) +{ + /* Test emit_op_flags. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[10]; + + if (verbose) + printf("Run test40\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = -100; + buf[1] = -100; + buf[2] = -100; + buf[3] = -8; + buf[4] = -100; + buf[5] = -100; + buf[6] = 0; + buf[7] = 0; + buf[8] = -100; + buf[9] = -100; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 4, 0, 0, sizeof(sljit_sw)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -5); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, SLJIT_IMM, -6, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x123456); + sljit_emit_op_flags(compiler, SLJIT_OR, SLJIT_R1, 0, SLJIT_SIG_LESS); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -13); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_IMM, -13, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, 0); + sljit_emit_op_flags(compiler, SLJIT_OR | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_EQUAL); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_NOT_EQUAL); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_SP), 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_IMM, -13, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); + sljit_emit_op_flags(compiler, SLJIT_OR | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_EQUAL); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 2, SLJIT_EQUAL); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -13); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op_flags(compiler, SLJIT_OR, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), SLJIT_WORD_SHIFT, SLJIT_SIG_LESS); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -8); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 33); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 0); + sljit_emit_op_flags(compiler, SLJIT_OR | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_GREATER); + sljit_emit_op_flags(compiler, SLJIT_OR, SLJIT_S1, 0, SLJIT_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, 0x88); + sljit_emit_op_flags(compiler, SLJIT_OR, SLJIT_S3, 0, SLJIT_NOT_EQUAL); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 4, SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 5, SLJIT_S3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x84); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_IMM, 0x180, SLJIT_R0, 0); + sljit_emit_op_flags(compiler, SLJIT_OR | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 6, SLJIT_EQUAL); + sljit_emit_op_flags(compiler, SLJIT_OR, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 7, SLJIT_EQUAL); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op_flags(compiler, SLJIT_OR | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_NOT_EQUAL); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 8, SLJIT_NOT_EQUAL); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x123456); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op_flags(compiler, SLJIT_OR, SLJIT_R0, 0, SLJIT_GREATER); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 9, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, 0xbaddead); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != 0xbaddead, "test40 case 1 failed\n"); + FAILED(buf[0] != 0x123457, "test40 case 2 failed\n"); + FAILED(buf[1] != 1, "test40 case 3 failed\n"); + FAILED(buf[2] != 0, "test40 case 4 failed\n"); + FAILED(buf[3] != -7, "test40 case 5 failed\n"); + FAILED(buf[4] != 0, "test40 case 6 failed\n"); + FAILED(buf[5] != 0x89, "test40 case 7 failed\n"); + FAILED(buf[6] != 0, "test40 case 8 failed\n"); + FAILED(buf[7] != 1, "test40 case 9 failed\n"); + FAILED(buf[8] != 1, "test40 case 10 failed\n"); + FAILED(buf[9] != 0x123457, "test40 case 11 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test41(void) +{ + /* Test inline assembly. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_s32 i; + sljit_f64 buf[3]; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_u8 inst[16]; +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_u8 inst[16]; + sljit_s32 reg; +#else + sljit_u32 inst; +#endif + + if (verbose) + printf("Run test41\n"); + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + SLJIT_ASSERT(sljit_has_cpu_feature(SLJIT_HAS_VIRTUAL_REGISTERS) == 0); +#endif + + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_R(i) >= SLJIT_R3 && SLJIT_R(i) <= SLJIT_R8) { + SLJIT_ASSERT(sljit_get_register_index(SLJIT_R(i)) == -1); + continue; + } +#endif + SLJIT_ASSERT(sljit_get_register_index(SLJIT_R(i)) >= 0 && sljit_get_register_index(SLJIT_R(i)) < 64); + } + + FAILED(!compiler, "cannot create compiler\n"); + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 3, 0, 0, 0); + + /* Returns with the sum of SLJIT_S0 and SLJIT_S1. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + /* lea SLJIT_RETURN_REG, [SLJIT_S0, SLJIT_S1] */ + inst[0] = 0x48; + inst[1] = 0x8d; + inst[2] = 0x04 | ((sljit_get_register_index(SLJIT_RETURN_REG) & 0x7) << 3); + inst[3] = (sljit_get_register_index(SLJIT_S0) & 0x7) + | ((sljit_get_register_index(SLJIT_S1) & 0x7) << 3); + sljit_emit_op_custom(compiler, inst, 4); +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* lea SLJIT_RETURN_REG, [SLJIT_S0, SLJIT_S1] */ + inst[0] = 0x48; /* REX_W */ + inst[1] = 0x8d; + inst[2] = 0x04; + reg = sljit_get_register_index(SLJIT_RETURN_REG); + inst[2] |= ((reg & 0x7) << 3); + if (reg > 7) + inst[0] |= 0x04; /* REX_R */ + reg = sljit_get_register_index(SLJIT_S0); + inst[3] = reg & 0x7; + if (reg > 7) + inst[0] |= 0x01; /* REX_B */ + reg = sljit_get_register_index(SLJIT_S1); + inst[3] |= (reg & 0x7) << 3; + if (reg > 7) + inst[0] |= 0x02; /* REX_X */ + sljit_emit_op_custom(compiler, inst, 4); +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + /* add rd, rn, rm */ + inst = 0xe0800000 | (sljit_get_register_index(SLJIT_RETURN_REG) << 12) + | (sljit_get_register_index(SLJIT_S0) << 16) + | sljit_get_register_index(SLJIT_S1); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + /* add rd, rn, rm */ + inst = 0xeb000000 | (sljit_get_register_index(SLJIT_RETURN_REG) << 8) + | (sljit_get_register_index(SLJIT_S0) << 16) + | sljit_get_register_index(SLJIT_S1); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + /* add rd, rn, rm */ + inst = 0x8b000000 | sljit_get_register_index(SLJIT_RETURN_REG) + | (sljit_get_register_index(SLJIT_S0) << 5) + | (sljit_get_register_index(SLJIT_S1) << 16); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + /* add rD, rA, rB */ + inst = (31 << 26) | (266 << 1) | (sljit_get_register_index(SLJIT_RETURN_REG) << 21) + | (sljit_get_register_index(SLJIT_S0) << 16) + | (sljit_get_register_index(SLJIT_S1) << 11); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* addu rd, rs, rt */ + inst = 33 | (sljit_get_register_index(SLJIT_RETURN_REG) << 11) + | (sljit_get_register_index(SLJIT_S0) << 21) + | (sljit_get_register_index(SLJIT_S1) << 16); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + /* daddu rd, rs, rt */ + inst = 45 | (sljit_get_register_index(SLJIT_RETURN_REG) << 11) + | (sljit_get_register_index(SLJIT_S0) << 21) + | (sljit_get_register_index(SLJIT_S1) << 16); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + /* add rd, rs1, rs2 */ + inst = (0x2 << 30) | (sljit_get_register_index(SLJIT_RETURN_REG) << 25) + | (sljit_get_register_index(SLJIT_S0) << 14) + | sljit_get_register_index(SLJIT_S1); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + /* agrk rd, rs1, rs2 */ + inst = (0xb9e8 << 16) + | (sljit_get_register_index(SLJIT_RETURN_REG) << 4) + | (sljit_get_register_index(SLJIT_S0) << 12) + | sljit_get_register_index(SLJIT_S1); + sljit_emit_op_custom(compiler, &inst, sizeof(inst)); +#else + inst = 0; + sljit_emit_op_custom(compiler, &inst, 0); +#endif + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func2(32, -11) != 21, "test41 case 1 failed\n"); + FAILED(code.func2(1000, 234) != 1234, "test41 case 2 failed\n"); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(code.func2(SLJIT_W(0x20f0a04090c06070), SLJIT_W(0x020f0a04090c0607)) != SLJIT_W(0x22ffaa4499cc6677), "test41 case 3 failed\n"); +#endif + + sljit_free_code(code.code, NULL); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + buf[0] = 13.5; + buf[1] = -2.25; + buf[2] = 0.0; + + compiler = sljit_create_compiler(NULL, NULL); + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 0, 1, 2, 0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + /* addsd x, xm */ + inst[0] = 0xf2; + inst[1] = 0x0f; + inst[2] = 0x58; + inst[3] = 0xc0 | (sljit_get_float_register_index(SLJIT_FR0) << 3) + | sljit_get_float_register_index(SLJIT_FR1); + sljit_emit_op_custom(compiler, inst, 4); +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* addsd x, xm */ + if (sljit_get_float_register_index(SLJIT_FR0) > 7 || sljit_get_float_register_index(SLJIT_FR1) > 7) { + inst[0] = 0; + if (sljit_get_float_register_index(SLJIT_FR0) > 7) + inst[0] |= 0x04; /* REX_R */ + if (sljit_get_float_register_index(SLJIT_FR1) > 7) + inst[0] |= 0x01; /* REX_B */ + inst[1] = 0xf2; + inst[2] = 0x0f; + inst[3] = 0x58; + inst[4] = 0xc0 | ((sljit_get_float_register_index(SLJIT_FR0) & 0x7) << 3) + | (sljit_get_float_register_index(SLJIT_FR1) & 0x7); + sljit_emit_op_custom(compiler, inst, 5); + } + else { + inst[0] = 0xf2; + inst[1] = 0x0f; + inst[2] = 0x58; + inst[3] = 0xc0 | (sljit_get_float_register_index(SLJIT_FR0) << 3) + | sljit_get_float_register_index(SLJIT_FR1); + sljit_emit_op_custom(compiler, inst, 4); + } +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + /* vadd.f64 dd, dn, dm */ + inst = 0xee300b00 | ((sljit_get_float_register_index(SLJIT_FR0) >> 1) << 12) + | ((sljit_get_float_register_index(SLJIT_FR0) >> 1) << 16) + | (sljit_get_float_register_index(SLJIT_FR1) >> 1); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + /* fadd rd, rn, rm */ + inst = 0x1e602800 | sljit_get_float_register_index(SLJIT_FR0) + | (sljit_get_float_register_index(SLJIT_FR0) << 5) + | (sljit_get_float_register_index(SLJIT_FR1) << 16); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + /* fadd frD, frA, frB */ + inst = (63 << 26) | (21 << 1) | (sljit_get_float_register_index(SLJIT_FR0) << 21) + | (sljit_get_float_register_index(SLJIT_FR0) << 16) + | (sljit_get_float_register_index(SLJIT_FR1) << 11); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + /* add.d fd, fs, ft */ + inst = (17 << 26) | (17 << 21) | (sljit_get_float_register_index(SLJIT_FR0) << 6) + | (sljit_get_float_register_index(SLJIT_FR0) << 11) + | (sljit_get_float_register_index(SLJIT_FR1) << 16); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + /* faddd rd, rs1, rs2 */ + inst = (0x2 << 30) | (0x34 << 19) | (0x42 << 5) + | (sljit_get_float_register_index(SLJIT_FR0) << 25) + | (sljit_get_float_register_index(SLJIT_FR0) << 14) + | sljit_get_float_register_index(SLJIT_FR1); + sljit_emit_op_custom(compiler, &inst, sizeof(sljit_u32)); +#endif + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[2] != 11.25, "test41 case 3 failed\n"); + + sljit_free_code(code.code, NULL); + } + + successful_tests++; +} + +static void test42(void) +{ + /* Test long multiply and division. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_s32 i; + sljit_sw buf[7 + 4 + 8 + 8]; + + if (verbose) + printf("Run test42\n"); + + FAILED(!compiler, "cannot create compiler\n"); + for (i = 0; i < 7 + 4 + 8 + 8; i++) + buf[i] = -1; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -0x1fb308a); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 0xf50c873); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, 0x8a0475b); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 0x9dc849b); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, -0x7c69a35); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, 0x5a4d0c4); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S4, 0, SLJIT_IMM, 0x9a3b06d); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-0x5dc4f897b8cd67f5)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x3f8b5c026cb088df)); + sljit_emit_op0(compiler, SLJIT_LMUL_UW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-0x5dc4f897b8cd67f5)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x3f8b5c026cb088df)); + sljit_emit_op0(compiler, SLJIT_LMUL_SW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-0x5dc4f897b8cd67f5)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x3f8b5c026cb088df)); + sljit_emit_op0(compiler, SLJIT_DIVMOD_UW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-0x5dc4f897b8cd67f5)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x3f8b5c026cb088df)); + sljit_emit_op0(compiler, SLJIT_DIVMOD_SW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x5cf783d3cf0a74b0)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x3d5df42d03a28fc7)); + sljit_emit_op0(compiler, SLJIT_DIVMOD_U32); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 15 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x371df5197ba26a28)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x46c78a5cfd6a420c)); + sljit_emit_op0(compiler, SLJIT_DIVMOD_S32); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 17 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 18 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0xc456f048c28a611b)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x3d4af2c543)); + sljit_emit_op0(compiler, SLJIT_DIV_UW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 19 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 20 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-0x720fa4b74c329b14)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0xa64ae42b7d6)); + sljit_emit_op0(compiler, SLJIT_DIV_SW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 21 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 22 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x4af51c027b34)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0x9ba4ff2906b14)); + sljit_emit_op0(compiler, SLJIT_DIV_U32); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 23 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 24 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0xc40b58a3f20d)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(-0xa63c923)); + sljit_emit_op0(compiler, SLJIT_DIV_S32); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 25 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 26 * sizeof(sljit_sw), SLJIT_R1, 0); + +#else + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -0x58cd67f5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x3cb088df); + sljit_emit_op0(compiler, SLJIT_LMUL_UW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -0x58cd67f5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x3cb088df); + sljit_emit_op0(compiler, SLJIT_LMUL_SW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -0x58cd67f5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x3cb088df); + sljit_emit_op0(compiler, SLJIT_DIVMOD_UW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -0x58cd67f5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0x3cb088df); + sljit_emit_op0(compiler, SLJIT_DIVMOD_SW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0xcf0a74b0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 0x03a28fc7); + sljit_emit_op0(compiler, SLJIT_DIVMOD_U32); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 15 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0x7ba26a28); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)0xfd6a420c); + sljit_emit_op0(compiler, SLJIT_DIVMOD_S32); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 17 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 18 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x9d4b7036)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0xb86d0)); + sljit_emit_op0(compiler, SLJIT_DIV_UW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 19 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 20 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(-0x58b0692c)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0xd357)); + sljit_emit_op0(compiler, SLJIT_DIV_SW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 21 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 22 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x1c027b34)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(0xf2906b14)); + sljit_emit_op0(compiler, SLJIT_DIV_U32); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 23 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 24 * sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x58a3f20d)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, SLJIT_W(-0xa63c923)); + sljit_emit_op0(compiler, SLJIT_DIV_S32); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 25 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 26 * sizeof(sljit_sw), SLJIT_R1, 0); +#endif + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R4, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_S3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_S4, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + + FAILED(buf[0] != -0x1fb308a, "test42 case 1 failed\n"); + FAILED(buf[1] != 0xf50c873, "test42 case 2 failed\n"); + FAILED(buf[2] != 0x8a0475b, "test42 case 3 failed\n"); + FAILED(buf[3] != 0x9dc849b, "test42 case 4 failed\n"); + FAILED(buf[4] != -0x7c69a35, "test42 case 5 failed\n"); + FAILED(buf[5] != 0x5a4d0c4, "test42 case 6 failed\n"); + FAILED(buf[6] != 0x9a3b06d, "test42 case 7 failed\n"); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[7] != SLJIT_W(-4388959407985636971), "test42 case 8 failed\n"); + FAILED(buf[8] != SLJIT_W(2901680654366567099), "test42 case 9 failed\n"); + FAILED(buf[9] != SLJIT_W(-4388959407985636971), "test42 case 10 failed\n"); + FAILED(buf[10] != SLJIT_W(-1677173957268872740), "test42 case 11 failed\n"); + FAILED(buf[11] != SLJIT_W(2), "test42 case 12 failed\n"); + FAILED(buf[12] != SLJIT_W(2532236178951865933), "test42 case 13 failed\n"); + FAILED(buf[13] != SLJIT_W(-1), "test42 case 14 failed\n"); + FAILED(buf[14] != SLJIT_W(-2177944059851366166), "test42 case 15 failed\n"); +#else + FAILED(buf[7] != -1587000939, "test42 case 8 failed\n"); + FAILED(buf[8] != 665003983, "test42 case 9 failed\n"); + FAILED(buf[9] != -1587000939, "test42 case 10 failed\n"); + FAILED(buf[10] != -353198352, "test42 case 11 failed\n"); + FAILED(buf[11] != 2, "test42 case 12 failed\n"); + FAILED(buf[12] != 768706125, "test42 case 13 failed\n"); + FAILED(buf[13] != -1, "test42 case 14 failed\n"); + FAILED(buf[14] != -471654166, "test42 case 15 failed\n"); +#endif + + FAILED(buf[15] != SLJIT_W(56), "test42 case 16 failed\n"); + FAILED(buf[16] != SLJIT_W(58392872), "test42 case 17 failed\n"); + FAILED(buf[17] != SLJIT_W(-47), "test42 case 18 failed\n"); + FAILED(buf[18] != SLJIT_W(35949148), "test42 case 19 failed\n"); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(buf[19] != SLJIT_W(0x3340bfc), "test42 case 20 failed\n"); + FAILED(buf[20] != SLJIT_W(0x3d4af2c543), "test42 case 21 failed\n"); + FAILED(buf[21] != SLJIT_W(-0xaf978), "test42 case 22 failed\n"); + FAILED(buf[22] != SLJIT_W(0xa64ae42b7d6), "test42 case 23 failed\n"); +#else + FAILED(buf[19] != SLJIT_W(0xda5), "test42 case 20 failed\n"); + FAILED(buf[20] != SLJIT_W(0xb86d0), "test42 case 21 failed\n"); + FAILED(buf[21] != SLJIT_W(-0x6b6e), "test42 case 22 failed\n"); + FAILED(buf[22] != SLJIT_W(0xd357), "test42 case 23 failed\n"); +#endif + + FAILED(buf[23] != SLJIT_W(0x0), "test42 case 24 failed\n"); + FAILED(buf[24] != SLJIT_W(0xf2906b14), "test42 case 25 failed\n"); + FAILED(buf[25] != SLJIT_W(-0x8), "test42 case 26 failed\n"); + FAILED(buf[26] != SLJIT_W(-0xa63c923), "test42 case 27 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test43(void) +{ + /* Test floating point compare. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump; + + union { + sljit_f64 value; + struct { + sljit_u32 value1; + sljit_u32 value2; + } u; + } dbuf[4]; + + if (verbose) + printf("Run test43\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test43 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + + FAILED(!compiler, "cannot create compiler\n"); + + dbuf[0].value = 12.125; + /* a NaN */ + dbuf[1].u.value1 = 0x7fffffff; + dbuf[1].u.value2 = 0x7fffffff; + dbuf[2].value = -13.5; + dbuf[3].value = 12.125; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 1, 1, 3, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + /* dbuf[0] < dbuf[2] -> -2 */ + jump = sljit_emit_fcmp(compiler, SLJIT_GREATER_EQUAL_F64, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_F64_SHIFT); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, -2); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + /* dbuf[0] and dbuf[1] is not NaN -> 5 */ + jump = sljit_emit_fcmp(compiler, SLJIT_UNORDERED_F64, SLJIT_MEM0(), (sljit_sw)&dbuf[1], SLJIT_FR1, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, 5); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 11); + /* dbuf[0] == dbuf[3] -> 11 */ + jump = sljit_emit_fcmp(compiler, SLJIT_EQUAL_F64, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_FR2, 0); + + /* else -> -17 */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, -17); + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&dbuf) != 11, "test43 case 1 failed\n"); + dbuf[3].value = 12; + FAILED(code.func1((sljit_sw)&dbuf) != -17, "test43 case 2 failed\n"); + dbuf[1].value = 0; + FAILED(code.func1((sljit_sw)&dbuf) != 5, "test43 case 3 failed\n"); + dbuf[2].value = 20; + FAILED(code.func1((sljit_sw)&dbuf) != -2, "test43 case 4 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test44(void) +{ + /* Test mov. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + void *buf[5]; + + if (verbose) + printf("Run test44\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = buf + 2; + buf[1] = NULL; + buf[2] = NULL; + buf[3] = NULL; + buf[4] = NULL; + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 2, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_p), SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_p)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 2); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), SLJIT_POINTER_SHIFT, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, sizeof(sljit_p)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3 << SLJIT_POINTER_SHIFT); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * sizeof(sljit_p)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1 << SLJIT_POINTER_SHIFT); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 2, SLJIT_R0, 0); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != (sljit_sw)(buf + 2), "test44 case 1 failed\n"); + FAILED(buf[1] != buf + 2, "test44 case 2 failed\n"); + FAILED(buf[2] != buf + 3, "test44 case 3 failed\n"); + FAILED(buf[3] != buf + 4, "test44 case 4 failed\n"); + FAILED(buf[4] != buf + 2, "test44 case 5 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test45(void) +{ + /* Test single precision floating point. */ + + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_f32 buf[12]; + sljit_sw buf2[6]; + struct sljit_jump* jump; + + if (verbose) + printf("Run test45\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test45 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = 5.5; + buf[1] = -7.25; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = 0; + buf[7] = 8.75; + buf[8] = 0; + buf[9] = 16.5; + buf[10] = 0; + buf[11] = 0; + + buf2[0] = -1; + buf2[1] = -1; + buf2[2] = -1; + buf2[3] = -1; + buf2[4] = -1; + buf2[5] = -1; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 2, 6, 0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f32), SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_FR1, 0, SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f32), SLJIT_FR1, 0); + sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f32), SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f32), SLJIT_FR4, 0); + + sljit_emit_fop2(compiler, SLJIT_ADD_F32, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f32), SLJIT_FR0, 0); + sljit_emit_fop2(compiler, SLJIT_SUB_F32, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f32), SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop2(compiler, SLJIT_MUL_F32, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f32), SLJIT_FR0, 0, SLJIT_FR0, 0); + sljit_emit_fop2(compiler, SLJIT_DIV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f32), SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f32), SLJIT_FR2, 0); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 0x3d0ac); + sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R0), 0x3d0ac); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 0x3d0ac + sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_ABS_F32, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R0), -0x3d0ac); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_EQUAL_F, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_EQUAL_F32); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_LESS_F, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw), SLJIT_LESS_F32); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_sw), SLJIT_EQUAL_F32); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_GREATER_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + cond_set(compiler, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_GREATER_EQUAL_F32); + + jump = sljit_emit_fcmp(compiler, SLJIT_LESS_EQUAL_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_IMM, 7); + sljit_set_label(jump, sljit_emit_label(compiler)); + + jump = sljit_emit_fcmp(compiler, SLJIT_GREATER_F32, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_FR2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_IMM, 6); + sljit_set_label(jump, sljit_emit_label(compiler)); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, (sljit_sw)&buf2); + FAILED(buf[2] != -5.5, "test45 case 1 failed\n"); + FAILED(buf[3] != 7.25, "test45 case 2 failed\n"); + FAILED(buf[4] != 7.25, "test45 case 3 failed\n"); + FAILED(buf[5] != -5.5, "test45 case 4 failed\n"); + FAILED(buf[6] != -1.75, "test45 case 5 failed\n"); + FAILED(buf[7] != 16.0, "test45 case 6 failed\n"); + FAILED(buf[8] != 30.25, "test45 case 7 failed\n"); + FAILED(buf[9] != 3, "test45 case 8 failed\n"); + FAILED(buf[10] != -5.5, "test45 case 9 failed\n"); + FAILED(buf[11] != 7.25, "test45 case 10 failed\n"); + FAILED(buf2[0] != 1, "test45 case 11 failed\n"); + FAILED(buf2[1] != 2, "test45 case 12 failed\n"); + FAILED(buf2[2] != 2, "test45 case 13 failed\n"); + FAILED(buf2[3] != 1, "test45 case 14 failed\n"); + FAILED(buf2[4] != 7, "test45 case 15 failed\n"); + FAILED(buf2[5] != -1, "test45 case 16 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test46(void) +{ + /* Test sljit_emit_op_flags with 32 bit operations. */ + + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_s32 buf[24]; + sljit_sw buf2[6]; + sljit_s32 i; + + if (verbose) + printf("Run test46\n"); + + for (i = 0; i < 24; ++i) + buf[i] = -17; + buf[16] = 0; + for (i = 0; i < 6; ++i) + buf2[i] = -13; + buf2[4] = -124; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 3, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -7); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, 13); + sljit_emit_op_flags(compiler, SLJIT_MOV32, SLJIT_MEM0(), (sljit_sw)&buf, SLJIT_LESS); + sljit_emit_op_flags(compiler, SLJIT_AND32, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_s32), SLJIT_NOT_ZERO); + + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, -7); + sljit_emit_op_flags(compiler, SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_s32), SLJIT_EQUAL); + sljit_emit_op_flags(compiler, SLJIT_AND32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_s32), SLJIT_NOT_EQUAL); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x1235); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 0x1235); + sljit_emit_op_flags(compiler, SLJIT_AND32 | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_ZERO); + sljit_emit_op_flags(compiler, SLJIT_AND32, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_s32), SLJIT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_s32), SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, -7); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 12); + sljit_emit_op_flags(compiler, SLJIT_MOV32, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 2, SLJIT_EQUAL); + sljit_emit_op_flags(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_EQUAL); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_s32), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 16); + sljit_emit_op_flags(compiler, SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 2, SLJIT_EQUAL); + sljit_emit_op_flags(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 18 * sizeof(sljit_s32), SLJIT_NOT_EQUAL); + + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, -7); + sljit_emit_op_flags(compiler, SLJIT_XOR32 | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_S0), 20 * sizeof(sljit_s32), SLJIT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 39); + sljit_emit_op_flags(compiler, SLJIT_XOR32, SLJIT_R0, 0, SLJIT_NOT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 22 * sizeof(sljit_s32), SLJIT_R0, 0); + + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, -7); + sljit_emit_op_flags(compiler, SLJIT_AND, SLJIT_MEM0(), (sljit_sw)&buf2, SLJIT_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, 5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op_flags(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_SIG_LESS); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, 5); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_LESS); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_NOT_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, 5); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_S2, 0); + sljit_emit_op_flags(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_WORD_SHIFT, SLJIT_SIG_LESS); + sljit_emit_op_flags(compiler, SLJIT_OR, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_ZERO); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R2, 0, SLJIT_IMM, 0); + sljit_emit_op_flags(compiler, SLJIT_XOR, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_GREATER); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func2((sljit_sw)&buf, (sljit_sw)&buf2); + FAILED(buf[0] != 0, "test46 case 1 failed\n"); + FAILED(buf[1] != -17, "test46 case 2 failed\n"); + FAILED(buf[2] != 1, "test46 case 3 failed\n"); + FAILED(buf[3] != -17, "test46 case 4 failed\n"); + FAILED(buf[4] != 1, "test46 case 5 failed\n"); + FAILED(buf[5] != -17, "test46 case 6 failed\n"); + FAILED(buf[6] != 1, "test46 case 7 failed\n"); + FAILED(buf[7] != -17, "test46 case 8 failed\n"); + FAILED(buf[8] != 0, "test46 case 9 failed\n"); + FAILED(buf[9] != -17, "test46 case 10 failed\n"); + FAILED(buf[10] != 1, "test46 case 11 failed\n"); + FAILED(buf[11] != -17, "test46 case 12 failed\n"); + FAILED(buf[12] != 1, "test46 case 13 failed\n"); + FAILED(buf[13] != -17, "test46 case 14 failed\n"); + FAILED(buf[14] != 1, "test46 case 15 failed\n"); + FAILED(buf[15] != -17, "test46 case 16 failed\n"); + FAILED(buf[16] != 0, "test46 case 17 failed\n"); + FAILED(buf[17] != -17, "test46 case 18 failed\n"); + FAILED(buf[18] != 0, "test46 case 19 failed\n"); + FAILED(buf[19] != -17, "test46 case 20 failed\n"); + FAILED(buf[20] != -18, "test46 case 21 failed\n"); + FAILED(buf[21] != -17, "test46 case 22 failed\n"); + FAILED(buf[22] != 38, "test46 case 23 failed\n"); + FAILED(buf[23] != -17, "test46 case 24 failed\n"); + + FAILED(buf2[0] != 0, "test46 case 25 failed\n"); + FAILED(buf2[1] != 1, "test46 case 26 failed\n"); + FAILED(buf2[2] != 0, "test46 case 27 failed\n"); + FAILED(buf2[3] != 1, "test46 case 28 failed\n"); + FAILED(buf2[4] != -123, "test46 case 29 failed\n"); + FAILED(buf2[5] != -14, "test46 case 30 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test47(void) +{ + /* Test jump optimizations. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[3]; + + if (verbose) + printf("Run test47\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x3a5c6f); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 3); + sljit_set_target(sljit_emit_jump(compiler, SLJIT_LESS), 0x11223344); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0xd37c10); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_set_target(sljit_emit_jump(compiler, SLJIT_LESS), SLJIT_W(0x112233445566)); +#endif + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0x59b48e); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_set_target(sljit_emit_jump(compiler, SLJIT_LESS), SLJIT_W(0x1122334455667788)); +#endif + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[0] != 0x3a5c6f, "test47 case 1 failed\n"); + FAILED(buf[1] != 0xd37c10, "test47 case 2 failed\n"); + FAILED(buf[2] != 0x59b48e, "test47 case 3 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test48(void) +{ + /* Test floating point conversions. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + int i; + sljit_f64 dbuf[10]; + sljit_f32 sbuf[10]; + sljit_sw wbuf[10]; + sljit_s32 ibuf[10]; + + if (verbose) + printf("Run test48\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test48 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + + FAILED(!compiler, "cannot create compiler\n"); + for (i = 0; i < 10; i++) { + dbuf[i] = 0.0; + sbuf[i] = 0.0; + wbuf[i] = 0; + ibuf[i] = 0; + } + + dbuf[0] = 123.5; + dbuf[1] = -367; + dbuf[2] = 917.75; + + sbuf[0] = 476.25; + sbuf[1] = -1689.75; + + wbuf[0] = 2345; + + ibuf[0] = 312; + ibuf[1] = -9324; + + sljit_emit_enter(compiler, 0, 0, 3, 3, 6, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&sbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&wbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)&ibuf); + + /* sbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_F64, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 3); + /* sbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_F64, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_F32_SHIFT, SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S1), 0); + /* dbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR4, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_FR2, 0, SLJIT_FR3, 0); + /* dbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_FR2, 0); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR3, 0); + + /* wbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 2); + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_R0, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), SLJIT_F64_SHIFT); + /* wbuf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S1), 0); + /* wbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_sw), SLJIT_FR5, 0); + sljit_emit_fop1(compiler, SLJIT_NEG_F32, SLJIT_FR0, 0, SLJIT_FR5, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 4); + /* wbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM2(SLJIT_S2, SLJIT_R1), SLJIT_WORD_SHIFT, SLJIT_FR0, 0); + sljit_emit_fop1(compiler, SLJIT_NEG_F64, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); + /* ibuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_MEM1(SLJIT_R2), 2 * sizeof(sljit_s32), SLJIT_FR4, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F32, SLJIT_R0, 0, SLJIT_FR1, 0); + /* ibuf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 3 * sizeof(sljit_s32), SLJIT_R0, 0); + + /* dbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S2), 0); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_FR2, 0, SLJIT_IMM, -6213); + /* dbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_FR2, 0); + /* dbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_f64), SLJIT_MEM0(), (sljit_sw)&ibuf[0]); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32)); + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_FR1, 0, SLJIT_R0, 0); + /* dbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f64), SLJIT_FR1, 0); + /* dbuf[9] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM0(), (sljit_sw)(dbuf + 9), SLJIT_IMM, -77); + /* sbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_f32), SLJIT_IMM, -123); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 7190); + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_FR3, 0, SLJIT_R0, 0); + /* sbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_f32), SLJIT_FR3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 123); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R1, 0, SLJIT_R2, 0, SLJIT_IMM, 123 * sizeof(sljit_s32)); + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_FR1, 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 2); + /* sbuf[7] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_f32), SLJIT_FR1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 3812); + /* sbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_MEM2(SLJIT_S1, SLJIT_R0), SLJIT_F32_SHIFT, SLJIT_R1, 0); + /* sbuf[9] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM0(), (sljit_sw)(sbuf + 9), SLJIT_IMM, -79); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func0(); + FAILED(dbuf[3] != 476.25, "test48 case 1 failed\n"); + FAILED(dbuf[4] != 476.25, "test48 case 2 failed\n"); + FAILED(dbuf[5] != 2345.0, "test48 case 3 failed\n"); + FAILED(dbuf[6] != -6213.0, "test48 case 4 failed\n"); + FAILED(dbuf[7] != 312.0, "test48 case 5 failed\n"); + FAILED(dbuf[8] != -9324.0, "test48 case 6 failed\n"); + FAILED(dbuf[9] != -77.0, "test48 case 7 failed\n"); + + FAILED(sbuf[2] != 123.5, "test48 case 8 failed\n"); + FAILED(sbuf[3] != 123.5, "test48 case 9 failed\n"); + FAILED(sbuf[4] != 476.25, "test48 case 10 failed\n"); + FAILED(sbuf[5] != -123, "test48 case 11 failed\n"); + FAILED(sbuf[6] != 7190, "test48 case 12 failed\n"); + FAILED(sbuf[7] != 312, "test48 case 13 failed\n"); + FAILED(sbuf[8] != 3812, "test48 case 14 failed\n"); + FAILED(sbuf[9] != -79.0, "test48 case 15 failed\n"); + + FAILED(wbuf[1] != -367, "test48 case 16 failed\n"); + FAILED(wbuf[2] != 917, "test48 case 17 failed\n"); + FAILED(wbuf[3] != 476, "test48 case 18 failed\n"); + FAILED(wbuf[4] != -476, "test48 case 19 failed\n"); + + FAILED(ibuf[2] != -917, "test48 case 20 failed\n"); + FAILED(ibuf[3] != -1689, "test48 case 21 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test49(void) +{ + /* Test floating point conversions. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + int i; + sljit_f64 dbuf[10]; + sljit_f32 sbuf[9]; + sljit_sw wbuf[9]; + sljit_s32 ibuf[9]; + sljit_s32* dbuf_ptr = (sljit_s32*)dbuf; + sljit_s32* sbuf_ptr = (sljit_s32*)sbuf; + + if (verbose) + printf("Run test49\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test49 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 9; i++) { + dbuf_ptr[i << 1] = -1; + dbuf_ptr[(i << 1) + 1] = -1; + sbuf_ptr[i] = -1; + wbuf[i] = -1; + ibuf[i] = -1; + } + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + dbuf[9] = (sljit_f64)SLJIT_W(0x1122334455); +#endif + dbuf[0] = 673.75; + sbuf[0] = -879.75; + wbuf[0] = 345; + ibuf[0] = -249; + + sljit_emit_enter(compiler, 0, 0, 3, 3, 3, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_IMM, (sljit_sw)&dbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)&sbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)&wbuf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)&ibuf); + + /* dbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_F32, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S1), 0); + /* sbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_F64, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), 0); + /* wbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S0), 0); + /* wbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F32, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_sw), SLJIT_MEM1(SLJIT_S1), 0); + /* ibuf[2] */ + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_MEM1(SLJIT_R2), 2 * sizeof(sljit_s32), SLJIT_MEM1(SLJIT_S0), 0); + /* ibuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F32, SLJIT_MEM1(SLJIT_R2), 4 * sizeof(sljit_s32), SLJIT_MEM1(SLJIT_S1), 0); + /* dbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_S2), 0); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S2), 0); + /* dbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64), SLJIT_MEM1(SLJIT_R2), 0); + /* sbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_S32, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_R2), 0); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_fop1(compiler, SLJIT_CONV_SW_FROM_F64, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64)); + /* wbuf[8] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 8 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_CONV_S32_FROM_F64, SLJIT_R0, 0, SLJIT_FR2, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_AND32, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0xffff); + /* ibuf[8] */ + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), 8 * sizeof(sljit_s32), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_W(0x4455667788)); + /* dbuf[8] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_SW, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_f64), SLJIT_R0, 0); + /* dbuf[9] */ + sljit_emit_fop1(compiler, SLJIT_CONV_F64_FROM_S32, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_f64), SLJIT_IMM, SLJIT_W(0x7766554433)); +#endif + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func0(); + + FAILED(dbuf_ptr[(1 * 2) + 0] != -1, "test49 case 1 failed\n"); + FAILED(dbuf_ptr[(1 * 2) + 1] != -1, "test49 case 2 failed\n"); + FAILED(dbuf[2] != -879.75, "test49 case 3 failed\n"); + FAILED(dbuf_ptr[(3 * 2) + 0] != -1, "test49 case 4 failed\n"); + FAILED(dbuf_ptr[(3 * 2) + 1] != -1, "test49 case 5 failed\n"); + FAILED(dbuf[4] != 345, "test49 case 6 failed\n"); + FAILED(dbuf_ptr[(5 * 2) + 0] != -1, "test49 case 7 failed\n"); + FAILED(dbuf_ptr[(5 * 2) + 1] != -1, "test49 case 8 failed\n"); + FAILED(dbuf[6] != -249, "test49 case 9 failed\n"); + FAILED(dbuf_ptr[(7 * 2) + 0] != -1, "test49 case 10 failed\n"); + FAILED(dbuf_ptr[(7 * 2) + 1] != -1, "test49 case 11 failed\n"); + + FAILED(sbuf_ptr[1] != -1, "test49 case 12 failed\n"); + FAILED(sbuf[2] != 673.75, "test49 case 13 failed\n"); + FAILED(sbuf_ptr[3] != -1, "test49 case 14 failed\n"); + FAILED(sbuf[4] != 345, "test49 case 15 failed\n"); + FAILED(sbuf_ptr[5] != -1, "test49 case 16 failed\n"); + FAILED(sbuf[6] != -249, "test49 case 17 failed\n"); + FAILED(sbuf_ptr[7] != -1, "test49 case 18 failed\n"); + + FAILED(wbuf[1] != -1, "test49 case 19 failed\n"); + FAILED(wbuf[2] != 673, "test49 case 20 failed\n"); + FAILED(wbuf[3] != -1, "test49 case 21 failed\n"); + FAILED(wbuf[4] != -879, "test49 case 22 failed\n"); + FAILED(wbuf[5] != -1, "test49 case 23 failed\n"); + + FAILED(ibuf[1] != -1, "test49 case 24 failed\n"); + FAILED(ibuf[2] != 673, "test49 case 25 failed\n"); + FAILED(ibuf[3] != -1, "test49 case 26 failed\n"); + FAILED(ibuf[4] != -879, "test49 case 27 failed\n"); + FAILED(ibuf[5] != -1, "test49 case 28 failed\n"); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + FAILED(dbuf[8] != (sljit_f64)SLJIT_W(0x4455667788), "test49 case 29 failed\n"); + FAILED(dbuf[9] != (sljit_f64)SLJIT_W(0x66554433), "test49 case 30 failed\n"); + FAILED(wbuf[8] != SLJIT_W(0x1122334455), "test48 case 31 failed\n"); + FAILED(ibuf[8] == 0x4455, "test48 case 32 failed\n"); +#endif + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test50(void) +{ + /* Test stack and floating point operations. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + sljit_uw size1, size2, size3; + int result; +#endif + sljit_f32 sbuf[7]; + + if (verbose) + printf("Run test50\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test50 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + + FAILED(!compiler, "cannot create compiler\n"); + + sbuf[0] = 245.5; + sbuf[1] = -100.25; + sbuf[2] = 713.75; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 3, 6, 0, 8 * sizeof(sljit_f32)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 0); + /* sbuf[3] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32), SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f32)); + sljit_emit_fop2(compiler, SLJIT_ADD_F32, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_f32)); + /* sbuf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32), SLJIT_IMM, 5934); + sljit_emit_fop1(compiler, SLJIT_CONV_F32_FROM_SW, SLJIT_MEM1(SLJIT_SP), 3 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 2 * sizeof(sljit_f32)); + /* sbuf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f32), SLJIT_MEM1(SLJIT_SP), 3 * sizeof(sljit_f32)); + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + size1 = compiler->size; +#endif + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f32)); +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + size2 = compiler->size; +#endif + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_FR2, 0); +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + size3 = compiler->size; +#endif + /* sbuf[6] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f32), SLJIT_FR5, 0); +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + result = (compiler->size - size3) == (size3 - size2) && (size3 - size2) == (size2 - size1); +#endif + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&sbuf); + + FAILED(sbuf[3] != 245.5, "test50 case 1 failed\n"); + FAILED(sbuf[4] != 145.25, "test50 case 2 failed\n"); + FAILED(sbuf[5] != 5934, "test50 case 3 failed\n"); + FAILED(sbuf[6] != 713.75, "test50 case 4 failed\n"); +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + FAILED(!result, "test50 case 5 failed\n"); +#endif + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test51(void) +{ + /* Test all registers provided by the CPU. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump; + sljit_sw buf[2]; + sljit_s32 i; + + if (verbose) + printf("Run test51\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + buf[0] = 39; + + sljit_emit_enter(compiler, 0, 0, SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, 0); + + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 32); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)buf); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 0); + + for (i = 2; i < SLJIT_NUMBER_OF_REGISTERS; i++) { + if (sljit_get_register_index(SLJIT_R(i)) >= 0) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R(i)), 0); + } else + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, buf[0]); + } + + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 32); + for (i = 2; i < SLJIT_NUMBER_OF_REGISTERS; i++) { + if (sljit_get_register_index(SLJIT_R(i)) >= 0) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R(i)), 32); + } else + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, buf[0]); + } + + for (i = 2; i < SLJIT_NUMBER_OF_REGISTERS; i++) { + if (sljit_get_register_index(SLJIT_R(i)) >= 0) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 32); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_MEM2(SLJIT_R(i), SLJIT_R0), 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_MEM2(SLJIT_R0, SLJIT_R(i)), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 8); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_MEM2(SLJIT_R0, SLJIT_R(i)), 2); + } else + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 3 * buf[0]); + } + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), 32 + sizeof(sljit_sw), SLJIT_R1, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func0(); + + FAILED(buf[1] != (39 * 5 * (SLJIT_NUMBER_OF_REGISTERS - 2)), "test51 case 1 failed\n"); + + sljit_free_code(code.code, NULL); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, 0, SLJIT_NUMBER_OF_SCRATCH_REGISTERS, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 0); + + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 17); + + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(SW)); + /* SLJIT_R0 contains the first value. */ + for (i = 1; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R(i), 0); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, 0, SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, 0); + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 35); + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func0() != (SLJIT_NUMBER_OF_SCRATCH_REGISTERS * 35 + SLJIT_NUMBER_OF_SAVED_REGISTERS * 17), "test51 case 2 failed\n"); + + sljit_free_code(code.code, NULL); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, 0, SLJIT_NUMBER_OF_SCRATCH_REGISTERS, SLJIT_NUMBER_OF_SAVED_REGISTERS, 0, 0, 0); + + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 68); + + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(SW)); + /* SLJIT_R0 contains the first value. */ + for (i = 1; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R(i), 0); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, 0, 0, SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0); + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S(i), 0, SLJIT_IMM, 43); + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code.func0() != (SLJIT_NUMBER_OF_SCRATCH_REGISTERS * 43 + SLJIT_NUMBER_OF_SAVED_REGISTERS * 68), "test51 case 3 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test52(void) +{ + /* Test all registers provided by the CPU. */ + executable_code code; + struct sljit_compiler* compiler; + struct sljit_jump* jump; + sljit_f64 buf[3]; + sljit_s32 i; + + if (verbose) + printf("Run test52\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test52 skipped\n"); + successful_tests++; + return; + } + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 6.25; + buf[1] = 17.75; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 0, 1, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS, 0); + + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_S0), 0); + + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(VOID)); + /* SLJIT_FR0 contains the first value. */ + for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_FR(i), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, 0, 1, 0, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf[1]); + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_R0), 0); + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[2] != (SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS * 17.75 + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS * 6.25), "test52 case 1 failed\n"); + + sljit_free_code(code.code, NULL); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = -32.5; + buf[1] = -11.25; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 0, 1, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS, 0); + + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR(i), 0, SLJIT_MEM1(SLJIT_S0), 0); + + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(VOID)); + /* SLJIT_FR0 contains the first value. */ + for (i = 1; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FR0, 0, SLJIT_FR0, 0, SLJIT_FR(i), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_enter(compiler, 0, 0, 1, 0, 0, SLJIT_NUMBER_OF_FLOAT_REGISTERS, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)&buf[1]); + for (i = 0; i < SLJIT_NUMBER_OF_FLOAT_REGISTERS; i++) + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FS(i), 0, SLJIT_MEM1(SLJIT_R0), 0); + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + FAILED(buf[2] != (SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS * -11.25 + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS * -32.5), "test52 case 2 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test53(void) +{ + /* Check SLJIT_DOUBLE_ALIGNMENT. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[1]; + + if (verbose) + printf("Run test53\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = -1; + + sljit_emit_enter(compiler, SLJIT_F64_ALIGNMENT, SLJIT_ARG1(SW), 1, 1, 0, 0, 2 * sizeof(sljit_sw)); + + sljit_get_local_base(compiler, SLJIT_R0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + + FAILED((buf[0] & (sizeof(sljit_f64) - 1)) != 0, "test53 case 1 failed\n"); + + sljit_free_code(code.code, NULL); + + /* Next test. */ + + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = -1; + + /* One more saved register to break the alignment on x86-32. */ + sljit_emit_enter(compiler, SLJIT_F64_ALIGNMENT, SLJIT_ARG1(SW), 1, 2, 0, 0, 2 * sizeof(sljit_sw)); + + sljit_get_local_base(compiler, SLJIT_R0, 0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + + FAILED((buf[0] & (sizeof(sljit_f64) - 1)) != 0, "test53 case 2 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test54(void) +{ + /* Check cmov. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_sw large_num = SLJIT_W(0x1234567812345678); +#else + sljit_sw large_num = SLJIT_W(0x12345678); +#endif + int i; + sljit_sw buf[19]; + sljit_s32 ibuf[4]; + + union { + sljit_f32 value; + sljit_s32 s32_value; + } sbuf[3]; + + sbuf[0].s32_value = 0x7fffffff; + sbuf[1].value = 7.5; + sbuf[2].value = -14.75; + + if (verbose) + printf("Run test54\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + for (i = 0; i < 19; i++) + buf[i] = 0; + for (i = 0; i < 4; i++) + ibuf[i] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 5, 3, 3, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 17); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 34); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, -10); + sljit_emit_cmov(compiler, SLJIT_SIG_LESS, SLJIT_R0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, -10); + sljit_emit_cmov(compiler, SLJIT_SIG_GREATER, SLJIT_R0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 24); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 24); + sljit_emit_cmov(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, SLJIT_IMM, 66); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_cmov(compiler, SLJIT_EQUAL, SLJIT_R0, SLJIT_IMM, 78); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_cmov(compiler, SLJIT_EQUAL, SLJIT_R0, SLJIT_IMM, large_num); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + SLJIT_ASSERT(sljit_get_register_index(SLJIT_R3) == -1 && sljit_get_register_index(SLJIT_R4) == -1); +#endif + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 7); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -45); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_cmov(compiler, SLJIT_OVERFLOW, SLJIT_R3, SLJIT_IMM, 35); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, large_num); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, large_num); + sljit_emit_cmov(compiler, SLJIT_OVERFLOW, SLJIT_R3, SLJIT_IMM, 35); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 71); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, 13); + sljit_emit_op2(compiler, SLJIT_LSHR | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_cmov(compiler, SLJIT_EQUAL, SLJIT_R3, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 12); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -29); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_cmov(compiler, SLJIT_NOT_OVERFLOW, SLJIT_R0, SLJIT_R3, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -12); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, 21); + sljit_emit_op2(compiler, SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_cmov(compiler, SLJIT_NOT_EQUAL, SLJIT_R3, SLJIT_R4, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_R3, 0); + sljit_emit_cmov(compiler, SLJIT_EQUAL, SLJIT_R3, SLJIT_R4, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_R3, 0); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_f32)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 16); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_cmov(compiler, SLJIT_EQUAL_F32, SLJIT_R0, SLJIT_IMM, -45); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_GREATER_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_cmov(compiler, SLJIT_GREATER_F32, SLJIT_R0, SLJIT_IMM, -45); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_GREATER_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL_F32, SLJIT_R0, SLJIT_IMM, 33); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_LESS_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_cmov(compiler, SLJIT_LESS_F32, SLJIT_R0, SLJIT_IMM, -70); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_LESS_EQUAL_F, SLJIT_FR2, 0, SLJIT_FR1, 0); + sljit_emit_cmov(compiler, SLJIT_LESS_EQUAL_F32, SLJIT_R0, SLJIT_IMM, -60); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 15 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_NOT_EQUAL_F, SLJIT_FR1, 0, SLJIT_FR2, 0); + sljit_emit_cmov(compiler, SLJIT_NOT_EQUAL_F32, SLJIT_R0, SLJIT_IMM, 31); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 53); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_ORDERED_F, SLJIT_FR1, 0, SLJIT_FR0, 0); + sljit_emit_cmov(compiler, SLJIT_ORDERED_F32, SLJIT_R0, SLJIT_IMM, 17); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 17 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_fop1(compiler, SLJIT_CMP_F32 | SLJIT_SET_UNORDERED_F, SLJIT_FR1, 0, SLJIT_FR0, 0); + sljit_emit_cmov(compiler, SLJIT_UNORDERED_F32, SLJIT_R0, SLJIT_IMM, 59); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 18 * sizeof(sljit_sw), SLJIT_R0, 0); + } + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 177); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 178); + sljit_emit_cmov(compiler, SLJIT_LESS, SLJIT_R0 | SLJIT_I32_OP, SLJIT_IMM, 200); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 95); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R3, 0, SLJIT_IMM, 177); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, SLJIT_R0, 0, SLJIT_IMM, 95); + sljit_emit_cmov(compiler, SLJIT_LESS_EQUAL, SLJIT_R3 | SLJIT_I32_OP, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_s32), SLJIT_R3, 0); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R3, 0, SLJIT_IMM, 56); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R4, 0, SLJIT_IMM, -63); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, SLJIT_R3, 0, SLJIT_R4, 0); + sljit_emit_cmov(compiler, SLJIT_SIG_LESS, SLJIT_R3 | SLJIT_I32_OP, SLJIT_R4, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_s32), SLJIT_R3, 0); + sljit_emit_op2(compiler, SLJIT_SUB32 | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_R3, 0, SLJIT_R4, 0); + sljit_emit_cmov(compiler, SLJIT_SIG_GREATER, SLJIT_R3 | SLJIT_I32_OP, SLJIT_R4, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_s32), SLJIT_R3, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&buf, (sljit_sw)&ibuf, (sljit_sw)&sbuf); + + FAILED(buf[0] != 17, "test54 case 1 failed\n"); + FAILED(buf[1] != 34, "test54 case 2 failed\n"); + FAILED(buf[2] != 24, "test54 case 3 failed\n"); + FAILED(buf[3] != 78, "test54 case 4 failed\n"); + FAILED(buf[4] != large_num, "test54 case 5 failed\n"); + FAILED(buf[5] != -45, "test54 case 6 failed\n"); + FAILED(buf[6] != 35, "test54 case 7 failed\n"); + FAILED(buf[7] != 71, "test54 case 8 failed\n"); + FAILED(buf[8] != -29, "test54 case 9 failed\n"); + FAILED(buf[9] != -12, "test54 case 10 failed\n"); + FAILED(buf[10] != 21, "test54 case 11 failed\n"); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + FAILED(buf[11] != 16, "test54 case 12 failed\n"); + FAILED(buf[12] != -45, "test54 case 13 failed\n"); + FAILED(buf[13] != 33, "test54 case 14 failed\n"); + FAILED(buf[14] != 8, "test54 case 15 failed\n"); + FAILED(buf[15] != -60, "test54 case 16 failed\n"); + FAILED(buf[16] != 31, "test54 case 17 failed\n"); + FAILED(buf[17] != 53, "test54 case 18 failed\n"); + FAILED(buf[18] != 59, "test54 case 19 failed\n"); + } + + FAILED(ibuf[0] != 200, "test54 case 12 failed\n"); + FAILED(ibuf[1] != 95, "test54 case 13 failed\n"); + FAILED(ibuf[2] != 56, "test54 case 14 failed\n"); + FAILED(ibuf[3] != -63, "test54 case 15 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test55(void) +{ + /* Check value preservation. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[2]; + sljit_s32 i; + + if (verbose) + printf("Run test55\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + + sljit_emit_enter(compiler, 0, 0, SLJIT_NUMBER_OF_REGISTERS, 0, 0, 0, sizeof (sljit_sw)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, 217); + + /* Check 1 */ + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 118); + + sljit_emit_op0(compiler, SLJIT_DIVMOD_SW); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + + for (i = 2; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R(i), 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 0), SLJIT_R0, 0); + + /* Check 2 */ + for (i = 0; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R(i), 0, SLJIT_IMM, 146); + + sljit_emit_op0(compiler, SLJIT_DIV_SW); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + + for (i = 1; i < SLJIT_NUMBER_OF_REGISTERS; i++) + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_R(i), 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM0(), (sljit_sw)(buf + 1), SLJIT_R0, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func0(); + + FAILED(buf[0] != (SLJIT_NUMBER_OF_REGISTERS - 2) * 118 + 217, "test55 case 1 failed\n"); + FAILED(buf[1] != (SLJIT_NUMBER_OF_REGISTERS - 1) * 146 + 217, "test55 case 2 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test56(void) +{ + /* Check integer substraction with negative immediate. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[13]; + sljit_s32 i; + + if (verbose) + printf("Run test56\n"); + + for (i = 0; i < 13; i++) + buf[i] = 77; + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 90 << 12); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, -(91 << 12)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_SIG_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, -(91 << 12)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R1, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_LESS); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER_EQUAL, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, -(91 << 12)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_SIG_GREATER_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, -(91 << 12)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_LESS_EQUAL); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, -(91 << 12)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_GREATER); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_LESS, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_IMM, -(91 << 12)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_SIG_LESS); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 90); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_SIG_GREATER, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, -91); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 9 * sizeof(sljit_sw), SLJIT_SIG_GREATER); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 90); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, -91); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_LESS_EQUAL); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, -0x7fffffff); + sljit_emit_op2(compiler, SLJIT_ADD32 | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, -(91 << 12)); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_OVERFLOW); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, -0x7fffffff-1); + sljit_emit_op1(compiler, SLJIT_NEG32 | SLJIT_SET_OVERFLOW, SLJIT_R0, 0, SLJIT_R0, 0); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_OVERFLOW); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + + FAILED(buf[0] != (181 << 12), "test56 case 1 failed\n"); + FAILED(buf[1] != 1, "test56 case 2 failed\n"); + FAILED(buf[2] != (181 << 12), "test56 case 3 failed\n"); + FAILED(buf[3] != 1, "test56 case 4 failed\n"); + FAILED(buf[4] != 1, "test56 case 5 failed\n"); + FAILED(buf[5] != 1, "test56 case 6 failed\n"); + FAILED(buf[6] != 0, "test56 case 7 failed\n"); + FAILED(buf[7] != 0, "test56 case 8 failed\n"); + FAILED(buf[8] != 181, "test56 case 9 failed\n"); + FAILED(buf[9] != 1, "test56 case 10 failed\n"); + FAILED(buf[10] != 1, "test56 case 11 failed\n"); + FAILED(buf[11] != 1, "test56 case 12 failed\n"); + FAILED(buf[12] != 1, "test56 case 13 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test57(void) +{ + /* Check prefetch instructions. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_label* labels[5]; + sljit_p addr[5]; + int i; + + if (verbose) + printf("Run test57\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, 0, 3, 1, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + labels[0] = sljit_emit_label(compiler); + /* Should never crash. */ + sljit_emit_op_src(compiler, SLJIT_PREFETCH_L1, SLJIT_MEM2(SLJIT_R0, SLJIT_R0), 2); + labels[1] = sljit_emit_label(compiler); + sljit_emit_op_src(compiler, SLJIT_PREFETCH_L2, SLJIT_MEM0(), 0); + labels[2] = sljit_emit_label(compiler); +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_emit_op_src(compiler, SLJIT_PREFETCH_L3, SLJIT_MEM1(SLJIT_R0), SLJIT_W(0x1122334455667788)); +#else + sljit_emit_op_src(compiler, SLJIT_PREFETCH_L3, SLJIT_MEM1(SLJIT_R0), 0x11223344); +#endif + labels[3] = sljit_emit_label(compiler); + sljit_emit_op_src(compiler, SLJIT_PREFETCH_ONCE, SLJIT_MEM1(SLJIT_R0), sizeof(sljit_sw)); + labels[4] = sljit_emit_label(compiler); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + for (i = 0; i < 5; i++) + addr[i] = sljit_get_label_addr(labels[i]); + + sljit_free_compiler(compiler); + + code.func0(); + + if (sljit_has_cpu_feature(SLJIT_HAS_PREFETCH)) { + FAILED(addr[0] == addr[1], "test57 case 1 failed\n"); + FAILED(addr[1] == addr[2], "test57 case 2 failed\n"); + FAILED(addr[2] == addr[3], "test57 case 3 failed\n"); + FAILED(addr[3] == addr[4], "test57 case 4 failed\n"); + } + else { + FAILED(addr[0] != addr[1], "test57 case 1 failed\n"); + FAILED(addr[1] != addr[2], "test57 case 2 failed\n"); + FAILED(addr[2] != addr[3], "test57 case 3 failed\n"); + FAILED(addr[3] != addr[4], "test57 case 4 failed\n"); + } + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static sljit_f64 SLJIT_FUNC test58_f1(sljit_f32 a, sljit_f32 b, sljit_f64 c) +{ + return a + b + c; +} + +static sljit_f32 SLJIT_FUNC test58_f2(sljit_sw a, sljit_f64 b, sljit_f32 c) +{ + return a + b + c; +} + +static sljit_f64 SLJIT_FUNC test58_f3(sljit_sw a, sljit_f32 b, sljit_sw c) +{ + return a + b + c; +} + +static sljit_f64 test58_f4(sljit_f32 a, sljit_sw b) +{ + return a + b; +} + +static sljit_f32 test58_f5(sljit_f32 a, sljit_f64 b, sljit_s32 c) +{ + return a + b + c; +} + +static sljit_sw SLJIT_FUNC test58_f6(sljit_f64 a, sljit_sw b) +{ + return (sljit_sw)a + b; +} + +static void test58(void) +{ + /* Check function calls with floating point arguments. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump = NULL; + sljit_f64 dbuf[7]; + sljit_f32 sbuf[7]; + sljit_sw wbuf[2]; + + if (verbose) + printf("Run test58\n"); + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test58 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + + dbuf[0] = 5.25; + dbuf[1] = 0.0; + dbuf[2] = 2.5; + dbuf[3] = 0.0; + dbuf[4] = 0.0; + dbuf[5] = 0.0; + dbuf[6] = -18.0; + + sbuf[0] = 6.75; + sbuf[1] = -3.5; + sbuf[2] = 1.5; + sbuf[3] = 0.0; + sbuf[4] = 0.0; + + wbuf[0] = 0; + wbuf[1] = 0; + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 3, 3, 4, 0, sizeof(sljit_sw)); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(F64) | SLJIT_ARG1(F32) | SLJIT_ARG2(F32) | SLJIT_ARG3(F64), SLJIT_IMM, SLJIT_FUNC_OFFSET(test58_f1)); + /* dbuf[1] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(F64) | SLJIT_ARG1(F32) | SLJIT_ARG2(F32) | SLJIT_ARG3(F64)); + sljit_set_target(jump, SLJIT_FUNC_OFFSET(test58_f1)); + /* dbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(test58_f2)); + sljit_get_local_base(compiler, SLJIT_R1, 0, -16); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 16); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(F32) | SLJIT_ARG1(SW) | SLJIT_ARG2(F64) | SLJIT_ARG3(F32), SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); + /* sbuf[3] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f32), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -4); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 9); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(F64) | SLJIT_ARG1(SW) | SLJIT_ARG2(F32) | SLJIT_ARG3(SW)); + sljit_set_target(jump, SLJIT_FUNC_OFFSET(test58_f3)); + /* dbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -6); + jump = sljit_emit_call(compiler, SLJIT_CALL_CDECL, SLJIT_RET(F64) | SLJIT_ARG1(F32) | SLJIT_ARG2(SW)); + sljit_set_target(jump, SLJIT_FUNC_OFFSET(test58_f4)); + /* dbuf[5] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_f64), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(test58_f5)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 8); + sljit_emit_icall(compiler, SLJIT_CALL_CDECL, SLJIT_RET(F32) | SLJIT_ARG1(F32) | SLJIT_ARG2(F64) | SLJIT_ARG3(S32), SLJIT_MEM1(SLJIT_SP), 0); + /* sbuf[4] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_f32), SLJIT_FR0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(test58_f6)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(F64) | SLJIT_ARG2(SW), SLJIT_R0, 0); + /* wbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_R0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 319); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(test58_f6)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(F64) | SLJIT_ARG2(SW), SLJIT_R1, 0); + /* wbuf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&dbuf, (sljit_sw)&sbuf, (sljit_sw)&wbuf); + + FAILED(dbuf[1] != 8.5, "test58 case 1 failed\n"); + FAILED(dbuf[3] != 0.5, "test58 case 2 failed\n"); + FAILED(sbuf[3] != 17.75, "test58 case 3 failed\n"); + FAILED(dbuf[4] != 11.75, "test58 case 4 failed\n"); + FAILED(dbuf[5] != -9.5, "test58 case 5 failed\n"); + FAILED(sbuf[4] != 12, "test58 case 6 failed\n"); + FAILED(wbuf[0] != SLJIT_FUNC_OFFSET(test58_f6) - 18, "test58 case 7 failed\n"); + FAILED(wbuf[1] != 301, "test58 case 8 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static sljit_sw SLJIT_FUNC test59_f1(sljit_sw a, sljit_s32 b, sljit_sw c, sljit_sw d) +{ + return (sljit_sw)(a + b + c + d - SLJIT_FUNC_OFFSET(test59_f1)); +} + +static sljit_sw test59_f2(sljit_sw a, sljit_s32 b, sljit_sw c, sljit_sw d) +{ + return (sljit_sw)(a + b + c + d - SLJIT_FUNC_OFFSET(test59_f2)); +} + +static sljit_s32 SLJIT_FUNC test59_f3(sljit_f64 a, sljit_f32 b, sljit_f64 c, sljit_sw d) +{ + return (sljit_s32)(a + b + c + d); +} + +static sljit_f32 SLJIT_FUNC test59_f4(sljit_f32 a, sljit_s32 b, sljit_f64 c, sljit_sw d) +{ + return (sljit_f32)(a + b + c + d); +} + +static sljit_f32 SLJIT_FUNC test59_f5(sljit_f32 a, sljit_f64 b, sljit_f32 c, sljit_f64 d) +{ + return (sljit_f32)(a + b + c + d); +} + +static void test59(void) +{ + /* Check function calls with four arguments. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump* jump = NULL; + sljit_sw wbuf[6]; + sljit_f64 dbuf[3]; + sljit_f32 sbuf[4]; + + if (verbose) + printf("Run test59\n"); + + wbuf[0] = 0; + wbuf[1] = 0; + wbuf[2] = 0; + wbuf[3] = SLJIT_FUNC_OFFSET(test59_f1); + wbuf[4] = 0; + wbuf[5] = 0; + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + dbuf[0] = 5.125; + dbuf[1] = 6.125; + dbuf[2] = 4.25; + + sbuf[0] = 0.75; + sbuf[1] = -1.5; + sbuf[2] = 0.0; + sbuf[3] = 0.0; + } + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 4, 3, 4, 0, sizeof(sljit_sw)); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 33); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -20); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(test59_f1)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -40); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(S32) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_R2, 0); + /* wbuf[0] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 16); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -30); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 50); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(test59_f2)); + sljit_emit_icall(compiler, SLJIT_CALL_CDECL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(S32) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_R3, 0); + /* wbuf[1] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(test59_f1)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -25); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 100); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -10); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(S32) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_R0, 0); + /* wbuf[2] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, 231); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 3); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(test59_f1) - 100); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(S32) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_MEM2(SLJIT_R0, SLJIT_R2), SLJIT_WORD_SHIFT); + /* wbuf[4] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R0, 0); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -100); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(F64) | SLJIT_ARG2(F32) | SLJIT_ARG3(F64) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(test59_f3)); + sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_R0, 0); + /* wbuf[5] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R0, 0); + + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R0, 0, SLJIT_IMM, 36); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 41); + jump = sljit_emit_call(compiler, SLJIT_CALL, SLJIT_RET(F32) | SLJIT_ARG1(F32) | SLJIT_ARG2(S32) | SLJIT_ARG3(F64) | SLJIT_ARG4(SW)); + sljit_set_target(jump, SLJIT_FUNC_OFFSET(test59_f4)); + /* sbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_f32), SLJIT_FR0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, SLJIT_FUNC_OFFSET(test59_f5)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S2), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_f64)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(F32) | SLJIT_ARG1(F32) | SLJIT_ARG2(F64) | SLJIT_ARG3(F32) | SLJIT_ARG4(F64), SLJIT_R0, 0); + /* sbuf[2] */ + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 3 * sizeof(sljit_f32), SLJIT_FR0, 0); + } + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&wbuf, (sljit_sw)&dbuf, (sljit_sw)&sbuf); + + FAILED(wbuf[0] != -27, "test59 case 1 failed\n"); + FAILED(wbuf[1] != 36, "test59 case 2 failed\n"); + FAILED(wbuf[2] != 65, "test59 case 3 failed\n"); + FAILED(wbuf[4] != (sljit_sw)wbuf + 134, "test59 case 4 failed\n"); + + if (sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + FAILED(wbuf[5] != -88, "test59 case 5 failed\n"); + FAILED(sbuf[2] != 79.75, "test59 case 6 failed\n"); + FAILED(sbuf[3] != 8.625, "test59 case 7 failed\n"); + } + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test60(void) +{ + /* Test memory accesses with pre/post updates. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_s32 i; + sljit_s32 supported[10]; + sljit_sw wbuf[18]; + sljit_s8 bbuf[4]; + sljit_s32 ibuf[4]; + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + static sljit_u8 expected[10] = { 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }; +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + static sljit_u8 expected[10] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + static sljit_u8 expected[10] = { 1, 0, 1, 1, 0, 1, 1, 1, 0, 0 }; +#elif (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + static sljit_u8 expected[10] = { 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }; +#else + static sljit_u8 expected[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +#endif + + if (verbose) + printf("Run test60\n"); + + for (i = 0; i < 18; i++) + wbuf[i] = 0; + wbuf[2] = -887766; + + bbuf[0] = 0; + bbuf[1] = 0; + bbuf[2] = -13; + + ibuf[0] = -5678; + ibuf[1] = 0; + ibuf[2] = 0; + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 4, 3, 4, 0, sizeof(sljit_sw)); + + supported[0] = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R1, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_sw)); + if (supported[0] == SLJIT_SUCCESS) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R1, SLJIT_MEM1(SLJIT_R0), 2 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + } + + supported[1] = sljit_emit_mem(compiler, SLJIT_MOV_S8 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_R0, SLJIT_MEM1(SLJIT_R2), -2 * (sljit_sw)sizeof(sljit_s8)); + if (supported[1] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S1, 0, SLJIT_IMM, 2 * sizeof(sljit_s8)); + sljit_emit_mem(compiler, SLJIT_MOV_S8 | SLJIT_MEM_POST, SLJIT_R0, SLJIT_MEM1(SLJIT_R2), -2 * (sljit_sw)sizeof(sljit_s8)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R2, 0); + } + + supported[2] = sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R1), -2 * (sljit_sw)sizeof(sljit_s32)); + if (supported[2] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S2, 0, SLJIT_IMM, 2 * sizeof(sljit_s32)); + sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R1), -2 * (sljit_sw)sizeof(sljit_s32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_R1, 0); + } + + supported[3] = sljit_emit_mem(compiler, SLJIT_MOV32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R1, SLJIT_MEM1(SLJIT_R2), 2 * sizeof(sljit_s32)); + if (supported[3] == SLJIT_SUCCESS) { + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -8765); + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R2, 0, SLJIT_S2, 0, SLJIT_IMM, sizeof(sljit_s32)); + sljit_emit_mem(compiler, SLJIT_MOV32 | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R1, SLJIT_MEM1(SLJIT_R2), 2 * sizeof(sljit_s32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_R2, 0); + } + + supported[4] = sljit_emit_mem(compiler, SLJIT_MOV_S8 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_R1, SLJIT_MEM1(SLJIT_R2), -128 * (sljit_sw)sizeof(sljit_s8)); + if (supported[4] == SLJIT_SUCCESS) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -121); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_S1, 0); + sljit_emit_mem(compiler, SLJIT_MOV_S8 | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_R1, SLJIT_MEM1(SLJIT_R2), -128 * (sljit_sw)sizeof(sljit_s8)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 8 * sizeof(sljit_sw), SLJIT_R2, 0); + } + + supported[5] = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R1, SLJIT_MEM1(SLJIT_R0), 1); + if (supported[5] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 9 * sizeof(sljit_sw) - 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, -881199); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R1, SLJIT_MEM1(SLJIT_R0), 1); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 10 * sizeof(sljit_sw), SLJIT_R0, 0); + } + + supported[6] = sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0); + if (supported[6] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S2, 0, SLJIT_IMM, 213); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -213); + sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_R1, 0); + } + + supported[7] = sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0); + if (supported[7] == SLJIT_SUCCESS) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2 * sizeof(sljit_s32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -7890); + sljit_emit_mem(compiler, SLJIT_MOV_S32 | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_R1, 0); + } + + supported[8] = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 2); + if (supported[8] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_POST, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 2); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 15 * sizeof(sljit_sw), SLJIT_R1, 0); + } + + supported[9] = sljit_emit_mem(compiler, SLJIT_MOV_S8 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0); + if (supported[9] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, 2 * sizeof(sljit_s8)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, -2 * (sljit_sw)sizeof(sljit_s8)); + sljit_emit_mem(compiler, SLJIT_MOV_S8 | SLJIT_MEM_POST, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 16 * sizeof(sljit_sw), SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 17 * sizeof(sljit_sw), SLJIT_R1, 0); + } + + SLJIT_ASSERT(sljit_emit_mem(compiler, SLJIT_MOV_S8 | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 1) == SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(sljit_emit_mem(compiler, SLJIT_MOV_S8 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_R0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 1) == SLJIT_ERR_UNSUPPORTED); + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + /* TODO: at least for ARM (both V5 and V7) the range below needs further fixing */ + SLJIT_ASSERT(sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R1, SLJIT_MEM1(SLJIT_R0), 256) == SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_R1, SLJIT_MEM1(SLJIT_R0), -257) == SLJIT_ERR_UNSUPPORTED); +#endif + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&wbuf, (sljit_sw)&bbuf, (sljit_sw)&ibuf); + + FAILED(sizeof(expected) != sizeof(supported) / sizeof(sljit_s32), "test60 case 1 failed\n"); + + for (i = 0; i < sizeof(expected); i++) { + if (expected[i]) { + if (supported[i] != SLJIT_SUCCESS) { + printf("tast60 case %d should be supported\n", i + 1); + return; + } + } else { + if (supported[i] == SLJIT_SUCCESS) { + printf("test60 case %d should not be supported\n", i + 1); + return; + } + } + } + + FAILED(supported[0] == SLJIT_SUCCESS && wbuf[0] != -887766, "test60 case 2 failed\n"); + FAILED(supported[0] == SLJIT_SUCCESS && wbuf[1] != (sljit_sw)(wbuf + 2), "test60 case 3 failed\n"); + FAILED(supported[1] == SLJIT_SUCCESS && wbuf[3] != -13, "test60 case 4 failed\n"); + FAILED(supported[1] == SLJIT_SUCCESS && wbuf[4] != (sljit_sw)(bbuf), "test60 case 5 failed\n"); + FAILED(supported[2] == SLJIT_SUCCESS && wbuf[5] != -5678, "test60 case 6 failed\n"); + FAILED(supported[2] == SLJIT_SUCCESS && wbuf[6] != (sljit_sw)(ibuf), "test60 case 7 failed\n"); + FAILED(supported[3] == SLJIT_SUCCESS && ibuf[1] != -8765, "test60 case 8 failed\n"); + FAILED(supported[3] == SLJIT_SUCCESS && wbuf[7] != (sljit_sw)(ibuf + 1), "test60 case 9 failed\n"); + FAILED(supported[4] == SLJIT_SUCCESS && bbuf[0] != -121, "test60 case 10 failed\n"); + FAILED(supported[4] == SLJIT_SUCCESS && wbuf[8] != (sljit_sw)(bbuf) - 128 * sizeof(sljit_s8), "test60 case 11 failed\n"); + FAILED(supported[5] == SLJIT_SUCCESS && wbuf[9] != -881199, "test60 case 12 failed\n"); + FAILED(supported[5] == SLJIT_SUCCESS && wbuf[10] != (sljit_sw)(wbuf + 9), "test60 case 13 failed\n"); + FAILED(supported[6] == SLJIT_SUCCESS && wbuf[11] != -5678, "test60 case 14 failed\n"); + FAILED(supported[6] == SLJIT_SUCCESS && wbuf[12] != (sljit_sw)(ibuf), "test60 case 15 failed\n"); + FAILED(supported[7] == SLJIT_SUCCESS && ibuf[2] != -7890, "test60 case 16 failed\n"); + FAILED(supported[7] == SLJIT_SUCCESS && wbuf[13] != (sljit_sw)(ibuf + 2), "test60 case 17 failed\n"); + FAILED(supported[8] == SLJIT_SUCCESS && wbuf[14] != -887766, "test60 case 18 failed\n"); + FAILED(supported[8] == SLJIT_SUCCESS && wbuf[15] != (sljit_sw)(wbuf + 10), "test60 case 19 failed\n"); + FAILED(supported[9] == SLJIT_SUCCESS && wbuf[16] != -13, "test60 case 20 failed\n"); + FAILED(supported[9] == SLJIT_SUCCESS && wbuf[17] != (sljit_sw)(bbuf), "test60 case 21 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test61(void) +{ + /* Test float memory accesses with pre/post updates. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_s32 i; + sljit_s32 supported[6]; + sljit_sw wbuf[6]; + sljit_f64 dbuf[4]; + sljit_f32 sbuf[4]; +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + static sljit_u8 expected[6] = { 1, 1, 1, 1, 0, 0 }; +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + static sljit_u8 expected[6] = { 1, 0, 1, 0, 1, 1 }; +#else + static sljit_u8 expected[6] = { 0, 0, 0, 0, 0, 0 }; +#endif + + if (!sljit_has_cpu_feature(SLJIT_HAS_FPU)) { + if (verbose) + printf("no fpu available, test61 skipped\n"); + successful_tests++; + if (compiler) + sljit_free_compiler(compiler); + return; + } + + if (verbose) + printf("Run test61\n"); + + for (i = 0; i < 6; i++) + wbuf[i] = 0; + + dbuf[0] = 66.725; + dbuf[1] = 0.0; + dbuf[2] = 0.0; + dbuf[3] = 0.0; + + sbuf[0] = 0.0; + sbuf[1] = -22.125; + sbuf[2] = 0.0; + sbuf[3] = 0.0; + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 4, 3, 4, 0, sizeof(sljit_sw)); + + supported[0] = sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_f64)); + if (supported[0] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, 4 * sizeof(sljit_f64)); + sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_PRE, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 4 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_f64), SLJIT_FR0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + } + + supported[1] = sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_FR2, SLJIT_MEM1(SLJIT_R0), -(sljit_sw)sizeof(sljit_f64)); + if (supported[1] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, 2 * sizeof(sljit_f64)); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S1), 0); + sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_FR2, SLJIT_MEM1(SLJIT_R0), -(sljit_sw)sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_R0, 0); + } + + supported[2] = sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_FR1, SLJIT_MEM1(SLJIT_R2), -4 * (sljit_sw)sizeof(sljit_f32)); + if (supported[2] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S2, 0, SLJIT_IMM, 4 * sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR1, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_FR1, SLJIT_MEM1(SLJIT_R2), -4 * (sljit_sw)sizeof(sljit_f32)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_R2, 0); + } + + supported[3] = sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_FR1, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_f32)); + if (supported[3] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S2, 0, SLJIT_IMM, sizeof(sljit_f32)); + sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_POST, SLJIT_FR1, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_S2), 2 * sizeof(sljit_f32), SLJIT_FR1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_R1, 0); + } + + supported[4] = sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); + if (supported[4] == SLJIT_SUCCESS) { + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S1, 0, SLJIT_IMM, 8 * sizeof(sljit_f64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, -8 * (sljit_sw)sizeof(sljit_f64)); + sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_PRE, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); + sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_f64), SLJIT_FR0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_R1, 0); + } + + supported[5] = sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_FR2, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 0); + if (supported[5] == SLJIT_SUCCESS) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_S2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3 * sizeof(sljit_f32)); + sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_f32)); + sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_FR2, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_R2, 0); + } + + SLJIT_ASSERT(sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0) == SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(sljit_emit_fmem(compiler, SLJIT_MOV_F32 | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_POST, SLJIT_FR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0) == SLJIT_ERR_UNSUPPORTED); + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + /* TODO: at least for ARM (both V5 and V7) the range below needs further fixing */ + SLJIT_ASSERT(sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), 256) == SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(sljit_emit_fmem(compiler, SLJIT_MOV_F64 | SLJIT_MEM_SUPP | SLJIT_MEM_POST, SLJIT_FR0, SLJIT_MEM1(SLJIT_R0), -257) == SLJIT_ERR_UNSUPPORTED); +#endif + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func3((sljit_sw)&wbuf, (sljit_sw)&dbuf, (sljit_sw)&sbuf); + + FAILED(sizeof(expected) != sizeof(supported) / sizeof(sljit_s32), "test61 case 1 failed\n"); + + for (i = 0; i < sizeof(expected); i++) { + if (expected[i]) { + if (supported[i] != SLJIT_SUCCESS) { + printf("tast61 case %d should be supported\n", i + 1); + return; + } + } else { + if (supported[i] == SLJIT_SUCCESS) { + printf("test61 case %d should not be supported\n", i + 1); + return; + } + } + } + + FAILED(supported[0] == SLJIT_SUCCESS && dbuf[1] != 66.725, "test61 case 2 failed\n"); + FAILED(supported[0] == SLJIT_SUCCESS && wbuf[0] != (sljit_sw)(dbuf), "test61 case 3 failed\n"); + FAILED(supported[1] == SLJIT_SUCCESS && dbuf[2] != 66.725, "test61 case 4 failed\n"); + FAILED(supported[1] == SLJIT_SUCCESS && wbuf[1] != (sljit_sw)(dbuf + 1), "test61 case 5 failed\n"); + FAILED(supported[2] == SLJIT_SUCCESS && sbuf[0] != -22.125, "test61 case 6 failed\n"); + FAILED(supported[2] == SLJIT_SUCCESS && wbuf[2] != (sljit_sw)(sbuf), "test61 case 7 failed\n"); + FAILED(supported[3] == SLJIT_SUCCESS && sbuf[2] != -22.125, "test61 case 8 failed\n"); + FAILED(supported[3] == SLJIT_SUCCESS && wbuf[3] != (sljit_sw)(sbuf + 2), "test61 case 9 failed\n"); + FAILED(supported[4] == SLJIT_SUCCESS && dbuf[3] != 66.725, "test61 case 10 failed\n"); + FAILED(supported[4] == SLJIT_SUCCESS && wbuf[4] != (sljit_sw)(dbuf), "test61 case 11 failed\n"); + FAILED(supported[5] == SLJIT_SUCCESS && sbuf[3] != -22.125, "test61 case 12 failed\n"); + FAILED(supported[5] == SLJIT_SUCCESS && wbuf[5] != (sljit_sw)(sbuf + 3), "test61 case 13 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test62(void) +{ + /* Test fast calls flag preservation. */ + executable_code code1; + executable_code code2; + struct sljit_compiler* compiler; + + if (verbose) + printf("Run test62\n"); + + /* A */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + sljit_set_context(compiler, 0, SLJIT_ARG1(SW), 1, 1, 0, 0, 0); + + sljit_emit_fast_enter(compiler, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_S0, 0, SLJIT_IMM, 42); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_R0, 0); + + code1.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + /* B */ + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 1, 1, 0, 0, 0); + sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, SLJIT_FUNC_OFFSET(code1.code)); + sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_Z | SLJIT_SET_LESS); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_ZERO); + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_LESS); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, 1); + sljit_emit_op2(compiler, SLJIT_OR, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_S0, 0); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code2.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + FAILED(code2.func1(88) != 0, "test62 case 1 failed\n"); + FAILED(code2.func1(42) != 1, "test62 case 2 failed\n"); + FAILED(code2.func1(0) != 2, "test62 case 3 failed\n"); + + sljit_free_code(code1.code, NULL); + sljit_free_code(code2.code, NULL); + successful_tests++; +} + +static void test63(void) +{ + /* Test put label. */ + executable_code code; + struct sljit_label *label[2]; + struct sljit_put_label *put_label[5]; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_uw addr[2]; + sljit_uw buf[4]; +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_sw offs = SLJIT_W(0x123456789012); +#else + sljit_sw offs = 0x12345678; +#endif + + if (verbose) + printf("Run test63\n"); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 2 * sizeof(sljit_sw)); + + put_label[0] = sljit_emit_put_label(compiler, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + + put_label[1] = sljit_emit_put_label(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_uw), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); + + label[0] = sljit_emit_label(compiler); + sljit_set_put_label(put_label[0], label[0]); + sljit_set_put_label(put_label[1], label[0]); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)(buf + 2) - offs); + put_label[2] = sljit_emit_put_label(compiler, SLJIT_MEM1(SLJIT_R0), offs); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (offs + sizeof(sljit_uw)) >> 1); + put_label[3] = sljit_emit_put_label(compiler, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1); + + label[1] = sljit_emit_label(compiler); + sljit_set_put_label(put_label[2], label[1]); + sljit_set_put_label(put_label[3], label[1]); + + put_label[4] = sljit_emit_put_label(compiler, SLJIT_RETURN_REG, 0); + sljit_set_put_label(put_label[4], label[0]); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + addr[0] = sljit_get_label_addr(label[0]); + addr[1] = sljit_get_label_addr(label[1]); + + sljit_free_compiler(compiler); + + FAILED(code.func1((sljit_sw)&buf) != addr[0], "test63 case 1 failed\n"); + FAILED(buf[0] != addr[0], "test63 case 2 failed\n"); + FAILED(buf[1] != addr[0], "test63 case 3 failed\n"); + FAILED(buf[2] != addr[1], "test63 case 4 failed\n"); + FAILED(buf[3] != addr[1], "test63 case 5 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test64(void) +{ + /* Test put label with absolute label addresses */ + executable_code code; + sljit_uw malloc_addr; + struct sljit_label label[4]; + struct sljit_put_label *put_label[2]; + struct sljit_compiler* compiler; + sljit_uw buf[5]; +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + sljit_sw offs1 = SLJIT_W(0x123456781122); + sljit_sw offs2 = SLJIT_W(0x1234567811223344); +#else /* !SLJIT_64BIT_ARCHITECTURE */ + sljit_sw offs1 = 0x12345678; + sljit_sw offs2 = 0x80000000; +#endif /* SLJIT_64BIT_ARCHITECTURE */ + + if (verbose) + printf("Run test64\n"); + + /* lock next allocation; see sljit_test_malloc_exec() */ +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + malloc_addr = (sljit_uw)SLJIT_MALLOC_EXEC(1024, NULL); + + if (!malloc_addr) { + printf("Cannot allocate executable memory\n"); + return; + } + + compiler = sljit_create_compiler(NULL, (void*)malloc_addr); + malloc_addr += SLJIT_EXEC_OFFSET((void*)malloc_addr); +#else /* SLJIT_CONFIG_UNSUPPORTED */ + malloc_addr = 0; + compiler = sljit_create_compiler(NULL, (void*)malloc_addr); +#endif /* !SLJIT_CONFIG_UNSUPPORTED */ + + label[0].addr = 0x1234; + label[0].size = (sljit_uw)(0x1234 - malloc_addr); + + label[1].addr = 0x12345678; + label[1].size = (sljit_uw)(0x12345678 - malloc_addr); + + label[2].addr = offs1; + label[2].size = (sljit_uw)(offs1 - malloc_addr); + + label[3].addr = offs2; + label[3].size = (sljit_uw)(offs2 - malloc_addr); + + FAILED(!compiler, "cannot create compiler\n"); + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 2 * sizeof(sljit_sw)); + + put_label[0] = sljit_emit_put_label(compiler, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); + + put_label[1] = sljit_emit_put_label(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_uw), SLJIT_MEM1(SLJIT_SP), sizeof(sljit_uw)); + + sljit_set_put_label(put_label[0], &label[0]); + sljit_set_put_label(put_label[1], &label[0]); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)(buf + 2) - offs1); + put_label[0] = sljit_emit_put_label(compiler, SLJIT_MEM1(SLJIT_R0), offs1); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (offs1 + sizeof(sljit_uw)) >> 1); + put_label[1] = sljit_emit_put_label(compiler, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1); + + sljit_set_put_label(put_label[0], &label[1]); + sljit_set_put_label(put_label[1], &label[1]); + + put_label[0] = sljit_emit_put_label(compiler, SLJIT_R2, 0); + sljit_set_put_label(put_label[0], &label[2]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_uw), SLJIT_R2, 0); + + put_label[0] = sljit_emit_put_label(compiler, SLJIT_RETURN_REG, 0); + sljit_set_put_label(put_label[0], &label[3]); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + SLJIT_ASSERT(SLJIT_FUNC_OFFSET(code.code) >= (sljit_sw)malloc_addr && SLJIT_FUNC_OFFSET(code.code) <= (sljit_sw)malloc_addr + 8); + + FAILED(code.func1((sljit_sw)&buf) != label[3].addr, "test64 case 1 failed\n"); + FAILED(buf[0] != label[0].addr, "test64 case 2 failed\n"); + FAILED(buf[1] != label[0].addr, "test64 case 3 failed\n"); + FAILED(buf[2] != label[1].addr, "test64 case 4 failed\n"); + FAILED(buf[3] != label[1].addr, "test64 case 5 failed\n"); + FAILED(buf[4] != label[2].addr, "test64 case 6 failed\n"); + + sljit_free_code(code.code, NULL); + + successful_tests++; +} + +static void test65(void) +{ + /* Test jump tables. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_s32 i; + /* Normally this table is allocated on the heap. */ + sljit_uw addr[64]; + struct sljit_label *labels[64]; + struct sljit_jump *jump; + + if (verbose) + printf("Run test65\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 1, 2, 0, 0, 0); + + jump = sljit_emit_cmp(compiler, SLJIT_GREATER_EQUAL, SLJIT_S0, 0, SLJIT_IMM, 64); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)addr); + sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM2(SLJIT_R0, SLJIT_S0), SLJIT_WORD_SHIFT); + + for (i = 0; i < 64; i++) { + labels[i] = sljit_emit_label(compiler); + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, i * 2); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + } + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_IMM, -1); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + for (i = 0; i < 64; i++) { + addr[i] = sljit_get_label_addr(labels[i]); + } + + sljit_free_compiler(compiler); + + FAILED(code.func2(64, 0) != -1, "test65 case 1 failed\n"); + + for (i = 0; i < 64; i++) { + FAILED(code.func2(i, i * 2) != i * 4, "test65 case 2 failed\n"); + } + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test66(void) +{ + /* Test direct jumps (computed goto). */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_s32 i; + sljit_uw addr[64]; + struct sljit_label *labels[64]; + + if (verbose) + printf("Run test66\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 1, 2, 0, 0, 0); + sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_S0, 0); + + for (i = 0; i < 64; i++) { + labels[i] = sljit_emit_label(compiler); + sljit_emit_op0(compiler, SLJIT_ENDBR); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S1, 0, SLJIT_IMM, i * 2); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + } + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + for (i = 0; i < 64; i++) { + addr[i] = sljit_get_label_addr(labels[i]); + } + + sljit_free_compiler(compiler); + + for (i = 0; i < 64; i++) { + FAILED(code.func2(addr[i], i) != i * 3, "test66 case 1 failed\n"); + } + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test67(void) +{ + /* Test skipping returns from fast calls (return type is fast). */ + executable_code code; + struct sljit_compiler *compiler = sljit_create_compiler(NULL, NULL); + struct sljit_jump *call, *jump; + struct sljit_label *label; + + if (verbose) + printf("Run test67\n"); + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, 0, 3, 1, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + call = sljit_emit_jump(compiler, SLJIT_FAST_CALL); + + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + /* First function, never returns. */ + label = sljit_emit_label(compiler); + sljit_set_label(call, label); + sljit_emit_fast_enter(compiler, SLJIT_R1, 0); + + call = sljit_emit_jump(compiler, SLJIT_FAST_CALL); + + /* Should never return here, marked by a segmentation fault if it does. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + /* Second function, skips the first function. */ + sljit_set_label(call, sljit_emit_label(compiler)); + sljit_emit_fast_enter(compiler, SLJIT_R2, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + + jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 1); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S0, 0, SLJIT_R1, 0); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_FAST_CALL), label); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + sljit_emit_op_src(compiler, SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN, SLJIT_S0, 0); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_S0, 0); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_op_src(compiler, SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN, SLJIT_R1, 0); + sljit_emit_op_src(compiler, SLJIT_FAST_RETURN, SLJIT_R1, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + sljit_free_compiler(compiler); + + FAILED(code.func0() != 3, "test67 case 1 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +static void test68(void) +{ + /* Test skipping returns from fast calls (return type is normal). */ + executable_code code; + struct sljit_compiler *compiler; + struct sljit_jump *call, *jump; + struct sljit_label *label; + int i; + + if (verbose) + printf("Run test68\n"); + + for (i = 0; i < 6 * 2; i++) { + compiler = sljit_create_compiler(NULL, NULL); + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, (i >= 6 ? SLJIT_F64_ALIGNMENT : 0), 0, 2 + (i % 6), (i % 6), 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); + call = sljit_emit_jump(compiler, SLJIT_FAST_CALL); + + /* Should never return here, marked by a segmentation fault if it does. */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM0(), 0); + + /* Recursive fast call. */ + label = sljit_emit_label(compiler); + sljit_set_label(call, label); + sljit_emit_fast_enter(compiler, SLJIT_R1, 0); + + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 1); + + jump = sljit_emit_cmp(compiler, SLJIT_GREATER_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 4); + + sljit_set_label(sljit_emit_jump(compiler, SLJIT_FAST_CALL), label); + + sljit_set_label(jump, sljit_emit_label(compiler)); + sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); + sljit_emit_return(compiler, SLJIT_MOV, SLJIT_R0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + + sljit_free_compiler(compiler); + + if (SLJIT_UNLIKELY(code.func0() != 4)) { + printf("test68 case %d failed\n", i + 1); + return; + } + sljit_free_code(code.code, NULL); + } + + successful_tests++; +} + +static void test69(void) +{ + /* Test sljit_set_current_flags. */ + executable_code code; + struct sljit_compiler* compiler = sljit_create_compiler(NULL, NULL); + sljit_sw buf[8]; + sljit_s32 i; + + if (verbose) + printf("Run test69\n"); + + for (i = 0; i < 8; i++) + buf[i] = 4; + + FAILED(!compiler, "cannot create compiler\n"); + + sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 3, 1, 0, 0, 0); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)1 << ((sizeof (sljit_sw) * 8) - 2)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_label(compiler); + sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW | SLJIT_CURRENT_FLAGS_ADD_SUB); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_OVERFLOW); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 5); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_label(compiler); + sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW | SLJIT_CURRENT_FLAGS_ADD_SUB); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw), SLJIT_OVERFLOW); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_label(compiler); + sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 2 * sizeof(sljit_sw), SLJIT_OVERFLOW); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 5); + sljit_emit_op2(compiler, SLJIT_MUL | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_label(compiler); + sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_OVERFLOW); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 6); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 5); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_R2, 0); + sljit_emit_label(compiler); + sljit_set_current_flags(compiler, SLJIT_SET_GREATER | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_sw), SLJIT_GREATER); + + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_R2, 0); + sljit_emit_label(compiler); + sljit_set_current_flags(compiler, SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_sw), SLJIT_ZERO); + + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R1, 0, SLJIT_IMM, -1 << 31); + sljit_emit_op2(compiler, SLJIT_ADD32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_R1, 0); + sljit_emit_label(compiler); + sljit_set_current_flags(compiler, SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_I32_OP | SLJIT_CURRENT_FLAGS_ADD_SUB); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_sw), SLJIT_ZERO); + + sljit_emit_op2(compiler, SLJIT_SHL32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + sljit_emit_label(compiler); + sljit_set_current_flags(compiler, SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_I32_OP); + cond_set(compiler, SLJIT_MEM1(SLJIT_S0), 7 * sizeof(sljit_sw), SLJIT_NOT_ZERO); + + sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0); + + code.code = sljit_generate_code(compiler); + CHECK(compiler); + sljit_free_compiler(compiler); + + code.func1((sljit_sw)&buf); + + FAILED(buf[0] != 1, "test69 case 1 failed\n"); + FAILED(buf[1] != 2, "test69 case 2 failed\n"); + FAILED(buf[2] != 1, "test69 case 3 failed\n"); + FAILED(buf[3] != 2, "test69 case 4 failed\n"); + FAILED(buf[4] != 1, "test69 case 5 failed\n"); + FAILED(buf[5] != 2, "test69 case 6 failed\n"); + FAILED(buf[6] != 1, "test69 case 7 failed\n"); + FAILED(buf[7] != 2, "test69 case 8 failed\n"); + + sljit_free_code(code.code, NULL); + successful_tests++; +} + +int sljit_test(int argc, char* argv[]) +{ + sljit_s32 has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0'); + verbose = has_arg && argv[1][1] == 'v'; + silent = has_arg && argv[1][1] == 's'; + + if (!verbose && !silent) + printf("Pass -v to enable verbose, -s to disable this hint.\n\n"); + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + test_exec_allocator(); +#endif + test1(); + test2(); + test3(); + test4(); + test5(); + test6(); + test7(); + test8(); + test9(); + test10(); + test11(); + test12(); + test13(); + test14(); + test15(); + test16(); + test17(); + test18(); + test19(); + test20(); + test21(); + test22(); + test23(); + test24(); + test25(); + test26(); + test27(); + test28(); + test29(); + test30(); + test31(); + test32(); + test33(); + test34(); + test35(); + test36(); + test37(); + test38(); + test39(); + test40(); + test41(); + test42(); + test43(); + test44(); + test45(); + test46(); + test47(); + test48(); + test49(); + test50(); + test51(); + test52(); + test53(); + test54(); + test55(); + test56(); + test57(); + test58(); + test59(); + test60(); + test61(); + test62(); + test63(); + test64(); + test65(); + test66(); + test67(); + test68(); + test69(); + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + sljit_free_unused_memory_exec(); +#endif + +# define TEST_COUNT 69 + + printf("SLJIT tests: "); + if (successful_tests == TEST_COUNT) + printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " "); + else + printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are " COLOR_RED "FAILED" COLOR_DEFAULT " ", TEST_COUNT - successful_tests, (TEST_COUNT - successful_tests) * 100 / TEST_COUNT); + printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "%s\n", sljit_get_platform_name(), sljit_has_cpu_feature(SLJIT_HAS_FPU) ? " (with fpu)" : " (without fpu)"); + + return TEST_COUNT - successful_tests; + +# undef TEST_COUNT +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/waterbox/ares64/ares/thirdparty/sljitAllocator.cpp b/waterbox/ares64/ares/thirdparty/sljitAllocator.cpp new file mode 100644 index 0000000000..3e5b7e338c --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljitAllocator.cpp @@ -0,0 +1,9 @@ +#include + +#include +#include + +auto sljit_nall_malloc_exec(sljit_uw size, void* exec_allocator_data) -> void* { + auto allocator = (nall::bump_allocator*)exec_allocator_data; + return allocator->acquire(size); +} diff --git a/waterbox/ares64/ares/thirdparty/sljitConfigPost.h b/waterbox/ares64/ares/thirdparty/sljitConfigPost.h new file mode 100644 index 0000000000..6baa6bff03 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljitConfigPost.h @@ -0,0 +1,10 @@ +#ifdef __cplusplus +extern "C" { +#endif + +//custom allocator +void* sljit_nall_malloc_exec(sljit_uw size, void* exec_allocator_data); + +#ifdef __cplusplus +} +#endif diff --git a/waterbox/ares64/ares/thirdparty/sljitConfigPre.h b/waterbox/ares64/ares/thirdparty/sljitConfigPre.h new file mode 100644 index 0000000000..63fed226b3 --- /dev/null +++ b/waterbox/ares64/ares/thirdparty/sljitConfigPre.h @@ -0,0 +1,11 @@ +//custom allocator +#define SLJIT_EXECUTABLE_ALLOCATOR 0 +#define SLJIT_MALLOC_EXEC(size, data) sljit_nall_malloc_exec((size), (data)) +#define SLJIT_FREE_EXEC(ptr, data) 0 +#define SLJIT_EXEC_OFFSET(ptr) 0 + +//debug-only options +#if !defined(BUILD_DEBUG) +#define SLJIT_DEBUG 0 +#define SLJIT_VERBOSE 0 +#endif