From d8f1d106f0965e469c84c661e0f80dafd418e0fd Mon Sep 17 00:00:00 2001 From: Nadia Holmquist Pedersen Date: Tue, 5 Nov 2024 07:58:31 +0100 Subject: [PATCH 1/4] flake: remove workaround no longer needed with Darwin SDK changes also add the Qt tools to the dev shell since they're needed for Qt Designer and such --- flake.lock | 6 +++--- flake.nix | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/flake.lock b/flake.lock index be75f57f..d7dd5bc6 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1729665710, - "narHash": "sha256-AlcmCXJZPIlO5dmFzV3V2XF6x/OpNWUV8Y/FMPGd8Z4=", + "lastModified": 1730531603, + "narHash": "sha256-Dqg6si5CqIzm87sp57j5nTaeBbWhHFaVyG7V6L8k3lY=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "2768c7d042a37de65bb1b5b3268fc987e534c49d", + "rev": "7ffd9ae656aec493492b44d0ddfb28e79a1ea25d", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 8d500c03..a7768e4e 100644 --- a/flake.nix +++ b/flake.nix @@ -19,7 +19,7 @@ then sourceInfo.dirtyShortRev else sourceInfo.shortRev; - melonDS = pkgs.qt6.qtbase.stdenv.mkDerivation { + melonDS = pkgs.stdenv.mkDerivation { pname = "melonDS"; version = "0.9.5-${shortRevision}"; src = ./.; @@ -74,8 +74,11 @@ drv = self.packages.${system}.default; }; devShells = { - default = pkgs.mkShell.override { stdenv = pkgs.qt6.qtbase.stdenv; } { + default = pkgs.mkShell { inputsFrom = [ self.packages.${system}.default ]; + packages = with pkgs; [ + qt6.qttools + ]; }; # Shell for building static melonDS release builds with vcpkg From 5959009ebd600a13578a7be6a6f2baffbcf2b436 Mon Sep 17 00:00:00 2001 From: Nadia Holmquist Pedersen Date: Tue, 5 Nov 2024 17:03:07 +0100 Subject: [PATCH 2/4] Use Qt 6 by default on all platforms and update build instructions (#2187) --- .github/workflows/build-ubuntu.yml | 3 +- BUILD.md | 81 ++++++++++++++++++++++++++++++ CMakePresets.json | 4 -- README.md | 70 +------------------------- cmake/ConfigureVcpkg.cmake | 6 +-- src/frontend/qt_sdl/CMakeLists.txt | 6 +-- 6 files changed, 85 insertions(+), 85 deletions(-) create mode 100644 BUILD.md diff --git a/.github/workflows/build-ubuntu.yml b/.github/workflows/build-ubuntu.yml index 044d01ee..1104142d 100644 --- a/.github/workflows/build-ubuntu.yml +++ b/.github/workflows/build-ubuntu.yml @@ -28,7 +28,7 @@ jobs: sudo apt install --allow-downgrades cmake ninja-build extra-cmake-modules libpcap0.8-dev libsdl2-dev libenet-dev \ qt6-{base,base-private,multimedia}-dev libqt6svg6-dev libarchive-dev libzstd-dev libfuse2 - name: Configure - run: cmake -B build -G Ninja -DUSE_QT6=ON -DCMAKE_INSTALL_PREFIX=/usr -DMELONDS_EMBED_BUILD_INFO=ON + run: cmake -B build -G Ninja -DCMAKE_INSTALL_PREFIX=/usr -DMELONDS_EMBED_BUILD_INFO=ON - name: Build run: | cmake --build build @@ -79,7 +79,6 @@ jobs: -DPKG_CONFIG_EXECUTABLE=/usr/bin/aarch64-linux-gnu-pkg-config \ -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc-12 \ -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++-12 \ - -DUSE_QT6=ON \ -DMELONDS_EMBED_BUILD_INFO=ON - name: Build shell: bash diff --git a/BUILD.md b/BUILD.md new file mode 100644 index 00000000..51bc4cac --- /dev/null +++ b/BUILD.md @@ -0,0 +1,81 @@ +# Building melonDS + +* [Linux](#linux) +* [Windows](#windows) +* [macOS](#macos) + +## Linux +1. Install dependencies: + * Ubuntu: + * All versions: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev libarchive-dev libenet-dev libzstd-dev` + * 24.04: `sudo apt install qt6-{base,base-private,multimedia,svg}-dev` + * 22.04: `sudo apt install qtbase6-dev qtbase6-private-dev qtmultimedia6-dev libqt6svg6-dev` + * Older versions: `sudo apt install qtbase5-dev qtbase5-private-dev qtmultimedia5-dev libqt5svg5-dev` + Also add `-DUSE_QT6=OFF` to the first CMake command below. + * Fedora: `sudo dnf install gcc-c++ cmake extra-cmake-modules SDL2-devel libarchive-devel enet-devel libzstd-devel qt6-{qtbase,qtmultimedia,qtsvg}-devel wayland-devel` + * Arch Linux: `sudo pacman -S base-devel cmake extra-cmake-modules git libpcap sdl2 qt6-{base,multimedia,svg} libarchive enet zstd` +2. Download the melonDS repository and prepare: + ```bash + git clone https://github.com/melonDS-emu/melonDS + cd melonDS + ``` +3. Compile: + ```bash + cmake -B build + cmake --build build -j$(nproc --all) + ``` + +## Windows +1. Install [MSYS2](https://www.msys2.org/) +2. Open the MSYS2 terminal from the Start menu: + * For x64 systems (most common), use **MSYS2 UCRT64** + * For ARM64 systems, use **MSYS2 CLANGARM64** +3. Update the packages using `pacman -Syu` and reopen the same terminal if it asks you to +4. Install git and clone the repository + ```bash + pacman -S git + git clone https://github.com/melonDS-emu/melonDS + cd melonDS + ``` +5. Install dependencies: + Replace `` below with `mingw-w64-ucrt-x86_64` on x64 systems, or `mingw-w64-clang-aarch64` on ARM64 systems. + ```bash + pacman -S -{toolchain,cmake,SDL2,libarchive,enet,zstd}` + ``` +6. Install Qt and configure the build directory + * Dynamic builds (with DLLs) + 1. Install Qt: `pacman -S -{qt6-base,qt6-svg,qt6-multimedia,qt6-svg,qt6-tools}` + 2. Set up the build directory with `cmake -B build` + * Static builds (without DLLs, standalone executable) + 1. Install Qt: `pacman -S -qt5-static` + (Note: As of writing, the `qt6-static` package does not work.) + 2. Set up the build directory with `cmake -B build -DBUILD_STATIC=ON -DUSE_QT6=OFF -DCMAKE_PREFIX_PATH=$MSYSTEM_PREFIX/qt5-static` +7. Compile: `cmake --build build` + +If everything went well, melonDS should now be in the `build` folder. For dynamic builds, you may need to run melonDS from the MSYS2 terminal in order for it to find the required DLLs. + +## macOS +1. Install the [Homebrew Package Manager](https://brew.sh) +2. Install dependencies: `brew install git pkg-config cmake sdl2 qt@6 libarchive enet zstd` +3. Download the melonDS repository and prepare: + ```zsh + git clone https://github.com/melonDS-emu/melonDS + cd melonDS + ``` +4. Compile: + ```zsh + cmake -B build -DCMAKE_PREFIX_PATH="$(brew --prefix qt@6);$(brew --prefix libarchive)" + cmake --build build -j$(sysctl -n hw.logicalcpu) + ``` +If everything went well, melonDS.app should now be in the `build` directory. + +### Self-contained app bundle +If you want an app bundle that can be distributed to other computers without needing to install dependencies through Homebrew, you can additionally run ` +../tools/mac-libs.rb .` after the build is completed, or add `-DMACOS_BUNDLE_LIBS=ON` to the first CMake command. + +## Nix (macOS/Linux) + +melonDS provides a Nix flake with support for both macOS and Linux. The [Nix package manager](https://nixos.org) needs to be installed to use it. + +* To run melonDS, just type `nix run github:melonDS-emu/melonDS`. +* To get a shell for development, clone the melonDS repository and type `nix develop` in its directory. \ No newline at end of file diff --git a/CMakePresets.json b/CMakePresets.json index 2144417b..b506b88a 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -27,10 +27,6 @@ "binaryDir": "${sourceDir}/build/release-mingw-x86_64", "generator": "Ninja", "cacheVariables": { - "USE_QT6": { - "type": "BOOL", - "value": "ON" - }, "BUILD_STATIC": { "type": "BOOL", "value": "ON" diff --git a/README.md b/README.md index eb8b1358..2cf42c2d 100644 --- a/README.md +++ b/README.md @@ -32,75 +32,7 @@ DS BIOS dumps from a DSi or 3DS can be used with no compatibility issues. DSi BI As for the rest, the interface should be pretty straightforward. If you have a question, don't hesitate to ask, though! ## How to build - -### Linux -1. Install dependencies: - * Ubuntu 22.04: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qtbase5-dev qtbase5-private-dev qtmultimedia5-dev libqt5svg5-dev libarchive-dev libenet-dev libzstd-dev` - * Older Ubuntu: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qt5-default qtbase5-private-dev qtmultimedia5-dev libqt5svg5-dev libarchive-dev libenet-dev libzstd-dev` - * Arch Linux: `sudo pacman -S base-devel cmake extra-cmake-modules git libpcap sdl2 qt5-base qt5-multimedia qt5-svg libarchive enet zstd` -3. Download the melonDS repository and prepare: - ```bash - git clone https://github.com/melonDS-emu/melonDS - cd melonDS - ``` - -3. Compile: - ```bash - cmake -B build - cmake --build build -j$(nproc --all) - ``` - -### Windows -1. Install [MSYS2](https://www.msys2.org/) -2. Open the **MSYS2 MinGW 64-bit** terminal -3. Update the packages using `pacman -Syu` and reopen the terminal if it asks you to -4. Install git to clone the repository - ```bash - pacman -S git - ``` -5. Download the melonDS repository and prepare: - ```bash - git clone https://github.com/melonDS-emu/melonDS - cd melonDS - ``` -#### Dynamic builds (with DLLs) -5. Install dependencies: `pacman -S mingw-w64-x86_64-{cmake,SDL2,toolchain,qt5-base,qt5-svg,qt5-multimedia,qt5-svg,qt5-tools,libarchive,enet,zstd}` -6. Compile: - ```bash - cmake -B build - cmake --build build - cd build - ../tools/msys-dist.sh - ``` -If everything went well, melonDS and the libraries it needs should now be in the `dist` folder. - -#### Static builds (without DLLs, standalone executable) -5. Install dependencies: `pacman -S mingw-w64-x86_64-{cmake,SDL2,toolchain,qt5-static,libarchive,enet,zstd}` -6. Compile: - ```bash - cmake -B build -DBUILD_STATIC=ON -DCMAKE_PREFIX_PATH=/mingw64/qt5-static - cmake --build build - ``` -If everything went well, melonDS should now be in the `build` folder. - -### macOS -1. Install the [Homebrew Package Manager](https://brew.sh) -2. Install dependencies: `brew install git pkg-config cmake sdl2 qt@6 libarchive enet zstd` -3. Download the melonDS repository and prepare: - ```zsh - git clone https://github.com/melonDS-emu/melonDS - cd melonDS - ``` -4. Compile: - ```zsh - cmake -B build -DCMAKE_PREFIX_PATH="$(brew --prefix qt@6);$(brew --prefix libarchive)" - cmake --build build -j$(sysctl -n hw.logicalcpu) - ``` -If everything went well, melonDS.app should now be in the `build` directory. - -#### Self-contained app bundle -If you want an app bundle that can be distributed to other computers without needing to install dependencies through Homebrew, you can additionally run ` -../tools/mac-libs.rb .` after the build is completed, or add `-DMACOS_BUNDLE_LIBS=ON` to the first CMake command. +See [BUILD.md](./BUILD.md) for build instructions. ## TODO LIST diff --git a/cmake/ConfigureVcpkg.cmake b/cmake/ConfigureVcpkg.cmake index c1eb522d..3fb0786f 100644 --- a/cmake/ConfigureVcpkg.cmake +++ b/cmake/ConfigureVcpkg.cmake @@ -19,11 +19,7 @@ set(VCPKG_OVERLAY_TRIPLETS "${CMAKE_SOURCE_DIR}/cmake/overlay-triplets") option(USE_RECOMMENDED_TRIPLETS "Use the recommended triplets that are used for official builds" ON) # Duplicated here because it needs to be set before project() -if (NOT WIN32) - option(USE_QT6 "Build using Qt 6 instead of 5" ON) -else() - option(USE_QT6 "Build using Qt 6 instead of 5" OFF) -endif() +option(USE_QT6 "Use Qt 6 instead of Qt 5" ON) # Since the Linux build pulls in glib anyway, we can just use upstream libslirp if (UNIX AND NOT APPLE) diff --git a/src/frontend/qt_sdl/CMakeLists.txt b/src/frontend/qt_sdl/CMakeLists.txt index 7dc4a00c..54888c49 100644 --- a/src/frontend/qt_sdl/CMakeLists.txt +++ b/src/frontend/qt_sdl/CMakeLists.txt @@ -59,11 +59,7 @@ set(SOURCES_QT_SDL NetplayDialog.cpp ) -if (APPLE) - option(USE_QT6 "Build using Qt 6 instead of 5" ON) -else() - option(USE_QT6 "Build using Qt 6 instead of 5" OFF) -endif() +option(USE_QT6 "Use Qt 6 instead of Qt 5" ON) if (USE_QT6) find_package(Qt6 COMPONENTS Core Gui Widgets Network Multimedia OpenGL OpenGLWidgets Svg REQUIRED) From 3c7db9b21f232f8dece502d20a9dba546fea217c Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 5 Nov 2024 21:56:19 -0500 Subject: [PATCH 3/4] correct thumb multiply timings --- src/ARMInterpreter_ALU.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 83fc1944..504a9c21 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -1583,10 +1583,10 @@ void T_MUL_REG(ARM* cpu) else { cpu->SetC(0); // carry flag destroyed, they say. whatever that means... - if (a & 0xFF000000) cycles += 4; - else if (a & 0x00FF0000) cycles += 3; - else if (a & 0x0000FF00) cycles += 2; - else cycles += 1; + if ((a & 0xFFFFFF00) == 0x00000000 || (a & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; + else if ((a & 0xFFFF0000) == 0x00000000 || (a & 0xFFFF0000) == 0xFFFF0000) cycles = 2; + else if ((a & 0xFF000000) == 0x00000000 || (a & 0xFF000000) == 0xFF000000) cycles = 3; + else cycles = 4; } cpu->AddCycles_CI(cycles); } From 3d49f5f2560084dbc70b1df780ad12cf24e5b97f Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Wed, 6 Nov 2024 00:18:29 -0500 Subject: [PATCH 4/4] arm7 muls carry flag emulation. --- src/ARMInterpreter_ALU.cpp | 19 ++-- src/ARMInterpreter_MultiplySuperLLE.h | 136 ++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 9 deletions(-) create mode 100644 src/ARMInterpreter_MultiplySuperLLE.h diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 504a9c21..72992f0f 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -19,6 +19,7 @@ #include #include "ARM.h" #include "NDS.h" +#include "ARMInterpreter_MultiplySuperLLE.h" namespace melonDS::ARMInterpreter { @@ -854,7 +855,6 @@ void A_MUL(ARM* cpu) { cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -866,6 +866,7 @@ void A_MUL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3; else cycles = 4; + if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, 0, cycles==4)); } cpu->AddCycles_CI(cycles); @@ -886,7 +887,6 @@ void A_MLA(ARM* cpu) { cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -898,6 +898,7 @@ void A_MLA(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, rn, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -919,7 +920,6 @@ void A_UMULL(ARM* cpu) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -931,6 +931,7 @@ void A_UMULL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(0, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -955,7 +956,6 @@ void A_UMLAL(ARM* cpu) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -967,6 +967,7 @@ void A_UMLAL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(rd, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -988,7 +989,6 @@ void A_SMULL(ARM* cpu) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -1000,6 +1000,7 @@ void A_SMULL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(0, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -1024,7 +1025,6 @@ void A_SMLAL(ARM* cpu) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -1036,6 +1036,7 @@ void A_SMLAL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(rd, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -1575,18 +1576,18 @@ void T_MUL_REG(ARM* cpu) cpu->SetNZ(res & 0x80000000, !res); - s32 cycles = 0; + s32 cycles; if (cpu->Num == 0) { - cycles += 3; + cycles = 3; } else { - cpu->SetC(0); // carry flag destroyed, they say. whatever that means... if ((a & 0xFFFFFF00) == 0x00000000 || (a & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; else if ((a & 0xFFFF0000) == 0x00000000 || (a & 0xFFFF0000) == 0xFFFF0000) cycles = 2; else if ((a & 0xFF000000) == 0x00000000 || (a & 0xFF000000) == 0xFF000000) cycles = 3; else cycles = 4; + cpu->SetC(MULSCarry(b, a, 0, cycles==4)); // carry flag destroyed, they say. whatever that means... } cpu->AddCycles_CI(cycles); } diff --git a/src/ARMInterpreter_MultiplySuperLLE.h b/src/ARMInterpreter_MultiplySuperLLE.h new file mode 100644 index 00000000..21b17bbc --- /dev/null +++ b/src/ARMInterpreter_MultiplySuperLLE.h @@ -0,0 +1,136 @@ +#ifndef ARMINTERPRETER_MULTIPLYSUPERLLE_H +#define ARMINTERPRETER_MULTIPLYSUPERLLE_H + +#include "types.h" + +using namespace melonDS; + +/* + Copyright (c) 2024 zaydlang + + This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. + If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + + + + +// code taken from: (also features a few alternative implementations that could maybe be worth looking at?) +// https://github.com/calc84maniac/multiplication-algorithm/blob/master/impl_opt.h +// based on research that can be found here: https://bmchtech.github.io/post/multiply/ + +// the code in this file is dedicated to handling the calculation of the carry flag for multiplication (S variant) instructions on the ARM7TDMI. + + +// Takes a multiplier between -0x01000000 and 0x00FFFFFF, cycles between 0 and 2 +static inline bool booths_multiplication32_opt(u32 multiplicand, u32 multiplier, u32 accumulator) { + // Set the low bit of the multiplicand to cause negation to invert the upper bits, this bit can't propagate to bit 31 + multiplicand |= 1; + + // Optimized first iteration + u32 booth = (s32)(multiplier << 31) >> 31; + u32 carry = booth * multiplicand; + // Pre-populate accumulator for output + u32 output = accumulator; + + u32 sum = output + carry; + int shift = 29; + do { + for (int i = 0; i < 4; i++, shift -= 2) { + // Get next booth factor (-2 to 2, shifted left by 30-shift) + u32 next_booth = (s32)(multiplier << shift) >> shift; + u32 factor = next_booth - booth; + booth = next_booth; + // Get scaled value of booth addend + u32 addend = multiplicand * factor; + // Combine the addend with the CSA + // Not performing any masking seems to work because the lower carries can't propagate to bit 31 + output ^= carry ^ addend; + sum += addend; + carry = sum - output; + } + } while (booth != multiplier); + + return carry >> 31; +} + +// Takes a multiplicand shifted right by 6 and a multiplier shifted right by 26 (zero or sign extended) +static inline bool booths_multiplication64_opt(u32 multiplicand, u32 multiplier, u32 accum_hi) { + // Skipping the first 14 iterations seems to work because the lower carries can't propagate to bit 63 + // This means only magic bits 62-61 are needed (which requires decoding 3 booth chunks), + // and only the last two booth iterations are needed + + // Set the low bit of the multiplicand to cause negation to invert the upper bits + multiplicand |= 1; + + // Pre-populate magic bit 61 for carry + u32 carry = ~accum_hi & UINT32_C(0x20000000); + // Pre-populate magic bits 63-60 for output (with carry magic pre-added in) + u32 output = accum_hi - UINT32_C(0x08000000); + + // Get factors from the top 3 booth chunks + u32 booth0 = (s32)(multiplier << 27) >> 27; + u32 booth1 = (s32)(multiplier << 29) >> 29; + u32 booth2 = (s32)(multiplier << 31) >> 31; + u32 factor0 = multiplier - booth0; + u32 factor1 = booth0 - booth1; + u32 factor2 = booth1 - booth2; + + // Get scaled value of the 3rd top booth addend + u32 addend = multiplicand * factor2; + // Finalize bits 61-60 of output magic using its sign + output -= addend & UINT32_C(0x10000000); + // Get scaled value of the 2nd top booth addend + addend = multiplicand * factor1; + // Finalize bits 63-62 of output magic using its sign + output -= addend & UINT32_C(0x40000000); + + // Get the carry from the CSA in bit 61 and propagate it to bit 62, which is not processed in this iteration + u32 sum = output + (addend & UINT32_C(0x20000000)); + // Subtract out the carry magic to get the actual output magic + output -= carry; + + // Get scaled value of the 1st top booth addend + addend = multiplicand * factor0; + // Add to bit 62 and propagate the carry + sum += addend & UINT32_C(0x40000000); + + // Cancel out the output magic bit 63 to get the carry bit 63 + return (sum ^ output) >> 31; +} + + +// also for MLAS and MUL (thumb ver.) +inline bool MULSCarry(s32 rm, s32 rs, u32 rn, bool lastcycle) +{ + if (lastcycle) + return (rs >> 30) == -2; + else + return booths_multiplication32_opt(rm, rs, rn); +} + +// also for UMLALS +inline bool UMULLSCarry(u64 rd, u32 rm, u32 rs, bool lastcycle) +{ + if (lastcycle) + return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32); + else + return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF); +} + +// also for SMLALS +inline bool SMULLSCarry(u64 rd, s32 rm, s32 rs, bool lastcycle) +{ + if (lastcycle) + return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32); + else + return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF); +} + +#endif