diff --git a/.github/workflows/build-ubuntu.yml b/.github/workflows/build-ubuntu.yml
index 044d01ee..1104142d 100644
--- a/.github/workflows/build-ubuntu.yml
+++ b/.github/workflows/build-ubuntu.yml
@@ -28,7 +28,7 @@ jobs:
         sudo apt install --allow-downgrades cmake ninja-build extra-cmake-modules libpcap0.8-dev libsdl2-dev libenet-dev \
           qt6-{base,base-private,multimedia}-dev libqt6svg6-dev libarchive-dev libzstd-dev libfuse2
     - name: Configure
-      run: cmake -B build -G Ninja -DUSE_QT6=ON -DCMAKE_INSTALL_PREFIX=/usr -DMELONDS_EMBED_BUILD_INFO=ON
+      run: cmake -B build -G Ninja -DCMAKE_INSTALL_PREFIX=/usr -DMELONDS_EMBED_BUILD_INFO=ON
     - name: Build
       run: |
         cmake --build build
@@ -79,7 +79,6 @@ jobs:
             -DPKG_CONFIG_EXECUTABLE=/usr/bin/aarch64-linux-gnu-pkg-config \
             -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc-12 \
             -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++-12 \
-            -DUSE_QT6=ON \
             -DMELONDS_EMBED_BUILD_INFO=ON
       - name: Build
         shell: bash
diff --git a/BUILD.md b/BUILD.md
new file mode 100644
index 00000000..51bc4cac
--- /dev/null
+++ b/BUILD.md
@@ -0,0 +1,81 @@
+# Building melonDS
+
+* [Linux](#linux)
+* [Windows](#windows)
+* [macOS](#macos)
+
+## Linux
+1. Install dependencies:
+   * Ubuntu:
+     * All versions: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev libarchive-dev libenet-dev libzstd-dev`
+     * 24.04: `sudo apt install qt6-{base,base-private,multimedia,svg}-dev`
+     * 22.04: `sudo apt install qtbase6-dev qtbase6-private-dev qtmultimedia6-dev libqt6svg6-dev`
+     * Older versions: `sudo apt install qtbase5-dev qtbase5-private-dev qtmultimedia5-dev libqt5svg5-dev`  
+       Also add `-DUSE_QT6=OFF` to the first CMake command below.
+   * Fedora: `sudo dnf install gcc-c++ cmake extra-cmake-modules SDL2-devel libarchive-devel enet-devel libzstd-devel qt6-{qtbase,qtmultimedia,qtsvg}-devel wayland-devel`
+   * Arch Linux: `sudo pacman -S base-devel cmake extra-cmake-modules git libpcap sdl2 qt6-{base,multimedia,svg} libarchive enet zstd`
+2. Download the melonDS repository and prepare:
+   ```bash
+   git clone https://github.com/melonDS-emu/melonDS
+   cd melonDS
+   ```
+3. Compile:
+   ```bash
+   cmake -B build
+   cmake --build build -j$(nproc --all)
+   ```
+
+## Windows
+1. Install [MSYS2](https://www.msys2.org/)
+2. Open the MSYS2 terminal from the Start menu:
+   * For x64 systems (most common), use **MSYS2 UCRT64**
+   * For ARM64 systems, use **MSYS2 CLANGARM64**
+3. Update the packages using `pacman -Syu` and reopen the same terminal if it asks you to
+4. Install git and clone the repository
+   ```bash
+   pacman -S git
+   git clone https://github.com/melonDS-emu/melonDS
+   cd melonDS
+   ```
+5. Install dependencies:  
+   Replace `<prefix>` below with `mingw-w64-ucrt-x86_64` on x64 systems, or `mingw-w64-clang-aarch64` on ARM64 systems.
+   ```bash
+   pacman -S <prefix>-{toolchain,cmake,SDL2,libarchive,enet,zstd}`
+   ```
+6. Install Qt and configure the build directory
+   * Dynamic builds (with DLLs)
+     1. Install Qt: `pacman -S <prefix>-{qt6-base,qt6-svg,qt6-multimedia,qt6-svg,qt6-tools}`
+     2. Set up the build directory with `cmake -B build`
+   * Static builds (without DLLs, standalone executable)
+     1. Install Qt: `pacman -S <prefi>-qt5-static`  
+        (Note: As of writing, the `qt6-static` package does not work.)
+     2. Set up the build directory with `cmake -B build -DBUILD_STATIC=ON -DUSE_QT6=OFF -DCMAKE_PREFIX_PATH=$MSYSTEM_PREFIX/qt5-static`
+7. Compile: `cmake --build build`
+
+If everything went well, melonDS should now be in the `build` folder. For dynamic builds, you may need to run melonDS from the MSYS2 terminal in order for it to find the required DLLs.
+
+## macOS
+1. Install the [Homebrew Package Manager](https://brew.sh)
+2. Install dependencies: `brew install git pkg-config cmake sdl2 qt@6 libarchive enet zstd`
+3. Download the melonDS repository and prepare:
+   ```zsh
+   git clone https://github.com/melonDS-emu/melonDS
+   cd melonDS
+   ```
+4. Compile:
+   ```zsh
+   cmake -B build -DCMAKE_PREFIX_PATH="$(brew --prefix qt@6);$(brew --prefix libarchive)"
+   cmake --build build -j$(sysctl -n hw.logicalcpu)
+   ```
+If everything went well, melonDS.app should now be in the `build` directory.
+
+### Self-contained app bundle
+If you want an app bundle that can be distributed to other computers without needing to install dependencies through Homebrew, you can additionally run `
+../tools/mac-libs.rb .` after the build is completed, or add `-DMACOS_BUNDLE_LIBS=ON` to the first CMake command.
+
+## Nix (macOS/Linux)
+
+melonDS provides a Nix flake with support for both macOS and Linux. The [Nix package manager](https://nixos.org) needs to be installed to use it.
+
+* To run melonDS, just type `nix run github:melonDS-emu/melonDS`.
+* To get a shell for development, clone the melonDS repository and type `nix develop` in its directory.
\ No newline at end of file
diff --git a/CMakePresets.json b/CMakePresets.json
index 2144417b..b506b88a 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -27,10 +27,6 @@
       "binaryDir": "${sourceDir}/build/release-mingw-x86_64",
       "generator": "Ninja",
       "cacheVariables": {
-        "USE_QT6": {
-          "type": "BOOL",
-          "value": "ON"
-        },
         "BUILD_STATIC": {
           "type": "BOOL",
           "value": "ON"
diff --git a/README.md b/README.md
index eb8b1358..2cf42c2d 100644
--- a/README.md
+++ b/README.md
@@ -32,75 +32,7 @@ DS BIOS dumps from a DSi or 3DS can be used with no compatibility issues. DSi BI
 As for the rest, the interface should be pretty straightforward. If you have a question, don't hesitate to ask, though!
 
 ## How to build
-
-### Linux
-1. Install dependencies:
-   * Ubuntu 22.04: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qtbase5-dev qtbase5-private-dev qtmultimedia5-dev libqt5svg5-dev libarchive-dev libenet-dev libzstd-dev`
-   * Older Ubuntu: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qt5-default qtbase5-private-dev qtmultimedia5-dev libqt5svg5-dev libarchive-dev libenet-dev libzstd-dev`
-   * Arch Linux: `sudo pacman -S base-devel cmake extra-cmake-modules git libpcap sdl2 qt5-base qt5-multimedia qt5-svg libarchive enet zstd`
-3. Download the melonDS repository and prepare:
-   ```bash
-   git clone https://github.com/melonDS-emu/melonDS
-   cd melonDS
-   ```
-
-3. Compile:
-   ```bash
-   cmake -B build
-   cmake --build build -j$(nproc --all)
-   ```
-
-### Windows
-1. Install [MSYS2](https://www.msys2.org/)
-2. Open the **MSYS2 MinGW 64-bit** terminal
-3. Update the packages using `pacman -Syu` and reopen the terminal if it asks you to
-4. Install git to clone the repository
-   ```bash
-   pacman -S git
-   ```
-5. Download the melonDS repository and prepare:
-   ```bash
-   git clone https://github.com/melonDS-emu/melonDS
-   cd melonDS
-   ```
-#### Dynamic builds (with DLLs)
-5. Install dependencies: `pacman -S mingw-w64-x86_64-{cmake,SDL2,toolchain,qt5-base,qt5-svg,qt5-multimedia,qt5-svg,qt5-tools,libarchive,enet,zstd}`
-6. Compile:
-   ```bash
-   cmake -B build
-   cmake --build build
-   cd build
-   ../tools/msys-dist.sh
-   ```
-If everything went well, melonDS and the libraries it needs should now be in the `dist` folder.
-
-#### Static builds (without DLLs, standalone executable)
-5. Install dependencies: `pacman -S mingw-w64-x86_64-{cmake,SDL2,toolchain,qt5-static,libarchive,enet,zstd}`
-6. Compile:
-   ```bash
-   cmake -B build -DBUILD_STATIC=ON -DCMAKE_PREFIX_PATH=/mingw64/qt5-static
-   cmake --build build
-   ```
-If everything went well, melonDS should now be in the `build` folder.
-
-### macOS
-1. Install the [Homebrew Package Manager](https://brew.sh)
-2. Install dependencies: `brew install git pkg-config cmake sdl2 qt@6 libarchive enet zstd`
-3. Download the melonDS repository and prepare:
-   ```zsh
-   git clone https://github.com/melonDS-emu/melonDS
-   cd melonDS
-   ```
-4. Compile:
-   ```zsh
-   cmake -B build -DCMAKE_PREFIX_PATH="$(brew --prefix qt@6);$(brew --prefix libarchive)"
-   cmake --build build -j$(sysctl -n hw.logicalcpu)
-   ```
-If everything went well, melonDS.app should now be in the `build` directory.
-
-#### Self-contained app bundle
-If you want an app bundle that can be distributed to other computers without needing to install dependencies through Homebrew, you can additionally run `
-../tools/mac-libs.rb .` after the build is completed, or add `-DMACOS_BUNDLE_LIBS=ON` to the first CMake command.
+See [BUILD.md](./BUILD.md) for build instructions.
 
 ## TODO LIST
 
diff --git a/cmake/ConfigureVcpkg.cmake b/cmake/ConfigureVcpkg.cmake
index c1eb522d..3fb0786f 100644
--- a/cmake/ConfigureVcpkg.cmake
+++ b/cmake/ConfigureVcpkg.cmake
@@ -19,11 +19,7 @@ set(VCPKG_OVERLAY_TRIPLETS "${CMAKE_SOURCE_DIR}/cmake/overlay-triplets")
 option(USE_RECOMMENDED_TRIPLETS "Use the recommended triplets that are used for official builds" ON)
 
 # Duplicated here because it needs to be set before project()
-if (NOT WIN32)
-    option(USE_QT6 "Build using Qt 6 instead of 5" ON)
-else()
-    option(USE_QT6 "Build using Qt 6 instead of 5" OFF)
-endif()
+option(USE_QT6 "Use Qt 6 instead of Qt 5" ON)
 
 # Since the Linux build pulls in glib anyway, we can just use upstream libslirp
 if (UNIX AND NOT APPLE)
diff --git a/flake.lock b/flake.lock
index be75f57f..d7dd5bc6 100644
--- a/flake.lock
+++ b/flake.lock
@@ -20,11 +20,11 @@
     },
     "nixpkgs": {
       "locked": {
-        "lastModified": 1729665710,
-        "narHash": "sha256-AlcmCXJZPIlO5dmFzV3V2XF6x/OpNWUV8Y/FMPGd8Z4=",
+        "lastModified": 1730531603,
+        "narHash": "sha256-Dqg6si5CqIzm87sp57j5nTaeBbWhHFaVyG7V6L8k3lY=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "2768c7d042a37de65bb1b5b3268fc987e534c49d",
+        "rev": "7ffd9ae656aec493492b44d0ddfb28e79a1ea25d",
         "type": "github"
       },
       "original": {
diff --git a/flake.nix b/flake.nix
index 8d500c03..a7768e4e 100644
--- a/flake.nix
+++ b/flake.nix
@@ -19,7 +19,7 @@
         then sourceInfo.dirtyShortRev
         else sourceInfo.shortRev;
 
-      melonDS = pkgs.qt6.qtbase.stdenv.mkDerivation {
+      melonDS = pkgs.stdenv.mkDerivation {
         pname = "melonDS";
         version = "0.9.5-${shortRevision}";
         src = ./.;
@@ -74,8 +74,11 @@
         drv = self.packages.${system}.default;
       };
       devShells = {
-        default = pkgs.mkShell.override { stdenv = pkgs.qt6.qtbase.stdenv; } {
+        default = pkgs.mkShell {
           inputsFrom = [ self.packages.${system}.default ];
+          packages = with pkgs; [
+            qt6.qttools
+          ];
         };
 
         # Shell for building static melonDS release builds with vcpkg
diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp
index c493b9ac..410a78e1 100644
--- a/src/ARMInterpreter_ALU.cpp
+++ b/src/ARMInterpreter_ALU.cpp
@@ -19,6 +19,7 @@
 #include <stdio.h>
 #include "ARM.h"
 #include "NDS.h"
+#include "ARMInterpreter_MultiplySuperLLE.h"
 
 namespace melonDS::ARMInterpreter
 {
@@ -914,7 +915,6 @@ void A_MUL(ARM* cpu)
     {
         cpu->SetNZ(res & 0x80000000,
                    !res);
-        if (cpu->Num==1) cpu->SetC(0);
     }
 
     if (cpu->Num == 0)
@@ -939,6 +939,7 @@ void A_MUL(ARM* cpu)
         else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
         else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3;
         else cycles = 4;
+        if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, 0, cycles==4));
         cpu->AddCycles_CI(cycles);
     }
 
@@ -959,7 +960,6 @@ void A_MLA(ARM* cpu)
     {
         cpu->SetNZ(res & 0x80000000,
                    !res);
-        if (cpu->Num==1) cpu->SetC(0);
     }
 
     if (cpu->Num == 0)
@@ -987,6 +987,7 @@ void A_MLA(ARM* cpu)
         else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
         else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
         else cycles = 5;
+        if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, rn, cycles==5));
         cpu->AddCycles_CI(cycles);
     }
 
@@ -1008,7 +1009,6 @@ void A_UMULL(ARM* cpu)
     {
         cpu->SetNZ((u32)(res >> 63ULL),
                    !res);
-        if (cpu->Num==1) cpu->SetC(0);
     }
 
     if (cpu->Num == 0)
@@ -1033,6 +1033,7 @@ void A_UMULL(ARM* cpu)
         else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
         else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
         else cycles = 5;
+        if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(0, rm, rs, cycles==5));
         cpu->AddCycles_CI(cycles);
     }
 
@@ -1057,7 +1058,6 @@ void A_UMLAL(ARM* cpu)
     {
         cpu->SetNZ((u32)(res >> 63ULL),
                    !res);
-        if (cpu->Num==1) cpu->SetC(0);
     }
 
     if (cpu->Num == 0)
@@ -1086,6 +1086,7 @@ void A_UMLAL(ARM* cpu)
         else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
         else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
         else cycles = 5;
+        if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(rd, rm, rs, cycles==5));
         cpu->AddCycles_CI(cycles);
     }
 
@@ -1107,7 +1108,6 @@ void A_SMULL(ARM* cpu)
     {
         cpu->SetNZ((u32)(res >> 63ULL),
                    !res);
-        if (cpu->Num==1) cpu->SetC(0);
     }
 
     if (cpu->Num == 0)
@@ -1132,6 +1132,7 @@ void A_SMULL(ARM* cpu)
         else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
         else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
         else cycles = 5;
+        if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(0, rm, rs, cycles==5));
         cpu->AddCycles_CI(cycles);
     }
 }
@@ -1155,7 +1156,6 @@ void A_SMLAL(ARM* cpu)
     {
         cpu->SetNZ((u32)(res >> 63ULL),
                    !res);
-        if (cpu->Num==1) cpu->SetC(0);
     }
 
     if (cpu->Num == 0)
@@ -1184,6 +1184,7 @@ void A_SMLAL(ARM* cpu)
         else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
         else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
         else cycles = 5;
+        if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(rd, rm, rs, cycles==5));
         cpu->AddCycles_CI(cycles);
     }
 }
@@ -1822,19 +1823,19 @@ void T_MUL_REG(ARM* cpu)
     cpu->SetNZ(res & 0x80000000,
                !res);
 
-    s32 cycles = 0;
+    s32 cycles;
     if (cpu->Num == 0)
     {
-        cycles += 3;
+        cycles = 3;
         ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
     }
     else
     {
-        cpu->SetC(0); // carry flag destroyed, they say. whatever that means...
-        if      (a & 0xFF000000) cycles += 4;
-        else if (a & 0x00FF0000) cycles += 3;
-        else if (a & 0x0000FF00) cycles += 2;
-        else                     cycles += 1;
+        if      ((a & 0xFFFFFF00) == 0x00000000 || (a & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1;
+        else if ((a & 0xFFFF0000) == 0x00000000 || (a & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
+        else if ((a & 0xFF000000) == 0x00000000 || (a & 0xFF000000) == 0xFF000000) cycles = 3;
+        else cycles = 4;
+        cpu->SetC(MULSCarry(b, a, 0, cycles==4)); // carry flag destroyed, they say. whatever that means...
     }
     cpu->AddCycles_CI(cycles); // implemented as S variant, doesn't interlock
 }
diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp
index bd6a4e8d..685f67ce 100644
--- a/src/ARMInterpreter_LoadStore.cpp
+++ b/src/ARMInterpreter_LoadStore.cpp
@@ -638,6 +638,9 @@ void A_LDM(ARM* cpu)
     // handle data aborts
     if (dabort) [[unlikely]]
     {
+        if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
+            cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
+
         ((ARMv5*)cpu)->DataAbort();
         return;
     }
diff --git a/src/ARMInterpreter_MultiplySuperLLE.h b/src/ARMInterpreter_MultiplySuperLLE.h
new file mode 100644
index 00000000..21b17bbc
--- /dev/null
+++ b/src/ARMInterpreter_MultiplySuperLLE.h
@@ -0,0 +1,136 @@
+#ifndef ARMINTERPRETER_MULTIPLYSUPERLLE_H
+#define ARMINTERPRETER_MULTIPLYSUPERLLE_H
+
+#include "types.h"
+
+using namespace melonDS;
+
+/*
+    Copyright (c) 2024 zaydlang
+
+    This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
+
+    Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
+
+        1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software.
+           If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+        2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+        3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+
+// code taken from: (also features a few alternative implementations that could maybe be worth looking at?)
+// https://github.com/calc84maniac/multiplication-algorithm/blob/master/impl_opt.h
+// based on research that can be found here: https://bmchtech.github.io/post/multiply/
+
+// the code in this file is dedicated to handling the calculation of the carry flag for multiplication (S variant) instructions on the ARM7TDMI.
+
+
+// Takes a multiplier between -0x01000000 and 0x00FFFFFF, cycles between 0 and 2
+static inline bool booths_multiplication32_opt(u32 multiplicand, u32 multiplier, u32 accumulator) {
+    // Set the low bit of the multiplicand to cause negation to invert the upper bits, this bit can't propagate to bit 31
+    multiplicand |= 1;
+
+    // Optimized first iteration
+    u32 booth = (s32)(multiplier << 31) >> 31;
+    u32 carry = booth * multiplicand;
+    // Pre-populate accumulator for output
+    u32 output = accumulator;
+
+    u32 sum = output + carry;
+    int shift = 29;
+    do {
+        for (int i = 0; i < 4; i++, shift -= 2) {
+            // Get next booth factor (-2 to 2, shifted left by 30-shift)
+            u32 next_booth = (s32)(multiplier << shift) >> shift;
+            u32 factor = next_booth - booth;
+            booth = next_booth;
+            // Get scaled value of booth addend
+            u32 addend = multiplicand * factor;
+            // Combine the addend with the CSA
+            // Not performing any masking seems to work because the lower carries can't propagate to bit 31
+            output ^= carry ^ addend;
+            sum += addend;
+            carry = sum - output;
+        }
+    } while (booth != multiplier);
+
+    return carry >> 31;
+}
+
+// Takes a multiplicand shifted right by 6 and a multiplier shifted right by 26 (zero or sign extended)
+static inline bool booths_multiplication64_opt(u32 multiplicand, u32 multiplier, u32 accum_hi) {
+    // Skipping the first 14 iterations seems to work because the lower carries can't propagate to bit 63
+    // This means only magic bits 62-61 are needed (which requires decoding 3 booth chunks),
+    // and only the last two booth iterations are needed
+
+    // Set the low bit of the multiplicand to cause negation to invert the upper bits
+    multiplicand |= 1;
+
+    // Pre-populate magic bit 61 for carry
+    u32 carry = ~accum_hi & UINT32_C(0x20000000);
+    // Pre-populate magic bits 63-60 for output (with carry magic pre-added in)
+    u32 output = accum_hi - UINT32_C(0x08000000);
+
+    // Get factors from the top 3 booth chunks
+    u32 booth0 = (s32)(multiplier << 27) >> 27;
+    u32 booth1 = (s32)(multiplier << 29) >> 29;
+    u32 booth2 = (s32)(multiplier << 31) >> 31;
+    u32 factor0 = multiplier - booth0;
+    u32 factor1 = booth0 - booth1;
+    u32 factor2 = booth1 - booth2;
+
+    // Get scaled value of the 3rd top booth addend
+    u32 addend = multiplicand * factor2;
+    // Finalize bits 61-60 of output magic using its sign
+    output -= addend & UINT32_C(0x10000000);
+    // Get scaled value of the 2nd top booth addend
+    addend = multiplicand * factor1;
+    // Finalize bits 63-62 of output magic using its sign
+    output -= addend & UINT32_C(0x40000000);
+
+    // Get the carry from the CSA in bit 61 and propagate it to bit 62, which is not processed in this iteration
+    u32 sum = output + (addend & UINT32_C(0x20000000));
+    // Subtract out the carry magic to get the actual output magic
+    output -= carry;
+
+    // Get scaled value of the 1st top booth addend
+    addend = multiplicand * factor0;
+    // Add to bit 62 and propagate the carry
+    sum += addend & UINT32_C(0x40000000);
+
+    // Cancel out the output magic bit 63 to get the carry bit 63
+    return (sum ^ output) >> 31;
+}
+
+
+// also for MLAS and MUL (thumb ver.)
+inline bool MULSCarry(s32 rm, s32 rs, u32 rn, bool lastcycle)
+{
+    if (lastcycle)
+        return (rs >> 30) == -2;
+    else
+        return booths_multiplication32_opt(rm, rs, rn);
+}
+
+// also for UMLALS
+inline bool UMULLSCarry(u64 rd, u32 rm, u32 rs, bool lastcycle)
+{
+    if (lastcycle)
+        return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
+    else
+        return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
+}
+
+// also for SMLALS
+inline bool SMULLSCarry(u64 rd, s32 rm, s32 rs, bool lastcycle)
+{
+    if (lastcycle)
+        return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
+    else
+        return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
+}
+
+#endif
diff --git a/src/frontend/qt_sdl/CMakeLists.txt b/src/frontend/qt_sdl/CMakeLists.txt
index 7dc4a00c..54888c49 100644
--- a/src/frontend/qt_sdl/CMakeLists.txt
+++ b/src/frontend/qt_sdl/CMakeLists.txt
@@ -59,11 +59,7 @@ set(SOURCES_QT_SDL
     NetplayDialog.cpp
 )
 
-if (APPLE)
-    option(USE_QT6 "Build using Qt 6 instead of 5" ON)
-else()
-    option(USE_QT6 "Build using Qt 6 instead of 5" OFF)
-endif()
+option(USE_QT6 "Use Qt 6 instead of Qt 5" ON)
 
 if (USE_QT6)
     find_package(Qt6 COMPONENTS Core Gui Widgets Network Multimedia OpenGL OpenGLWidgets Svg REQUIRED)