Merge branch 'master' into master

2023-09-04 13:45:15 +09:00 · 2023-09-04 13:45:15 +09:00 · 4695e9c1f5
parent 328d73c1a9 f16d4f0523
commit 4695e9c1f5
271 changed files with 6739 additions and 3757 deletions
--- a/.ci/build-linux.sh
+++ b/.ci/build-linux.sh
@ -1,14 +1,11 @@
 #!/bin/sh -ex

-# Setup Qt variables
-export QT_BASE_DIR=/opt/qt"${QTVERMIN}"
-export PATH="$QT_BASE_DIR"/bin:"$PATH"
-export LD_LIBRARY_PATH="$QT_BASE_DIR"/lib/x86_64-linux-gnu:"$QT_BASE_DIR"/lib
-
 if [ -z "$CIRRUS_CI" ]; then
   cd rpcs3 || exit 1
 fi

+git config --global --add safe.directory '*'
+
 # Pull all the submodules except llvm
 # Note: Tried to use git submodule status, but it takes over 20 seconds
 # shellcheck disable=SC2046
@ -45,6 +42,7 @@ cmake ..                                               \
    -DCMAKE_RANLIB="$RANLIB"                           \
    -DUSE_SYSTEM_CURL=ON                               \
    -DUSE_SDL=ON                                       \
+    -DUSE_SYSTEM_FFMPEG=OFF                            \
    -DOpenGL_GL_PREFERENCE=LEGACY                      \
    -DLLVM_DIR=/opt/llvm/lib/cmake/llvm                \
    -DSTATIC_LINK_LLVM=ON                              \
--- a/.ci/build-mac.sh
+++ b/.ci/build-mac.sh
@ -6,11 +6,12 @@ brew install -f --overwrite nasm ninja git p7zip create-dmg ccache pipenv
 #/usr/sbin/softwareupdate --install-rosetta --agree-to-license
 arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
 arch -x86_64 /usr/local/bin/brew update
-arch -x86_64 /usr/local/bin/brew install -f --overwrite llvm@16 sdl2 glew cmake faudio vulkan-headers
+arch -x86_64 /usr/local/bin/brew install --build-from-source ffmpeg gnutls freetype
+arch -x86_64 /usr/local/bin/brew install -f --overwrite llvm@16 glew cmake sdl2 vulkan-headers coreutils
 arch -x86_64 /usr/local/bin/brew link -f llvm@16

-# moltenvk based on commit for 1.2.4 release
-wget https://raw.githubusercontent.com/Homebrew/homebrew-core/b233d4f9f40f26d81da11140defbfd578cfe4a69/Formula/molten-vk.rb
+# moltenvk based on commit for 1.2.5 release
+wget https://raw.githubusercontent.com/Homebrew/homebrew-core/b0bba05b617ef0fd796b3727be46addfd098a491/Formula/m/molten-vk.rb
 arch -x86_64 /usr/local/bin/brew install -f --overwrite ./molten-vk.rb
 #export MACOSX_DEPLOYMENT_TARGET=12.0
 export CXX=clang++
@ -28,23 +29,24 @@ export WORKDIR;
 WORKDIR="$(pwd)"

 # Get Qt
-if [ ! -d "/tmp/Qt/5.15.2" ]; then
+if [ ! -d "/tmp/Qt/$QT_VER" ]; then
  mkdir -p "/tmp/Qt"
  git clone https://github.com/engnr/qt-downloader.git
  cd qt-downloader
  git checkout f52efee0f18668c6d6de2dec0234b8c4bc54c597
  cd "/tmp/Qt"
  "/opt/homebrew/bin/pipenv" run pip3 install py7zr requests semantic_version lxml
-  "/opt/homebrew/bin/pipenv" run "$WORKDIR/qt-downloader/qt-downloader" macos desktop 5.15.2 clang_64 --opensource
+  mkdir -p "$QT_VER/macos" ; ln -s "macos" "$QT_VER/clang_64"
+  "/opt/homebrew/bin/pipenv" run "$WORKDIR/qt-downloader/qt-downloader" macos desktop "$QT_VER" clang_64 --opensource --addons qtmultimedia
 fi

 cd "$WORKDIR"
-ditto "/tmp/Qt/5.15.2" "qt-downloader/5.15.2"
+ditto "/tmp/Qt/$QT_VER" "qt-downloader/$QT_VER"

-export Qt5_DIR="$WORKDIR/qt-downloader/5.15.2/clang_64/lib/cmake/Qt5"
+export Qt6_DIR="$WORKDIR/qt-downloader/$QT_VER/clang_64/lib/cmake/Qt$QT_VER_MAIN"
 export SDL2_DIR="$BREW_X64_PATH/opt/sdl2/lib/cmake/SDL2"

-export PATH="$BREW_X64_PATH/opt/llvm@16/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH"
+export PATH="$BREW_X64_PATH/opt/llvm@16/bin:$WORKDIR/qt-downloader/$QT_VER/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH"
 export LDFLAGS="-L$BREW_X64_PATH/lib -Wl,-rpath,$BREW_X64_PATH/lib"
 export CPPFLAGS="-I$BREW_X64_PATH/include -msse -msse2 -mcx16 -no-pie"
 export LIBRARY_PATH="$BREW_X64_PATH/lib"
@ -57,9 +59,9 @@ export VK_ICD_FILENAMES="$VULKAN_SDK/share/vulkan/icd.d/MoltenVK_icd.json"

 export LLVM_DIR
 LLVM_DIR="BREW_X64_PATH/opt/llvm@16"
-# exclude FAudio, SPIRV and LLVM, and sdl from submodule update
+# exclude ffmpeg, SPIRV and LLVM, and sdl from submodule update
 # shellcheck disable=SC2046
-git submodule -q update --init --depth=1 --jobs=8 $(awk '/path/ && !/FAudio/ && !/llvm/ && !/SPIRV/ && !/SDL/ { print $3 }' .gitmodules)
+git submodule -q update --init --depth=1 --jobs=8 $(awk '/path/ && !/ffmpeg/ && !/llvm/ && !/SPIRV/ && !/SDL/ { print $3 }' .gitmodules)

 # 3rdparty fixes
 sed -i '' "s/extern const double NSAppKitVersionNumber;/const double NSAppKitVersionNumber = 1343;/g" 3rdparty/hidapi/hidapi/mac/hid.c
@ -67,16 +69,31 @@ sed -i '' "s/extern const double NSAppKitVersionNumber;/const double NSAppKitVer
 mkdir build && cd build || exit 1

 "$BREW_X64_PATH/bin/cmake" .. \
-    -DUSE_SDL=ON -DUSE_DISCORD_RPC=ON -DUSE_VULKAN=ON -DUSE_ALSA=OFF -DUSE_PULSE=OFF -DUSE_AUDIOUNIT=ON \
-    -DLLVM_CCACHE_BUILD=OFF -DLLVM_BUILD_RUNTIME=OFF -DLLVM_BUILD_TOOLS=OFF \
-    -DLLVM_INCLUDE_DOCS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_TOOLS=OFF \
-    -DLLVM_INCLUDE_UTILS=OFF -DLLVM_USE_PERF=OFF -DLLVM_ENABLE_Z3_SOLVER=OFF \
+    -DUSE_SDL=ON \
+    -DUSE_DISCORD_RPC=ON \
+    -DUSE_VULKAN=ON \
+    -DUSE_ALSA=OFF \
+    -DUSE_PULSE=OFF \
+    -DUSE_AUDIOUNIT=ON \
+    -DUSE_SYSTEM_FFMPEG=ON \
+    -DLLVM_CCACHE_BUILD=OFF \
+    -DLLVM_BUILD_RUNTIME=OFF \
+    -DLLVM_BUILD_TOOLS=OFF \
+    -DLLVM_INCLUDE_DOCS=OFF \
+    -DLLVM_INCLUDE_EXAMPLES=OFF \
+    -DLLVM_INCLUDE_TESTS=OFF \
+    -DLLVM_INCLUDE_TOOLS=OFF \
+    -DLLVM_INCLUDE_UTILS=OFF \
+    -DLLVM_USE_PERF=OFF \
+    -DLLVM_ENABLE_Z3_SOLVER=OFF \
    -DUSE_NATIVE_INSTRUCTIONS=OFF \
    -DUSE_SYSTEM_MVK=ON \
-    -DUSE_SYSTEM_FAUDIO=ON \
+    -DUSE_SYSTEM_FAUDIO=OFF \
    -DUSE_SYSTEM_SDL=ON \
    $CMAKE_EXTRA_OPTS \
-    -DLLVM_TARGET_ARCH=X86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_IGNORE_PATH="$BREW_PATH/lib" \
+    -DLLVM_TARGET_ARCH=X86_64 \
+    -DCMAKE_OSX_ARCHITECTURES=x86_64 \
+    -DCMAKE_IGNORE_PATH="$BREW_PATH/lib" \
    -G Ninja

 "$BREW_PATH/bin/ninja"; build_status=$?;
--- a/.ci/deploy-linux.sh
+++ b/.ci/deploy-linux.sh
@ -5,11 +5,11 @@ cd build || exit 1
 if [ "$DEPLOY_APPIMAGE" = "true" ]; then
    DESTDIR=AppDir ninja install

-    curl -sL -o /usr/bin/linuxdeploy https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage
+    curl -fsSLo /usr/bin/linuxdeploy https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage
    chmod +x /usr/bin/linuxdeploy
-    curl -sL -o /usr/bin/linuxdeploy-plugin-qt https://github.com/linuxdeploy/linuxdeploy-plugin-qt/releases/download/continuous/linuxdeploy-plugin-qt-x86_64.AppImage
+    curl -fsSLo /usr/bin/linuxdeploy-plugin-qt https://github.com/linuxdeploy/linuxdeploy-plugin-qt/releases/download/continuous/linuxdeploy-plugin-qt-x86_64.AppImage
    chmod +x /usr/bin/linuxdeploy-plugin-qt
-    curl -sL -o linuxdeploy-plugin-checkrt.sh https://github.com/linuxdeploy/linuxdeploy-plugin-checkrt/releases/download/continuous/linuxdeploy-plugin-checkrt-x86_64.sh
+    curl -fsSLo linuxdeploy-plugin-checkrt.sh https://github.com/linuxdeploy/linuxdeploy-plugin-checkrt/releases/download/continuous/linuxdeploy-plugin-checkrt-x86_64.sh
    chmod +x ./linuxdeploy-plugin-checkrt.sh

    EXTRA_QT_PLUGINS="svg;" APPIMAGE_EXTRACT_AND_RUN=1 linuxdeploy --appdir AppDir --plugin qt
--- a/.ci/deploy-mac.sh
+++ b/.ci/deploy-mac.sh
@ -16,6 +16,8 @@ cd bin
 mkdir "rpcs3.app/Contents/lib/"

 cp "/usr/local/opt/llvm@16/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib"
+cp "$(realpath /usr/local/lib/libsharpyuv.0.dylib)" "rpcs3.app/Contents/lib/libsharpyuv.0.dylib"
+cp "$(realpath /usr/local/lib/libintl.8.dylib)" "rpcs3.app/Contents/lib/libintl.8.dylib"

 rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \
 "rpcs3.app/Contents/Frameworks/QtQml.framework" \
@ -45,6 +47,7 @@ DMG_FILEPATH="$BUILD_ARTIFACTSTAGINGDIRECTORY/rpcs3-v${COMM_TAG}-${COMM_COUNT}-$
 --hide-extension Quickstart.url \
 --app-drop-link 600 185 \
 --skip-jenkins \
+--format ULMO \
 "$DMG_FILEPATH" \
 RPCS3.app

--- a/.ci/deploy-windows.sh
+++ b/.ci/deploy-windows.sh
@ -7,10 +7,13 @@ ARTIFACT_DIR="$BUILD_ARTIFACTSTAGINGDIRECTORY"
 rm -f ./bin/rpcs3.exp ./bin/rpcs3.lib ./bin/rpcs3.pdb ./bin/vc_redist.x64.exe
 rm -rf ./bin/git

-# Prepare compatibility database for packaging, as well as
+# Prepare compatibility and SDL database for packaging, as well as
 # certificate for ssl (auto-updater)
-curl -sL 'https://rpcs3.net/compatibility?api=v1&export' | iconv -t UTF-8 > ./bin/GuiConfigs/compat_database.dat
-curl -sL 'https://curl.haxx.se/ca/cacert.pem' > ./bin/cacert.pem
+mkdir ./bin/config
+mkdir ./bin/config/input_configs
+curl -fsSL 'https://raw.githubusercontent.com/gabomdq/SDL_GameControllerDB/master/gamecontrollerdb.txt' 1> ./bin/config/input_configs/gamecontrollerdb.txt
+curl -fsSL 'https://rpcs3.net/compatibility?api=v1&export' | iconv -t UTF-8 1> ./bin/GuiConfigs/compat_database.dat
+curl -fsSL 'https://curl.haxx.se/ca/cacert.pem' 1> ./bin/cacert.pem

 # Package artifacts
 7z a -m0=LZMA2 -mx9 "$BUILD" ./bin/*
--- a/.ci/get_keys-windows.sh
+++ b/.ci/get_keys-windows.sh
@ -1,4 +1,4 @@
 #!/bin/sh -ex

-curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.1/llvmlibs_mt.7z.sha256"
-curl -L -o "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256"
+curl -fLo "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.1/llvmlibs_mt.7z.sha256"
+curl -fLo "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256"
--- a/.ci/github-upload.sh
+++ b/.ci/github-upload.sh
@ -16,7 +16,7 @@ generate_post_data()
 EOF
 }

-curl -s \
+curl -fsS \
    -H "Authorization: token ${RPCS3_TOKEN}" \
    -H "Accept: application/vnd.github.v3+json" \
    --data "$(generate_post_data)" "https://api.github.com/repos/$UPLOAD_REPO_FULL_NAME/releases" >> release.json
@ -28,7 +28,7 @@ echo "${id:?}"

 upload_file()
 {
-    curl -s \
+    curl -fsS \
        -H "Authorization: token ${RPCS3_TOKEN}" \
        -H "Accept: application/vnd.github.v3+json" \
        -H "Content-Type: application/octet-stream" \
--- a/.ci/install-freebsd.sh
+++ b/.ci/install-freebsd.sh
@ -2,7 +2,7 @@
 # NOTE: this script is run under root permissions
 # shellcheck shell=sh disable=SC2096

-# RPCS3 often needs recent Qt5 and Vulkan-Headers
+# RPCS3 often needs recent Qt and Vulkan-Headers
 sed -i '' 's/quarterly/latest/' /etc/pkg/FreeBSD.conf

 export ASSUME_ALWAYS_YES=true
@ -11,8 +11,8 @@ pkg info # debug
 # Prefer newer Clang than in base system (see also .ci/build-freebsd.sh)
 pkg install llvm16

-# Mandatory dependencies (qt5-dbus and qt5-gui are pulled via qt5-widgets)
-pkg install git ccache cmake ninja qt5-qmake qt5-buildtools qt5-widgets qt5-concurrent qt5-multimedia qt5-svg glew openal-soft ffmpeg
+# Mandatory dependencies (qt6-base and qt6-svg are pulled via qt6-multimedia)
+pkg install git ccache cmake ninja qt6-multimedia glew openal-soft ffmpeg

-# Optional dependencies (libevdev is pulled by qt5-gui)
+# Optional dependencies (libevdev is pulled by qt6-base)
 pkg install pkgconf alsa-lib pulseaudio sdl2 evdev-proto vulkan-headers vulkan-loader
--- a/.ci/setup-windows.sh
+++ b/.ci/setup-windows.sh
@ -11,25 +11,26 @@ PR_NUMBER="$SYSTEM_PULLREQUEST_PULLREQUESTID"
 QT_HOST="http://qt.mirror.constant.com/"
 QT_URL_VER=$(echo "$QT_VER" | sed "s/\.//g")
 QT_VER_MSVC_UP=$(echo "${QT_VER_MSVC}" | tr '[:lower:]' '[:upper:]')
-QT_PREFIX="online/qtsdkrepository/windows_x86/desktop/qt${QT_VER_MAIN}_${QT_URL_VER}/qt.qt${QT_VER_MAIN}.${QT_URL_VER}.win64_${QT_VER_MSVC}_64/${QT_VER}-0-${QT_DATE}"
-QT_SUFFIX="-Windows-Windows_10-${QT_VER_MSVC_UP}-Windows-Windows_10-X86_64.7z"
-QT_BASE_URL="${QT_HOST}${QT_PREFIX}qtbase${QT_SUFFIX}"
-QT_WINE_URL="${QT_HOST}${QT_PREFIX}qtwinextras${QT_SUFFIX}"
-QT_DECL_URL="${QT_HOST}${QT_PREFIX}qtdeclarative${QT_SUFFIX}"
-QT_TOOL_URL="${QT_HOST}${QT_PREFIX}qttools${QT_SUFFIX}"
-QT_MM_URL="${QT_HOST}${QT_PREFIX}qtmultimedia${QT_SUFFIX}"
-QT_SVG_URL="${QT_HOST}${QT_PREFIX}qtsvg${QT_SUFFIX}"
+QT_PREFIX="online/qtsdkrepository/windows_x86/desktop/qt${QT_VER_MAIN}_${QT_URL_VER}/qt.qt${QT_VER_MAIN}.${QT_URL_VER}."
+QT_PREFIX_2="win64_${QT_VER_MSVC}_64/${QT_VER}-0-${QT_DATE}"
+QT_SUFFIX="-Windows-Windows_10_22H2-${QT_VER_MSVC_UP}-Windows-Windows_10_22H2-X86_64.7z"
+QT_BASE_URL="${QT_HOST}${QT_PREFIX}${QT_PREFIX_2}qtbase${QT_SUFFIX}"
+QT_DECL_URL="${QT_HOST}${QT_PREFIX}${QT_PREFIX_2}qtdeclarative${QT_SUFFIX}"
+QT_TOOL_URL="${QT_HOST}${QT_PREFIX}${QT_PREFIX_2}qttools${QT_SUFFIX}"
+QT_MM_URL="${QT_HOST}${QT_PREFIX}addons.qtmultimedia.${QT_PREFIX_2}qtmultimedia${QT_SUFFIX}"
+QT_SVG_URL="${QT_HOST}${QT_PREFIX}${QT_PREFIX_2}qtsvg${QT_SUFFIX}"
+QT_5CMP_URL="${QT_HOST}${QT_PREFIX}qt5compat.${QT_PREFIX_2}qt5compat${QT_SUFFIX}"
 LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.1/llvmlibs_mt.7z'
 GLSLANG_URL='https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z'
 VULKAN_SDK_URL="https://www.dropbox.com/s/cs77c3iv5mbo0bt/VulkanSDK-${VULKAN_VER}-Installer.exe"

 DEP_URLS="         \
    $QT_BASE_URL   \
-    $QT_WINE_URL   \
    $QT_DECL_URL   \
    $QT_TOOL_URL   \
    $QT_MM_URL     \
    $QT_SVG_URL    \
+    $QT_5CMP_URL   \
    $LLVMLIBS_URL  \
    $GLSLANG_URL   \
    $VULKAN_SDK_URL"
@ -59,7 +60,7 @@ download_and_verify()
    fileName="$4"

    for _ in 1 2 3; do
-        [ -e "$CACHE_DIR/$fileName" ] || curl -L -o "$CACHE_DIR/$fileName" "$url"
+        [ -e "$CACHE_DIR/$fileName" ] || curl -fLo "$CACHE_DIR/$fileName" "$url"
        fileChecksum=$("${algo}sum" "$CACHE_DIR/$fileName" | awk '{ print $1 }')
        [ "$fileChecksum" = "$correctChecksum" ] && return 0
        rm "$CACHE_DIR/$fileName"
@ -78,9 +79,9 @@ for url in $DEP_URLS; do

    # shellcheck disable=SC1003
    case "$url" in
-    *qt*) checksum=$(curl -L "${url}.sha1"); algo="sha1"; outDir='C:\Qt\' ;;
-    *llvm*) checksum=$(curl -L "${url}.sha256"); algo="sha256"; outDir="./3rdparty/llvm" ;;
-    *glslang*) checksum=$(curl -L "${url}.sha256"); algo="sha256"; outDir="./lib/Release-x64" ;;
+    *qt*) checksum=$(curl -fL "${url}.sha1"); algo="sha1"; outDir='C:\Qt\' ;;
+    *llvm*) checksum=$(curl -fL "${url}.sha256"); algo="sha256"; outDir="./3rdparty/llvm" ;;
+    *glslang*) checksum=$(curl -fL "${url}.sha256"); algo="sha256"; outDir="./lib/Release-x64" ;;
    *Vulkan*)
        # Vulkan setup needs to be run in batch environment
        # Need to subshell this or else it doesn't wait
--- a/.cirrus.yml
+++ b/.cirrus.yml
@ -6,103 +6,98 @@ env:
  BUILD_SOURCEVERSION: $CIRRUS_CHANGE_IN_REPO
  BUILD_SOURCEBRANCHNAME: $CIRRUS_BRANCH
  RPCS3_TOKEN: ENCRYPTED[!a4c3850e29ab150692286a74bec29819d25971a7ec431b86de2a35f7ed90c5b2ab3c93469f9298e30924d843599110e9!]
+  QT_VER_MAIN: '6'
+  QT_VER: '6.5.2'

-windows_task:
-  matrix:
-    - name: Cirrus Windows
-      windows_container:
-        image: cirrusci/windowsservercore:visualstudio2019
-        cpu: 8
-        memory: 16G
-  env:
-    CIRRUS_SHELL: "bash"
-    COMPILER: msvc
-    QT_VER_MAIN: '5'
-    BUILD_ARTIFACTSTAGINGDIRECTORY: ${CIRRUS_WORKING_DIR}\artifacts\
-    QT_VER: '5.15.2'
-    QT_VER_MSVC: 'msvc2019'
-    QT_DATE: '202011130602'
-    QTDIR: C:\Qt\${QT_VER}\${QT_VER_MSVC}_64
-    VULKAN_VER: '1.3.224.1'
-    VULKAN_SDK_SHA: '2029e652e39ee6a6036cff3765da31e1e6c595fd2413d3cd111dfab7855621ea'
-    VULKAN_SDK: C:\VulkanSDK\${VULKAN_VER}
-    CACHE_DIR: "./cache"
-    UPLOAD_COMMIT_HASH: 7d09e3be30805911226241afbb14f8cdc2eb054e
-    UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-win"
-  deps_cache:
-    folder: "./cache"
-  #obj_cache:
-  #  folder: "./tmp"
-  #obj2_cache:
-  #  folder: "./rpcs3/x64"
-  setup_script:
-    - './.ci/get_keys-windows.sh'
-    - './.ci/setup-windows.sh'
-#   - choco install -y python # Needed for SPIRV, use either this or make a new Docker image
-#  spirv_script:
-#   - export PATH=${PATH}:"C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin"
-#   - cd "${CIRRUS_WORKING_DIR}/3rdparty/SPIRV"
-#   - msbuild.exe spirv.vcxproj //p:Configuration=Release //m
-  rpcs3_script:
-    - export PATH=${PATH}:"C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin"
-    - msbuild.exe rpcs3.sln //p:Configuration=Release //m
-  deploy_script:
-    - mkdir artifacts
-    - source './.ci/export-cirrus-vars.sh'
-    - './.ci/deploy-windows.sh'
-  artifacts:
-    name: Artifact
-    path: "*.7z*"
-  push_script: |
-    if [ "$CIRRUS_REPO_OWNER" = "RPCS3" ] && [ -z "$CIRRUS_PR" ] && [ "$CIRRUS_BRANCH" = "master" ]; then
-      source './.ci/export-cirrus-vars.sh'
-      './.ci/github-upload.sh'
-    fi;
+# windows_task:
+#   matrix:
+#     - name: Cirrus Windows
+#       windows_container:
+#         image: cirrusci/windowsservercore:visualstudio2019
+#         cpu: 8
+#         memory: 16G
+#   env:
+#     CIRRUS_SHELL: "bash"
+#     COMPILER: msvc
+#     BUILD_ARTIFACTSTAGINGDIRECTORY: ${CIRRUS_WORKING_DIR}\artifacts\
+#     QT_VER_MSVC: 'msvc2019'
+#     QT_DATE: '202307080351'
+#     QTDIR: C:\Qt\${QT_VER}\${QT_VER_MSVC}_64
+#     VULKAN_VER: '1.3.224.1'
+#     VULKAN_SDK_SHA: '2029e652e39ee6a6036cff3765da31e1e6c595fd2413d3cd111dfab7855621ea'
+#     VULKAN_SDK: C:\VulkanSDK\${VULKAN_VER}
+#     CACHE_DIR: "./cache"
+#     UPLOAD_COMMIT_HASH: 7d09e3be30805911226241afbb14f8cdc2eb054e
+#     UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-win"
+#   deps_cache:
+#     folder: "./cache"
+#   #obj_cache:
+#   #  folder: "./tmp"
+#   #obj2_cache:
+#   #  folder: "./rpcs3/x64"
+#   setup_script:
+#     - './.ci/get_keys-windows.sh'
+#     - './.ci/setup-windows.sh'
+#   rpcs3_script:
+#     - export PATH=${PATH}:"C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin"
+#     - msbuild.exe rpcs3.sln //p:Configuration=Release //m
+#   deploy_script:
+#     - mkdir artifacts
+#     - source './.ci/export-cirrus-vars.sh'
+#     - './.ci/deploy-windows.sh'
+#   artifacts:
+#     name: Artifact
+#     path: "*.7z*"
+#   push_script: |
+#     if [ "$CIRRUS_REPO_OWNER" = "RPCS3" ] && [ -z "$CIRRUS_PR" ] && [ "$CIRRUS_BRANCH" = "master" ]; then
+#       source './.ci/export-cirrus-vars.sh'
+#       './.ci/github-upload.sh'
+#     fi;

-linux_task:
-  container:
-    image: rpcs3/rpcs3-ci-bionic:1.8
-    cpu: 4
-    memory: 16G
-  env:
-    BUILD_ARTIFACTSTAGINGDIRECTORY: ${CIRRUS_WORKING_DIR}/artifacts
-    ARTDIR: ${CIRRUS_WORKING_DIR}/artifacts/
-    CCACHE_DIR: "/tmp/ccache_dir"
-    CCACHE_MAXSIZE: 300M
-    CI_HAS_ARTIFACTS: true
-    UPLOAD_COMMIT_HASH: d812f1254a1157c80fd402f94446310560f54e5f
-    UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-linux"
-    DEPLOY_APPIMAGE: true
-    APPDIR: "./appdir"
-    RELEASE_MESSAGE: "../GitHubReleaseMessage.txt"
-  ccache_cache:
-    folder: "/tmp/ccache_dir"
-  matrix:
-    - name: Cirrus Linux GCC
-      env:
-        COMPILER: gcc
-      gcc_script:
-        - mkdir artifacts
-        - ".ci/build-linux.sh"
-    - name: Cirrus Linux Clang
-      env:
-        COMPILER: clang
-      clang_script:
-        - mkdir artifacts
-        - ".ci/build-linux.sh"
-  artifacts:
-    name: Artifact
-    path: "artifacts/*"
-  push_script: |
-    if [ "$CIRRUS_REPO_OWNER" = "RPCS3" ] && [ -z "$CIRRUS_PR" ] && [ "$CIRRUS_BRANCH" = "master" ] && [ "$COMPILER" = "gcc" ]; then
-      COMM_TAG=$(awk '/version{.*}/ { printf("%d.%d.%d", $5, $6, $7) }' ./rpcs3/rpcs3_version.cpp)
-      COMM_COUNT=$(git rev-list --count HEAD)
-      COMM_HASH=$(git rev-parse --short=8 HEAD)
+# linux_task:
+#   container:
+#     image: rpcs3/rpcs3-ci-focal:1.1
+#     cpu: 4
+#     memory: 16G
+#   env:
+#     BUILD_ARTIFACTSTAGINGDIRECTORY: ${CIRRUS_WORKING_DIR}/artifacts
+#     ARTDIR: ${CIRRUS_WORKING_DIR}/artifacts/
+#     CCACHE_DIR: "/tmp/ccache_dir"
+#     CCACHE_MAXSIZE: 300M
+#     CI_HAS_ARTIFACTS: true
+#     UPLOAD_COMMIT_HASH: d812f1254a1157c80fd402f94446310560f54e5f
+#     UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-linux"
+#     DEPLOY_APPIMAGE: true
+#     APPDIR: "./appdir"
+#     RELEASE_MESSAGE: "../GitHubReleaseMessage.txt"
+#   ccache_cache:
+#     folder: "/tmp/ccache_dir"
+#   matrix:
+#     - name: Cirrus Linux GCC
+#       env:
+#         COMPILER: gcc
+#       gcc_script:
+#         - mkdir artifacts
+#         - ".ci/build-linux.sh"
+#     - name: Cirrus Linux Clang
+#       env:
+#         COMPILER: clang
+#       clang_script:
+#         - mkdir artifacts
+#         - ".ci/build-linux.sh"
+#   artifacts:
+#     name: Artifact
+#     path: "artifacts/*"
+#   push_script: |
+#     if [ "$CIRRUS_REPO_OWNER" = "RPCS3" ] && [ -z "$CIRRUS_PR" ] && [ "$CIRRUS_BRANCH" = "master" ] && [ "$COMPILER" = "gcc" ]; then
+#       COMM_TAG=$(awk '/version{.*}/ { printf("%d.%d.%d", $5, $6, $7) }' ./rpcs3/rpcs3_version.cpp)
+#       COMM_COUNT=$(git rev-list --count HEAD)
+#       COMM_HASH=$(git rev-parse --short=8 HEAD)

-      export AVVER="${COMM_TAG}-${COMM_COUNT}"
+#       export AVVER="${COMM_TAG}-${COMM_COUNT}"

-      .ci/github-upload.sh
-    fi;
+#       .ci/github-upload.sh
+#     fi;

 freebsd_task:
  matrix:
--- a/.gitmodules
+++ b/.gitmodules
@ -87,3 +87,4 @@
 [submodule "3rdparty/rtmidi/rtmidi"]
 	path = 3rdparty/rtmidi/rtmidi
 	url = ../../thestk/rtmidi
+	ignore = dirty
--- a/3rdparty/CMakeLists.txt
+++ b/3rdparty/CMakeLists.txt
@ -267,32 +267,17 @@ if(USE_SYSTEM_FFMPEG)
 	target_link_libraries(3rdparty_ffmpeg INTERFACE ${FFMPEG_LIBRARIES})
 else()
 	if (NOT MSVC AND WIN32)
-		message(STATUS "RPCS3: building ffmpeg submodule")
-
-		include(ProcessorCount)
-		ProcessorCount(N)
-		
-		ExternalProject_Add(ffmpeg-mingw
-			DOWNLOAD_COMMAND ""
-			SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ffmpeg
-			BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/ffmpeg
-			CONFIGURE_COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/ffmpeg/configure --prefix=./windows/x86_64 --arch=x86_64 --disable-avdevice --disable-programs --disable-avfilter --disable-postproc --disable-doc --disable-pthreads --enable-w32threads --disable-network --disable-everything --disable-encoders --disable-muxers --disable-hwaccels --disable-parsers --disable-protocols --enable-dxva2 --enable-static --disable-shared --enable-decoder=aac --enable-decoder=aac_latm --enable-decoder=atrac3 --enable-decoder=atrac3p --enable-decoder=mp3 --enable-decoder=pcm_s16le --enable-decoder=pcm_s8 --enable-decoder=h264 --enable-decoder=mpeg4 --enable-decoder=mpeg2video --enable-decoder=mjpeg --enable-decoder=mjpegb --enable-encoder=pcm_s16le --enable-encoder=ffv1 --enable-encoder=mpeg4 --enable-parser=h264 --enable-parser=mpeg4video --enable-parser=mpegaudio --enable-parser=mpegvideo --enable-parser=mjpeg --enable-parser=aac --enable-parser=aac_latm --enable-muxer=avi --enable-demuxer=h264 --enable-demuxer=m4v --enable-demuxer=mp3 --enable-demuxer=mpegvideo --enable-demuxer=mpegps --enable-demuxer=mjpeg --enable-demuxer=avi --enable-demuxer=aac --enable-demuxer=pmp --enable-demuxer=oma --enable-demuxer=pcm_s16le --enable-demuxer=pcm_s8 --enable-demuxer=wav --enable-hwaccel=h264_dxva2 --enable-indev=dshow --enable-protocol=file
-			BUILD_COMMAND make -j ${N}
-			INSTALL_COMMAND make install
-		)
-
-		target_link_directories(3rdparty_ffmpeg INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/ffmpeg/windows/x86_64/lib")
-		target_link_libraries(3rdparty_ffmpeg INTERFACE avformat avcodec avutil swscale swresample iconv)
+		message(FATAL_ERROR "-- RPCS3: building ffmpeg submodule is currently not supported")
 	else()
 		message(STATUS "RPCS3: using builtin ffmpeg")

 		if (WIN32)
-			set(FFMPEG_LIB_DIR "ffmpeg/windows/x86_64")
+			set(FFMPEG_LIB_DIR "ffmpeg/lib/windows/x86_64")
 			target_link_libraries(3rdparty_ffmpeg INTERFACE "Bcrypt.lib")
 		elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-			set(FFMPEG_LIB_DIR "ffmpeg/linux/x86_64")
+			set(FFMPEG_LIB_DIR "ffmpeg/lib/linux/ubuntu-20.04/x86_64")
 		elseif(APPLE)
-			set(FFMPEG_LIB_DIR "ffmpeg/macos/x86_64")
+			set(FFMPEG_LIB_DIR "ffmpeg/lib/macos/x86_64")
 		else()
 			message(FATAL_ERROR "Prebuilt ffmpeg is not available on this platform! Try USE_SYSTEM_FFMPEG=ON.")
 		endif()
--- a/3rdparty/FAudio
+++ b/3rdparty/FAudio
@ -1 +1 @@
-Subproject commit 58cf606b5f718883e5dffbafdec44859d4e304ec
+Subproject commit 29a7d3a726383a3907baf4930d2c4d4da773b023
--- a/3rdparty/MoltenVK/CMakeLists.txt
+++ b/3rdparty/MoltenVK/CMakeLists.txt
@ -4,7 +4,7 @@ include(ExternalProject)

 ExternalProject_Add(moltenvk
 	GIT_REPOSITORY https://github.com/KhronosGroup/MoltenVK.git
-	GIT_TAG 4c6bfbe
+	GIT_TAG 02a8c01
 	BUILD_IN_SOURCE 1
 	SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK
 	CONFIGURE_COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK/fetchDependencies" --macos
--- a/3rdparty/curl/curl
+++ b/3rdparty/curl/curl
@ -1 +1 @@
-Subproject commit 7ab9d43720bc34d9aa351c7ca683c1668ebf8335
+Subproject commit 50490c0679fcd0e50bb3a8fbf2d9244845652cf0
--- a/3rdparty/ffmpeg
+++ b/3rdparty/ffmpeg
@ -1 +1 @@
-Subproject commit bf019f8c88bc64638fccef62840e935ab2689a4a
+Subproject commit 4f8bcb1555767522c2b21bfb08e5dcadad99ff62
--- a/3rdparty/libsdl-org/SDL
+++ b/3rdparty/libsdl-org/SDL
@ -1 +1 @@
-Subproject commit ffa78e6bead23e2ba3adf8ec2367ff2218d4343c
+Subproject commit 031912c4b6c5db80b443f04aa56fec3e4e645153
--- a/3rdparty/llvm/CMakeLists.txt
+++ b/3rdparty/llvm/CMakeLists.txt
@ -14,8 +14,8 @@ if(WITH_LLVM)
 		option(LLVM_INCLUDE_TESTS OFF)
 		option(LLVM_INCLUDE_TOOLS OFF)
 		option(LLVM_INCLUDE_UTILS OFF)
-		# we globally enable ccache
-		set(LLVM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
+		option(LLVM_CCACHE_BUILD ON)
+		set(LLVM_ENABLE_WARNINGS OFF CACHE BOOL "Enable compiler warnings.")

 		if(WIN32)
 			set(LLVM_USE_INTEL_JITEVENTS ON)
--- a/3rdparty/qt5.cmake
+++ b/3rdparty/qt5.cmake
@ -1,45 +0,0 @@
-add_library(3rdparty_qt5 INTERFACE)
-
-set(QT_MIN_VER 5.15.2)
-
-find_package(Qt5 ${QT_MIN_VER} CONFIG COMPONENTS Widgets Concurrent Multimedia MultimediaWidgets Svg)
-if(WIN32)
-	find_package(Qt5 ${QT_MIN_VER} COMPONENTS WinExtras REQUIRED)
-	target_link_libraries(3rdparty_qt5 INTERFACE Qt5::Widgets Qt5::WinExtras Qt5::Concurrent Qt5::Multimedia Qt5::MultimediaWidgets Qt5::Svg)
-else()
-	find_package(Qt5 ${QT_MIN_VER} COMPONENTS DBus Gui)
-	if(Qt5DBus_FOUND)
-		target_link_libraries(3rdparty_qt5 INTERFACE Qt5::Widgets Qt5::DBus Qt5::Concurrent Qt5::Multimedia Qt5::MultimediaWidgets Qt5::Svg)
-		target_compile_definitions(3rdparty_qt5 INTERFACE -DHAVE_QTDBUS)
-	else()
-		target_link_libraries(3rdparty_qt5 INTERFACE Qt5::Widgets Qt5::Concurrent Qt5::Multimedia Qt5::MultimediaWidgets Qt5::Svg)
-	endif()
-	target_include_directories(3rdparty_qt5 INTERFACE ${Qt5Gui_PRIVATE_INCLUDE_DIRS})
-endif()
-
-if(NOT Qt5Widgets_FOUND)
-	if(Qt5Widgets_VERSION VERSION_LESS ${QT_MIN_VER})
-		message("Minimum supported Qt5 version is ${QT_MIN_VER}! You have version ${Qt5Widgets_VERSION} installed, please upgrade!")
-		if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-			message(FATAL_ERROR "Most distros do not provide an up-to-date version of Qt.
-If you're on Ubuntu or Linux Mint, there are PPAs you can use to install one of the latest qt5 versions.
-Find the correct ppa at https://launchpad.net/~beineri and follow the instructions.")
-		elseif(WIN32)
-			message(FATAL_ERROR "You can download the latest version of Qt5 here: https://www.qt.io/download-open-source/")
-		else()
-			message(FATAL_ERROR "Look online for instructions on installing an up-to-date Qt5 on ${CMAKE_SYSTEM}.")
-		endif()
-	endif()
-
-	message("CMake was unable to find Qt5!")
-	if(WIN32)
-		message(FATAL_ERROR "Make sure the QTDIR env variable has been set properly. (for example C:\\Qt\\${QT_MIN_VER}\\msvc2019_64\\)
-You can also try setting the Qt5_DIR preprocessor definiton.")
-	elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-		message(FATAL_ERROR "Make sure to install your distro's qt5 package!")
-	else()
-		message(FATAL_ERROR "You need to have Qt5 installed, look online for instructions on installing Qt5 on ${CMAKE_SYSTEM}.")
-	endif()
-endif()
-
-add_library(3rdparty::qt5 ALIAS 3rdparty_qt5)
--- a/3rdparty/qt6.cmake
+++ b/3rdparty/qt6.cmake
@ -0,0 +1,44 @@
+add_library(3rdparty_qt6 INTERFACE)
+
+set(QT_MIN_VER 6.2.4)
+
+find_package(Qt6 ${QT_MIN_VER} CONFIG COMPONENTS Widgets Concurrent Multimedia MultimediaWidgets Svg SvgWidgets)
+if(WIN32)
+	target_link_libraries(3rdparty_qt6 INTERFACE Qt6::Widgets Qt6::Concurrent Qt6::Multimedia Qt6::MultimediaWidgets Qt6::Svg Qt6::SvgWidgets)
+else()
+	find_package(Qt6 ${QT_MIN_VER} COMPONENTS DBus Gui)
+	if(Qt6DBus_FOUND)
+		target_link_libraries(3rdparty_qt6 INTERFACE Qt6::Widgets Qt6::DBus Qt6::Concurrent Qt6::Multimedia Qt6::MultimediaWidgets Qt6::Svg Qt6::SvgWidgets)
+		target_compile_definitions(3rdparty_qt6 INTERFACE -DHAVE_QTDBUS)
+	else()
+		target_link_libraries(3rdparty_qt6 INTERFACE Qt6::Widgets Qt6::Concurrent Qt6::Multimedia Qt6::MultimediaWidgets Qt6::Svg Qt6::SvgWidgets)
+	endif()
+	target_include_directories(3rdparty_qt6 INTERFACE ${Qt6Gui_PRIVATE_INCLUDE_DIRS})
+endif()
+
+if(Qt6Widgets_FOUND)
+	if(Qt6Widgets_VERSION VERSION_LESS ${QT_MIN_VER})
+		message("Minimum supported Qt version is ${QT_MIN_VER}! You have version ${Qt6Widgets_VERSION} installed, please upgrade!")
+		if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+			message(FATAL_ERROR "Most distros do not provide an up-to-date version of Qt.
+If you're on Ubuntu or Linux Mint, there are PPAs you can use to install one of the latest qt6 versions.
+Find the correct ppa at https://launchpad.net/~beineri and follow the instructions.")
+		elseif(WIN32)
+			message(FATAL_ERROR "You can download the latest version of Qt6 here: https://www.qt.io/download-open-source/")
+		else()
+			message(FATAL_ERROR "Look online for instructions on installing an up-to-date Qt6 on ${CMAKE_SYSTEM}.")
+		endif()
+	endif()
+else()
+	message("CMake was unable to find Qt6!")
+	if(WIN32)
+		message(FATAL_ERROR "Make sure the QTDIR env variable has been set properly. (for example C:\\Qt\\${QT_MIN_VER}\\msvc2019_64\\)
+You can also try setting the Qt6_DIR preprocessor definiton.")
+	elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+		message(FATAL_ERROR "Make sure to install your distro's qt6 package!")
+	else()
+		message(FATAL_ERROR "You need to have Qt6 installed, look online for instructions on installing Qt6 on ${CMAKE_SYSTEM}.")
+	endif()
+endif()
+
+add_library(3rdparty::qt6 ALIAS 3rdparty_qt6)
--- a/3rdparty/rtmidi/rtmidi
+++ b/3rdparty/rtmidi/rtmidi
@ -1 +1 @@
-Subproject commit 84a99422a3faf1ab417fe71c0903a48debb9376a
+Subproject commit 1e5b49925aa60065db52de44c366d446a902547b
--- a/3rdparty/xxHash
+++ b/3rdparty/xxHash
@ -1 +1 @@
-Subproject commit 35b0373c697b5f160d3db26b1cbb45a0d5ba788c
+Subproject commit bbb27a5efb85b92a0486cf361a8635715a53f6ba
--- a/3rdparty/zlib/zlib
+++ b/3rdparty/zlib/zlib
@ -1 +1 @@
-Subproject commit 04f42ceca40f73e2978b50e93806c2a18c1281fc
+Subproject commit 09155eaa2f9270dc4ed1fa13e2b4b2613e6e4851
--- a/BUILDING.md
+++ b/BUILDING.md
@ -9,11 +9,11 @@ Other instructions may be found [here](https://wiki.rpcs3.net/index.php?title=Bu

 * [CMake 3.16.9+](https://www.cmake.org/download/) (add to PATH)
 * [Python 3.6+](https://www.python.org/downloads/) (add to PATH)
-* [Qt 5.15.2](https://www.qt.io/download-qt-installer)
+* [Qt 6.5.2](https://www.qt.io/download-qt-installer)
 * [Visual Studio 2019](https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community)
-* [Vulkan SDK 1.3.224+](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/windows/getting_started.html))
+* [Vulkan SDK 1.3.224](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/windows/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.224.

-**Either add the** `QTDIR` **environment variable, e.g.** `<QtInstallFolder>\5.15.2\msvc2019_64\` **, or use the [Visual Studio Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.QtVisualStudioTools2019)**
+**Either add the** `QTDIR` **environment variable, e.g.** `<QtInstallFolder>\6.5.2\msvc2019_64\` **, or use the [Visual Studio Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.QtVisualStudioTools2019)**

 ### Linux

@ -21,15 +21,15 @@ These are the essentials tools to build RPCS3 on Linux. Some of them can be inst

 * Clang 12+ or GCC 11+
 * [CMake 3.16.9+](https://www.cmake.org/download/)
-* [Qt 5.15.2](https://www.qt.io/download-qt-installer)
-* [Vulkan SDK 1.3.224+](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html))
+* [Qt 6.5.2](https://www.qt.io/download-qt-installer)
+* [Vulkan SDK 1.3.224](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.224.
 * [SDL2](https://github.com/libsdl-org/SDL/releases) (for the FAudio backend)

 **If you have an NVIDIA GPU, you may need to install the libglvnd package.**

 #### Arch Linux

-    sudo pacman -S glew openal cmake vulkan-validation-layers qt5-base qt5-declarative qt5-multimedia sdl2 sndio jack2 base-devel
+    sudo pacman -S glew openal cmake vulkan-validation-layers qt6-base qt6-declarative qt6-multimedia sdl2 sndio jack2 base-devel

 #### Debian & Ubuntu

@ -38,14 +38,7 @@ These are the essentials tools to build RPCS3 on Linux. Some of them can be inst
 Ubuntu is usually horrendously out of date, and some packages need to be downloaded by hand. This part is for Qt, GCC, Vulkan, and CMake
 ##### Qt PPA

-Ubuntu usually does not have a new enough Qt package to suit rpcs3's needs. There is a PPA available to work around this. Run the following:
-```
-. /etc/os-release
-sudo add-apt-repository ppa:beineri/opt-qt-5.15.2-$UBUNTU_CODENAME
-sudo apt-get update
-sudo apt-get install qt515base qt515svg
-. /opt/qt515/bin/qt515-env.sh >/dev/null 2>&1
-```
+Ubuntu usually does not have a new enough Qt package to suit rpcs3's needs. There is currently no PPA available to work around this.

 ##### GCC 11.x installation

@ -64,7 +57,7 @@ For Ubuntu systems, it is strongly recommended to use the PPA from [LunarG](http
 ```
 . /etc/os-release
 wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
-sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.2.198-$UBUNTU_CODENAME.list https://packages.lunarg.com/vulkan/1.2.198/lunarg-vulkan-1.2.198-$UBUNTU_CODENAME.list
+sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-1.3.224-$UBUNTU_CODENAME.list https://packages.lunarg.com/vulkan/1.3.224/lunarg-vulkan-1.3.224-$UBUNTU_CODENAME.list
 sudo apt update
 sudo apt install vulkan-sdk
 ```
@ -82,11 +75,11 @@ sudo apt-get install cmake

 #### Fedora

-    sudo dnf install alsa-lib-devel cmake glew glew-devel libatomic libevdev-devel libudev-devel openal-devel qt5-qtbase-devel qt5-qtbase-private-devel vulkan-devel pipewire-jack-audio-connection-kit-devel qt5-qtmultimedia-devel qt5-qtsvg-devel
+    sudo dnf install alsa-lib-devel cmake glew glew-devel libatomic libevdev-devel libudev-devel openal-devel qt6-qtbase-devel qt6-qtbase-private-devel vulkan-devel pipewire-jack-audio-connection-kit-devel qt6-qtmultimedia-devel qt6-qtsvg-devel

 #### OpenSUSE

-    sudo zypper install git cmake libasound2 libpulse-devel openal-soft-devel glew-devel zlib-devel libedit-devel vulkan-devel libudev-devel libqt5-qtbase-devel libqt5-qtmultimedia-devel libqt5-qtsvg-devel libQt5Gui-private-headers-devel libevdev-devel libsndio7_1 libjack-devel
+    sudo zypper install git cmake libasound2 libpulse-devel openal-soft-devel glew-devel zlib-devel libedit-devel vulkan-devel libudev-devel libqt6-qtbase-devel libqt6-qtmultimedia-devel libqt6-qtsvg-devel libQt6Gui-private-headers-devel libevdev-devel libsndio7_1 libjack-devel

 ## Setup the project

@ -103,7 +96,7 @@ git submodule update --init
 #### Configuring the Qt plugin (if used)

 1) Go to `Extensions->Qt VS Tools->Qt Versions`.
-2) Add the path to your Qt installation with compiler e.g. `<QtInstallFolder>\5.15.2\msvc2019_64`, version will fill in automatically.
+2) Add the path to your Qt installation with compiler e.g. `<QtInstallFolder>\6.5.2\msvc2019_64`, version will fill in automatically.
 3) Go to `Extensions->Qt VS Tools->Options->Legacy Project Format`.
 4) Set `Build: Run pre-build setup` to `true`.

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -26,6 +26,7 @@ option(USE_VULKAN "Vulkan render backend" ON)
 option(USE_PRECOMPILED_HEADERS "Use precompiled headers" OFF)
 option(USE_SDL "Enables SDL input handler" OFF)
 option(USE_SYSTEM_SDL "Prefer system SDL instead of the builtin one" OFF)
+option(USE_SYSTEM_FFMPEG "Prefer system ffmpeg instead of the prebuild one" OFF)

 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/buildfiles/cmake")

@ -104,18 +105,11 @@ if(CCACHE_FOUND)
    set(CMAKE_CXX_COMPILER_LAUNCHER ccache)
 endif()

-if(WIN32)
-    add_compile_definitions(UNICODE)
-    add_compile_definitions(_WIN32_WINNT=0x0602)
-endif()
-
 if(APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
    include_directories(/opt/homebrew/include)
    link_directories(/opt/homebrew/lib)
 endif()

-set(LLVM_ENABLE_WARNINGS OFF CACHE BOOL "Enable compiler warnings.")
-
 if(MSVC)
    add_compile_options(/wd4530 /utf-8) # C++ exception handler used, but unwind semantics are not enabled
 endif()
--- a/Utilities/Config.h
+++ b/Utilities/Config.h
@ -6,6 +6,7 @@
 #include "util/atomic.hpp"
 #include "util/shared_ptr.hpp"

+#include <algorithm>
 #include <utility>
 #include <string>
 #include <vector>
--- a/Utilities/File.cpp
+++ b/Utilities/File.cpp
@ -1303,12 +1303,12 @@ fs::file::file(const std::string& path, bs_t<open_mode> mode)

 				DWORD nwritten = 0;
 				OVERLAPPED ovl{};
-				const u64 pos = m_pos;
+				const u64 pos = m_pos.fetch_add(size);
 				ovl.Offset = DWORD(pos);
 				ovl.OffsetHigh = DWORD(pos >> 32);
 				ensure(WriteFile(m_handle, data, size, &nwritten, &ovl)); // "file::write"
+				ensure(nwritten == size);
 				nwritten_sum += nwritten;
-				m_pos += nwritten;

 				if (nwritten < size)
 				{
--- a/Utilities/Thread.cpp
+++ b/Utilities/Thread.cpp
@ -1637,7 +1637,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe
 			if (!g_tls_access_violation_recovered)
 			{
 				vm_log.notice("\n%s", dump_useful_thread_info());
-				vm_log.error("[%s] Access violation %s location 0x%x (%s)", is_writing ? "writing" : "reading", cpu->get_name(), addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory");
+				vm_log.error("[%s] Access violation %s location 0x%x (%s)", cpu->get_name(), is_writing ? "writing" : "reading", addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory");
 			}

 			// TODO:
@ -1663,6 +1663,11 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe
 		}
 	}

+	if (cpu)
+	{
+		cpu->state += cpu_flag::wait;
+	}
+
 	Emu.Pause(true);

 	if (!g_tls_access_violation_recovered)
@ -2010,37 +2015,8 @@ thread_local DECLARE(thread_ctrl::g_tls_error_callback) = nullptr;

 DECLARE(thread_ctrl::g_native_core_layout) { native_core_arrangement::undefined };

-static atomic_t<u128, 64> s_thread_bits{0};
-
-static atomic_t<thread_base**> s_thread_pool[128]{};
-
 void thread_base::start()
 {
-	for (u128 bits = s_thread_bits.load(); bits; bits &= bits - 1)
-	{
-		const u32 pos = utils::ctz128(bits);
-
-		if (!s_thread_pool[pos])
-		{
-			continue;
-		}
-
-		thread_base** tls = s_thread_pool[pos].exchange(nullptr);
-
-		if (!tls)
-		{
-			continue;
-		}
-
-		// Receive "that" native thread handle, sent "this" thread_base
-		const u64 _self = reinterpret_cast<u64>(atomic_storage<thread_base*>::load(*tls));
-		m_thread.release(_self);
-		ensure(_self != reinterpret_cast<u64>(this));
-		atomic_storage<thread_base*>::store(*tls, this);
-		s_thread_pool[pos].notify_one();
-		return;
-	}
-
 #ifdef _WIN32
 	m_thread = ::_beginthreadex(nullptr, 0, entry_point, this, CREATE_SUSPENDED, nullptr);
 	ensure(m_thread);
@ -2203,14 +2179,14 @@ u64 thread_base::finalize(thread_state result_state) noexcept
 	const u64 _self = m_thread;

 	// Set result state (errored or finalized)
-	m_sync.fetch_op([&](u64& v)
+	m_sync.fetch_op([&](u32& v)
 	{
 		v &= -4;
 		v |= static_cast<u32>(result_state);
 	});

 	// Signal waiting threads
-	m_sync.notify_all(2);
+	m_sync.notify_all();

 	return _self;
 }
@ -2234,112 +2210,13 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept
 		return nullptr;
 	}

-	// Try to add self to thread pool
-	set_name("..pool");
-
-	thread_ctrl::set_native_priority(0);
-
-	thread_ctrl::set_thread_affinity_mask(0);
-
-	std::fesetround(FE_TONEAREST);
-
-	gv_unset_zeroing_denormals();
-
-	static constexpr u64 s_stop_bit = 0x8000'0000'0000'0000ull;
-
-	static atomic_t<u64> s_pool_ctr = []
-	{
-		std::atexit([]
-		{
-			s_pool_ctr |= s_stop_bit;
-
-			while (/*u64 remains = */s_pool_ctr & ~s_stop_bit)
-			{
-				for (u32 i = 0; i < std::size(s_thread_pool); i++)
-				{
-					if (thread_base** ptls = s_thread_pool[i].exchange(nullptr))
-					{
-						// Extract thread handle
-						const u64 _self = reinterpret_cast<u64>(*ptls);
-
-						// Wake up a thread and make sure it's joined
-						s_thread_pool[i].notify_one();
-
 #ifdef _WIN32
-						const HANDLE handle = reinterpret_cast<HANDLE>(_self);
-						WaitForSingleObject(handle, INFINITE);
-						CloseHandle(handle);
+	_endthreadex(0);
 #else
-						pthread_join(reinterpret_cast<pthread_t>(_self), nullptr);
+	pthread_exit(nullptr);
 #endif
-					}
-				}
-			}
-		});

-		return 0;
-	}();
-
-	s_pool_ctr++;
-
-	u32 pos = -1;
-
-	while (true)
-	{
-		const auto [bits, ok] = s_thread_bits.fetch_op([](u128& bits)
-		{
-			if (~bits) [[likely]]
-			{
-				// Set lowest clear bit
-				bits |= bits + 1;
-				return true;
-			}
-
-			return false;
-		});
-
-		if (ok) [[likely]]
-		{
-			pos = utils::ctz128(~bits);
-			break;
-		}
-
-		s_thread_bits.wait(bits);
-	}
-
-	const auto tls = &thread_ctrl::g_tls_this_thread;
-	s_thread_pool[pos] = tls;
-
-	atomic_wait::list<2> list{};
-	list.set<0>(s_pool_ctr, 0, s_stop_bit);
-	list.set<1>(s_thread_pool[pos], tls);
-
-	while (s_thread_pool[pos] == tls || atomic_storage<thread_base*>::load(*tls) == fake_self)
-	{
-		list.wait();
-
-		if (s_pool_ctr & s_stop_bit)
-		{
-			break;
-		}
-	}
-
-	// Free thread pool slot
-	s_thread_bits.atomic_op([pos](u128& val)
-	{
-		val &= ~(u128(1) << pos);
-	});
-
-	s_thread_bits.notify_one();
-
-	if (--s_pool_ctr & s_stop_bit)
-	{
-		return nullptr;
-	}
-
-	// Return new entry point
-	utils::prefetch_exec((*tls)->entry_point);
-	return (*tls)->entry_point;
+	return nullptr;
 }

 thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base* _base))
@ -2396,8 +2273,18 @@ thread_state thread_ctrl::state()

 void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
 {
+	if (!usec)
+	{
+		return;
+	}
+
 	auto _this = g_tls_this_thread;

+	if (!alert && usec > 50000)
+	{
+		usec = 50000;
+	}
+
 #ifdef __linux__
 	static thread_local struct linux_timer_handle_t
 	{
@ -2426,13 +2313,13 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
 		}
 	} fd_timer;

-	if (!alert && usec > 0 && usec <= 1000 && fd_timer != -1)
+	if (!alert && fd_timer != -1)
 	{
 		struct itimerspec timeout;
 		u64 missed;

-		timeout.it_value.tv_nsec = usec * 1'000ull;
-		timeout.it_value.tv_sec = 0;
+		timeout.it_value.tv_nsec = usec % 1'000'000 * 1'000ull;
+		timeout.it_value.tv_sec = usec / 1'000'000;
 		timeout.it_interval.tv_sec = 0;
 		timeout.it_interval.tv_nsec = 0;
 		timerfd_settime(fd_timer, 0, &timeout, NULL);
@ -2442,15 +2329,27 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
 	}
 #endif

-	if (_this->m_sync.bit_test_reset(2) || _this->m_taskq)
+	if (alert)
 	{
-		return;
+		if (_this->m_sync.bit_test_reset(2) || _this->m_taskq)
+		{
+			return;
+		}
 	}

 	// Wait for signal and thread state abort
 	atomic_wait::list<2> list{};
-	list.set<0>(_this->m_sync, 0, 4 + 1);
-	list.set<1>(_this->m_taskq, nullptr);
+
+	if (alert)
+	{
+		list.set<0>(_this->m_sync, 0);
+		list.set<1>(utils::bless<atomic_t<u32>>(&_this->m_taskq)[1], 0);
+	}
+	else
+	{
+		list.set<0>(_this->m_dummy, 0);
+	}
+
 	list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
 }

@ -2461,29 +2360,27 @@ void thread_ctrl::wait_for_accurate(u64 usec)
 		return;
 	}

+	if (usec > 50000)
+	{
+		fmt::throw_exception("thread_ctrl::wait_for_accurate: unsupported amount");
+	}
+
+#ifdef __linux__
+	return wait_for(usec, false);
+#else
 	using namespace std::chrono_literals;

 	const auto until = std::chrono::steady_clock::now() + 1us * usec;

 	while (true)
 	{
-#ifdef __linux__
-		// NOTE: Assumption that timer initialization has succeeded
-		u64 host_min_quantum = usec <= 1000 ? 10 : 50;
-#else
 		// Host scheduler quantum for windows (worst case)
-		// NOTE: On ps3 this function has very high accuracy
 		constexpr u64 host_min_quantum = 500;
-#endif
+
 		if (usec >= host_min_quantum)
 		{
-#ifdef __linux__
-			// Do not wait for the last quantum to avoid loss of accuracy
-			wait_for(usec - ((usec % host_min_quantum) + host_min_quantum), false);
-#else
 			// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
 			wait_for(usec - (usec % host_min_quantum), false);
-#endif
 		}
 		// TODO: Determine best value for yield delay
 		else if (usec >= host_min_quantum / 2)
@ -2504,6 +2401,7 @@ void thread_ctrl::wait_for_accurate(u64 usec)

 		usec = (until - current).count();
 	}
+#endif
 }

 std::string thread_ctrl::get_name_cached()
@ -2570,7 +2468,7 @@ bool thread_base::join(bool dtor) const

 	for (u64 i = 0; (m_sync & 3) <= 1; i++)
 	{
-		m_sync.wait(0, 2, timeout);
+		m_sync.wait(m_sync & ~2, timeout);

 		if (m_sync & 2)
 		{
@ -2590,7 +2488,7 @@ void thread_base::notify()
 {
 	// Set notification
 	m_sync |= 4;
-	m_sync.notify_one(4);
+	m_sync.notify_all();
 }

 u64 thread_base::get_native_id() const
@ -2627,7 +2525,7 @@ u64 thread_base::get_cycles()
 	{
 		cycles = static_cast<u64>(thread_time.tv_sec) * 1'000'000'000 + thread_time.tv_nsec;
 #endif
-		if (const u64 old_cycles = m_sync.fetch_op([&](u64& v){ v &= 7; v |= (cycles << 3); }) >> 3)
+		if (const u64 old_cycles = m_cycles.exchange(cycles))
 		{
 			return cycles - old_cycles;
 		}
@ -2637,7 +2535,7 @@ u64 thread_base::get_cycles()
 	}
 	else
 	{
-		return m_sync >> 3;
+		return m_cycles;
 	}
 }

@ -2690,8 +2588,8 @@ void thread_base::exec()
 				}

 				// Notify waiters
-				ptr->exec.release(nullptr);
-				ptr->exec.notify_all();
+				ptr->done.release(1);
+				ptr->done.notify_all();
 			}

 			if (ptr->next)
--- a/Utilities/Thread.h
+++ b/Utilities/Thread.h
@ -100,17 +100,19 @@ class thread_future
 protected:
 	atomic_t<void(*)(thread_base*, thread_future*)> exec{};

+	atomic_t<u32> done{0};
+
 public:
 	// Get reference to the atomic variable for inspection and waiting for
 	const auto& get_wait() const
 	{
-		return exec;
+		return done;
 	}

 	// Wait (preset)
 	void wait() const
 	{
-		exec.wait<atomic_wait::op_ne>(nullptr);
+		done.wait(0);
 	}
 };

@ -131,8 +133,13 @@ private:
 	// Thread handle (platform-specific)
 	atomic_t<u64> m_thread{0};

-	// Thread state and cycles
-	atomic_t<u64> m_sync{0};
+	// Thread cycles
+	atomic_t<u64> m_cycles{0};
+
+	atomic_t<u32> m_dummy{0};
+
+	// Thread state
+	atomic_t<u32> m_sync{0};

 	// Thread name
 	atomic_ptr<std::string> m_tname;
@ -284,16 +291,22 @@ public:
 		}

 		atomic_wait::list<Max + 2> list{};
-		list.template set<Max>(_this->m_sync, 0, 4 + 1);
-		list.template set<Max + 1>(_this->m_taskq, nullptr);
+		list.template set<Max>(_this->m_sync, 0);
+		list.template set<Max + 1>(_this->m_taskq);
 		setter(list);
 		list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
 	}

-	template <atomic_wait::op Op = atomic_wait::op::eq, typename T, typename U>
+	template <typename T, typename U>
 	static inline void wait_on(T& wait, U old, u64 usec = -1)
 	{
-		wait_on_custom<1>([&](atomic_wait::list<3>& list){ list.set<0, Op>(wait, old); }, usec);
+		wait_on_custom<1>([&](atomic_wait::list<3>& list) { list.template set<0>(wait, old); }, usec);
+	}
+
+	template <typename T>
+	static inline void wait_on(T& wait)
+	{
+		wait_on_custom<1>([&](atomic_wait::list<3>& list) { list.template set<0>(wait); });
 	}

 	// Exit.
@ -637,7 +650,7 @@ public:
 	{
 		bool notify_sync = false;

-		if (s >= thread_state::aborting && thread::m_sync.fetch_op([](u64& v){ return !(v & 3) && (v |= 1); }).second)
+		if (s >= thread_state::aborting && thread::m_sync.fetch_op([](u32& v) { return !(v & 3) && (v |= 1); }).second)
 		{
 			notify_sync = true;
 		}
@ -650,7 +663,7 @@ public:
 		if (notify_sync)
 		{
 			// Notify after context abortion has been made so all conditions for wake-up be satisfied by the time of notification
-			thread::m_sync.notify_one(1);
+			thread::m_sync.notify_all();
 		}

 		if (s == thread_state::finished)
--- a/Utilities/bin_patch.cpp
+++ b/Utilities/bin_patch.cpp
@ -378,13 +378,21 @@ bool patch_engine::load(patch_map& patches_map, const std::string& path, std::st
 				{
 					for (const auto note : notes_node)
 					{
-						if (note && note.IsScalar())
+						if (note)
 						{
-							info.notes += note.Scalar();
+							if (note.IsScalar())
+							{
+								info.notes += note.Scalar();
+							}
+							else
+							{
+								append_log_message(log_messages, fmt::format("Error: Skipping sequenced Note (patch: %s, key: %s, location: %s, file: %s)", description, main_key, get_yaml_node_location(note), path), &patch_log.error);
+								is_valid = false;
+							}
 						}
 						else
 						{
-							append_log_message(log_messages, fmt::format("Error: Skipping sequenced Note (patch: %s, key: %s, location: %s, file: %s)", description, main_key, get_yaml_node_location(note), path), &patch_log.error);
+							append_log_message(log_messages, fmt::format("Error: Skipping sequenced Note (patch: %s, key: %s, location: %s, file: %s)", description, main_key, get_yaml_node_location(notes_node), path), &patch_log.error);
 							is_valid = false;
 						}
 					}
@ -726,7 +734,7 @@ bool patch_engine::add_patch_data(YAML::Node node, patch_info& info, u32 modifie
 		break;
 	default:
 	{
-		get_yaml_node_value<u32>(addr_node, error_message);
+		[[maybe_unused]] const u32 offset = get_yaml_node_value<u32>(addr_node, error_message);
 		if (!error_message.empty())
 		{
 			error_message = fmt::format("Skipping patch data entry: [ %s, 0x%.8x, %s ] (key: %s, location: %s) Invalid patch offset '%s' (not a valid u32 or overflow)",
@ -1752,9 +1760,9 @@ bool patch_engine::save_patches(const patch_map& patches, const std::string& pat
 				{
 					out << serial << YAML::BeginSeq;

-					for (const auto& app_version : app_versions)
+					for (const auto& [app_version, patch_config] : app_versions)
 					{
-						out << app_version.first;
+						out << app_version;
 					}

 					out << YAML::EndSeq;
@ -1891,7 +1899,7 @@ patch_engine::patch_map patch_engine::load_config()

 		for (const auto pair : root)
 		{
-			const auto& hash = pair.first.Scalar();
+			const std::string& hash = pair.first.Scalar();

 			if (const auto yml_type = pair.second.Type(); yml_type != YAML::NodeType::Map)
 			{
@ -1901,7 +1909,7 @@ patch_engine::patch_map patch_engine::load_config()

 			for (const auto patch : pair.second)
 			{
-				const auto& description = patch.first.Scalar();
+				const std::string& description = patch.first.Scalar();

 				if (const auto yml_type = patch.second.Type(); yml_type != YAML::NodeType::Map)
 				{
@ -1911,7 +1919,7 @@ patch_engine::patch_map patch_engine::load_config()

 				for (const auto title_node : patch.second)
 				{
-					const auto& title = title_node.first.Scalar();
+					const std::string& title = title_node.first.Scalar();

 					if (const auto yml_type = title_node.second.Type(); yml_type != YAML::NodeType::Map)
 					{
@ -1921,7 +1929,7 @@ patch_engine::patch_map patch_engine::load_config()

 					for (const auto serial_node : title_node.second)
 					{
-						const auto& serial = serial_node.first.Scalar();
+						const std::string& serial = serial_node.first.Scalar();

 						if (const auto yml_type = serial_node.second.Type(); yml_type != YAML::NodeType::Map)
 						{
--- a/Utilities/cond.cpp
+++ b/Utilities/cond.cpp
@ -9,7 +9,7 @@ void cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
 	ensure(_old);

 	// Wait with timeout
-	m_value.wait(_old, c_signal_mask, atomic_wait_timeout{_timeout > max_timeout ? umax : _timeout * 1000});
+	m_value.wait(_old, atomic_wait_timeout{_timeout > max_timeout ? umax : _timeout * 1000});

 	// Cleanup
 	m_value.atomic_op([](u32& value)
@ -47,10 +47,10 @@ void cond_variable::imp_wake(u32 _count) noexcept
 	if (_count > 1 || ((_old + (c_signal_mask & (0 - c_signal_mask))) & c_signal_mask) == c_signal_mask)
 	{
 		// Resort to notify_all if signal count reached max
-		m_value.notify_all(c_signal_mask);
+		m_value.notify_all();
 	}
 	else
 	{
-		m_value.notify_one(c_signal_mask);
+		m_value.notify_one();
 	}
 }
--- a/Utilities/git-version-gen.cmd
+++ b/Utilities/git-version-gen.cmd
@ -90,8 +90,7 @@ rem // Get commit count from (unshallowed) HEAD
 for /F %%I IN ('call %GIT% rev-list HEAD --count') do set COMMIT_COUNT=%%I

 rem // Check if the current build system sets the git branch and version.
-rem // The name is misleading. This is also used for master builds.
-if defined SYSTEM_PULLREQUEST_SOURCEBRANCH (
+if defined BUILD_SOURCEBRANCHNAME (

 	rem // This must be a CI build

@ -125,6 +124,7 @@ if defined SYSTEM_PULLREQUEST_SOURCEBRANCH (
 		for /F %%I IN ('call %GIT% rev-parse --short^=8 HEAD') do set GIT_VERSION=%COMMIT_COUNT%-%%I
 		for /F %%I IN ('call %GIT% rev-parse --abbrev-ref HEAD') do set GIT_BRANCH=%%I

+		set GIT_BRANCH=%BUILD_SOURCEBRANCHNAME%
 	) else (
 		rem // This must be a pull request or a build from a fork.
 		echo Assuming pull request build
@ -142,8 +142,8 @@ if defined SYSTEM_PULLREQUEST_SOURCEBRANCH (
 	)

 ) else (
-	rem // The name is misleading. This is also used for master builds.
-	echo SYSTEM_PULLREQUEST_SOURCEBRANCH undefined
+	echo BUILD_SOURCEBRANCHNAME undefined
+	echo Assuming local build

 	rem // Make GIT_VERSION the last commit (shortened); Don't include commit count on non-release builds
 	for /F %%I IN ('call %GIT% rev-parse --short^=8 HEAD') do set GIT_VERSION=%%I
--- a/Utilities/lockless.h
+++ b/Utilities/lockless.h
@ -2,6 +2,7 @@

 #include "util/types.hpp"
 #include "util/atomic.hpp"
+#include "util/bless.hpp"

 //! Simple unshrinkable array base for concurrent access. Only growths automatically.
 //! There is no way to know the current size. The smaller index is, the faster it's accessed.
@ -269,6 +270,30 @@ public:
 		return {};
 	}

+	const T& operator[](usz index) const noexcept
+	{
+		lf_queue_iterator<T> result = begin();
+
+		while (--index != umax)
+		{
+			result++;
+		}
+
+		return *result;
+	}
+
+	T& operator[](usz index) noexcept
+	{
+		lf_queue_iterator<T> result = begin();
+
+		while (--index != umax)
+		{
+			result++;
+		}
+
+		return *result;
+	}
+
 	lf_queue_slice& pop_front()
 	{
 		delete std::exchange(m_head, std::exchange(m_head->m_link, nullptr));
@ -280,12 +305,17 @@ public:
 template <typename T>
 class lf_queue final
 {
-	atomic_t<lf_queue_item<T>*> m_head{nullptr};
+	atomic_t<u64> m_head{0};
+
+	lf_queue_item<T>* load(u64 value) const noexcept
+	{
+		return reinterpret_cast<lf_queue_item<T>*>(value >> 16);
+	}

 	// Extract all elements and reverse element order (FILO to FIFO)
 	lf_queue_item<T>* reverse() noexcept
 	{
-		if (auto* head = m_head.load() ? m_head.exchange(nullptr) : nullptr)
+		if (auto* head = load(m_head) ? load(m_head.exchange(0)) : nullptr)
 		{
 			if (auto* prev = head->m_link)
 			{
@ -309,45 +339,61 @@ class lf_queue final
 public:
 	constexpr lf_queue() = default;

-	~lf_queue()
+	lf_queue(lf_queue&& other) noexcept
 	{
-		delete m_head.load();
+		m_head.release(other.m_head.exchange(0));
+	}
+
+	lf_queue& operator=(lf_queue&& other) noexcept
+	{
+		if (this == std::addressof(other))
+		{
+			return *this;
+		}
+
+		delete load(m_head);
+		m_head.release(other.m_head.exchange(0));
+		return *this;
+	}
+
+	~lf_queue()
+	{
+		delete load(m_head);
 	}

-	template <atomic_wait::op Flags = atomic_wait::op::eq>
 	void wait(std::nullptr_t /*null*/ = nullptr) noexcept
 	{
-		if (m_head == nullptr)
+		if (m_head == 0)
 		{
-			m_head.template wait<Flags>(nullptr);
+			utils::bless<atomic_t<u32>>(&m_head)[1].wait(0);
 		}
 	}

 	const volatile void* observe() const noexcept
 	{
-		return m_head.load();
+		return load(m_head);
 	}

 	explicit operator bool() const noexcept
 	{
-		return m_head != nullptr;
+		return m_head != 0;
 	}

 	template <typename... Args>
 	void push(Args&&... args)
 	{
-		auto _old = m_head.load();
-		auto item = new lf_queue_item<T>(_old, std::forward<Args>(args)...);
+		auto oldv = m_head.load();
+		auto item = new lf_queue_item<T>(load(oldv), std::forward<Args>(args)...);

-		while (!m_head.compare_exchange(_old, item))
+		while (!m_head.compare_exchange(oldv, reinterpret_cast<u64>(item) << 16))
 		{
-			item->m_link = _old;
+			item->m_link = load(oldv);
 		}

-		if (!_old)
+		if (!oldv)
 		{
 			// Notify only if queue was empty
-			m_head.notify_one();
+			utils::bless<atomic_t<u32>>(&m_head)[1].notify_one();
 		}
 	}

@ -363,7 +409,7 @@ public:
 	lf_queue_slice<T> pop_all_reversed()
 	{
 		lf_queue_slice<T> result;
-		result.m_head = m_head.exchange(nullptr);
+		result.m_head = load(m_head.exchange(0));
 		return result;
 	}

--- a/Utilities/mutex.cpp
+++ b/Utilities/mutex.cpp
@ -74,14 +74,14 @@ void shared_mutex::imp_wait()
 			break;
 		}

-		m_value.wait(old, c_sig);
+		m_value.wait(old);
 	}
 }

 void shared_mutex::imp_signal()
 {
 	m_value += c_sig;
-	m_value.notify_one(c_sig);
+	m_value.notify_one();
 }

 void shared_mutex::imp_lock(u32 val)
--- a/Utilities/sync.h
+++ b/Utilities/sync.h
@ -38,7 +38,28 @@ constexpr NTSTATUS NTSTATUS_ALERTED = 0x101;
 constexpr NTSTATUS NTSTATUS_TIMEOUT = 0x102;
 #endif

-#ifndef __linux__
+#ifdef __linux__
+#ifndef SYS_futex_waitv
+#if defined(ARCH_X64) || defined(ARCH_ARM64)
+#define SYS_futex_waitv 449
+#endif
+#endif
+
+#ifndef FUTEX_32
+#define FUTEX_32 2
+#endif
+
+#ifndef FUTEX_WAITV_MAX
+#define FUTEX_WAITV_MAX 128
+struct futex_waitv
+{
+	__u64 val;
+	__u64 uaddr;
+	__u32 flags;
+	__u32 __reserved;
+};
+#endif
+#else
 enum
 {
 	FUTEX_PRIVATE_FLAG = 0,
@ -113,7 +134,7 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t
 				}
 				else
 				{
-					// TODO
+					// TODO: absolute timeout
 				}

 				map.erase(std::find(map.find(uaddr), map.end(), ref));
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -5,7 +5,10 @@ trigger:
  tags:
    exclude:
      - '*'
-pr: none
+pr:
+  branches:
+    include:
+      - master
 jobs:
 - job: Linux_Build
  strategy:
@ -35,13 +38,13 @@ jobs:
    displayName: ccache

  - bash: |
-      docker pull --quiet rpcs3/rpcs3-ci-bionic:1.3
+      docker pull --quiet rpcs3/rpcs3-ci-focal:1.1
      docker run                      \
        -v $(pwd):/rpcs3              \
        --env-file .ci/docker.env \
        -v $CCACHE_DIR:/root/.ccache  \
        -v $BUILD_ARTIFACTSTAGINGDIRECTORY:/root/artifacts \
-        rpcs3/rpcs3-ci-bionic:1.3 \
+        rpcs3/rpcs3-ci-focal:1.1 \
        /rpcs3/.ci/build-linux.sh
    displayName: Docker setup and build

@ -49,26 +52,26 @@ jobs:
    condition: succeeded()
    artifact: RPCS3 for Linux ($(COMPILER))

-  # - bash: |
-  #     COMM_TAG=$(awk '/version{.*}/ { printf("%d.%d.%d", $5, $6, $7) }' ./rpcs3/rpcs3_version.cpp)
-  #     COMM_COUNT=$(git rev-list --count HEAD)
-  #     COMM_HASH=$(git rev-parse --short=8 HEAD)
+  - bash: |
+      COMM_TAG=$(awk '/version{.*}/ { printf("%d.%d.%d", $5, $6, $7) }' ./rpcs3/rpcs3_version.cpp)
+      COMM_COUNT=$(git rev-list --count HEAD)
+      COMM_HASH=$(git rev-parse --short=8 HEAD)

-  #     export AVVER="${COMM_TAG}-${COMM_COUNT}"
+      export AVVER="${COMM_TAG}-${COMM_COUNT}"

-  #     .ci/github-upload.sh
-  #   condition: and(ne(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.Repository.Name'], 'RPCS3/rpcs3'), eq(variables['Build.SourceBranch'], 'refs/heads/master'), eq(variables['COMPILER'], 'gcc'))
-  #   displayName: Push build to GitHub
-  #   env:
-  #     RPCS3_TOKEN: $(RPCS3-Token)
+      .ci/github-upload.sh
+    condition: and(ne(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.Repository.Name'], 'RPCS3/rpcs3'), eq(variables['Build.SourceBranch'], 'refs/heads/master'), eq(variables['COMPILER'], 'gcc'))
+    displayName: Push build to GitHub
+    env:
+      RPCS3_TOKEN: $(RPCS3-Token)

 - job: Windows_Build
  variables:
    COMPILER: msvc
-    QT_VER_MAIN: '5'
-    QT_VER: '5.15.2'
+    QT_VER_MAIN: '6'
+    QT_VER: '6.5.2'
    QT_VER_MSVC: 'msvc2019'
-    QT_DATE: '202011130602'
+    QT_DATE: '202307080351'
    QTDIR: C:\Qt\$(QT_VER)\$(QT_VER_MSVC)_64
    VULKAN_VER: '1.3.224.1'
    VULKAN_SDK_SHA: '2029e652e39ee6a6036cff3765da31e1e6c595fd2413d3cd111dfab7855621ea'
@ -98,14 +101,6 @@ jobs:
    - bash: .ci/export-azure-vars.sh
      displayName: Export Variables

-    - task: MSBuild@1
-      inputs:
-        solution: './3rdparty/SPIRV/spirv.vcxproj'
-        maximumCpuCount: true
-        platform: x64
-        configuration: 'Release'
-      displayName: Compile SPIRV-Tools
-
    - task: VSBuild@1
      inputs:
        solution: 'rpcs3.sln'
@ -121,8 +116,8 @@ jobs:
      condition: succeeded()
      artifact: RPCS3 for Windows

-    # - bash: .ci/github-upload.sh
-    #   condition: and(ne(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.Repository.Name'], 'RPCS3/rpcs3'), eq(variables['Build.SourceBranch'], 'refs/heads/master'))
-    #   displayName: Push build to GitHub
-    #   env:
-    #     RPCS3_TOKEN: $(RPCS3-Token)
+    - bash: .ci/github-upload.sh
+      condition: and(ne(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.Repository.Name'], 'RPCS3/rpcs3'), eq(variables['Build.SourceBranch'], 'refs/heads/master'))
+      displayName: Push build to GitHub
+      env:
+        RPCS3_TOKEN: $(RPCS3-Token)
--- a/bin/GuiConfigs/Classic
+++ b/bin/GuiConfigs/Classic
@ -44,13 +44,13 @@ QLabel#gamelist_icon_background_color {
 }

 /* log stylesheet */
-QTextEdit#tty_frame {
+QPlainTextEdit#tty_frame {
 	background-color:#ffffff;
 }
 QLabel#tty_text {
 	color:#000000;
 }
-QTextEdit#log_frame {
+QPlainTextEdit#log_frame {
 	background-color:#ffffff;
 }
 QLabel#log_level_always {
--- a/bin/GuiConfigs/Darker
+++ b/bin/GuiConfigs/Darker
@ -253,7 +253,7 @@ QLabel#thumbnail_icon_color {
 }

 /* Set Log colors */
-QTextEdit#log_frame {
+QPlainTextEdit#log_frame {
 	background-color: #000; /* Black */
 }
 QLabel#log_level_always {
@ -285,7 +285,7 @@ QLabel#log_stack {
 }

 /* Set TTY colors */
-QTextEdit#tty_frame {
+QPlainTextEdit#tty_frame {
 	background-color: #000; /* Black */
 }
 QLabel#tty_text {
--- a/bin/GuiConfigs/Envy.qss
+++ b/bin/GuiConfigs/Envy.qss
@ -579,7 +579,7 @@ QLabel#thumbnail_icon_color {
 }

 /* Log colors */
-QTextEdit#log_frame {
+QPlainTextEdit#log_frame {
 	background-color: #23262d;
 }

@ -620,7 +620,7 @@ QLabel#log_stack {
 }

 /* TTY colors */
-QTextEdit#tty_frame {
+QPlainTextEdit#tty_frame {
 	background-color: #23262d;
 }

--- a/bin/GuiConfigs/Kuroi
+++ b/bin/GuiConfigs/Kuroi
@ -292,7 +292,7 @@ QLabel#debugger_frame_pc {
 }

 /* Set Log colors */
-QTextEdit#log_frame {
+QPlainTextEdit#log_frame {
 	background-color: #000000; /* Black */
 }
 QLabel#log_level_always {
@ -323,7 +323,7 @@ QLabel#log_stack {
 	color: #ffffff; /* White */
 }
 /* Set TTY colors */
-QTextEdit#tty_frame {
+QPlainTextEdit#tty_frame {
 	background-color: #000000; /* Black */
 }
 QLabel#tty_text {
--- a/bin/GuiConfigs/ModernBlue
+++ b/bin/GuiConfigs/ModernBlue
@ -250,7 +250,7 @@ QLabel#thumbnail_icon_color {
 }

 /* Set Log colors */
-QTextEdit#log_frame {
+QPlainTextEdit#log_frame {
 	background-color: #181d24; /* Black */
 }
 QLabel#log_level_always {
@ -282,7 +282,7 @@ QLabel#log_stack {
 }

 /* Set TTY colors */
-QTextEdit#tty_frame {
+QPlainTextEdit#tty_frame {
 	background-color: #181d24; /* Black */
 }
 QLabel#tty_text {
--- a/bin/GuiConfigs/Skyline
+++ b/bin/GuiConfigs/Skyline
@ -611,7 +611,7 @@ QLineEdit:focus {
 }

 /* Log colors */
-QTextEdit#log_frame {
+QPlainTextEdit#log_frame {
 	background-color: #111525;
 }

--- a/bin/GuiConfigs/Skyline.qss
+++ b/bin/GuiConfigs/Skyline.qss
@ -619,7 +619,7 @@ QLineEdit:focus {
 }

 /* Log colors */
-QTextEdit#log_frame {
+QPlainTextEdit#log_frame {
 	background-color: #FFFFFF;
 }

--- a/buildfiles/msvc/rpcs3_default.props
+++ b/buildfiles/msvc/rpcs3_default.props
@ -24,7 +24,7 @@
    </ClCompile>
    <Link>
      <AdditionalDependencies>xxhash.lib;ws2_32.lib;Iphlpapi.lib;Bcrypt.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\3rdparty\ffmpeg\windows\x86_64</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\3rdparty\ffmpeg\lib\windows\x86_64</AdditionalLibraryDirectories>
      <StackReserveSize>8388608</StackReserveSize>
      <StackCommitSize>1048576</StackCommitSize>
    </Link>
--- a/rpcs3/CMakeLists.txt
+++ b/rpcs3/CMakeLists.txt
@ -29,11 +29,11 @@ if(UNIX AND NOT APPLE)
    endif()
 endif()

-# Qt5
+# Qt
 # finds Qt libraries and setups custom commands for MOC and UIC
 # Must be done here because generated MOC and UIC targets cant
 # be found otherwise
-include(${CMAKE_SOURCE_DIR}/3rdparty/qt5.cmake)
+include(${CMAKE_SOURCE_DIR}/3rdparty/qt6.cmake)

 # subdirectories
 add_subdirectory(Emu)
@ -42,6 +42,7 @@ add_subdirectory(rpcs3qt)
 if(WIN32)
    add_executable(rpcs3 WIN32)
    target_sources(rpcs3 PRIVATE rpcs3.rc)
+    target_compile_definitions(rpcs3 PRIVATE UNICODE _UNICODE)
 elseif(APPLE)
    add_executable(rpcs3 MACOSX_BUNDLE)
    target_sources(rpcs3 PRIVATE rpcs3.icns)
@ -84,7 +85,7 @@ set_target_properties(rpcs3
        AUTOUIC ON)

 target_link_libraries(rpcs3 PRIVATE rpcs3_emu rpcs3_ui)
-target_link_libraries(rpcs3 PRIVATE 3rdparty::discordRPC 3rdparty::qt5 3rdparty::hidapi 3rdparty::libusb 3rdparty::wolfssl 3rdparty::libcurl 3rdparty::zlib)
+target_link_libraries(rpcs3 PRIVATE 3rdparty::discordRPC 3rdparty::qt6 3rdparty::hidapi 3rdparty::libusb 3rdparty::wolfssl 3rdparty::libcurl 3rdparty::zlib)
 target_link_libraries(rpcs3 PRIVATE ${ADDITIONAL_LIBS})

 # Unix display manager
@ -112,14 +113,6 @@ if(USE_PRECOMPILED_HEADERS)
    target_precompile_headers(rpcs3 PRIVATE stdafx.h)
 endif()

-get_target_property(_qmake_executable Qt5::qmake IMPORTED_LOCATION)
-get_filename_component(_qt_bin_dir "${_qmake_executable}" DIRECTORY)
-if(APPLE)
-    find_program(MACDEPLOYQT_EXECUTABLE macdeployqt HINTS "${_qt_bin_dir}")
-elseif(WIN32)
-    find_program(WINDEPLOYQT_EXECUTABLE windeployqt HINTS "${_qt_bin_dir}")
-endif()
-
 # Copy icons to executable directory
 if(APPLE)
    if (CMAKE_BUILD_TYPE MATCHES "Debug" OR CMAKE_BUILD_TYPE MATCHES "RelWithDebInfo")
@ -148,35 +141,9 @@ elseif(UNIX)
            COMMAND ${CMAKE_COMMAND} -E copy_directory
            ${CMAKE_SOURCE_DIR}/bin/git $<TARGET_FILE_DIR:rpcs3>/git)
 elseif(WIN32)
-    # TODO(cjj19970505@live.cn)
-    # offical Qt binaries are built with -MD(d) only as stated in offical wiki
-    # https://wiki.qt.io/Technical_FAQ#Why_does_a_statically_built_Qt_use_the_dynamic_Visual_Studio_runtime_libraries_.3F_Do_I_need_to_deploy_those_with_my_application_.3F
-    # If we build our libs with /MT(d), we might encounter some issues.
-    # https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=msvc-160#what-problems-exist-if-an-application-uses-more-than-one-crt-version
-
-    # Qt installed from Qt installer has following hierarchy:
-    # bin/ for release and debug dlls and windeployqt tools
-    # lib/cmake/Qt5/ for Qt5_Dir
-    # Qt installed from vcpkg has following hierarchy:
-    # bin/ for release dlls
-    # debug/bin/ for debug dlls
-    # tools/qt5/bin/ for tools including windeployqt
-    # tools/qt5/debug/bin/ for tools with debug build including windeployqt
-    # share/cmake/Qt5/ for Qt5_Dir
-
-    # If Qt5 is installed from official Qt installer
-    # list(APPEND _QT5_TOOLS_PATHS "${Qt5_DIR}/../../../bin/")
-
-    # If Qt5 is installed from vcpkg
-    # list(APPEND _QT5_TOOLS_PATHS "${Qt5_DIR}/../../../tools/qt5$<$<CONFIG:Debug>:/debug>/bin/")
-
    add_custom_command(TARGET rpcs3 POST_BUILD
-        # COMMAND set PATH=${_QT5_TOOLS_PATHS}$<SEMICOLON>%PATH%
        COMMAND "${CMAKE_COMMAND}" -E copy_directory "${CMAKE_SOURCE_DIR}/bin" "$<TARGET_FILE_DIR:rpcs3>"
-        # If Qt5 is installed from vcpkg, add binary path to PATH
-        # otherwise windeployqt tool won't be able to locate necessary dlls
-        # COMMAND set PATH=${Qt5_DIR}/../../../$<$<CONFIG:Debug>:debug/>bin/$<SEMICOLON>%PATH% 
-        COMMAND "${WINDEPLOYQT_EXECUTABLE}" --no-angle --no-compiler-runtime --no-opengl-sw --no-patchqt --no-translations --no-quick --plugindir "$<TARGET_FILE_DIR:rpcs3>/qt/plugins" --verbose 0 "$<TARGET_FILE:rpcs3>")
+        COMMAND "${WINDEPLOYQT_EXECUTABLE}" --no-compiler-runtime --no-opengl-sw --no-patchqt --no-translations --no-quick --no-system-d3d-compiler --no-quick-import --plugindir "$<IF:$<CXX_COMPILER_ID:MSVC>,$<TARGET_FILE_DIR:rpcs3>/plugins,$<TARGET_FILE_DIR:rpcs3>/share/qt6/plugins>" --verbose 0 "$<TARGET_FILE:rpcs3>")
 endif()

 # Unix installation
--- a/rpcs3/Crypto/decrypt_binaries.h
+++ b/rpcs3/Crypto/decrypt_binaries.h
@ -16,7 +16,7 @@ public:

    bool done() const
    {
-        return m_index >= m_klics.size();
+        return m_index >= m_modules.size();
    }

    const std::string& operator[](usz index) const
--- a/rpcs3/Crypto/unedat.cpp
+++ b/rpcs3/Crypto/unedat.cpp
@ -135,9 +135,9 @@ std::tuple<u64, s32, s32> dec_section(unsigned char* metadata)
 	dec[0x0E] = (metadata[0x6] ^ metadata[0x2] ^ metadata[0x1E]);
 	dec[0x0F] = (metadata[0x7] ^ metadata[0x3] ^ metadata[0x1F]);

-	u64 offset = swap64(*reinterpret_cast<u64*>(&dec[0]));
-	s32 length = swap32(*reinterpret_cast<s32*>(&dec[8]));
-	s32 compression_end = swap32(*reinterpret_cast<s32*>(&dec[12]));
+	u64 offset = read_from_ptr<be_t<u64>>(dec, 0);
+	s32 length = read_from_ptr<be_t<s32>>(dec, 8);
+	s32 compression_end = read_from_ptr<be_t<s32>>(dec, 12);

 	return std::make_tuple(offset, length, compression_end);
 }
@ -149,7 +149,7 @@ u128 get_block_key(int block, NPD_HEADER *npd)
 	u128 dest_key{};
 	std::memcpy(&dest_key, src_key, 0xC);

-	s32 swappedBlock = swap32(block);
+	s32 swappedBlock = std::bit_cast<be_t<s32>>(block);
 	std::memcpy(reinterpret_cast<uchar*>(&dest_key) + 0xC, &swappedBlock, sizeof(swappedBlock));
 	return dest_key;
 }
@ -193,9 +193,9 @@ s64 decrypt_block(const fs::file* in, u8* out, EDAT_HEADER *edat, NPD_HEADER *np
 		// NOTE: For NPD version 1 the metadata is not encrypted.
 		if (npd->version <= 1)
 		{
-			offset = swap64(*reinterpret_cast<u64*>(&metadata[0x10]));
-			length = swap32(*reinterpret_cast<s32*>(&metadata[0x18]));
-			compression_end = swap32(*reinterpret_cast<s32*>(&metadata[0x1C]));
+			offset = read_from_ptr<be_t<u64>>(metadata, 0x10);
+			length = read_from_ptr<be_t<s32>>(metadata, 0x18);
+			compression_end = read_from_ptr<be_t<s32>>(metadata, 0x1C);
 		}
 		else
 		{
@ -433,17 +433,26 @@ int check_data(unsigned char *key, EDAT_HEADER *edat, NPD_HEADER *npd, const fs:
 			edat_log.warning("COMPRESSED data detected!");
 	}

-	const int block_num = static_cast<int>((edat->file_size + edat->block_size - 1) / edat->block_size);
-	const int metadata_offset = 0x100;
-	const int metadata_size = metadata_section_size * block_num;
+	if (!edat->block_size)
+	{
+		return 1;
+	}
+
+	const usz block_num = utils::aligned_div<u64>(edat->file_size, edat->block_size);
+	constexpr usz metadata_offset = 0x100;
+	const usz metadata_size = utils::mul_saturate<u64>(metadata_section_size, block_num);
 	u64 metadata_section_offset = metadata_offset;

-	long bytes_read = 0;
-	long bytes_to_read = metadata_size;
-	std::unique_ptr<u8[]> metadata(new u8[metadata_size]);
-	std::unique_ptr<u8[]> empty_metadata(new u8[metadata_size]);
+	if (utils::add_saturate<u64>(utils::add_saturate<u64>(file_offset, metadata_section_offset), metadata_size) > f->size())
+	{
+		return 1;
+	}

-	while (bytes_to_read > 0)
+	u64 bytes_read = 0;
+	const auto metadata = std::make_unique<u8[]>(metadata_size);
+	const auto empty_metadata = std::make_unique<u8[]>(metadata_size);
+
+	while (bytes_read < metadata_size)
 	{
 		// Locate the metadata blocks.
 		f->seek(file_offset + metadata_section_offset);
@ -453,7 +462,6 @@ int check_data(unsigned char *key, EDAT_HEADER *edat, NPD_HEADER *npd, const fs:

 		// Adjust sizes.
 		bytes_read += metadata_section_size;
-		bytes_to_read -= metadata_section_size;

 		if (((edat->flags & EDAT_FLAG_0x20) != 0)) // Metadata block before each data block.
 			metadata_section_offset += (metadata_section_size + edat->block_size);
@ -553,18 +561,18 @@ bool validate_dev_klic(const u8* klicensee, NPD_HEADER *npd)
 		return true;
 	}

-	unsigned char dev[0x60] = { 0 };
+	unsigned char dev[0x60]{};

 	// Build the dev buffer (first 0x60 bytes of NPD header in big-endian).
-	memcpy(dev, npd, 0x60);
+	std::memcpy(dev, npd, 0x60);

 	// Fix endianness.
-	int version = swap32(npd->version);
-	int license = swap32(npd->license);
-	int type = swap32(npd->type);
-	memcpy(dev + 0x4, &version, 4);
-	memcpy(dev + 0x8, &license, 4);
-	memcpy(dev + 0xC, &type, 4);
+	s32 version = std::bit_cast<be_t<s32>>(npd->version);
+	s32 license = std::bit_cast<be_t<s32>>(npd->license);
+	s32 type = std::bit_cast<be_t<s32>>(npd->type);
+	std::memcpy(dev + 0x4, &version, 4);
+	std::memcpy(dev + 0x8, &license, 4);
+	std::memcpy(dev + 0xC, &type, 4);

 	// Check for an empty dev_hash (can't validate if devklic is NULL);
 	u128 klic;
@ -638,20 +646,20 @@ void read_npd_edat_header(const fs::file* input, NPD_HEADER& NPD, EDAT_HEADER& E
 	input->read(npd_header, sizeof(npd_header));
 	input->read(edat_header, sizeof(edat_header));

-	memcpy(&NPD.magic, npd_header, 4);
-	NPD.version = swap32(*reinterpret_cast<s32*>(&npd_header[4]));
-	NPD.license = swap32(*reinterpret_cast<s32*>(&npd_header[8]));
-	NPD.type = swap32(*reinterpret_cast<s32*>(&npd_header[12]));
-	memcpy(NPD.content_id, &npd_header[16], 0x30);
-	memcpy(NPD.digest, &npd_header[64], 0x10);
-	memcpy(NPD.title_hash, &npd_header[80], 0x10);
-	memcpy(NPD.dev_hash, &npd_header[96], 0x10);
-	NPD.activate_time = swap64(*reinterpret_cast<s64*>(&npd_header[112]));
-	NPD.expire_time = swap64(*reinterpret_cast<s64*>(&npd_header[120]));
+	std::memcpy(&NPD.magic, npd_header, 4);
+	NPD.version = read_from_ptr<be_t<s32>>(npd_header, 4);
+	NPD.license = read_from_ptr<be_t<s32>>(npd_header, 8);
+	NPD.type = read_from_ptr<be_t<s32>>(npd_header, 12);
+	std::memcpy(NPD.content_id, &npd_header[16], 0x30);
+	std::memcpy(NPD.digest, &npd_header[64], 0x10);
+	std::memcpy(NPD.title_hash, &npd_header[80], 0x10);
+	std::memcpy(NPD.dev_hash, &npd_header[96], 0x10);
+	NPD.activate_time = read_from_ptr<be_t<s64>>(npd_header, 112);
+	NPD.expire_time = read_from_ptr<be_t<s64>>(npd_header, 120);

-	EDAT.flags = swap32(*reinterpret_cast<s32*>(&edat_header[0]));
-	EDAT.block_size = swap32(*reinterpret_cast<s32*>(&edat_header[4]));
-	EDAT.file_size = swap64(*reinterpret_cast<u64*>(&edat_header[8]));
+	EDAT.flags = read_from_ptr<be_t<s32>>(edat_header, 0);
+	EDAT.block_size = read_from_ptr<be_t<s32>>(edat_header, 4);
+	EDAT.file_size = read_from_ptr<be_t<u64>>(edat_header, 8);
 }

 bool extract_all_data(const fs::file* input, const fs::file* output, const char* input_file_name, unsigned char* devklic, bool verbose)
--- a/rpcs3/Crypto/unself.cpp
+++ b/rpcs3/Crypto/unself.cpp
@ -365,22 +365,14 @@ void MetadataInfo::Show() const

 void MetadataHeader::Load(u8* in)
 {
-	memcpy(&signature_input_length, in, 8);
-	memcpy(&unknown1, in + 8, 4);
-	memcpy(&section_count, in + 12, 4);
-	memcpy(&key_count, in + 16, 4);
-	memcpy(&opt_header_size, in + 20, 4);
-	memcpy(&unknown2, in + 24, 4);
-	memcpy(&unknown3, in + 28, 4);
-
 	// Endian swap.
-	signature_input_length = swap64(signature_input_length);
-	unknown1 = swap32(unknown1);
-	section_count = swap32(section_count);
-	key_count = swap32(key_count);
-	opt_header_size = swap32(opt_header_size);
-	unknown2 = swap32(unknown2);
-	unknown3 = swap32(unknown3);
+	signature_input_length = read_from_ptr<be_t<u64>>(in);
+	unknown1               = read_from_ptr<be_t<u32>>(in, 8);
+	section_count          = read_from_ptr<be_t<u32>>(in, 12);
+	key_count              = read_from_ptr<be_t<u32>>(in, 16);
+	opt_header_size        = read_from_ptr<be_t<u32>>(in, 20);
+	unknown2               = read_from_ptr<be_t<u32>>(in, 24);
+	unknown3               = read_from_ptr<be_t<u32>>(in, 28);
 }

 void MetadataHeader::Show() const
@ -396,28 +388,17 @@ void MetadataHeader::Show() const

 void MetadataSectionHeader::Load(u8* in)
 {
-	memcpy(&data_offset, in, 8);
-	memcpy(&data_size, in + 8, 8);
-	memcpy(&type, in + 16, 4);
-	memcpy(&program_idx, in + 20, 4);
-	memcpy(&hashed, in + 24, 4);
-	memcpy(&sha1_idx, in + 28, 4);
-	memcpy(&encrypted, in + 32, 4);
-	memcpy(&key_idx, in + 36, 4);
-	memcpy(&iv_idx, in + 40, 4);
-	memcpy(&compressed, in + 44, 4);
-
 	// Endian swap.
-	data_offset = swap64(data_offset);
-	data_size = swap64(data_size);
-	type = swap32(type);
-	program_idx = swap32(program_idx);
-	hashed = swap32(hashed);
-	sha1_idx = swap32(sha1_idx);
-	encrypted = swap32(encrypted);
-	key_idx = swap32(key_idx);
-	iv_idx = swap32(iv_idx);
-	compressed = swap32(compressed);
+	data_offset = read_from_ptr<be_t<u64>>(in);
+	data_size   = read_from_ptr<be_t<u64>>(in, 8);
+	type        = read_from_ptr<be_t<u32>>(in, 16);
+	program_idx = read_from_ptr<be_t<u32>>(in, 20);
+	hashed      = read_from_ptr<be_t<u32>>(in, 24);
+	sha1_idx    = read_from_ptr<be_t<u32>>(in, 28);
+	encrypted   = read_from_ptr<be_t<u32>>(in, 32);
+	key_idx     = read_from_ptr<be_t<u32>>(in, 36);
+	iv_idx      = read_from_ptr<be_t<u32>>(in, 40);
+	compressed  = read_from_ptr<be_t<u32>>(in, 44);
 }

 void MetadataSectionHeader::Show() const
@ -936,19 +917,29 @@ bool SELFDecrypter::LoadHeaders(bool isElf32, SelfAdditionalInfo* out_info)
 		}
 	}

-
 	// Read section info.
 	m_seg_ext_hdr.clear();
 	self_f.seek(m_ext_hdr.segment_ext_hdr_offset);

-	for(u32 i = 0; i < ((isElf32) ? elf32_hdr.e_phnum : elf64_hdr.e_phnum); ++i)
+	for(u32 i = 0; i < (isElf32 ? elf32_hdr.e_phnum : elf64_hdr.e_phnum); ++i)
 	{
+		if (self_f.pos() >= self_f.size())
+		{
+			return false;
+		}
+
 		m_seg_ext_hdr.emplace_back();
 		m_seg_ext_hdr.back().Load(self_f);
 	}

+	if (m_ext_hdr.version_hdr_offset == 0 || utils::add_saturate<u64>(m_ext_hdr.version_hdr_offset, sizeof(version_header)) > self_f.size())
+	{
+		return false;
+	}
+
 	// Read SCE version info.
 	self_f.seek(m_ext_hdr.version_hdr_offset);
+
 	m_version_hdr.Load(self_f);

 	// Read control info.
@ -957,6 +948,11 @@ bool SELFDecrypter::LoadHeaders(bool isElf32, SelfAdditionalInfo* out_info)

 	for (u64 i = 0; i < m_ext_hdr.supplemental_hdr_size;)
 	{
+		if (self_f.pos() >= self_f.size())
+		{
+			return false;
+		}
+
 		m_supplemental_hdr_arr.emplace_back();
 		supplemental_header& cinfo = m_supplemental_hdr_arr.back();
 		cinfo.Load(self_f);
@ -1407,7 +1403,7 @@ static bool CheckDebugSelf(fs::file& s)
 	return false;
 }

-fs::file decrypt_self(fs::file elf_or_self, u8* klic_key, SelfAdditionalInfo* out_info)
+fs::file decrypt_self(fs::file elf_or_self, u8* klic_key, SelfAdditionalInfo* out_info, bool require_encrypted)
 {
 	if (out_info)
 	{
@ -1422,8 +1418,14 @@ fs::file decrypt_self(fs::file elf_or_self, u8* klic_key, SelfAdditionalInfo* ou
 	elf_or_self.seek(0);

 	// Check SELF header first. Check for a debug SELF.
-	if (elf_or_self.size() >= 4 && elf_or_self.read<u32>() == "SCE\0"_u32 && !CheckDebugSelf(elf_or_self))
+	if (elf_or_self.size() >= 4 && elf_or_self.read<u32>() == "SCE\0"_u32)
 	{
+		if (CheckDebugSelf(elf_or_self))
+		{
+			// TODO: Decrypt
+			return elf_or_self;
+		}
+
 		// Check the ELF file class (32 or 64 bit).
 		const bool isElf32 = IsSelfElf32(elf_or_self);

@ -1455,6 +1457,11 @@ fs::file decrypt_self(fs::file elf_or_self, u8* klic_key, SelfAdditionalInfo* ou
 		return self_dec.MakeElf(isElf32);
 	}

+	if (require_encrypted)
+	{
+		return {};
+	}
+
 	return elf_or_self;
 }

--- a/rpcs3/Crypto/unself.h
+++ b/rpcs3/Crypto/unself.h
@ -559,7 +559,7 @@ private:
 	}
 };

-fs::file decrypt_self(fs::file elf_or_self, u8* klic_key = nullptr, SelfAdditionalInfo* additional_info = nullptr);
+fs::file decrypt_self(fs::file elf_or_self, u8* klic_key = nullptr, SelfAdditionalInfo* additional_info = nullptr, bool require_encrypted = false);
 bool verify_npdrm_self_headers(const fs::file& self, u8* klic_key = nullptr, NPD_HEADER* npd_out = nullptr);
 bool get_npdrm_self_header(const fs::file& self, NPD_HEADER& npd);

--- a/rpcs3/Crypto/utils.h
+++ b/rpcs3/Crypto/utils.h
@ -5,39 +5,12 @@
 // http://www.gnu.org/licenses/gpl-2.0.txt

 #include "util/types.hpp"
+#include "util/asm.hpp"

 #include <stdlib.h>

 enum { CRYPTO_MAX_PATH = 4096 };

-// Auxiliary functions (endian swap, xor, and file name).
-inline u16 swap16(u16 i)
-{
-#if defined(__GNUG__)
-	return __builtin_bswap16(i);
-#else
-	return _byteswap_ushort(i);
-#endif
-}
-
-inline u32 swap32(u32 i)
-{
-#if defined(__GNUG__)
-	return __builtin_bswap32(i);
-#else
-	return _byteswap_ulong(i);
-#endif
-}
-
-inline u64 swap64(u64 i)
-{
-#if defined(__GNUG__)
-	return __builtin_bswap64(i);
-#else
-	return _byteswap_uint64(i);
-#endif
-}
-
 char* extract_file_name(const char* file_path, char real_file_name[CRYPTO_MAX_PATH]);

 std::string sha256_get_hash(const char* data, usz size, bool lower_case);
--- a/rpcs3/Emu/Audio/AudioBackend.cpp
+++ b/rpcs3/Emu/Audio/AudioBackend.cpp
@ -158,6 +158,6 @@ AudioChannelCnt AudioBackend::convert_channel_count(u64 raw)
 	case 1:
 		return AudioChannelCnt::STEREO;
 	case 0:
-		fmt::throw_exception("Usupported channel count");
+		fmt::throw_exception("Unsupported channel count");
 	}
 }
--- a/rpcs3/Emu/Audio/Cubeb/CubebBackend.cpp
+++ b/rpcs3/Emu/Audio/Cubeb/CubebBackend.cpp
@ -543,7 +543,7 @@ void CubebBackend::device_collection_changed_cb(cubeb* context, void* user_ptr)

 	if (context != cubeb->m_ctx)
 	{
-		Cubeb.error("device_collection_changed_cb called with unkown context");
+		Cubeb.error("device_collection_changed_cb called with unknown context");
 		return;
 	}

--- a/rpcs3/Emu/Audio/XAudio2/XAudio2Backend.cpp
+++ b/rpcs3/Emu/Audio/XAudio2/XAudio2Backend.cpp
@ -317,12 +317,9 @@ f64 XAudio2Backend::GetCallbackFrameLen()
 		return _10ms;
 	}

-#if _MSC_VER
 	Microsoft::WRL::ComPtr<IXAudio2Extension> xaudio_ext{};
-#endif
 	f64 min_latency{};

-#if _MSC_VER
 	if (HRESULT hr = m_xaudio2_instance->QueryInterface(IID_IXAudio2Extension, std::bit_cast<void**>(xaudio_ext.GetAddressOf())); FAILED(hr))
 	{
 		XAudio.error("QueryInterface() failed: %s (0x%08x)", std::system_category().message(hr), static_cast<u32>(hr));
@ -337,7 +334,6 @@ f64 XAudio2Backend::GetCallbackFrameLen()
 			min_latency = static_cast<f64>(samples_per_q) / freq;
 		}
 	}
-#endif

 	return std::max<f64>(min_latency, _10ms); // 10ms is the minimum for XAudio
 }
--- a/rpcs3/Emu/Audio/XAudio2/XAudio2Backend.h
+++ b/rpcs3/Emu/Audio/XAudio2/XAudio2Backend.h
@ -11,6 +11,7 @@
 #ifdef _MSC_VER
 #include <xaudio2redist.h>
 #else
+#include <initguid.h>
 #include <xaudio2.h>
 #endif
 #include <wrl/client.h>
--- a/rpcs3/Emu/CMakeLists.txt
+++ b/rpcs3/Emu/CMakeLists.txt
@ -163,6 +163,7 @@ if(WIN32)
        Audio/XAudio2/XAudio2Backend.cpp
        Audio/XAudio2/xaudio2_enumerator.cpp
    )
+    target_compile_definitions(rpcs3_emu PRIVATE UNICODE _UNICODE _WIN32_WINNT=0x0602)
 endif()

 target_link_libraries(rpcs3_emu
--- a/rpcs3/Emu/CPU/CPUThread.cpp
+++ b/rpcs3/Emu/CPU/CPUThread.cpp
@ -261,7 +261,7 @@ struct cpu_prof
 			if (threads.empty())
 			{
 				// Wait for messages if no work (don't waste CPU)
-				thread_ctrl::wait_on(registered, nullptr);
+				thread_ctrl::wait_on(registered);
 				continue;
 			}

@ -939,7 +939,7 @@ bool cpu_thread::check_state() noexcept
 						else
 						{
 							// TODO: fix the workaround
-							g_suspend_counter.wait(ctr, -4, atomic_wait_timeout{100});
+							g_suspend_counter.wait(ctr, atomic_wait_timeout{10'000});
 						}
 					}
 					else
@ -972,8 +972,7 @@ bool cpu_thread::check_state() noexcept
 				}

 				// Short sleep when yield flag is present alone (makes no sense when other methods which can stop thread execution have been done)
-				// Pass a mask of a single bit which is often unused to avoid notifications
-				s_dummy_atomic.wait(0, 1u << 30, atomic_wait_timeout{80'000});
+				s_dummy_atomic.wait(0, atomic_wait_timeout{80'000});
 			}
 		}
 	}
@ -1010,13 +1009,13 @@ cpu_thread& cpu_thread::operator=(thread_state)

 	if (old & cpu_flag::wait && old.none_of(cpu_flag::again + cpu_flag::exit))
 	{
-		state.notify_one(cpu_flag::exit);
+		state.notify_one();

 		if (auto thread = try_get<spu_thread>())
 		{
 			if (u32 resv = atomic_storage<u32>::load(thread->raddr))
 			{
-				vm::reservation_notifier(resv).notify_all(-128);
+				vm::reservation_notifier(resv).notify_all();
 			}
 		}
 	}
--- a/rpcs3/Emu/Cell/Modules/cellAdec.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellAdec.cpp
@ -588,10 +588,10 @@ public:
 						frame.auAddr = task.au.addr;
 						frame.auSize = task.au.size;
 						frame.userdata = task.au.userdata;
-						frame.size = frame.data->nb_samples * frame.data->channels * nbps;
+						frame.size = frame.data->nb_samples * frame.data->ch_layout.nb_channels * nbps;

 						//cellAdec.notice("got audio frame (pts=0x%llx, nb_samples=%d, ch=%d, sample_rate=%d, nbps=%d)",
-							//frame.pts, frame.data->nb_samples, frame.data->channels, frame.data->sample_rate, nbps);
+							//frame.pts, frame.data->nb_samples, frame.data->ch_layout.nb_channels, frame.data->sample_rate, nbps);

 						if (frames.push(frame, &is_closed))
 						{
@ -944,7 +944,7 @@ error_code cellAdecGetPcm(u32 handle, vm::ptr<float> outBuffer)
 	if (outBuffer)
 	{
 		// reverse byte order:
-		if (frame->format == AV_SAMPLE_FMT_FLTP && frame->channels == 1)
+		if (frame->format == AV_SAMPLE_FMT_FLTP && frame->ch_layout.nb_channels == 1)
 		{
 			float* in_f = reinterpret_cast<float*>(frame->extended_data[0]);
 			for (u32 i = 0; i < af.size / 4; i++)
@ -952,7 +952,7 @@ error_code cellAdecGetPcm(u32 handle, vm::ptr<float> outBuffer)
 				outBuffer[i] = in_f[i];
 			}
 		}
-		else if (frame->format == AV_SAMPLE_FMT_FLTP && frame->channels == 2)
+		else if (frame->format == AV_SAMPLE_FMT_FLTP && frame->ch_layout.nb_channels == 2)
 		{
 			float* in_f[2];
 			in_f[0] = reinterpret_cast<float*>(frame->extended_data[0]);
@ -963,7 +963,7 @@ error_code cellAdecGetPcm(u32 handle, vm::ptr<float> outBuffer)
 				outBuffer[i * 2 + 1] = in_f[1][i];
 			}
 		}
-		else if (frame->format == AV_SAMPLE_FMT_FLTP && frame->channels == 6)
+		else if (frame->format == AV_SAMPLE_FMT_FLTP && frame->ch_layout.nb_channels == 6)
 		{
 			float* in_f[6];
 			in_f[0] = reinterpret_cast<float*>(frame->extended_data[0]);
@ -982,7 +982,7 @@ error_code cellAdecGetPcm(u32 handle, vm::ptr<float> outBuffer)
 				outBuffer[i * 6 + 5] = in_f[5][i];
 			}
 		}
-		else if (frame->format == AV_SAMPLE_FMT_FLTP && frame->channels == 8)
+		else if (frame->format == AV_SAMPLE_FMT_FLTP && frame->ch_layout.nb_channels == 8)
 		{
 			float* in_f[8];
 			in_f[0] = reinterpret_cast<float*>(frame->extended_data[0]);
@ -1005,7 +1005,7 @@ error_code cellAdecGetPcm(u32 handle, vm::ptr<float> outBuffer)
 				outBuffer[i * 8 + 7] = in_f[7][i];
 			}
 		}
-		else if (frame->format == AV_SAMPLE_FMT_S16P && frame->channels == 1)
+		else if (frame->format == AV_SAMPLE_FMT_S16P && frame->ch_layout.nb_channels == 1)
 		{
 			s16* in_i = reinterpret_cast<s16*>(frame->extended_data[0]);
 			for (u32 i = 0; i < af.size / 2; i++)
@ -1013,7 +1013,7 @@ error_code cellAdecGetPcm(u32 handle, vm::ptr<float> outBuffer)
 				outBuffer[i] = in_i[i] / 32768.f;
 			}
 		}
-		else if (frame->format == AV_SAMPLE_FMT_S16P && frame->channels == 2)
+		else if (frame->format == AV_SAMPLE_FMT_S16P && frame->ch_layout.nb_channels == 2)
 		{
 			s16* in_i[2];
 			in_i[0] = reinterpret_cast<s16*>(frame->extended_data[0]);
@ -1026,7 +1026,7 @@ error_code cellAdecGetPcm(u32 handle, vm::ptr<float> outBuffer)
 		}
 		else
 		{
-			fmt::throw_exception("Unsupported frame format (channels=%d, format=%d)", frame->channels, frame->format);
+			fmt::throw_exception("Unsupported frame format (channels=%d, format=%d)", frame->ch_layout.nb_channels, frame->format);
 		}
 	}

@ -1078,25 +1078,26 @@ error_code cellAdecGetPcmItem(u32 handle, vm::pptr<CellAdecPcmItem> pcmItem)

 		atx->samplingFreq = frame->sample_rate;
 		atx->nbytes = frame->nb_samples * u32{sizeof(float)};
-		if (frame->channels == 1)
+
+		switch (frame->ch_layout.nb_channels)
 		{
-			atx->channelConfigIndex = 1;
-		}
-		else if (frame->channels == 2)
+		case 1:
+		case 2:
+		case 6:
 		{
-			atx->channelConfigIndex = 2;
+			atx->channelConfigIndex = frame->ch_layout.nb_channels;
+			break;
 		}
-		else if (frame->channels == 6)
-		{
-			atx->channelConfigIndex = 6;
-		}
-		else if (frame->channels == 8)
+		case 8:
 		{
 			atx->channelConfigIndex = 7;
+			break;
 		}
-		else
+		default:
 		{
-			cellAdec.fatal("cellAdecGetPcmItem(): unsupported channel count (%d)", frame->channels);
+			cellAdec.fatal("cellAdecGetPcmItem(): unsupported channel count (%d)", frame->ch_layout.nb_channels);
+			break;
+		}
 		}
 	}
 	else if (adec->type == CELL_ADEC_TYPE_MP3)
--- a/rpcs3/Emu/Cell/Modules/cellAudio.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellAudio.cpp
@ -10,6 +10,8 @@

 LOG_CHANNEL(cellAudio);

+extern void lv2_sleep(u64 timeout, ppu_thread* ppu = nullptr);
+
 vm::gvar<char, AUDIO_PORT_OFFSET * AUDIO_PORT_COUNT> g_audio_buffer;

 struct alignas(16) aligned_index_t
@ -1252,8 +1254,6 @@ error_code cellAudioPortOpen(vm::ptr<CellAudioPortParam> audioParam, vm::ptr<u32

 	auto& g_audio = g_fxo->get<cell_audio>();

-	std::lock_guard lock(g_audio.mutex);
-
 	if (!g_audio.init)
 	{
 		return CELL_AUDIO_ERROR_NOT_INIT;
@ -1319,6 +1319,16 @@ error_code cellAudioPortOpen(vm::ptr<CellAudioPortParam> audioParam, vm::ptr<u32
 		cellAudio.todo("cellAudioPortOpen(): unknown attributes (0x%llx)", attr);
 	}

+	// Waiting for VSH and doing some more things
+	lv2_sleep(200);
+
+	std::lock_guard lock(g_audio.mutex);
+
+	if (!g_audio.init)
+	{
+		return CELL_AUDIO_ERROR_NOT_INIT;
+	}
+
 	// Open audio port
 	audio_port* port = g_audio.open_port();

@ -1410,8 +1420,6 @@ error_code cellAudioPortStart(u32 portNum)

 	auto& g_audio = g_fxo->get<cell_audio>();

-	std::lock_guard lock(g_audio.mutex);
-
 	if (!g_audio.init)
 	{
 		return CELL_AUDIO_ERROR_NOT_INIT;
@ -1422,6 +1430,16 @@ error_code cellAudioPortStart(u32 portNum)
 		return CELL_AUDIO_ERROR_PARAM;
 	}

+	// Waiting for VSH
+	lv2_sleep(30);
+
+	std::lock_guard lock(g_audio.mutex);
+
+	if (!g_audio.init)
+	{
+		return CELL_AUDIO_ERROR_NOT_INIT;
+	}
+
 	switch (audio_port_state state = g_audio.ports[portNum].state.compare_and_swap(audio_port_state::opened, audio_port_state::started))
 	{
 	case audio_port_state::closed: return CELL_AUDIO_ERROR_PORT_NOT_OPEN;
@ -1650,10 +1668,69 @@ error_code cellAudioCreateNotifyEventQueueEx(ppu_thread& ppu, vm::ptr<u32> id, v
 	return AudioCreateNotifyEventQueue(ppu, id, key, queue_type);
 }

-error_code AudioSetNotifyEventQueue(u64 key, u32 iFlags)
+error_code AudioSetNotifyEventQueue(ppu_thread& ppu, u64 key, u32 iFlags)
 {
 	auto& g_audio = g_fxo->get<cell_audio>();

+	if (!g_audio.init)
+	{
+		return CELL_AUDIO_ERROR_NOT_INIT;
+	}
+
+	// Waiting for VSH
+	lv2_sleep(20, &ppu);
+
+	// Dirty hack for sound: confirm the creation of _mxr000 event queue by _cellsurMixerMain thread
+	constexpr u64 c_mxr000 = 0x8000cafe0246030;
+
+	if (key == c_mxr000 || key == 0)
+	{
+		bool has_sur_mixer_thread = false;
+
+		for (usz count = 0; !lv2_event_queue::find(c_mxr000) && count < 100; count++)
+		{
+			if (has_sur_mixer_thread || idm::select<named_thread<ppu_thread>>([&](u32 id, named_thread<ppu_thread>& test_ppu)
+			{
+				// Confirm thread existence
+				if (id == ppu.id)
+				{
+					return false;
+				}
+
+				const auto ptr = test_ppu.ppu_tname.load();
+
+				if (!ptr)
+				{
+					return false;
+				}
+
+				return *ptr == "_cellsurMixerMain"sv;
+			}).ret)
+			{
+				has_sur_mixer_thread = true;
+			}
+			else
+			{
+				break;
+			}
+
+			if (ppu.is_stopped())
+			{
+				ppu.state += cpu_flag::again;
+				return {};
+			}
+
+			cellAudio.error("AudioSetNotifyEventQueue(): Waiting for _mxr000. x%d", count);
+
+			lv2_sleep(50'000, &ppu);
+		}
+
+		if (has_sur_mixer_thread && lv2_event_queue::find(c_mxr000))
+		{
+			key = c_mxr000;
+		}
+	}
+
 	std::lock_guard lock(g_audio.mutex);

 	if (!g_audio.init)
@ -1687,27 +1764,33 @@ error_code AudioSetNotifyEventQueue(u64 key, u32 iFlags)
 	}

 	// Set unique source associated with the key
-	g_audio.keys.push_back({
+	g_audio.keys.push_back
+	({
 		.start_period = g_audio.event_period,
 		.flags = iFlags,
 		.source = ((process_getpid() + u64{}) << 32) + lv2_event_port::id_base + (g_audio.key_count++ * lv2_event_port::id_step),
 		.ack_timestamp = 0,
 		.port = std::move(q)
 	});
+
 	g_audio.key_count %= lv2_event_port::id_count;

 	return CELL_OK;
 }

-error_code cellAudioSetNotifyEventQueue(u64 key)
+error_code cellAudioSetNotifyEventQueue(ppu_thread& ppu, u64 key)
 {
+	ppu.state += cpu_flag::wait;
+
 	cellAudio.warning("cellAudioSetNotifyEventQueue(key=0x%llx)", key);

-	return AudioSetNotifyEventQueue(key, 0);
+	return AudioSetNotifyEventQueue(ppu, key, 0);
 }

-error_code cellAudioSetNotifyEventQueueEx(u64 key, u32 iFlags)
+error_code cellAudioSetNotifyEventQueueEx(ppu_thread& ppu, u64 key, u32 iFlags)
 {
+	ppu.state += cpu_flag::wait;
+
 	cellAudio.todo("cellAudioSetNotifyEventQueueEx(key=0x%llx, iFlags=0x%x)", key, iFlags);

 	if (iFlags & (~0u >> 5))
@ -1715,7 +1798,7 @@ error_code cellAudioSetNotifyEventQueueEx(u64 key, u32 iFlags)
 		return CELL_AUDIO_ERROR_PARAM;
 	}

-	return AudioSetNotifyEventQueue(key, iFlags);
+	return AudioSetNotifyEventQueue(ppu, key, iFlags);
 }

 error_code AudioRemoveNotifyEventQueue(u64 key, u32 iFlags)
--- a/rpcs3/Emu/Cell/Modules/cellAvconfExt.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellAvconfExt.cpp
@ -202,7 +202,7 @@ error_code cellVideoOutSetupDisplay(u32 videoOut)

 error_code cellAudioInGetDeviceInfo(u32 deviceNumber, u32 deviceIndex, vm::ptr<CellAudioInDeviceInfo> info)
 {
-	cellAvconfExt.todo("cellAudioInGetDeviceInfo(deviceNumber=0x%x, deviceIndex=0x%x, info=*0x%x)", deviceNumber, deviceIndex, info);
+	cellAvconfExt.trace("cellAudioInGetDeviceInfo(deviceNumber=0x%x, deviceIndex=0x%x, info=*0x%x)", deviceNumber, deviceIndex, info);

 	if (deviceIndex != 0 || !info)
 	{
@ -277,7 +277,7 @@ error_code cellVideoOutGetGamma(u32 videoOut, vm::ptr<f32> gamma)

 error_code cellAudioInGetAvailableDeviceInfo(u32 count, vm::ptr<CellAudioInDeviceInfo> device_info)
 {
-	cellAvconfExt.todo("cellAudioInGetAvailableDeviceInfo(count=%d, info=*0x%x)", count, device_info);
+	cellAvconfExt.trace("cellAudioInGetAvailableDeviceInfo(count=%d, info=*0x%x)", count, device_info);

 	if (count > 16 || !device_info)
 	{
--- a/rpcs3/Emu/Cell/Modules/cellCamera.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellCamera.cpp
@ -1891,7 +1891,6 @@ bool camera_context::on_handler_state(camera_handler_base::camera_handler_state
 {
 	switch (state)
 	{
-	case camera_handler_base::camera_handler_state::not_available:
 	case camera_handler_base::camera_handler_state::closed:
 	{
 		if (is_attached)
--- a/rpcs3/Emu/Cell/Modules/cellGame.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellGame.cpp
@ -349,6 +349,29 @@ void disc_change_manager::insert_disc(u32 disc_type, std::string title_id)
 	});
 }

+extern void lv2_sleep(u64 timeout, ppu_thread* ppu = nullptr)
+{
+	if (!ppu)
+	{
+		ppu = ensure(cpu_thread::get_current<ppu_thread>());
+	}
+
+	if (!timeout)
+	{
+		return;
+	}
+
+	const bool had_wait = ppu->state.test_and_set(cpu_flag::wait);
+
+	lv2_obj::sleep(*ppu);
+	lv2_obj::wait_timeout(timeout);
+	ppu->check_state();
+
+	if (had_wait)
+	{
+		ppu->state += cpu_flag::wait;
+	}
+}

 error_code cellHddGameCheck(ppu_thread& ppu, u32 version, vm::cptr<char> dirName, u32 errDialog, vm::ptr<CellHddGameStatCallback> funcStat, u32 container)
 {
@ -434,6 +457,8 @@ error_code cellHddGameCheck(ppu_thread& ppu, u32 version, vm::cptr<char> dirName

 	// TODO ?

+	lv2_sleep(5000, &ppu);
+
 	funcStat(ppu, result, get, set);

 	std::string error_msg;
@ -445,6 +470,8 @@ error_code cellHddGameCheck(ppu_thread& ppu, u32 version, vm::cptr<char> dirName
 		// Game confirmed that it wants to create directory
 		const auto setParam = set->setParam;

+		lv2_sleep(2000, &ppu);
+
 		if (new_data)
 		{
 			if (!setParam)
@ -536,6 +563,10 @@ error_code cellHddGameCheck(ppu_thread& ppu, u32 version, vm::cptr<char> dirName
 			return CELL_GAMEDATA_ERROR_INTERNAL;
 		}
 	}
+	else
+	{
+		lv2_sleep(2000, &ppu);
+	}

 	return CELL_HDDGAME_ERROR_CBRESULT;
 }
@ -548,14 +579,24 @@ error_code cellHddGameCheck2(ppu_thread& ppu, u32 version, vm::cptr<char> dirNam
 	return cellHddGameCheck(ppu, version, dirName, errDialog, funcStat, container);
 }

-error_code cellHddGameGetSizeKB(vm::ptr<u32> size)
+error_code cellHddGameGetSizeKB(ppu_thread& ppu, vm::ptr<u32> size)
 {
+	ppu.state += cpu_flag::wait;
+
 	cellGame.warning("cellHddGameGetSizeKB(size=*0x%x)", size);

+	lv2_obj::sleep(ppu);
+
+	const u64 start_sleep = ppu.start_time;
+
 	const std::string local_dir = vfs::get(Emu.GetDir());

 	const auto dirsz = fs::get_dir_size(local_dir, 1024);

+	// This function is very slow by nature
+	// TODO: Check if after first use the result is being cached so the sleep can be reduced in this case
+	lv2_sleep(utils::sub_saturate<u64>(dirsz == umax ? 2000 : 200000, get_guest_system_time() - start_sleep), &ppu);
+
 	if (dirsz == umax)
 	{
 		const auto error = fs::g_tls_error;
@ -568,7 +609,8 @@ error_code cellHddGameGetSizeKB(vm::ptr<u32> size)
 		return CELL_HDDGAME_ERROR_FAILURE;
 	}

-	*size = ::narrow<u32>(dirsz / 1024);
+	ppu.check_state();
+	*size = ::narrow<s32>(dirsz / 1024);

 	return CELL_OK;
 }
@ -591,8 +633,10 @@ error_code cellHddGameExitBroken()
 	return open_exit_dialog(get_localized_string(localized_string_id::CELL_HDD_GAME_EXIT_BROKEN), true);
 }

-error_code cellGameDataGetSizeKB(vm::ptr<u32> size)
+error_code cellGameDataGetSizeKB(ppu_thread& ppu, vm::ptr<u32> size)
 {
+	ppu.state += cpu_flag::wait;
+
 	cellGame.warning("cellGameDataGetSizeKB(size=*0x%x)", size);

 	if (!size)
@ -600,10 +644,18 @@ error_code cellGameDataGetSizeKB(vm::ptr<u32> size)
 		return CELL_GAMEDATA_ERROR_PARAM;
 	}

+	lv2_obj::sleep(ppu);
+
+	const u64 start_sleep = ppu.start_time;
+
 	const std::string local_dir = vfs::get(Emu.GetDir());

 	const auto dirsz = fs::get_dir_size(local_dir, 1024);

+	// This function is very slow by nature
+	// TODO: Check if after first use the result is being cached so the sleep can be reduced in this case
+	lv2_sleep(utils::sub_saturate<u64>(dirsz == umax ? 2000 : 200000, get_guest_system_time() - start_sleep), &ppu);
+
 	if (dirsz == umax)
 	{
 		const auto error = fs::g_tls_error;
@ -616,7 +668,8 @@ error_code cellGameDataGetSizeKB(vm::ptr<u32> size)
 		return CELL_GAMEDATA_ERROR_FAILURE;
 	}

-	*size = ::narrow<u32>(dirsz / 1024);
+	ppu.check_state();
+	*size = ::narrow<s32>(dirsz / 1024);

 	return CELL_OK;
 }
@ -650,6 +703,8 @@ error_code cellGameBootCheck(vm::ptr<u32> type, vm::ptr<u32> attributes, vm::ptr

 	auto& perm = g_fxo->get<content_permission>();

+	lv2_sleep(500);
+
 	const auto init = perm.init.init();

 	if (!init)
@ -662,11 +717,13 @@ error_code cellGameBootCheck(vm::ptr<u32> type, vm::ptr<u32> attributes, vm::ptr

 	const std::string& cat = Emu.GetFakeCat();

+	u32 _type{};
+
 	if (cat == "DG")
 	{
 		perm.mode = content_permission::check_mode::disc_game;

-		*type = CELL_GAME_GAMETYPE_DISC;
+		_type = CELL_GAME_GAMETYPE_DISC;
 		*attributes = 0; // TODO
 		// TODO: dirName might be a read only string when BootCheck is called on a disc game. (e.g. Ben 10 Ultimate Alien: Cosmic Destruction)

@ -676,7 +733,7 @@ error_code cellGameBootCheck(vm::ptr<u32> type, vm::ptr<u32> attributes, vm::ptr
 	{
 		perm.mode = content_permission::check_mode::patch;

-		*type = CELL_GAME_GAMETYPE_DISC;
+		_type = CELL_GAME_GAMETYPE_DISC;
 		*attributes = CELL_GAME_ATTRIBUTE_PATCH; // TODO

 		sfo = psf::load_object(vfs::get(Emu.GetDir() + "PARAM.SFO"));
@ -685,13 +742,15 @@ error_code cellGameBootCheck(vm::ptr<u32> type, vm::ptr<u32> attributes, vm::ptr
 	{
 		perm.mode = content_permission::check_mode::hdd_game;

-		*type = CELL_GAME_GAMETYPE_HDD;
+		_type = CELL_GAME_GAMETYPE_HDD;
 		*attributes = 0; // TODO

 		sfo = psf::load_object(vfs::get(Emu.GetDir() + "PARAM.SFO"));
 		dir = Emu.GetTitleID();
 	}

+	*type = _type;
+
 	if (size)
 	{
 		// TODO: Use the free space of the computer's HDD where RPCS3 is being run.
@ -702,7 +761,7 @@ error_code cellGameBootCheck(vm::ptr<u32> type, vm::ptr<u32> attributes, vm::ptr
 		size->sysSizeKB = 4;
 	}

-	if (*type == u32{CELL_GAME_GAMETYPE_HDD} && dirName)
+	if (_type == u32{CELL_GAME_GAMETYPE_HDD} && dirName)
 	{
 		strcpy_trunc(*dirName, Emu.GetTitleID());
 	}
@ -718,6 +777,8 @@ error_code cellGamePatchCheck(vm::ptr<CellGameContentSize> size, vm::ptr<void> r
 {
 	cellGame.warning("cellGamePatchCheck(size=*0x%x, reserved=*0x%x)", size, reserved);

+	lv2_sleep(5000);
+
 	if (Emu.GetCat() != "GD")
 	{
 		return CELL_GAME_ERROR_NOTPATCH;
@ -778,9 +839,14 @@ error_code cellGameDataCheck(u32 type, vm::cptr<char> dirName, vm::ptr<CellGameC

 	if (!init)
 	{
+		lv2_sleep(300);
 		return CELL_GAME_ERROR_BUSY;
 	}

+	// This function is incredibly slow, slower for DISC type and even if the game/disc data does not exist
+	// Null size does not change it
+	lv2_sleep(type == CELL_GAME_GAMETYPE_DISC ? 300000 : 120000);
+
 	auto [sfo, psf_error] = psf::load(vfs::get(dir + "/PARAM.SFO"));

 	if (const std::string_view cat = psf::get_string(sfo, "CATEGORY"); [&]()
@ -807,7 +873,7 @@ error_code cellGameDataCheck(u32 type, vm::cptr<char> dirName, vm::ptr<CellGameC
 		size->hddFreeSizeKB = 40 * 1024 * 1024 - 1; // Read explanation in cellHddGameCheck

 		// TODO: Calculate data size for game data, if necessary.
-		size->sizeKB = CELL_GAME_SIZEKB_NOTCALC;
+		size->sizeKB = sfo.empty() ? 0 : CELL_GAME_SIZEKB_NOTCALC;
 		size->sysSizeKB = 0; // TODO
 	}

@ -826,7 +892,7 @@ error_code cellGameDataCheck(u32 type, vm::cptr<char> dirName, vm::ptr<CellGameC
 	return CELL_OK;
 }

-error_code cellGameContentPermit(vm::ptr<char[CELL_GAME_PATH_MAX]> contentInfoPath, vm::ptr<char[CELL_GAME_PATH_MAX]> usrdirPath)
+error_code cellGameContentPermit(ppu_thread& ppu, vm::ptr<char[CELL_GAME_PATH_MAX]> contentInfoPath, vm::ptr<char[CELL_GAME_PATH_MAX]> usrdirPath)
 {
 	cellGame.warning("cellGameContentPermit(contentInfoPath=*0x%x, usrdirPath=*0x%x)", contentInfoPath, usrdirPath);

@ -854,6 +920,10 @@ error_code cellGameContentPermit(vm::ptr<char[CELL_GAME_PATH_MAX]> contentInfoPa
 		return CELL_OK;
 	}

+	lv2_obj::sleep(ppu);
+
+	const u64 start_sleep = ppu.start_time;
+
 	if (!perm.temp.empty())
 	{
 		// Create PARAM.SFO
@ -882,6 +952,9 @@ error_code cellGameContentPermit(vm::ptr<char[CELL_GAME_PATH_MAX]> contentInfoPa
 		ensure(temp.commit());
 	}

+	// This function is very slow by nature
+	lv2_sleep(utils::sub_saturate<u64>(!perm.temp.empty() || perm.can_create ? 200000 : 2000, get_guest_system_time() - start_sleep), &ppu);
+
 	// Cleanup
 	perm.reset();

@ -892,7 +965,7 @@ error_code cellGameContentPermit(vm::ptr<char[CELL_GAME_PATH_MAX]> contentInfoPa

 error_code cellGameDataCheckCreate2(ppu_thread& ppu, u32 version, vm::cptr<char> dirName, u32 errDialog, vm::ptr<CellGameDataStatCallback> funcStat, u32 container)
 {
-	cellGame.error("cellGameDataCheckCreate2(version=0x%x, dirName=%s, errDialog=0x%x, funcStat=*0x%x, container=%d)", version, dirName, errDialog, funcStat, container);
+	cellGame.success("cellGameDataCheckCreate2(version=0x%x, dirName=%s, errDialog=0x%x, funcStat=*0x%x, container=%d)", version, dirName, errDialog, funcStat, container);

 	//older sdk. it might not care about game type.

@ -954,6 +1027,8 @@ error_code cellGameDataCheckCreate2(ppu_thread& ppu, u32 version, vm::cptr<char>
 		strcpy_trunc(cbGet->getParam.titleLang[i], psf::get_string(sfo, fmt::format("TITLE_%02d", i)));
 	}

+	lv2_sleep(5000, &ppu);
+
 	funcStat(ppu, cbResult, cbGet, cbSet);

 	std::string error_msg;
@ -970,6 +1045,8 @@ error_code cellGameDataCheckCreate2(ppu_thread& ppu, u32 version, vm::cptr<char>
 		// Game confirmed that it wants to create directory
 		const auto setParam = cbSet->setParam;

+		lv2_sleep(2000, &ppu);
+
 		if (new_data)
 		{
 			if (!setParam)
@ -1065,6 +1142,10 @@ error_code cellGameDataCheckCreate2(ppu_thread& ppu, u32 version, vm::cptr<char>
 			return CELL_GAMEDATA_ERROR_INTERNAL;
 		}
 	}
+	else
+	{
+		lv2_sleep(2000, &ppu);
+	}

 	return CELL_GAMEDATA_ERROR_CBRESULT;
 }
@ -1079,7 +1160,7 @@ error_code cellGameDataCheckCreate(ppu_thread& ppu, u32 version, vm::cptr<char>

 error_code cellGameCreateGameData(vm::ptr<CellGameSetInitParams> init, vm::ptr<char[CELL_GAME_PATH_MAX]> tmp_contentInfoPath, vm::ptr<char[CELL_GAME_PATH_MAX]> tmp_usrdirPath)
 {
-	cellGame.error("cellGameCreateGameData(init=*0x%x, tmp_contentInfoPath=*0x%x, tmp_usrdirPath=*0x%x)", init, tmp_contentInfoPath, tmp_usrdirPath);
+	cellGame.success("cellGameCreateGameData(init=*0x%x, tmp_contentInfoPath=*0x%x, tmp_usrdirPath=*0x%x)", init, tmp_contentInfoPath, tmp_usrdirPath);

 	if (!init)
 	{
@ -1090,6 +1171,8 @@ error_code cellGameCreateGameData(vm::ptr<CellGameSetInitParams> init, vm::ptr<c

 	const auto _init = perm.init.access();

+	lv2_sleep(2000);
+
 	if (!_init || perm.dir.empty())
 	{
 		return CELL_GAME_ERROR_FAILURE;
@ -1105,6 +1188,9 @@ error_code cellGameCreateGameData(vm::ptr<CellGameSetInitParams> init, vm::ptr<c
 		return CELL_GAME_ERROR_EXIST;
 	}

+	// Account for for filesystem operations
+	lv2_sleep(50'000);
+
 	std::string dirname = "_GDATA_" + std::to_string(steady_clock::now().time_since_epoch().count());
 	std::string tmp_contentInfo = "/dev_hdd0/game/" + dirname;
 	std::string tmp_usrdir = "/dev_hdd0/game/" + dirname + "/USRDIR";
@ -1231,6 +1317,8 @@ error_code cellGameGetParamInt(s32 id, vm::ptr<s32> value)
 		return CELL_GAME_ERROR_PARAM;
 	}

+	lv2_sleep(2000);
+
 	auto& perm = g_fxo->get<content_permission>();

 	const auto init = perm.init.access();
@ -1359,6 +1447,8 @@ error_code cellGameGetParamString(s32 id, vm::ptr<char> buf, u32 bufsize)

 	auto& perm = g_fxo->get<content_permission>();

+	lv2_sleep(2000);
+
 	const auto init = perm.init.access();

 	if (!init || perm.mode == content_permission::check_mode::not_set)
@ -1401,6 +1491,8 @@ error_code cellGameSetParamString(s32 id, vm::cptr<char> buf)
 		return CELL_GAME_ERROR_PARAM;
 	}

+	lv2_sleep(2000);
+
 	auto& perm = g_fxo->get<content_permission>();

 	const auto init = perm.init.access();
@ -1427,7 +1519,7 @@ error_code cellGameSetParamString(s32 id, vm::cptr<char> buf)
 	return CELL_OK;
 }

-error_code cellGameGetSizeKB(vm::ptr<s32> size)
+error_code cellGameGetSizeKB(ppu_thread& ppu, vm::ptr<s32> size)
 {
 	cellGame.warning("cellGameGetSizeKB(size=*0x%x)", size);

@ -1438,6 +1530,7 @@ error_code cellGameGetSizeKB(vm::ptr<s32> size)

 	// Always reset to 0 at start
 	*size = 0;
+	ppu.state += cpu_flag::wait;

 	auto& perm = g_fxo->get<content_permission>();

@ -1448,10 +1541,18 @@ error_code cellGameGetSizeKB(vm::ptr<s32> size)
 		return CELL_GAME_ERROR_FAILURE;
 	}

+	lv2_obj::sleep(ppu);
+
+	const u64 start_sleep = ppu.start_time;
+
 	const std::string local_dir = !perm.temp.empty() ? perm.temp : vfs::get("/dev_hdd0/game/" + perm.dir);

 	const auto dirsz = fs::get_dir_size(local_dir, 1024);

+	// This function is very slow by nature
+	// TODO: Check if after first use the result is being cached so the sleep can be reduced in this case
+	lv2_sleep(utils::sub_saturate<u64>(dirsz == umax ? 1000 : 200000, get_guest_system_time() - start_sleep), &ppu);
+
 	if (dirsz == umax)
 	{
 		const auto error = fs::g_tls_error;
@ -1467,7 +1568,8 @@ error_code cellGameGetSizeKB(vm::ptr<s32> size)
 		}
 	}

-	*size = ::narrow<u32>(dirsz / 1024);
+	ppu.check_state();
+	*size = ::narrow<s32>(dirsz / 1024);

 	return CELL_OK;
 }
@ -1708,6 +1810,8 @@ error_code cellDiscGameGetBootDiscInfo(vm::ptr<CellDiscGameSystemFileParam> getP
 	// Always sets 0 at first dword
 	write_to_ptr<u32>(getParam->titleId, 0);

+	lv2_sleep(2000);
+
 	// This is also called by non-disc games, see NPUB90029
 	static const std::string dir = "/dev_bdvd/PS3_GAME"s;

--- a/rpcs3/Emu/Cell/Modules/cellHttpUtil.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellHttpUtil.cpp
@ -132,7 +132,7 @@ error_code cellHttpUtilParseUri(vm::ptr<CellHttpUri> uri, vm::cptr<char> str, vm
 				parseError = "Error, URI didn't contain a slash";
 				break;
 			default:
-				parseError = "Error, unkown error #" + std::to_string(static_cast<int>(URL.m_ErrorCode));
+				parseError = "Error, unknown error #" + std::to_string(static_cast<int>(URL.m_ErrorCode));
 				break;
 		}
 		cellHttpUtil.error("%s, while parsing URI, %s.", parseError, str.get_ptr());
--- a/rpcs3/Emu/Cell/Modules/cellKb.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellKb.cpp
@ -6,8 +6,8 @@
 #include "Emu/Io/KeyboardHandler.h"
 #include "cellKb.h"

-extern void libio_sys_config_init();
-extern void libio_sys_config_end();
+error_code sys_config_start(ppu_thread& ppu);
+error_code sys_config_stop(ppu_thread& ppu);

 extern bool is_input_allowed();

@ -61,7 +61,7 @@ void KeyboardHandlerBase::save(utils::serial& ar)
 	ar(inited ? m_info.max_connect : 0);
 }

-error_code cellKbInit(u32 max_connect)
+error_code cellKbInit(ppu_thread& ppu, u32 max_connect)
 {
 	sys_io.warning("cellKbInit(max_connect=%d)", max_connect);

@ -78,13 +78,13 @@ error_code cellKbInit(u32 max_connect)
 		return CELL_KB_ERROR_INVALID_PARAMETER;
 	}

-	libio_sys_config_init();
+	sys_config_start(ppu);
 	handler.Init(std::min(max_connect, 7u));

 	return CELL_OK;
 }

-error_code cellKbEnd()
+error_code cellKbEnd(ppu_thread& ppu)
 {
 	sys_io.notice("cellKbEnd()");

@ -96,7 +96,7 @@ error_code cellKbEnd()
 		return CELL_KB_ERROR_UNINITIALIZED;

 	// TODO
-	libio_sys_config_end();
+	sys_config_stop(ppu);
 	return CELL_OK;
 }

--- a/rpcs3/Emu/Cell/Modules/cellMic.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellMic.cpp
@ -73,7 +73,7 @@ void mic_context::operator()()
 	// Timestep in microseconds
 	constexpr u64 TIMESTEP = 256ull * 1'000'000ull / 48000ull;
 	u64 timeout = 0;
-	u64 oldvalue = 0;
+	u32 oldvalue = 0;

 	while (thread_ctrl::state() != thread_state::aborting)
 	{
--- a/rpcs3/Emu/Cell/Modules/cellMic.h
+++ b/rpcs3/Emu/Cell/Modules/cellMic.h
@ -374,7 +374,7 @@ public:
 	static constexpr auto thread_name = "Microphone Thread"sv;

 protected:
-	atomic_t<u64> wakey = 0;
+	atomic_t<u32> wakey = 0;

 	//	u32 signalStateLocalTalk = 9; // value is in range 0-10. 10 indicates talking, 0 indicating none.
 	//	u32 signalStateFarTalk = 0; // value is in range 0-10. 10 indicates talking from far away, 0 indicating none.
--- a/rpcs3/Emu/Cell/Modules/cellMouse.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellMouse.cpp
@ -7,8 +7,8 @@

 #include "cellMouse.h"

-extern void libio_sys_config_init();
-extern void libio_sys_config_end();
+error_code sys_config_start(ppu_thread& ppu);
+error_code sys_config_stop(ppu_thread& ppu);

 extern bool is_input_allowed();

@ -61,7 +61,7 @@ void MouseHandlerBase::save(utils::serial& ar)
 	ar(inited ? m_info.max_connect : 0);
 }

-error_code cellMouseInit(u32 max_connect)
+error_code cellMouseInit(ppu_thread& ppu, u32 max_connect)
 {
 	sys_io.notice("cellMouseInit(max_connect=%d)", max_connect);

@ -78,7 +78,7 @@ error_code cellMouseInit(u32 max_connect)
 		return CELL_MOUSE_ERROR_INVALID_PARAMETER;
 	}

-	libio_sys_config_init();
+	sys_config_start(ppu);
 	handler.Init(std::min(max_connect, 7u));

 	return CELL_OK;
@ -121,7 +121,7 @@ error_code cellMouseClearBuf(u32 port_no)
 	return CELL_OK;
 }

-error_code cellMouseEnd()
+error_code cellMouseEnd(ppu_thread& ppu)
 {
 	sys_io.notice("cellMouseEnd()");

@ -133,7 +133,7 @@ error_code cellMouseEnd()
 		return CELL_MOUSE_ERROR_UNINITIALIZED;

 	// TODO
-	libio_sys_config_end();
+	sys_config_stop(ppu);
 	return CELL_OK;
 }

--- a/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp
@ -164,7 +164,7 @@ error_code open_msg_dialog(bool is_blocking, u32 type, vm::cptr<char> msgString,
 			return CellSysutilError{ret + 0u};
 		}

-		const auto notify = std::make_shared<atomic_t<bool>>(false);
+		const auto notify = std::make_shared<atomic_t<u32>>(0);

 		const auto res = manager->create<rsx::overlays::message_dialog>()->show(is_blocking, msgString.get_ptr(), _type, [callback, userData, &return_code, is_blocking, notify](s32 status)
 		{
@ -186,7 +186,7 @@ error_code open_msg_dialog(bool is_blocking, u32 type, vm::cptr<char> msgString,

 			if (is_blocking && notify)
 			{
-				*notify = true;
+				*notify = 1;
 				notify->notify_one();
 			}
 		});
@ -533,13 +533,15 @@ error_code cellMsgDialogAbort()
 			sysutil_send_system_cmd(CELL_SYSUTIL_DRAWING_END, 0);
 			return CELL_OK;
 		}
+
+		return CELL_OK; // Not CELL_MSGDIALOG_ERROR_DIALOG_NOT_OPENED, tested on HW.
 	}

 	const auto dlg = g_fxo->get<msg_info>().get();

 	if (!dlg)
 	{
-		return CELL_MSGDIALOG_ERROR_DIALOG_NOT_OPENED;
+		return CELL_OK; // Not CELL_MSGDIALOG_ERROR_DIALOG_NOT_OPENED, tested on HW.
 	}

 	if (!dlg->state.compare_and_swap_test(MsgDialogState::Open, MsgDialogState::Abort))
--- a/rpcs3/Emu/Cell/Modules/cellMusicDecode.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellMusicDecode.cpp
@ -256,7 +256,7 @@ error_code cell_music_decode_read(vm::ptr<void> buf, vm::ptr<u32> startTime, u64
 	{
 		dec.read_pos = 0;
 		dec.decoder.clear();
-		dec.decoder.track_fully_consumed = true;
+		dec.decoder.track_fully_consumed = 1;
 		dec.decoder.track_fully_consumed.notify_one();
 		break;
 	}
--- a/rpcs3/Emu/Cell/Modules/cellPad.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellPad.cpp
@ -3,13 +3,14 @@
 #include "Emu/system_config.h"
 #include "Emu/Cell/PPUModule.h"
 #include "Emu/Cell/lv2/sys_process.h"
+#include "Emu/Cell/lv2/sys_sync.h"
 #include "Emu/Io/pad_types.h"
 #include "Input/pad_thread.h"
 #include "Input/product_info.h"
 #include "cellPad.h"

-extern void libio_sys_config_init();
-extern void libio_sys_config_end();
+error_code sys_config_start(ppu_thread& ppu);
+error_code sys_config_stop(ppu_thread& ppu);

 extern bool is_input_allowed();

@ -52,19 +53,112 @@ void fmt_class_string<CellPadFilterError>::format(std::string& out, u64 arg)
 	});
 }

+extern void sys_io_serialize(utils::serial& ar);
+
 pad_info::pad_info(utils::serial& ar)
 	: max_connect(ar)
 	, port_setting(ar)
 {
+	sys_io_serialize(ar);
 }

 void pad_info::save(utils::serial& ar)
 {
+	USING_SERIALIZATION_VERSION(sys_io);
+
 	ar(max_connect, port_setting);
+
+	sys_io_serialize(ar);
 }

+extern void send_sys_io_connect_event(usz index, u32 state);

-error_code cellPadInit(u32 max_connect)
+void cellPad_NotifyStateChange(usz index, u32 /*state*/)
+{
+	auto info = g_fxo->try_get<pad_info>();
+
+	if (!info)
+	{
+		return;
+	}
+
+	std::lock_guard lock(pad::g_pad_mutex);
+
+	if (index >= info->get_max_connect())
+	{
+		return;
+	}
+
+	const auto handler = pad::get_current_handler();
+	const auto& pads = handler->GetPads();
+	const auto& pad = pads[index];
+
+	pad_data_internal& reported_info = info->reported_info[index];
+	const u32 old_status = reported_info.port_status;
+
+	// Ignore sent status for now, use the latest instead
+	// NOTE 1: The state's CONNECTED bit should currently be identical to the current
+	//         m_port_status CONNECTED bit when called from our pad handlers.
+	// NOTE 2: Make sure to propagate all other status bits to the reported status.
+	const u32 new_status = pads[index]->m_port_status;
+
+	if (~(old_status ^ new_status) & CELL_PAD_STATUS_CONNECTED)
+	{
+		// old and new have the same connection status
+		return;
+	}
+
+	reported_info.port_status = new_status | CELL_PAD_STATUS_ASSIGN_CHANGES;
+	reported_info.device_capability = pad->m_device_capability;
+	reported_info.device_type = pad->m_device_type;
+	reported_info.pclass_type = pad->m_class_type;
+	reported_info.pclass_profile = pad->m_class_profile;
+
+	if (pad->m_vendor_id == 0 || pad->m_product_id == 0)
+	{
+		// Fallback to defaults
+
+		input::product_info product;
+
+		switch (pad->m_class_type)
+		{
+		case CELL_PAD_PCLASS_TYPE_GUITAR:
+			product = input::get_product_info(input::product_type::red_octane_gh_guitar);
+			break;
+		case CELL_PAD_PCLASS_TYPE_DRUM:
+			product = input::get_product_info(input::product_type::red_octane_gh_drum_kit);
+			break;
+		case CELL_PAD_PCLASS_TYPE_DJ:
+			product = input::get_product_info(input::product_type::dj_hero_turntable);
+			break;
+		case CELL_PAD_PCLASS_TYPE_DANCEMAT:
+			product = input::get_product_info(input::product_type::dance_dance_revolution_mat);
+			break;
+		case CELL_PAD_PCLASS_TYPE_NAVIGATION:
+			product = input::get_product_info(input::product_type::ps_move_navigation);
+			break;
+		case CELL_PAD_PCLASS_TYPE_STANDARD:
+		default:
+			product = input::get_product_info(input::product_type::playstation_3_controller);
+			break;
+		}
+
+		reported_info.vendor_id = product.vendor_id;
+		reported_info.product_id = product.product_id;
+	}
+	else
+	{
+		reported_info.vendor_id = pad->m_vendor_id;
+		reported_info.product_id = pad->m_product_id;
+	}
+}
+
+extern void pad_state_notify_state_change(usz index, u32 state)
+{
+	cellPad_NotifyStateChange(index, state);
+}
+
+error_code cellPadInit(ppu_thread& ppu, u32 max_connect)
 {
 	sys_io.warning("cellPadInit(max_connect=%d)", max_connect);

@ -78,13 +172,33 @@ error_code cellPadInit(u32 max_connect)
 	if (max_connect == 0 || max_connect > CELL_MAX_PADS)
 		return CELL_PAD_ERROR_INVALID_PARAMETER;

-	libio_sys_config_init();
-	config.max_connect = std::min<u32>(max_connect, CELL_PAD_MAX_PORT_NUM);
+	sys_config_start(ppu);
+
+	config.max_connect = max_connect;
 	config.port_setting.fill(CELL_PAD_SETTING_PRESS_OFF | CELL_PAD_SETTING_SENSOR_OFF);
+	config.reported_info = {};
+
+	std::array<s32, CELL_MAX_PADS> statuses{};
+
+	const auto handler = pad::get_current_handler();
+
+	const auto& pads = handler->GetPads();
+
+	for (usz i = 0; i < statuses.size(); ++i)
+	{
+		if (i >= config.get_max_connect())
+			break;
+
+		if (pads[i]->m_port_status & CELL_PAD_STATUS_CONNECTED)
+		{
+			send_sys_io_connect_event(i, CELL_PAD_STATUS_CONNECTED);
+		}
+	}
+
 	return CELL_OK;
 }

-error_code cellPadEnd()
+error_code cellPadEnd(ppu_thread& ppu)
 {
 	sys_io.notice("cellPadEnd()");

@ -95,7 +209,7 @@ error_code cellPadEnd()
 	if (!config.max_connect.exchange(0))
 		return CELL_PAD_ERROR_UNINITIALIZED;

-	libio_sys_config_end();
+	sys_config_stop(ppu);
 	return CELL_OK;
 }

@ -138,12 +252,12 @@ error_code cellPadClearBuf(u32 port_no)

 	const auto& pads = handler->GetPads();

-	if (port_no >= config.max_connect)
+	if (port_no >= config.get_max_connect())
 		return CELL_PAD_ERROR_NO_DEVICE;

 	const auto& pad = pads[port_no];

-	if (!(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
+	if (!config.is_reportedly_connected(port_no) || !(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
 		return not_an_error(CELL_PAD_ERROR_NO_DEVICE);

 	clear_pad_buffer(pad);
@ -151,39 +265,7 @@ error_code cellPadClearBuf(u32 port_no)
 	return CELL_OK;
 }

-void pad_get_data(u32 port_no, CellPadData* data);
-
-error_code cellPadGetData(u32 port_no, vm::ptr<CellPadData> data)
-{
-	sys_io.trace("cellPadGetData(port_no=%d, data=*0x%x)", port_no, data);
-
-	std::lock_guard lock(pad::g_pad_mutex);
-
-	auto& config = g_fxo->get<pad_info>();
-
-	if (!config.max_connect)
-		return CELL_PAD_ERROR_UNINITIALIZED;
-
-	const auto handler = pad::get_current_handler();
-
-	if (port_no >= CELL_MAX_PADS || !data)
-		return CELL_PAD_ERROR_INVALID_PARAMETER;
-
-	const auto& pads = handler->GetPads();
-
-	if (port_no >= config.max_connect)
-		return CELL_PAD_ERROR_NO_DEVICE;
-
-	const auto& pad = pads[port_no];
-
-	if (!(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
-		return not_an_error(CELL_PAD_ERROR_NO_DEVICE);
-
-	pad_get_data(port_no, data.get_ptr());
-	return CELL_OK;
-}
-
-void pad_get_data(u32 port_no, CellPadData* data)
+void pad_get_data(u32 port_no, CellPadData* data, bool get_periph_data = false)
 {
 	auto& config = g_fxo->get<pad_info>();
 	const auto handler = pad::get_current_handler();
@ -196,7 +278,7 @@ void pad_get_data(u32 port_no, CellPadData* data)
 		return;
 	}

-	const auto setting = config.port_setting[port_no];
+	const u32 setting = config.port_setting[port_no];
 	bool btnChanged = false;

 	if (rinfo.ignore_input || !is_input_allowed())
@ -383,6 +465,131 @@ void pad_get_data(u32 port_no, CellPadData* data)
 			data->button[CELL_PAD_BTN_OFFSET_SENSOR_G] = pad->m_sensor_g;
 		}
 	}
+
+	if (!get_periph_data || data->len <= CELL_PAD_LEN_CHANGE_SENSOR_ON)
+	{
+		return;
+	}
+
+	const auto get_pressure_value = [setting](u16 val, u16 min, u16 max) -> u16
+	{
+		if (setting & CELL_PAD_SETTING_PRESS_ON)
+		{
+			return std::clamp(val, min, max);
+		}
+
+		if (val > 0)
+		{
+			return max;
+		}
+
+		return 0;
+	};
+
+	// TODO: support for 'unique' controllers, which goes in offsets 24+ in padData (CELL_PAD_PCLASS_BTN_OFFSET)
+	// TODO: update data->len accordingly
+
+	switch (pad->m_class_profile)
+	{
+	default:
+	case CELL_PAD_PCLASS_TYPE_STANDARD:
+	case CELL_PAD_PCLASS_TYPE_NAVIGATION:
+	{
+		break;
+	}
+	case CELL_PAD_PCLASS_TYPE_GUITAR:
+	{
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_1]      = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_2]      = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_3]      = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_4]      = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_5]      = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_STRUM_UP]    = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_STRUM_DOWN]  = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_WHAMMYBAR]   = 0x80; // 0x80 – 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_H1]     = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_H2]     = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_H3]     = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_H4]     = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_FRET_H5]     = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_5WAY_EFFECT] = 0x0019; // One of 5 values: 0x0019, 0x004C, 0x007F (or 0x0096), 0x00B2, 0x00E5 (or 0x00E2)
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_GUITAR_TILT_SENS]   = get_pressure_value(0, 0x0, 0xFF);
+		break;
+	}
+	case CELL_PAD_PCLASS_TYPE_DRUM:
+	{
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DRUM_SNARE]     = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DRUM_TOM]       = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DRUM_TOM2]      = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DRUM_TOM_FLOOR] = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DRUM_KICK]      = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DRUM_CYM_HiHAT] = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DRUM_CYM_CRASH] = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DRUM_CYM_RIDE]  = get_pressure_value(0, 0x0, 0xFF);
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DRUM_KICK2]     = get_pressure_value(0, 0x0, 0xFF);
+		break;
+	}
+	case CELL_PAD_PCLASS_TYPE_DJ:
+	{
+		// First deck
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_MIXER_ATTACK]     = 0;    // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_MIXER_CROSSFADER] = 0;    // 0x0 - 0x3FF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_MIXER_DSP_DIAL]   = 0;    // 0x0 - 0x3FF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_DECK1_STREAM1]    = 0;    // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_DECK1_STREAM2]    = 0;    // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_DECK1_STREAM3]    = 0;    // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_DECK1_PLATTER]    = 0x80; // 0x0 - 0xFF (neutral: 0x80)
+
+		// Second deck
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_DECK2_STREAM1]    = 0;    // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_DECK2_STREAM2]    = 0;    // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_DECK2_STREAM3]    = 0;    // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DJ_DECK2_PLATTER]    = 0x80; // 0x0 - 0xFF (neutral: 0x80)
+		break;
+	}
+	case CELL_PAD_PCLASS_TYPE_DANCEMAT:
+	{
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DANCEMAT_CIRCLE]   = 0; // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DANCEMAT_CROSS]    = 0; // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DANCEMAT_TRIANGLE] = 0; // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DANCEMAT_SQUARE]   = 0; // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DANCEMAT_RIGHT]    = 0; // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DANCEMAT_LEFT]     = 0; // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DANCEMAT_UP]       = 0; // 0x0 or 0xFF
+		data->button[CELL_PAD_PCLASS_BTN_OFFSET_DANCEMAT_DOWN]     = 0; // 0x0 or 0xFF
+		break;
+	}
+	}
+}
+
+error_code cellPadGetData(u32 port_no, vm::ptr<CellPadData> data)
+{
+	sys_io.trace("cellPadGetData(port_no=%d, data=*0x%x)", port_no, data);
+
+	std::lock_guard lock(pad::g_pad_mutex);
+
+	auto& config = g_fxo->get<pad_info>();
+
+	if (!config.max_connect)
+		return CELL_PAD_ERROR_UNINITIALIZED;
+
+	const auto handler = pad::get_current_handler();
+
+	if (port_no >= CELL_MAX_PADS || !data)
+		return CELL_PAD_ERROR_INVALID_PARAMETER;
+
+	const auto& pads = handler->GetPads();
+
+	if (port_no >= config.get_max_connect())
+		return CELL_PAD_ERROR_NO_DEVICE;
+
+	const auto& pad = pads[port_no];
+
+	if (!config.is_reportedly_connected(port_no) || !(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
+		return not_an_error(CELL_PAD_ERROR_NO_DEVICE);
+
+	pad_get_data(port_no, data.get_ptr());
+	return CELL_OK;
 }

 error_code cellPadPeriphGetInfo(vm::ptr<CellPadPeriphInfo> info)
@ -406,25 +613,37 @@ error_code cellPadPeriphGetInfo(vm::ptr<CellPadPeriphInfo> info)
 	std::memset(info.get_ptr(), 0, sizeof(CellPadPeriphInfo));

 	info->max_connect = config.max_connect;
-	info->now_connect = rinfo.now_connect;
 	info->system_info = rinfo.system_info;

-	const auto& pads = handler->GetPads();
+	u32 now_connect = 0;

 	for (u32 i = 0; i < CELL_PAD_MAX_PORT_NUM; ++i)
 	{
-		if (i >= config.max_connect)
+		if (i >= config.get_max_connect())
 			break;

-		info->port_status[i] = pads[i]->m_port_status;
-		pads[i]->m_port_status &= ~CELL_PAD_STATUS_ASSIGN_CHANGES;
+		pad_data_internal& reported_info = config.reported_info[i];
+
+		info->port_status[i] = reported_info.port_status;
 		info->port_setting[i] = config.port_setting[i];
-		info->device_capability[i] = pads[i]->m_device_capability;
-		info->device_type[i] = pads[i]->m_device_type;
-		info->pclass_type[i] = pads[i]->m_class_type;
-		info->pclass_profile[i] = pads[i]->m_class_profile;
+
+		reported_info.port_status &= ~CELL_PAD_STATUS_ASSIGN_CHANGES;
+
+		if (~reported_info.port_status & CELL_PAD_STATUS_CONNECTED)
+		{
+			continue;
+		}
+
+		info->device_capability[i] = reported_info.device_capability;
+		info->device_type[i] = reported_info.device_type;
+		info->pclass_type[i] = reported_info.pclass_type;
+		info->pclass_profile[i] = reported_info.pclass_profile;
+
+		now_connect++;
 	}

+	info->now_connect = now_connect;
+
 	return CELL_OK;
 }

@ -447,20 +666,19 @@ error_code cellPadPeriphGetData(u32 port_no, vm::ptr<CellPadPeriphData> data)

 	const auto& pads = handler->GetPads();

-	if (port_no >= config.max_connect)
+	if (port_no >= config.get_max_connect())
 		return CELL_PAD_ERROR_NO_DEVICE;

 	const auto& pad = pads[port_no];

-	if (!(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
+	if (!config.is_reportedly_connected(port_no) || !(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
 		return not_an_error(CELL_PAD_ERROR_NO_DEVICE);

-	pad_get_data(port_no, &data->cellpad_data);
+	pad_get_data(port_no, &data->cellpad_data, true);

 	data->pclass_type = pad->m_class_type;
 	data->pclass_profile = pad->m_class_profile;

-	// TODO: support for 'unique' controllers, which goes in offsets 24+ in padData (CELL_PAD_PCLASS_BTN_OFFSET)
 	return CELL_OK;
 }

@ -482,12 +700,12 @@ error_code cellPadGetRawData(u32 port_no, vm::ptr<CellPadData> data)

 	const auto& pads = handler->GetPads();

-	if (port_no >= config.max_connect)
+	if (port_no >= config.get_max_connect())
 		return CELL_PAD_ERROR_NO_DEVICE;

 	const auto& pad = pads[port_no];

-	if (!(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
+	if (!config.is_reportedly_connected(port_no) || !(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
 		return not_an_error(CELL_PAD_ERROR_NO_DEVICE);

 	// ?
@ -548,12 +766,12 @@ error_code cellPadSetActDirect(u32 port_no, vm::ptr<CellPadActParam> param)

 	const auto& pads = handler->GetPads();

-	if (port_no >= config.max_connect)
+	if (port_no >= config.get_max_connect())
 		return CELL_PAD_ERROR_NO_DEVICE;

 	const auto& pad = pads[port_no];

-	if (!(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
+	if (!config.is_reportedly_connected(port_no) || !(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
 		return not_an_error(CELL_PAD_ERROR_NO_DEVICE);

 	// TODO: find out if this is checked here or later or at all
@ -585,56 +803,32 @@ error_code cellPadGetInfo(vm::ptr<CellPadInfo> info)

 	const PadInfo& rinfo = handler->GetInfo();
 	info->max_connect = config.max_connect;
-	info->now_connect = rinfo.now_connect;
 	info->system_info = rinfo.system_info;

-	const auto& pads = handler->GetPads();
+	u32 now_connect = 0;

 	for (u32 i = 0; i < CELL_MAX_PADS; ++i)
 	{
-		if (i >= config.max_connect)
+		if (i >= config.get_max_connect())
 			break;

-		pads[i]->m_port_status &= ~CELL_PAD_STATUS_ASSIGN_CHANGES; // TODO: should ASSIGN flags be cleared here?
-		info->status[i] = pads[i]->m_port_status;
+		pad_data_internal& reported_info = config.reported_info[i];
+		reported_info.port_status &= ~CELL_PAD_STATUS_ASSIGN_CHANGES; // TODO: should ASSIGN flags be cleared here?

-		if (pads[i]->m_vendor_id == 0 || pads[i]->m_product_id == 0)
+		info->status[i] = reported_info.port_status;
+
+		if (~reported_info.port_status & CELL_PAD_STATUS_CONNECTED)
 		{
-			// Fallback to defaults
-
-			input::product_info product;
-
-			switch (pads[i]->m_class_type)
-			{
-			case CELL_PAD_PCLASS_TYPE_GUITAR:
-				product = input::get_product_info(input::product_type::red_octane_gh_guitar);
-				break;
-			case CELL_PAD_PCLASS_TYPE_DRUM:
-				product = input::get_product_info(input::product_type::red_octane_gh_drum_kit);
-				break;
-			case CELL_PAD_PCLASS_TYPE_DJ:
-				product = input::get_product_info(input::product_type::dj_hero_turntable);
-				break;
-			case CELL_PAD_PCLASS_TYPE_DANCEMAT:
-				product = input::get_product_info(input::product_type::dance_dance_revolution_mat);
-				break;
-			case CELL_PAD_PCLASS_TYPE_NAVIGATION:
-			case CELL_PAD_PCLASS_TYPE_STANDARD:
-			default:
-				product = input::get_product_info(input::product_type::playstation_3_controller);
-				break;
-			}
-
-			info->vendor_id[i] = product.vendor_id;
-			info->product_id[i] = product.product_id;
-		}
-		else
-		{
-			info->vendor_id[i] = pads[i]->m_vendor_id;
-			info->product_id[i] = pads[i]->m_product_id;
+			continue;
 		}
+
+		info->vendor_id[i] = reported_info.vendor_id;
+		info->product_id[i] = reported_info.product_id;
+
+		now_connect++;
 	}

+	info->now_connect = now_connect;
 	return CELL_OK;
 }

@ -657,24 +851,37 @@ error_code cellPadGetInfo2(vm::ptr<CellPadInfo2> info)
 	std::memset(info.get_ptr(), 0, sizeof(CellPadInfo2));

 	const PadInfo& rinfo = handler->GetInfo();
-	info->max_connect = config.max_connect;
-	info->now_connect = rinfo.now_connect;
+	info->max_connect = config.get_max_connect(); // Here it is forcibly clamped
 	info->system_info = rinfo.system_info;

+	u32 now_connect = 0;
+
 	const auto& pads = handler->GetPads();

 	for (u32 i = 0; i < CELL_PAD_MAX_PORT_NUM; ++i)
 	{
-		if (i >= config.max_connect)
+		if (i >= config.get_max_connect())
 			break;

-		info->port_status[i] = pads[i]->m_port_status;
-		pads[i]->m_port_status &= ~CELL_PAD_STATUS_ASSIGN_CHANGES;
+		pad_data_internal& reported_info = config.reported_info[i];
+
+		info->port_status[i] = reported_info.port_status;
 		info->port_setting[i] = config.port_setting[i];
+
+		reported_info.port_status &= ~CELL_PAD_STATUS_ASSIGN_CHANGES;
+
+		if (~reported_info.port_status & CELL_PAD_STATUS_CONNECTED)
+		{
+			continue;
+		}
+
 		info->device_capability[i] = pads[i]->m_device_capability;
 		info->device_type[i] = pads[i]->m_device_type;
+
+		now_connect++;
 	}

+	info->now_connect = now_connect;
 	return CELL_OK;
 }

@ -696,12 +903,12 @@ error_code cellPadGetCapabilityInfo(u32 port_no, vm::ptr<CellPadCapabilityInfo>

 	const auto& pads = handler->GetPads();

-	if (port_no >= config.max_connect)
+	if (port_no >= config.get_max_connect())
 		return CELL_PAD_ERROR_NO_DEVICE;

 	const auto& pad = pads[port_no];

-	if (!(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
+	if (!config.is_reportedly_connected(port_no) || !(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
 		return not_an_error(CELL_PAD_ERROR_NO_DEVICE);

 	// Should return the same as device capability mask, psl1ght has it backwards in pad->h
@ -754,12 +961,12 @@ error_code cellPadInfoPressMode(u32 port_no)

 	const auto& pads = handler->GetPads();

-	if (port_no >= config.max_connect)
+	if (port_no >= config.get_max_connect())
 		return CELL_PAD_ERROR_NO_DEVICE;

 	const auto& pad = pads[port_no];

-	if (!(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
+	if (!config.is_reportedly_connected(port_no) || !(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
 		return not_an_error(CELL_PAD_ERROR_NO_DEVICE);

 	return not_an_error((pad->m_device_capability & CELL_PAD_CAPABILITY_PRESS_MODE) ? 1 : 0);
@ -783,12 +990,12 @@ error_code cellPadInfoSensorMode(u32 port_no)

 	const auto& pads = handler->GetPads();

-	if (port_no >= config.max_connect)
+	if (port_no >= config.get_max_connect())
 		return CELL_PAD_ERROR_NO_DEVICE;

 	const auto& pad = pads[port_no];

-	if (!(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
+	if (!config.is_reportedly_connected(port_no) || !(pad->m_port_status & CELL_PAD_STATUS_CONNECTED))
 		return not_an_error(CELL_PAD_ERROR_NO_DEVICE);

 	return not_an_error((pad->m_device_capability & CELL_PAD_CAPABILITY_SENSOR_MODE) ? 1 : 0);
--- a/rpcs3/Emu/Cell/Modules/cellPad.h
+++ b/rpcs3/Emu/Cell/Modules/cellPad.h
@ -24,92 +24,6 @@ enum CellPadFilterError : u32
 	CELL_PADFILTER_ERROR_INVALID_PARAMETER = 0x80121401,
 };

-// Controller types
-enum
-{
-	CELL_PAD_PCLASS_TYPE_STANDARD   = 0x00,
-	CELL_PAD_PCLASS_TYPE_GUITAR     = 0x01,
-	CELL_PAD_PCLASS_TYPE_DRUM       = 0x02,
-	CELL_PAD_PCLASS_TYPE_DJ         = 0x03,
-	CELL_PAD_PCLASS_TYPE_DANCEMAT   = 0x04,
-	CELL_PAD_PCLASS_TYPE_NAVIGATION = 0x05,
-};
-
-// Profile of a Standard Type Controller
-// Profile of a Navigation Type Controller
-// Bits 0 – 31 All 0s
-
-// Profile of a Guitar Type Controller
-enum
-{
-	// Basic
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_1       = 0x00000001,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_2       = 0x00000002,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_3       = 0x00000004,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_4       = 0x00000008,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_5       = 0x00000010,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_STRUM_UP     = 0x00000020,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_STRUM_DOWN   = 0x00000040,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_WHAMMYBAR    = 0x00000080,
-	// All Basic                                = 0x000000FF
-
-	// Optional
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_H1      = 0x00000100,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_H2      = 0x00000200,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_H3      = 0x00000400,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_H4      = 0x00000800,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_FRET_H5      = 0x00001000,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_5WAY_EFFECT  = 0x00002000,
-	CELL_PAD_PCLASS_PROFILE_GUITAR_TILT_SENS    = 0x00004000,
-	// All                                      = 0x00007FFF
-};
-
-// Profile of a Drum Type Controller
-enum
-{
-	CELL_PAD_PCLASS_PROFILE_DRUM_SNARE     = 0x00000001,
-	CELL_PAD_PCLASS_PROFILE_DRUM_TOM       = 0x00000002,
-	CELL_PAD_PCLASS_PROFILE_DRUM_TOM2      = 0x00000004,
-	CELL_PAD_PCLASS_PROFILE_DRUM_TOM_FLOOR = 0x00000008,
-	CELL_PAD_PCLASS_PROFILE_DRUM_KICK      = 0x00000010,
-	CELL_PAD_PCLASS_PROFILE_DRUM_CYM_HiHAT = 0x00000020,
-	CELL_PAD_PCLASS_PROFILE_DRUM_CYM_CRASH = 0x00000040,
-	CELL_PAD_PCLASS_PROFILE_DRUM_CYM_RIDE  = 0x00000080,
-	CELL_PAD_PCLASS_PROFILE_DRUM_KICK2     = 0x00000100,
-	// All                                 = 0x000001FF
-};
-
-// Profile of a DJ Deck Type Controller
-enum
-{
-	CELL_PAD_PCLASS_PROFILE_DJ_MIXER_ATTACK     = 0x00000001,
-	CELL_PAD_PCLASS_PROFILE_DJ_MIXER_CROSSFADER = 0x00000002,
-	CELL_PAD_PCLASS_PROFILE_DJ_MIXER_DSP_DIAL   = 0x00000004,
-	CELL_PAD_PCLASS_PROFILE_DJ_DECK1_STREAM1    = 0x00000008,
-	CELL_PAD_PCLASS_PROFILE_DJ_DECK1_STREAM2    = 0x00000010,
-	CELL_PAD_PCLASS_PROFILE_DJ_DECK1_STREAM3    = 0x00000020,
-	CELL_PAD_PCLASS_PROFILE_DJ_DECK1_PLATTER    = 0x00000040,
-	CELL_PAD_PCLASS_PROFILE_DJ_DECK2_STREAM1    = 0x00000080,
-	CELL_PAD_PCLASS_PROFILE_DJ_DECK2_STREAM2    = 0x00000100,
-	CELL_PAD_PCLASS_PROFILE_DJ_DECK2_STREAM3    = 0x00000200,
-	CELL_PAD_PCLASS_PROFILE_DJ_DECK2_PLATTER    = 0x00000400,
-	// All                                      = 0x000007FF
-};
-
-// Profile of a Dance Mat Type Controller
-enum
-{
-	CELL_PAD_PCLASS_PROFILE_DANCEMAT_CIRCLE   = 0x00000001,
-	CELL_PAD_PCLASS_PROFILE_DANCEMAT_CROSS    = 0x00000002,
-	CELL_PAD_PCLASS_PROFILE_DANCEMAT_TRIANGLE = 0x00000004,
-	CELL_PAD_PCLASS_PROFILE_DANCEMAT_SQUARE   = 0x00000008,
-	CELL_PAD_PCLASS_PROFILE_DANCEMAT_RIGHT    = 0x00000010,
-	CELL_PAD_PCLASS_PROFILE_DANCEMAT_LEFT     = 0x00000020,
-	CELL_PAD_PCLASS_PROFILE_DANCEMAT_UP       = 0x00000040,
-	CELL_PAD_PCLASS_PROFILE_DANCEMAT_DOWN     = 0x00000080,
-	// All                                    = 0x000000FF
-};
-
 // Length returned in CellPadData struct
 enum
 {
@ -126,6 +40,17 @@ enum
 	CELL_PADFILTER_IIR_CUTOFF_2ND_LPF_BT_010 = 2, // 10% Nyquist frequency
 };

+struct pad_data_internal
+{
+	u16 vendor_id;
+	u16 product_id;
+	u32 port_status;
+	u32 device_capability;
+	u32 device_type;
+	u32 pclass_type;
+	u32 pclass_profile;
+};
+
 struct CellPadInfo
 {
 	be_t<u32> max_connect;
@ -192,14 +117,27 @@ struct pad_info
 {
 	atomic_t<u32> max_connect = 0;
 	std::array<u32, CELL_PAD_MAX_PORT_NUM> port_setting{ 0 };
+	std::array<pad_data_internal, CELL_PAD_MAX_PORT_NUM> reported_info{};

 	SAVESTATE_INIT_POS(11);

 	pad_info() = default;
 	pad_info(utils::serial& ar);
 	void save(utils::serial& ar);
+
+	u32 get_max_connect() const
+	{
+		return std::min<u32>(max_connect, CELL_PAD_MAX_PORT_NUM);
+	}
+
+	// Unreliable way the firmware uses to optimize away pad calls for disconnected pads
+	// This result relies on data updates from config events on a dedicated thread to receive them
+	bool is_reportedly_connected(u32 port_no) const
+	{
+		return port_no < get_max_connect() && !!(reported_info[port_no].port_status & CELL_PAD_STATUS_CONNECTED);
+	}
 };

 error_code cellPadGetData(u32 port_no, vm::ptr<CellPadData> data);
-error_code cellPadInit(u32 max_connect);
+error_code cellPadInit(ppu_thread& ppu, u32 max_connect);
 error_code cellPadSetPortSetting(u32 port_no, u32 port_setting);
--- a/rpcs3/Emu/Cell/Modules/cellRec.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellRec.cpp
@ -158,14 +158,14 @@ public:
 		has_error = false;
 	}

-	void add_frame(std::vector<u8>& frame, const u32 width, const u32 height, s32 pixel_format, usz timestamp_ms) override
+	void add_frame(std::vector<u8>& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) override
 	{
 		std::lock_guard lock(m_mtx);

 		if (m_flush)
 			return;

-		m_frames_to_encode.emplace_back(timestamp_ms, width, height, pixel_format, std::move(frame));
+		m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame));
 	}

 	encoder_frame get_frame()
@ -445,6 +445,7 @@ void rec_info::set_audio_params(s32 audio_format)
 	case CELL_REC_PARAM_AUDIO_FMT_PCM_1536K:
 		audio_codec_id = 65556; // AV_CODEC_ID_PCM_F32BE
 		//audio_codec_id = 65557; // AV_CODEC_ID_PCM_F32LE // TODO: maybe this one?
+		break;
 	default:
 		audio_codec_id = 86018; // AV_CODEC_ID_AAC
 		break;
@ -587,7 +588,7 @@ void rec_info::start_image_provider()
 						{
 							std::vector<u8> frame(frame_size);
 							std::memcpy(frame.data(), video_input_buffer.get_ptr(), frame.size());
-							encoder->add_frame(frame, input_format.pitch, input_format.height, input_format.av_pixel_format, timestamp_ms);
+							encoder->add_frame(frame, input_format.pitch, input_format.width, input_format.height, input_format.av_pixel_format, timestamp_ms);
 						}
 					}

@ -680,7 +681,7 @@ void rec_info::stop_image_provider(bool flush)
 		{
 			const usz pos = (start_offset + i) % video_ringbuffer.size();
 			utils::image_sink::encoder_frame& frame_data = video_ringbuffer[pos];
-			encoder->add_frame(frame_data.data, frame_data.width, frame_data.height, frame_data.av_pixel_format, encoder->get_timestamp_ms(frame_data.pts - start_pts));
+			encoder->add_frame(frame_data.data, frame_data.pitch, frame_data.width, frame_data.height, frame_data.av_pixel_format, encoder->get_timestamp_ms(frame_data.pts - start_pts));

 			// TODO: add audio data to encoder
 		}
--- a/rpcs3/Emu/Cell/Modules/cellSaveData.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellSaveData.cpp
@ -1547,21 +1547,34 @@ static NEVER_INLINE error_code savedata_op(ppu_thread& ppu, u32 operation, u32 v
 				return {CELL_SAVEDATA_ERROR_PARAM, "57 (attribute=0x%x)", statSet->setParam->attribute};
 			}

-			if (g_ps3_process_info.sdk_ver > 0x36FFFF)
+			if (statSet->setParam->parental_level > 11)
 			{
-				// In firmware 3.70 or higher parental_level was changed to reserved2 and has to zeroes
-				if (statSet->setParam->parental_level)
-				{
-					// ****** sysutil savedata parameter error : 58 ******
-					return {CELL_SAVEDATA_ERROR_PARAM, "58 (sdk_ver=0x%x, parental_level=%d)", g_ps3_process_info.sdk_ver, statSet->setParam->parental_level};
-				}
+				// ****** sysutil savedata parameter error : 58 ******
+				return {CELL_SAVEDATA_ERROR_PARAM, "58 (sdk_ver=0x%x, parental_level=%d)", g_ps3_process_info.sdk_ver, statSet->setParam->parental_level};
 			}
-			else
+
+			// Note: in firmware 3.70 or higher parental_level was changed to reserved2
+
+			for (usz index = 0;; index++)
 			{
-				if (statSet->setParam->parental_level > 11)
+				// Convert to pointer to avoid UB when accessing out of range
+				const u8 c = (+statSet->setParam->listParam)[index];
+
+				if (c == 0 || index >= (g_ps3_process_info.sdk_ver > 0x36FFFF ? std::size(statSet->setParam->listParam) - 1 : std::size(statSet->setParam->listParam)))
 				{
-					// ****** sysutil savedata parameter error : 58 ******
-					return {CELL_SAVEDATA_ERROR_PARAM, "58 (sdk_ver=0x%x, parental_level=%d)", g_ps3_process_info.sdk_ver, statSet->setParam->parental_level};
+					if (c)
+					{
+						// ****** sysutil savedata parameter error : 76 ******
+						return {CELL_SAVEDATA_ERROR_PARAM, "76 (listParam=0x%016x)", std::bit_cast<be_t<u64>>(statSet->setParam->listParam)};
+					}
+
+					break;
+				}
+
+				if ((c < 'A' || c > 'Z') && (c < '0' || c > '9') && c != '-' && c != '_')
+				{
+					// ****** sysutil savedata parameter error : 77 ******
+					return {CELL_SAVEDATA_ERROR_PARAM, "77 (listParam=0x%016x)", std::bit_cast<be_t<u64>>(statSet->setParam->listParam)};
 				}
 			}

--- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp
@ -2477,8 +2477,8 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, vm::ptr<u32>
 	spurs_res += 127;
 	spurs_res2 += 127;

-	spurs_res.notify_all(-128);
-	spurs_res2.notify_all(-128);
+	spurs_res.notify_all();
+	spurs_res2.notify_all();

 	u32 res_wkl;
 	const auto wkl = &spurs->wklInfo(wnum);
--- a/rpcs3/Emu/Cell/Modules/cellVdec.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellVdec.cpp
@ -303,7 +303,7 @@ struct vdec_context final
 				return;
 			}

-			thread_ctrl::wait_on(in_cmd, nullptr);
+			thread_ctrl::wait_on(in_cmd);
 			slice = in_cmd.pop_all(); // Pop new command list
 		}())
 		{
@ -921,7 +921,7 @@ static error_code vdecOpen(ppu_thread& ppu, T type, U res, vm::cptr<CellVdecCb>
 	});

 	thrd->state -= cpu_flag::stop;
-	thrd->state.notify_one(cpu_flag::stop);
+	thrd->state.notify_one();

 	return CELL_OK;
 }
--- a/rpcs3/Emu/Cell/Modules/sceNp.cpp
+++ b/rpcs3/Emu/Cell/Modules/sceNp.cpp
@ -17,6 +17,7 @@

 #include "Emu/Cell/lv2/sys_time.h"
 #include "Emu/Cell/lv2/sys_fs.h"
+#include "Emu/Cell/lv2/sys_sync.h"
 #include "Emu/NP/np_handler.h"
 #include "Emu/NP/np_contexts.h"
 #include "Emu/NP/np_helpers.h"
@ -408,6 +409,8 @@ void message_data::print() const
 	sceNp.notice("commId: %s, msgId: %d, mainType: %d, subType: %d, subject: %s, body: %s, data_size: %d", static_cast<const char *>(commId.data), msgId, mainType, subType, subject, body, data.size());
 }

+extern void lv2_sleep(u64 timeout, ppu_thread* ppu = nullptr);
+
 error_code sceNpInit(u32 poolsize, vm::ptr<void> poolptr)
 {
 	sceNp.warning("sceNpInit(poolsize=0x%x, poolptr=*0x%x)", poolsize, poolptr);
@ -553,18 +556,40 @@ error_code npDrmIsAvailable(vm::cptr<u8> k_licensee_addr, vm::cptr<char> drm_pat
 	return CELL_OK;
 }

-error_code sceNpDrmIsAvailable(vm::cptr<u8> k_licensee_addr, vm::cptr<char> drm_path)
+error_code sceNpDrmIsAvailable(ppu_thread& ppu, vm::cptr<u8> k_licensee_addr, vm::cptr<char> drm_path)
 {
 	sceNp.warning("sceNpDrmIsAvailable(k_licensee=*0x%x, drm_path=*0x%x)", k_licensee_addr, drm_path);

-	return npDrmIsAvailable(k_licensee_addr, drm_path);
+	if (!drm_path)
+	{
+		return SCE_NP_DRM_ERROR_INVALID_PARAM;
+	}
+
+	lv2_obj::sleep(ppu);
+
+	const auto ret = npDrmIsAvailable(k_licensee_addr, drm_path);
+	lv2_sleep(100000, &ppu);
+
+	return ret;
 }

-error_code sceNpDrmIsAvailable2(vm::cptr<u8> k_licensee_addr, vm::cptr<char> drm_path)
+error_code sceNpDrmIsAvailable2(ppu_thread& ppu, vm::cptr<u8> k_licensee_addr, vm::cptr<char> drm_path)
 {
 	sceNp.warning("sceNpDrmIsAvailable2(k_licensee=*0x%x, drm_path=*0x%x)", k_licensee_addr, drm_path);

-	return npDrmIsAvailable(k_licensee_addr, drm_path);
+	if (!drm_path)
+	{
+		return SCE_NP_DRM_ERROR_INVALID_PARAM;
+	}
+
+	lv2_obj::sleep(ppu);
+
+	const auto ret = npDrmIsAvailable(k_licensee_addr, drm_path);
+
+	// TODO: Accurate sleep time
+	//lv2_sleep(20000, &ppu);
+
+	return ret;
 }

 error_code npDrmVerifyUpgradeLicense(vm::cptr<char> content_id)
--- a/rpcs3/Emu/Cell/Modules/sys_io_.cpp
+++ b/rpcs3/Emu/Cell/Modules/sys_io_.cpp
@ -3,6 +3,10 @@
 #include "Emu/IdManager.h"
 #include "Emu/Cell/PPUModule.h"

+#include "Emu/Cell/lv2/sys_event.h"
+#include "Emu/Cell/lv2/sys_ppu_thread.h"
+#include "Emu/Cell/Modules/sysPrxForUser.h"
+
 LOG_CHANNEL(sys_io);

 extern void cellPad_init();
@ -13,53 +17,157 @@ struct libio_sys_config
 {
 	shared_mutex mtx;
 	s32 init_ctr = 0;
-	u32 stack_addr = 0;
+	u32 ppu_id = 0;
+	u32 queue_id = 0;

 	~libio_sys_config() noexcept
 	{
-		if (stack_addr)
-		{
-			ensure(vm::dealloc(stack_addr, vm::stack));
-		}
+	}
+
+	void save_or_load(utils::serial& ar)
+	{
+		ar(init_ctr, ppu_id, queue_id);
 	}
 };

-// Only exists internally (has no name)
-extern void libio_sys_config_init()
+extern void sys_io_serialize(utils::serial& ar)
+{
+	// Do not assign a serialization tag for now, call it from cellPad serialization
+	g_fxo->get<libio_sys_config>().save_or_load(ar);
+}
+
+extern void cellPad_NotifyStateChange(usz index, u32 state);
+
+void config_event_entry(ppu_thread& ppu)
 {
 	auto& cfg = g_fxo->get<libio_sys_config>();

-	std::lock_guard lock(cfg.mtx);
+	if (!ppu.loaded_from_savestate)
+	{
+		// Ensure awake
+		ppu.check_state();
+	}
+
+	while (!sys_event_queue_receive(ppu, cfg.queue_id, vm::null, 0))
+	{
+		if (ppu.is_stopped())
+		{
+			return;
+		}
+
+		// Some delay
+		thread_ctrl::wait_for(10000);
+
+		// Wakeup
+		ppu.check_state();
+
+		const u64 arg1 = ppu.gpr[5];
+		const u64 arg2 = ppu.gpr[6];
+		const u64 arg3 = ppu.gpr[7];
+
+		// TODO: Reverse-engineer proper event system
+
+		if (arg1 == 1)
+		{
+			cellPad_NotifyStateChange(arg2, arg3);
+		}
+	}
+
+	ppu_execute<&sys_ppu_thread_exit>(ppu, 0);
+}
+
+std::unique_lock<shared_mutex> lock_lv2_mutex_alike(shared_mutex& mtx, ppu_thread* ppu)
+{
+	std::unique_lock<shared_mutex> lock(mtx, std::defer_lock);
+
+	while (!lock.try_lock())
+	{
+		if (ppu)
+		{
+			// Could not be acquired, put PPU to sleep
+			lv2_obj::sleep(*ppu);
+		}
+
+		// Wait for unlock without owning the lock
+		mtx.lock_unlock();
+
+		if (ppu)
+		{
+			// Awake, still not owning
+			ppu->check_state();
+		}
+	}
+
+	return lock;
+}
+
+extern void send_sys_io_connect_event(usz index, u32 state)
+{
+	auto& cfg = g_fxo->get<libio_sys_config>();
+
+	auto lock = lock_lv2_mutex_alike(cfg.mtx, cpu_thread::get_current<ppu_thread>());
+
+	if (cfg.init_ctr)
+	{
+		if (auto port = idm::get<lv2_obj, lv2_event_queue>(cfg.queue_id))
+		{
+			port->send(0, 1, index, state);
+		}
+	}
+}
+
+error_code sys_config_start(ppu_thread& ppu)
+{
+	sys_io.warning("sys_config_start()");
+
+	auto& cfg = g_fxo->get<libio_sys_config>();
+
+	auto lock = lock_lv2_mutex_alike(cfg.mtx, &ppu);

 	if (cfg.init_ctr++ == 0)
 	{
-		// Belongs to "_cfg_evt_hndlr" thread (8k stack)
-		cfg.stack_addr = ensure(vm::alloc(0x2000, vm::stack, 4096));
+		// Run thread
+		vm::var<u64> _tid;
+		vm::var<u32> queue_id;
+		vm::var<char[]> _name = vm::make_str("_cfg_evt_hndlr");
+
+		vm::var<sys_event_queue_attribute_t> attr;
+		attr->protocol = SYS_SYNC_PRIORITY;
+		attr->type = SYS_PPU_QUEUE;
+		attr->name_u64 = 0;
+
+		ensure(CELL_OK == sys_event_queue_create(ppu, queue_id, attr, 0, 0x20));
+		ppu.check_state();
+		cfg.queue_id = *queue_id;
+
+		ensure(CELL_OK == ppu_execute<&sys_ppu_thread_create>(ppu, +_tid, g_fxo->get<ppu_function_manager>().func_addr(FIND_FUNC(config_event_entry)), 0, 512, 0x2000, SYS_PPU_THREAD_CREATE_JOINABLE, +_name));
+		ppu.check_state();
+
+		cfg.ppu_id = static_cast<u32>(*_tid);
 	}
-}
-
-extern void libio_sys_config_end()
-{
-	auto& cfg = g_fxo->get<libio_sys_config>();
-
-	std::lock_guard lock(cfg.mtx);
-
-	if (cfg.init_ctr-- == 1)
-	{
-		ensure(vm::dealloc(std::exchange(cfg.stack_addr, 0), vm::stack));
-	}
-}
-
-error_code sys_config_start()
-{
-	sys_io.todo("sys_config_start()");

 	return CELL_OK;
 }

-error_code sys_config_stop()
+error_code sys_config_stop(ppu_thread& ppu)
 {
-	sys_io.todo("sys_config_stop()");
+	sys_io.warning("sys_config_stop()");
+
+	auto& cfg = g_fxo->get<libio_sys_config>();
+
+	auto lock = lock_lv2_mutex_alike(cfg.mtx, &ppu);
+
+	if (cfg.init_ctr && cfg.init_ctr-- == 1)
+	{
+		ensure(CELL_OK == sys_event_queue_destroy(ppu, cfg.queue_id, SYS_EVENT_QUEUE_DESTROY_FORCE));
+		ppu.check_state();
+		ensure(CELL_OK == sys_ppu_thread_join(ppu, cfg.ppu_id, +vm::var<u64>{}));
+	}
+	else
+	{
+		// TODO: Unknown error
+	}
+
 	return CELL_OK;
 }

@ -114,4 +222,6 @@ DECLARE(ppu_module_manager::sys_io)("sys_io", []()
 	REG_FUNC(sys_io, sys_config_register_service);
 	REG_FUNC(sys_io, sys_config_unregister_io_error_handler);
 	REG_FUNC(sys_io, sys_config_unregister_service);
+
+	REG_HIDDEN_FUNC(config_event_entry);
 });
--- a/rpcs3/Emu/Cell/PPUAnalyser.cpp
+++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp
@ -532,6 +532,11 @@ namespace ppu_patterns

 bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::basic_string<u32>& applied, std::function<bool()> check_aborted)
 {
+	if (segs.empty())
+	{
+		return false;
+	}
+
 	// Assume first segment is executable
 	const u32 start = segs[0].addr;

@ -549,7 +554,38 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 	std::vector<std::reference_wrapper<ppu_function>> func_queue;

 	// Known references (within segs, addr and value alignment = 4)
-	std::set<u32> addr_heap{entry};
+	std::set<u32> addr_heap;
+
+	if (entry)
+	{
+		addr_heap.emplace(entry);
+	}
+
+	auto verify_func = [&](u32 addr)
+	{
+		if (entry)
+		{
+			// Fixed addresses
+			return true;
+		}
+
+		// Check if the storage address exists within relocations
+
+		for (auto& rel : this->relocs)
+		{
+			if ((rel.addr & -8) == (addr & -8))
+			{
+				if (rel.type != 38 && rel.type != 44 && (rel.addr & -4) != (addr & -4))
+				{
+					continue;
+				}
+
+				return true;
+			}
+		}
+
+		return false;
+	};

 	// Register new function
 	auto add_func = [&](u32 addr, u32 toc, u32 caller) -> ppu_function&
@ -612,7 +648,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b

 			for (; _ptr <= seg_end;)
 			{
-				if (ptr[1] == toc && ptr[0] >= start && ptr[0] < end && ptr[0] % 4 == 0)
+				if (ptr[1] == toc && FN(x >= start && x < end && x % 4 == 0)(ptr[0]) && verify_func(_ptr.addr()))
 				{
 					// New function
 					ppu_log.trace("OPD*: [0x%x] 0x%x (TOC=0x%x)", _ptr, ptr[0], ptr[1]);
@ -668,6 +704,11 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 	// Find OPD section
 	for (const auto& sec : secs)
 	{
+		if (sec.size % 8)
+		{
+			continue;
+		}
+
 		vm::cptr<void> sec_end = vm::cast(sec.addr + sec.size);

 		// Probe
@ -691,17 +732,17 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 			const u32 _toc = ptr[1];

 			// Rough Table of Contents borders
-			//const u32 _toc_begin = _toc - 0x8000;
-			//const u32 _toc_end = _toc + 0x8000;
+			const u32 toc_begin = _toc - 0x8000;
+			//const u32 toc_end = _toc + 0x7ffc;

 			// TODO: improve TOC constraints
-			if (_toc % 4 || !get_ptr<u32>(_toc) || _toc >= 0x40000000 || (_toc >= start && _toc < end))
+			if (toc_begin % 4 || !get_ptr<u8>(toc_begin) || toc_begin >= 0x40000000 || (toc_begin >= start && toc_begin < end))
 			{
 				sec_end.set(0);
 				break;
 			}

-			if (addr % 4 || addr < start || addr >= end || addr == _toc)
+			if (addr % 4 || addr < start || addr >= end || !verify_func(_ptr.addr()))
 			{
 				sec_end.set(0);
 				break;
@ -754,6 +795,11 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 	// Find .eh_frame section
 	for (const auto& sec : secs)
 	{
+		if (sec.size % 4)
+		{
+			continue;
+		}
+
 		vm::cptr<void> sec_end = vm::cast(sec.addr + sec.size);

 		// Probe
@ -923,7 +969,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 					continue;
 				}

-				if (target >= start && target < end)
+				if (target >= start && target < end && (~ptr[0] & 0x2 || verify_func(_ptr.addr())))
 				{
 					auto& new_func = add_func(target, func.toc, func.addr);

@ -951,7 +997,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 				// Simple trampoline
 				const u32 target = (ptr[0] << 16) + ppu_opcode_t{ptr[1]}.simm16;

-				if (target >= start && target < end)
+				if (target >= start && target < end && verify_func(_ptr.addr()))
 				{
 					auto& new_func = add_func(target, func.toc, func.addr);

@ -1022,7 +1068,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 				const u32 target = (ptr[3] << 16) + s16(ptr[4]);
 				const u32 toc_add = (ptr[1] << 16) + s16(ptr[2]);

-				if (target >= start && target < end)
+				if (target >= start && target < end && verify_func((_ptr + 3).addr()))
 				{
 					auto& new_func = add_func(target, 0, func.addr);

@ -1069,7 +1115,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 				const u32 toc_add = (ptr[1] << 16) + s16(ptr[2]);
 				const u32 target = (ptr[3] & 0x2 ? 0 : (_ptr + 3).addr()) + ppu_opcode_t{ptr[3]}.bt24;

-				if (target >= start && target < end)
+				if (target >= start && target < end && (~ptr[3] & 0x2 || verify_func((_ptr + 3).addr())))
 				{
 					auto& new_func = add_func(target, 0, func.addr);

@ -1349,9 +1395,9 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 					block.second = _ptr.addr() - block.first;
 					break;
 				}
-				else if (type == ppu_itype::TW || type == ppu_itype::TWI || type == ppu_itype::TD || type == ppu_itype::TDI)
+				else if (type & ppu_itype::trap)
 				{
-					if (op.opcode != ppu_instructions::TRAP())
+					if (op.bo != 31)
 					{
 						add_block(_ptr.addr());
 					}
@ -1432,7 +1478,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 				{
 					const u32 target = (op.aa ? 0 : iaddr) + (type == ppu_itype::B ? +op.bt24 : +op.bt14);

-					if (target >= start && target < end)
+					if (target >= start && target < end && (!op.aa || verify_func(iaddr)))
 					{
 						if (target < func.addr || target >= func.addr + func.size)
 						{
@ -1577,6 +1623,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 		end = 0;
 	}

+	u32 per_instruction_bytes = 0;
+
 	for (auto&& [_, func] : as_rvalue(fmap))
 	{
 		if (func.attr & ppu_attr::no_size && entry)
@ -1595,6 +1643,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 				block.attr = ppu_attr::no_size;
 			}

+			per_instruction_bytes += utils::sub_saturate<u32>(lim, func.addr);
 			continue;
 		}

@ -1675,11 +1724,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 	u32 exp = start;
 	u32 lim = end;

-	// Start with full scan (disabled for PRX for now)
-	if (entry)
-	{
-		block_queue.emplace_back(exp, lim);
-	}
+	// Start with full scan
+	block_queue.emplace_back(exp, lim);

 	// Add entries from patches (on per-instruction basis)
 	for (u32 addr : applied)
@ -1713,14 +1759,17 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 		{
 			u32 i_pos = exp;

+			u32 block_edges[16];
+			u32 edge_count = 0;
+
 			bool is_good = true;
 			bool is_fallback = true;

 			for (; i_pos < lim; i_pos += 4)
 			{
-				const u32 opc = get_ref<u32>(i_pos);
+				const ppu_opcode_t op{get_ref<u32>(i_pos)};

-				switch (auto type = s_ppu_itype.decode(opc))
+				switch (auto type = s_ppu_itype.decode(op.opcode))
 				{
 				case ppu_itype::UNK:
 				case ppu_itype::ECIWX:
@ -1730,10 +1779,20 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 					is_good = false;
 					break;
 				}
-				case ppu_itype::TD:
 				case ppu_itype::TDI:
-				case ppu_itype::TW:
 				case ppu_itype::TWI:
+				{
+					if (op.ra == 1u || op.ra == 13u || op.ra == 2u)
+					{
+						// Non-user registers, checking them against a constant value makes no sense
+						is_good = false;
+						break;
+					}
+
+					[[fallthrough]];
+				}
+				case ppu_itype::TD:
+				case ppu_itype::TW:
 				case ppu_itype::B:
 				case ppu_itype::BC:
 				{
@ -1744,14 +1803,14 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b

 					if (type == ppu_itype::B || type == ppu_itype::BC)
 					{
-						if (entry == 0 && ppu_opcode_t{opc}.aa)
+						if (entry == 0 && op.aa)
 						{
 							// Ignore absolute branches in PIC (PRX)
 							is_good = false;
 							break;
 						}

-						const u32 target = (opc & 2 ? 0 : i_pos) + (type == ppu_itype::B ? +ppu_opcode_t{opc}.bt24 : +ppu_opcode_t{opc}.bt14);
+						const u32 target = (op.aa ? 0 : i_pos) + (type == ppu_itype::B ? +op.bt24 : +op.bt14);

 						if (target < segs[0].addr || target >= segs[0].addr + segs[0].size)
 						{
@ -1760,9 +1819,43 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 							break;
 						}

+						const ppu_opcode_t test_op{get_ref<u32>(target)};
+						const auto type0 = s_ppu_itype.decode(test_op.opcode);
+
+						if (type0 == ppu_itype::UNK)
+						{
+							is_good = false;
+							break;
+						}
+
+						// Test another instruction just in case (testing more is unlikely to improve results by much)
+						if (!(type0 & ppu_itype::branch))
+						{
+							if (target + 4 >= segs[0].addr + segs[0].size)
+							{
+								is_good = false;
+								break;
+							}
+
+							const auto type1 = s_ppu_itype.decode(get_ref<u32>(target + 4));
+
+							if (type1 == ppu_itype::UNK)
+							{
+								is_good = false;
+								break;
+							}
+						}
+						else if (u32 target0 = (test_op.aa ? 0 : target) + (type == ppu_itype::B ? +test_op.bt24 : +test_op.bt14);
+							target0 < segs[0].addr || target0 >= segs[0].addr + segs[0].size)
+						{
+							// Sanity check
+							is_good = false;
+							break;
+						}
+
 						if (target != i_pos && !fmap.contains(target))
 						{
-							if (block_set.count(target) == 0)
+							if (block_set.count(target) == 0 && std::count(block_edges, block_edges + edge_count, target) == 0)
 							{
 								ppu_log.trace("Block target found: 0x%x (i_pos=0x%x)", target, i_pos);
 								block_queue.emplace_back(target, 0);
@ -1777,27 +1870,38 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 				case ppu_itype::BCLR:
 				case ppu_itype::SC:
 				{
-					if (type == ppu_itype::SC && opc != ppu_instructions::SC(0))
+					if (type == ppu_itype::SC && op.opcode != ppu_instructions::SC(0))
 					{
 						// Strict garbage filter
 						is_good = false;
 						break;
 					}

-					if (type == ppu_itype::BCCTR && opc & 0xe000)
+					if (type == ppu_itype::BCCTR && op.opcode & 0xe000)
 					{
 						// Garbage filter
 						is_good = false;
 						break;
 					}

-					if (type == ppu_itype::BCLR && opc & 0xe000)
+					if (type == ppu_itype::BCLR && op.opcode & 0xe000)
 					{
 						// Garbage filter
 						is_good = false;
 						break;
 					}

+					if ((type & ppu_itype::branch && op.lk) || type & ppu_itype::trap || type == ppu_itype::BC)
+					{
+						// if farther instructions are valid: register all blocks
+						// Otherwise, register none (all or nothing)
+						if (edge_count < std::size(block_edges))
+						{
+							block_edges[edge_count++] = i_pos + 4;
+							continue;
+						}
+					}
+
 					// Good block terminator found, add single block
 					break;
 				}
@ -1828,17 +1932,23 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b

 			if (is_good)
 			{
-				auto& block = fmap[exp];
-
-				if (!block.addr)
+				for (u32 it = 0, prev_addr = exp; it <= edge_count; it++)
 				{
-					block.addr = exp;
-					block.size = i_pos - exp;
-					ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size);
+					const u32 block_end = it < edge_count ? block_edges[it] : i_pos;
+					const u32 block_begin = std::exchange(prev_addr, block_end);

-					if (get_limit(exp) == end)
+					auto& block = fmap[block_begin];
+
+					if (!block.addr)
 					{
-						block.attr += ppu_attr::no_size;
+						block.addr = block_begin;
+						block.size = block_end - block_begin;
+						ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size);
+
+						if (get_limit(block_begin) == end)
+						{
+							block.attr += ppu_attr::no_size;
+						}
 					}
 				}
 			}
@ -1861,9 +1971,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 	// Convert map to vector (destructive)
 	for (auto&& [_, block] : as_rvalue(std::move(fmap)))
 	{
-		if (block.attr & ppu_attr::no_size && block.size > 4 && entry)
+		if (block.attr & ppu_attr::no_size && block.size > 4)
 		{
-			// Disabled for PRX for now
 			ppu_log.warning("Block 0x%x will be compiled on per-instruction basis (size=0x%x)", block.addr, block.size);

 			for (u32 addr = block.addr; addr < block.addr + block.size; addr += 4)
@ -1875,12 +1984,19 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
 				i.attr = ppu_attr::no_size;
 			}

+			per_instruction_bytes += block.size;
 			continue;
 		}

 		funcs.emplace_back(std::move(block));
 	}

+	if (per_instruction_bytes)
+	{
+		const bool error = per_instruction_bytes >= 200 && per_instruction_bytes / 4 >= utils::aligned_div<u32>(funcs.size(), 128);
+		(error ? ppu_log.error : ppu_log.notice)("%d instructions will be compiled on per-instruction basis in total", per_instruction_bytes / 4);
+	}
+
 	ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size());
 	return true;
 }
--- a/rpcs3/Emu/Cell/PPUAnalyser.h
+++ b/rpcs3/Emu/Cell/PPUAnalyser.h
@ -168,19 +168,19 @@ struct ppu_module
 	}

 	template <typename T, typename U> requires requires (const U& obj) { +obj.size() * 0; }
-	to_be_t<T>& get_ref(U&& addr,
+	to_be_t<T>& get_ref(U&& addr, u32 index = 0,
 		u32 line = __builtin_LINE(),
 		u32 col = __builtin_COLUMN(),
 		const char* file = __builtin_FILE(),
 		const char* func = __builtin_FUNCTION()) const
 	{
 		constexpr usz size_element = std::is_void_v<T> ? 0 : sizeof(std::conditional_t<std::is_void_v<T>, char, T>);
-		if (auto ptr = get_ptr<T>(addr.addr(), u32{size_element}))
+		if (auto ptr = get_ptr<T>((addr + index).addr(), u32{size_element}))
 		{
 			return *ptr;
 		}

-		fmt::throw_exception("get_ref(): Failure! (addr=0x%x)%s", addr.addr(), src_loc{line, col, file, func});
+		fmt::throw_exception("get_ref(): Failure! (addr=0x%x)%s", (addr + index).addr(), src_loc{line, col, file, func});
 		return *std::add_pointer_t<to_be_t<T>>{};
 	}
 };
@ -262,6 +262,9 @@ struct ppu_pattern_matrix
 // PPU Instruction Type
 struct ppu_itype
 {
+	static constexpr struct branch_tag{} branch{}; // Branch Instructions
+	static constexpr struct trap_tag{} trap{}; // Branch Instructions
+
 	enum type
 	{
 		UNK = 0,
@ -423,8 +426,6 @@ struct ppu_itype
 		VUPKLSB,
 		VUPKLSH,
 		VXOR,
-		TDI,
-		TWI,
 		MULLI,
 		SUBFIC,
 		CMPLI,
@ -432,11 +433,8 @@ struct ppu_itype
 		ADDIC,
 		ADDI,
 		ADDIS,
-		BC,
 		SC,
-		B,
 		MCRF,
-		BCLR,
 		CRNOR,
 		CRANDC,
 		ISYNC,
@ -446,7 +444,6 @@ struct ppu_itype
 		CREQV,
 		CRORC,
 		CROR,
-		BCCTR,
 		RLWIMI,
 		RLWINM,
 		RLWNM,
@ -463,7 +460,6 @@ struct ppu_itype
 		RLDCL,
 		RLDCR,
 		CMP,
-		TW,
 		LVSL,
 		LVEBX,
 		SUBFC,
@ -490,7 +486,6 @@ struct ppu_itype
 		LWZUX,
 		CNTLZD,
 		ANDC,
-		TD,
 		LVEWX,
 		MULHD,
 		MULHW,
@ -781,6 +776,16 @@ struct ppu_itype
 		FCTID_,
 		FCTIDZ_,
 		FCFID_,
+
+		B, // branch_tag first
+		BC,
+		BCLR,
+		BCCTR, // branch_tag last
+
+		TD, // trap_tag first
+		TW,
+		TDI,
+		TWI, // trap_tag last
 	};

 	// Enable address-of operator for ppu_decoder<>
@ -788,6 +793,16 @@ struct ppu_itype
 	{
 		return value;
 	}
+
+	friend constexpr bool operator &(type value, branch_tag)
+	{
+		return value >= B && value <= BCCTR;
+	}
+
+	friend constexpr bool operator &(type value, trap_tag)
+	{
+		return value >= TD && value <= TWI;
+	}
 };

 struct ppu_iname
--- a/rpcs3/Emu/Cell/PPUDisAsm.cpp
+++ b/rpcs3/Emu/Cell/PPUDisAsm.cpp
@ -106,12 +106,7 @@ std::pair<PPUDisAsm::const_op, u64> PPUDisAsm::try_get_const_op_gpr_value(u32 re

 		const auto type = s_ppu_itype.decode(opcode);

-		auto is_branch = [](enum ppu_itype::type itype)
-		{
-			return itype == ppu_itype::BC || itype == ppu_itype::B || itype == ppu_itype::BCLR || itype == ppu_itype::BCCTR;
-		};
-
-		if (is_branch(type) || type == ppu_itype::UNK)
+		if (type & ppu_itype::branch || type == ppu_itype::UNK)
 		{
 			// TODO: Detect calls, ignore them if reg is a non-volatile register
 			return {};
--- a/rpcs3/Emu/Cell/PPUInterpreter.cpp
+++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp
@ -5204,7 +5204,9 @@ auto LVRX()
 	static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
 	{
 		const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
-		const u128 data = ppu_feed_data<u128, Flags...>(ppu, addr & -16);
+
+		// Read from instruction address if offset is 0, this prevents accessing potentially bad memory from addr (because no actual memory is dereferenced)
+		const u128 data = ppu_feed_data<u128, Flags...>(ppu, ((addr & 15) == 0 ? ppu.cia : addr) & -16);
 		ppu.vr[op.vd] = data >> ((~addr & 15) * 8) >> 8;
 	};

--- a/rpcs3/Emu/Cell/PPUModule.cpp
+++ b/rpcs3/Emu/Cell/PPUModule.cpp
@ -13,6 +13,7 @@
 #include "Emu/VFS.h"

 #include "Emu/Cell/PPUOpcodes.h"
+#include "Emu/Cell/SPUThread.h"
 #include "Emu/Cell/PPUAnalyser.h"

 #include "Emu/Cell/lv2/sys_process.h"
@ -552,7 +553,7 @@ extern const std::unordered_map<u32, std::string_view>& get_exported_function_na
 }

 // Resolve relocations for variable/function linkage.
-static void ppu_patch_refs(std::vector<ppu_reloc>* out_relocs, u32 fref, u32 faddr)
+static void ppu_patch_refs(const ppu_module& _module, std::vector<ppu_reloc>* out_relocs, u32 fref, u32 faddr)
 {
 	struct ref_t
 	{
@ -561,7 +562,7 @@ static void ppu_patch_refs(std::vector<ppu_reloc>* out_relocs, u32 fref, u32 fad
 		be_t<u32> addend; // Note: Treating it as addend seems to be correct for now, but still unknown if theres more in this variable
 	};

-	for (auto ref = vm::ptr<ref_t>::make(fref); ref->type; ref++)
+	for (const ref_t* ref = &_module.get_ref<ref_t>(fref); ref->type; fref += sizeof(ref_t), ref = &_module.get_ref<ref_t>(fref))
 	{
 		if (ref->addend) ppu_loader.warning("**** REF(%u): Addend value(0x%x, 0x%x)", ref->type, ref->addr, ref->addend);

@ -584,28 +585,28 @@ static void ppu_patch_refs(std::vector<ppu_reloc>* out_relocs, u32 fref, u32 fad
 		{
 		case 1:
 		{
-			const u32 value = vm::_ref<u32>(ref->addr) = rdata;
+			const u32 value = _module.get_ref<u32>(ref->addr) = rdata;
 			ppu_loader.trace("**** REF(1): 0x%x <- 0x%x", ref->addr, value);
 			break;
 		}

 		case 4:
 		{
-			const u16 value = vm::_ref<u16>(ref->addr) = static_cast<u16>(rdata);
+			const u16 value = _module.get_ref<u16>(ref->addr) = static_cast<u16>(rdata);
 			ppu_loader.trace("**** REF(4): 0x%x <- 0x%04x (0x%llx)", ref->addr, value, faddr);
 			break;
 		}

 		case 6:
 		{
-			const u16 value = vm::_ref<u16>(ref->addr) = static_cast<u16>(rdata >> 16) + (rdata & 0x8000 ? 1 : 0);
+			const u16 value = _module.get_ref<u16>(ref->addr) = static_cast<u16>(rdata >> 16) + (rdata & 0x8000 ? 1 : 0);
 			ppu_loader.trace("**** REF(6): 0x%x <- 0x%04x (0x%llx)", ref->addr, value, faddr);
 			break;
 		}

 		case 57:
 		{
-			const u16 value = vm::_ref<ppu_bf_t<be_t<u16>, 0, 14>>(ref->addr) = static_cast<u16>(rdata) >> 2;
+			const u16 value = _module.get_ref<ppu_bf_t<be_t<u16>, 0, 14>>(ref->addr) = static_cast<u16>(rdata) >> 2;
 			ppu_loader.trace("**** REF(57): 0x%x <- 0x%04x (0x%llx)", ref->addr, value, faddr);
 			break;
 		}
@ -680,7 +681,7 @@ extern bool ppu_register_library_lock(std::string_view libname, bool lock_lib)
 }

 // Load and register exports; return special exports found (nameless module)
-static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 exports_end, bool for_observing_callbacks = false, std::basic_string<bool>* loaded_flags = nullptr)
+static auto ppu_load_exports(const ppu_module& _module, ppu_linkage_info* link, u32 exports_start, u32 exports_end, bool for_observing_callbacks = false, std::basic_string<bool>* loaded_flags = nullptr)
 {
 	std::unordered_map<u32, u32> result;

@ -694,7 +695,7 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo

 	for (u32 addr = exports_start; addr < exports_end; unload_index++, addr += lib.size ? lib.size : sizeof(ppu_prx_module_info))
 	{
-		std::memcpy(&lib, vm::base(addr), sizeof(lib));
+		std::memcpy(&lib, &_module.get_ref<ppu_prx_module_info>(addr), sizeof(lib));

 		const bool is_library = !!(lib.attributes & PRX_EXPORT_LIBRARY_FLAG);
 		const bool is_management = !is_library && !!(lib.attributes & PRX_EXPORT_PRX_MANAGEMENT_FUNCTIONS_FLAG);
@ -709,12 +710,12 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo
 			// Set special exports
 			for (u32 i = 0, end = lib.num_func + lib.num_var; i < end; i++)
 			{
-				const u32 nid = lib.nids[i];
-				const u32 addr = lib.addrs[i];
+				const u32 nid = _module.get_ref<u32>(lib.nids, i);
+				const u32 addr = _module.get_ref<u32>(lib.addrs, i);

 				if (i < lib.num_func)
 				{
-					ppu_loader.notice("** Special: [%s] at 0x%x [0x%x, 0x%x]", ppu_get_function_name({}, nid), addr, vm::_ref<u32>(addr), vm::_ref<u32>(addr + 4));
+					ppu_loader.notice("** Special: [%s] at 0x%x [0x%x, 0x%x]", ppu_get_function_name({}, nid), addr, _module.get_ref<u32>(addr), _module.get_ref<u32>(addr + 4));
 				}
 				else
 				{
@ -738,7 +739,7 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo
 			continue;
 		}

-		const std::string module_name(lib.name.get_ptr());
+		const std::string module_name(&_module.get_ref<const char>(lib.name));

 		if (unload_exports)
 		{
@ -782,9 +783,9 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo
 		// Get functions
 		for (u32 i = 0, end = lib.num_func; i < end; i++)
 		{
-			const u32 fnid = fnids[i];
-			const u32 faddr = faddrs[i];
-			ppu_loader.notice("**** %s export: [%s] (0x%08x) at 0x%x [at:0x%x]", module_name, ppu_get_function_name(module_name, fnid), fnid, faddr, vm::read32(faddr));
+			const u32 fnid = _module.get_ref<u32>(fnids, i);
+			const u32 faddr = _module.get_ref<u32>(faddrs, i);
+			ppu_loader.notice("**** %s export: [%s] (0x%08x) at 0x%x [at:0x%x]", module_name, ppu_get_function_name(module_name, fnid), fnid, faddr, _module.get_ref<u32>(faddr));

 			// Function linkage info
 			auto& flink = mlink.functions[fnid];
@ -811,7 +812,10 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo
 					// Set exported function
 					flink.export_addr = target - 4;

-					ppu_form_branch_to_code(vm::read32(faddr), target);
+					if (auto ptr = _module.get_ptr<u32>(faddr); vm::try_get_addr(ptr).first)
+					{
+						ppu_form_branch_to_code(*ptr, target);
+					}
 				}
 				else
 				{
@ -821,13 +825,13 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo
 					// Fix imports
 					for (const u32 addr : flink.imports)
 					{
-						vm::write32(addr, faddr);
+						_module.get_ref<u32>(addr) = faddr;
 						//ppu_loader.warning("Exported function '%s' in module '%s'", ppu_get_function_name(module_name, fnid), module_name);
 					}

 					for (const u32 fref : flink.frefss)
 					{
-						ppu_patch_refs(nullptr, fref, faddr);
+						ppu_patch_refs(_module, nullptr, fref, faddr);
 					}
 				}
 			}
@ -839,8 +843,8 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo
 		// Get variables
 		for (u32 i = 0, end = lib.num_var; i < end; i++)
 		{
-			const u32 vnid = vnids[i];
-			const u32 vaddr = vaddrs[i];
+			const u32 vnid = _module.get_ref<u32>(vnids, i);
+			const u32 vaddr = _module.get_ref<u32>(vaddrs, i);
 			ppu_loader.notice("**** %s export: &[%s] at 0x%x", module_name, ppu_get_variable_name(module_name, vnid), vaddr);

 			// Variable linkage info
@ -863,7 +867,7 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo
 				// Fix imports
 				for (const auto vref : vlink.imports)
 				{
-					ppu_patch_refs(nullptr, vref, vaddr);
+					ppu_patch_refs(_module, nullptr, vref, vaddr);
 					//ppu_loader.warning("Exported variable '%s' in module '%s'", ppu_get_variable_name(module_name, vnid), module_name);
 				}
 			}
@ -873,17 +877,17 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo
 	return result;
 }

-static auto ppu_load_imports(std::vector<ppu_reloc>& relocs, ppu_linkage_info* link, u32 imports_start, u32 imports_end)
+static auto ppu_load_imports(const ppu_module& _module, std::vector<ppu_reloc>& relocs, ppu_linkage_info* link, u32 imports_start, u32 imports_end)
 {
 	std::unordered_map<u32, void*> result;

-	reader_lock lock(link->mutex);
+	std::lock_guard lock(link->mutex);

 	for (u32 addr = imports_start; addr < imports_end;)
 	{
-		const auto& lib = vm::_ref<const ppu_prx_module_info>(addr);
+		const auto& lib = _module.get_ref<const ppu_prx_module_info>(addr);

-		const std::string module_name(lib.name.get_ptr());
+		const std::string module_name(&_module.get_ref<const char>(lib.name));

 		ppu_loader.notice("** Imported module '%s' (ver=0x%x, attr=0x%x, 0x%x, 0x%x) [0x%x]", module_name, lib.version, lib.attributes, lib.unk4, lib.unk5, addr);

@ -903,8 +907,8 @@ static auto ppu_load_imports(std::vector<ppu_reloc>& relocs, ppu_linkage_info* l

 		for (u32 i = 0, end = lib.num_func; i < end; i++)
 		{
-			const u32 fnid = fnids[i];
-			const u32 fstub = faddrs[i];
+			const u32 fnid = _module.get_ref<u32>(fnids, i);
+			const u32 fstub = _module.get_ref<u32>(faddrs, i);
 			const u32 faddr = (faddrs + i).addr();
 			ppu_loader.notice("**** %s import: [%s] (0x%08x) -> 0x%x", module_name, ppu_get_function_name(module_name, fnid), fnid, fstub);

@ -920,14 +924,14 @@ static auto ppu_load_imports(std::vector<ppu_reloc>& relocs, ppu_linkage_info* l
 			const u32 link_addr = flink.export_addr ? flink.export_addr : g_fxo->get<ppu_function_manager>().addr;

 			// Write import table
-			vm::write32(faddr, link_addr);
+			_module.get_ref<u32>(faddr) = link_addr;

 			// Patch refs if necessary (0x2000 seems to be correct flag indicating the presence of additional info)
-			if (const u32 frefs = (lib.attributes & 0x2000) ? +fnids[i + lib.num_func] : 0)
+			if (const u32 frefs = (lib.attributes & 0x2000) ? +_module.get_ref<u32>(fnids, i + lib.num_func) : 0)
 			{
 				result.emplace(frefs, &flink);
 				flink.frefss.emplace(frefs);
-				ppu_patch_refs(&relocs, frefs, link_addr);
+				ppu_patch_refs(_module, &relocs, frefs, link_addr);
 			}

 			//ppu_loader.warning("Imported function '%s' in module '%s' (0x%x)", ppu_get_function_name(module_name, fnid), module_name, faddr);
@ -938,8 +942,8 @@ static auto ppu_load_imports(std::vector<ppu_reloc>& relocs, ppu_linkage_info* l

 		for (u32 i = 0, end = lib.num_var; i < end; i++)
 		{
-			const u32 vnid = vnids[i];
-			const u32 vref = vstubs[i];
+			const u32 vnid = _module.get_ref<u32>(vnids, i);
+			const u32 vref = _module.get_ref<u32>(vstubs, i);
 			ppu_loader.notice("**** %s import: &[%s] (ref=*0x%x)", module_name, ppu_get_variable_name(module_name, vnid), vref);

 			// Variable linkage info
@ -951,7 +955,7 @@ static auto ppu_load_imports(std::vector<ppu_reloc>& relocs, ppu_linkage_info* l
 			mlink.imported = true;

 			// Link if available
-			ppu_patch_refs(&relocs, vref, vlink.export_addr);
+			ppu_patch_refs(_module, &relocs, vref, vlink.export_addr);

 			//ppu_loader.warning("Imported variable '%s' in module '%s' (0x%x)", ppu_get_variable_name(module_name, vnid), module_name, vlink.first);
 		}
@ -968,14 +972,18 @@ void ppu_manual_load_imports_exports(u32 imports_start, u32 imports_size, u32 ex
 	auto& _main = g_fxo->get<main_ppu_module>();
 	auto& link = g_fxo->get<ppu_linkage_info>();

-	ppu_load_exports(&link, exports_start, exports_start + exports_size, false, &loaded_flags);
+	ppu_module vm_all_fake_module{};
+	vm_all_fake_module.segs.emplace_back(ppu_segment{0x10000, -0x10000u, 1 /*LOAD*/, 0, -0x1000u, vm::base(0x10000)});
+	vm_all_fake_module.addr_to_seg_index.emplace(0x10000, 0);
+
+	ppu_load_exports(vm_all_fake_module, &link, exports_start, exports_start + exports_size, false, &loaded_flags);

 	if (!imports_size)
 	{
 		return;
 	}

-	ppu_load_imports(_main.relocs, &link, imports_start, imports_start + imports_size);
+	ppu_load_imports(vm_all_fake_module, _main.relocs, &link, imports_start, imports_start + imports_size);
 }

 // For savestates
@ -1063,12 +1071,188 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&
 		return;
 	}

+	const bool is_firmware = mod.path.starts_with(vfs::get("/dev_flash/"));
+
+	const auto _main = g_fxo->try_get<main_ppu_module>();
+
 	const std::string_view seg_view{ensure(mod.get_ptr<char>(seg.addr)), seg.size};

-	for (usz i = seg_view.find("\177ELF"); i < seg.size; i = seg_view.find("\177ELF", i + 4))
+	auto find_first_of_multiple = [](std::string_view data, std::initializer_list<std::string_view> values, usz index)
+	{
+		usz pos = umax;
+
+		for (std::string_view value : values)
+		{
+			if (usz pos0 = data.substr(index, pos - index).find(value); pos0 != umax && pos0 + index < pos)
+			{
+				pos = pos0 + index;
+			}
+		}
+
+		return pos;
+	};
+
+	extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 size);
+
+	// Search for [stqd lr,0x10(sp)] instruction or ELF file signature, whichever comes first
+	const std::initializer_list<std::string_view> prefixes = {"\177ELF"sv, "\x24\0\x40\x80"sv};
+
+	usz prev_bound = 0;
+
+	for (usz i = find_first_of_multiple(seg_view, prefixes, 0); i < seg.size; i = find_first_of_multiple(seg_view, prefixes, utils::align<u32>(i + 1, 4)))
 	{
 		const auto elf_header = ensure(mod.get_ptr<u8>(seg.addr + i));

+		if (i % 4 == 0 && std::memcmp(elf_header, "\x24\0\x40\x80", 4) == 0)
+		{
+			bool next = true;
+			const u32 old_i = i;
+
+			for (u32 search = i & -128, tries = 10; tries && search >= prev_bound; tries--, search = utils::sub_saturate<u32>(search, 128))
+			{
+				if (seg_view[search] != 0x42 && seg_view[search] != 0x43)
+				{
+					continue;
+				}
+
+				const u32 inst1 = read_from_ptr<be_t<u32>>(seg_view, search);
+				const u32 inst2 = read_from_ptr<be_t<u32>>(seg_view, search + 4);
+				const u32 inst3 = read_from_ptr<be_t<u32>>(seg_view, search + 8);
+				const u32 inst4 = read_from_ptr<be_t<u32>>(seg_view, search + 12);
+
+				if ((inst1 & 0xfe'00'00'7f) != 0x42000002 || (inst2 & 0xfe'00'00'7f) != 0x42000002 || (inst3 & 0xfe'00'00'7f) != 0x42000002 || (inst4 & 0xfe'00'00'7f) != 0x42000002)
+				{
+					continue;
+				}
+
+				ppu_log.success("Found SPURS GUID Pattern at 0x%05x", search + seg.addr);
+				i = search;
+				next = false;
+				break;
+			}
+
+			if (next)
+			{
+				continue;
+			}
+
+			std::string_view ls_segment = seg_view.substr(i);
+
+			// Bound to a bit less than LS size
+			ls_segment = ls_segment.substr(0, 0x38000);
+
+			for (usz addr_last = 0, valid_count = 0, invalid_count = 0;;)
+			{
+				usz instruction = ls_segment.find("\x24\0\x40\x80"sv, addr_last);
+
+				if (instruction != umax)
+				{
+					if (instruction % 4 != i % 4)
+					{
+						// Unaligned, continue
+						addr_last = instruction + (i % 4 - instruction % 4) % 4;
+						continue;
+					}
+
+					// FIXME: This seems to terminate SPU code prematurely in some cases
+					// Likely due to absolute branches
+					if (spu_thread::is_exec_code(instruction, {reinterpret_cast<const u8*>(ls_segment.data()), ls_segment.size()}, 0))
+					{
+						addr_last = instruction + 4;
+						valid_count++;
+						invalid_count = 0;
+						continue;
+					}
+
+					if (invalid_count == 0)
+					{
+						// Allow a single case of invalid data
+						addr_last = instruction + 4;
+						invalid_count++;
+						continue;
+					}
+
+					addr_last = instruction;
+				}
+
+				if (addr_last >= 0x80 && valid_count >= 2)
+				{
+					const u32 begin = i & -128;
+					u32 end = std::min<u32>(seg.size, utils::align<u32>(i + addr_last + 256, 128));
+
+					u32 guessed_ls_addr = 0;
+
+					// Try to guess LS address by observing the pattern for disable/enable interrupts
+					// ILA R2, PC + 8
+					// BIE/BID R2
+
+					for (u32 found = 0, last_vaddr = 0, it = begin + 16; it < end - 16; it += 4)
+					{
+						const u32 inst1 = read_from_ptr<be_t<u32>>(seg_view, it);
+						const u32 inst2 = read_from_ptr<be_t<u32>>(seg_view, it + 4);
+						const u32 inst3 = read_from_ptr<be_t<u32>>(seg_view, it + 8);
+						const u32 inst4 = read_from_ptr<be_t<u32>>(seg_view, it + 12);
+
+						if ((inst1 & 0xfe'00'00'7f) == 0x42000002 && (inst2 & 0xfe'00'00'7f) == 0x42000002 && (inst3 & 0xfe'00'00'7f) == 0x42000002 && (inst4 & 0xfe'00'00'7f) == 0x42000002)
+						{
+							// SPURS GUID pattern
+							end = it;
+							ppu_log.success("Found SPURS GUID Pattern for terminator at 0x%05x", end + seg.addr);
+							break;
+						}
+
+						if ((inst1 >> 7) % 4 == 0 && (inst1 & 0xfe'00'00'7f) == 0x42000002 && (inst2 == 0x35040100 || inst2 == 0x35080100))
+						{
+							const u32 addr_inst = (inst1 >> 7) % 0x40000;
+
+							if (u32 addr_seg = addr_inst - std::min<u32>(it + 8 - begin, addr_inst))
+							{
+								if (last_vaddr != addr_seg)
+								{
+									guessed_ls_addr = 0;
+									found = 0;
+								}
+
+								found++;
+								last_vaddr = addr_seg;
+
+								if (found >= 2)
+								{
+									// Good segment address
+									guessed_ls_addr = last_vaddr;
+									ppu_log.notice("Found IENABLE/IDSIABLE Pattern at 0x%05x", it + seg.addr);
+								}
+							}
+						}
+					}
+
+					if (guessed_ls_addr)
+					{
+						end = begin + std::min<u32>(end - begin, SPU_LS_SIZE - guessed_ls_addr);
+					}
+
+					ppu_log.success("Found valid roaming SPU code at 0x%x..0x%x (guessed_ls_addr=0x%x)", seg.addr + begin, seg.addr + end, guessed_ls_addr);
+
+					if (!is_firmware && _main == &mod)
+					{
+						// Siginify that the base address is unknown by passing 0
+						utilize_spu_data_segment(guessed_ls_addr ? guessed_ls_addr : 0x4000, seg_view.data() + begin, end - begin);
+					}
+
+					i = std::max<u32>(end, i + 4) - 4;
+					prev_bound = i + 4;
+				}
+				else
+				{
+					i = old_i;
+				}
+
+				break;
+			}
+
+			continue;
+		}
+
 		// Try to load SPU image
 		const spu_exec_object obj(fs::file(elf_header, seg.size - i));

@ -1100,6 +1284,13 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&

 			if (prog.p_type == 0x1u /* LOAD */ && prog.p_filesz > 0u)
 			{
+				if (prog.p_vaddr && !is_firmware && _main == &mod)
+				{
+					extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 size);
+
+					utilize_spu_data_segment(prog.p_vaddr, (elf_header + prog.p_offset), prog.p_filesz);
+				}
+
 				sha1_update(&sha2, (elf_header + prog.p_offset), prog.p_filesz);
 			}

@ -1117,6 +1308,8 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&
 			}
 		}

+		fmt::append(dump, " (image addr: 0x%x, size: 0x%x)", seg.addr + i, obj.highest_offset);
+
 		sha1_finish(&sha2, sha1_hash);

 		// Format patch name
@ -1159,6 +1352,9 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&
 		{
 			ppu_loader.success("SPU executable hash: %s (<- %u)%s", hash, applied.size(), dump);
 		}
+
+		i += obj.highest_offset - 4;
+		prev_bound = i + 4;
 	}
 }

@ -1349,12 +1545,6 @@ std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_lo
 				// Initialize executable code if necessary
 				if (prog.p_flags & 0x1 && !virtual_load)
 				{
-					if (ar)
-					{
-						// Disable analysis optimization for savestates (it's not compatible with savestate with patches applied)
-						end = std::max(end, utils::align<u32>(addr + mem_size, 0x10000));
-					}
-
 					ppu_register_range(addr, mem_size);
 				}
 			}
@ -1569,10 +1759,14 @@ std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_lo

 		ppu_loader.warning("Library %s (rtoc=0x%x):", lib_name, lib_info->toc);

-		if (!virtual_load)
+		ppu_linkage_info dummy{};
+
+		prx->specials = ppu_load_exports(*prx, virtual_load ? &dummy : &link, prx->exports_start, prx->exports_end, true);
+		prx->imports = ppu_load_imports(*prx, prx->relocs, virtual_load ? &dummy : &link, lib_info->imports_start, lib_info->imports_end);
+
+		if (virtual_load)
 		{
-			prx->specials = ppu_load_exports(&link, prx->exports_start, prx->exports_end, true);
-			prx->imports = ppu_load_imports(prx->relocs, &link, lib_info->imports_start, lib_info->imports_end);
+			prx->imports.clear();
 		}

 		std::stable_sort(prx->relocs.begin(), prx->relocs.end());
@ -1640,6 +1834,36 @@ std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_lo
 		}
 	}

+	// Disabled for PRX for now (problematic and does not seem to have any benefit)
+	end = 0;
+
+	if (!applied.empty() || ar)
+	{
+		// Compare memory changes in memory after executable code sections end
+		if (end >= prx->segs[0].addr && end < prx->segs[0].addr + prx->segs[0].size)
+		{
+			for (const auto& prog : elf.progs)
+			{
+				// Find the first segment
+				if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz)
+				{
+					std::basic_string_view<uchar> elf_memory{prog.bin.data(), prog.bin.size()};
+					elf_memory.remove_prefix(end - prx->segs[0].addr);
+
+					if (elf_memory != std::basic_string_view<uchar>{&prx->get_ref<uchar>(end), elf_memory.size()})
+					{
+						// There are changes, disable analysis optimization
+						ppu_loader.notice("Disabling analysis optimization due to memory changes from original file");
+
+						end = 0;
+					}
+
+					break;
+				}
+			}
+		}
+	}
+
 	// Embedded SPU elf patching
 	for (const auto& seg : prx->segs)
 	{
@ -1658,6 +1882,11 @@ std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_lo

 void ppu_unload_prx(const lv2_prx& prx)
 {
+	if (prx.segs.empty() || prx.segs[0].ptr != vm::base(prx.segs[0].addr))
+	{
+		return;
+	}
+
 	std::unique_lock lock(g_fxo->get<ppu_linkage_info>().mutex, std::defer_lock);

 	// Clean linkage info
@ -1708,10 +1937,7 @@ void ppu_unload_prx(const lv2_prx& prx)
 	{
 		if (!seg.size) continue;

-		if (seg.ptr == vm::base(seg.addr))
-		{
-			vm::dealloc(seg.addr, vm::main);
-		}
+		vm::dealloc(seg.addr, vm::main);

 		const std::string hash_seg = fmt::format("%s-%u", hash, &seg - prx.segs.data());

@ -1847,15 +2073,30 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
 			else if (already_loaded)
 			{
 			}
-			else if (!vm::falloc(addr, size, vm::main))
+			else if (![&]() -> bool
 			{
-				ppu_loader.error("vm::falloc(vm::main) failed (addr=0x%x, memsz=0x%x)", addr, size); // TODO
+				// 1M pages if it is RSX shared
+				const u32 area_flags = (_seg.flags >> 28) ? vm::page_size_1m : vm::page_size_64k;
+				const u32 alloc_at = std::max<u32>(addr & -0x10000000, 0x10000);

-				if (!vm::falloc(addr, size))
+				const auto area = vm::reserve_map(vm::any, std::max<u32>(addr & -0x10000000, 0x10000), 0x10000000, area_flags);
+
+				if (!area)
 				{
-					ppu_loader.error("ppu_load_exec(): vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size);
 					return false;
 				}
+
+				if (area->addr != alloc_at || (area->flags & 0xf00) != area_flags)
+				{
+					ppu_loader.error("Failed to allocate memory at 0x%x - conflicting memory area exists: area->addr=0x%x, area->flags=0x%x", addr, area->addr, area->flags);
+					return false;
+				}
+
+				return area->falloc(addr, size);
+			}())
+			{
+				ppu_loader.error("ppu_load_exec(): vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size);
+				return false;
 			}

 			// Store only LOAD segments (TODO)
@ -1882,12 +2123,6 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
 			// Initialize executable code if necessary
 			if (prog.p_flags & 0x1 && !virtual_load)
 			{
-				if (already_loaded && ar)
-				{
-					// Disable analysis optimization for savestates (it's not compatible with savestate with patches applied)
-					end = std::max(end, utils::align<u32>(addr + size, 0x10000));
-				}
-
 				ppu_register_range(addr, size);
 			}
 		}
@ -1941,6 +2176,33 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
 		applied += g_fxo->get<patch_engine>().apply(Emu.GetTitleID() + '-' + hash, [&](u32 addr, u32 size) { return _main.get_ptr<u8>(addr, size); });
 	}

+	if (!applied.empty() || ar)
+	{
+		// Compare memory changes in memory after executable code sections end
+		if (end >= _main.segs[0].addr && end < _main.segs[0].addr + _main.segs[0].size)
+		{
+			for (const auto& prog : elf.progs)
+			{
+				// Find the first segment
+				if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz)
+				{
+					std::basic_string_view<uchar> elf_memory{prog.bin.data(), prog.bin.size()};
+					elf_memory.remove_prefix(end - _main.segs[0].addr);
+
+					if (elf_memory != std::basic_string_view<uchar>{&_main.get_ref<u8>(end), elf_memory.size()})
+					{
+						// There are changes, disable analysis optimization
+						ppu_loader.notice("Disabling analysis optimization due to memory changes from original file");
+
+						end = 0;
+					}
+
+					break;
+				}
+			}
+		}
+	}
+
 	if (applied.empty())
 	{
 		ppu_loader.warning("PPU executable hash: %s", hash);
@ -2103,11 +2365,10 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
 					return false;
 				}

-				if (!virtual_load)
-				{
-					ppu_load_exports(&link, proc_prx_param.libent_start, proc_prx_param.libent_end);
-					ppu_load_imports(_main.relocs, &link, proc_prx_param.libstub_start, proc_prx_param.libstub_end);
-				}
+				ppu_linkage_info dummy{};
+
+				ppu_load_exports(_main, virtual_load ? &dummy : &link, proc_prx_param.libent_start, proc_prx_param.libent_end);
+				ppu_load_imports(_main, _main.relocs, virtual_load ? &dummy : &link, proc_prx_param.libstub_start, proc_prx_param.libstub_end);

 				std::stable_sort(_main.relocs.begin(), _main.relocs.end());
 			}
@ -2224,16 +2485,15 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str

 		void init_fxo_for_exec(utils::serial* ar, bool full);
 		init_fxo_for_exec(ar, false);
+
+		liblv2_begin = 0;
+		liblv2_end = 0;
 	}
 	else
 	{
 		g_ps3_process_info = old_process_info;
-		Emu.ConfigurePPUCache();
 	}

-	liblv2_begin = 0;
-	liblv2_end = 0;
-
 	if (!load_libs.empty())
 	{
 		for (const auto& name : load_libs)
@ -2295,33 +2555,6 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
 		}
 	}

-	// Initialize process arguments
-	auto args = vm::ptr<u64>::make(vm::alloc(u32{sizeof(u64)} * (::size32(Emu.argv) + ::size32(Emu.envp) + 2), vm::main));
-	auto argv = args;
-
-	for (const auto& arg : Emu.argv)
-	{
-		const u32 arg_size = ::size32(arg) + 1;
-		const u32 arg_addr = vm::alloc(arg_size, vm::main);
-
-		std::memcpy(vm::base(arg_addr), arg.data(), arg_size);
-
-		*args++ = arg_addr;
-	}
-
-	*args++ = 0;
-	auto envp = args;
-
-	for (const auto& arg : Emu.envp)
-	{
-		const u32 arg_size = ::size32(arg) + 1;
-		const u32 arg_addr = vm::alloc(arg_size, vm::main);
-
-		std::memcpy(vm::base(arg_addr), arg.data(), arg_size);
-
-		*args++ = arg_addr;
-	}
-
 	// Fix primary stack size
 	switch (u32 sz = primary_stacksize)
 	{
@ -2352,9 +2585,62 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
 	if (!Emu.data.empty())
 	{
 		std::memcpy(vm::base(ppu->stack_addr + ppu->stack_size - ::size32(Emu.data)), Emu.data.data(), Emu.data.size());
-		ppu->gpr[1] -= Emu.data.size();
+		ppu->gpr[1] -= utils::align<u32>(::size32(Emu.data), 0x10);
 	}

+	// Initialize process arguments
+
+	// Calculate storage requirements on the stack
+	const u32 pointers_storage_size = u32{sizeof(u64)} * utils::align<u32>(::size32(Emu.envp) + ::size32(Emu.argv) + 2, 2);
+
+	u32 stack_alloc_size = pointers_storage_size;
+
+	for (const auto& arg : Emu.argv)
+	{
+		stack_alloc_size += utils::align<u32>(::size32(arg) + 1, 0x10);
+	}
+
+	for (const auto& arg : Emu.envp)
+	{
+		stack_alloc_size += utils::align<u32>(::size32(arg) + 1, 0x10);
+	}
+
+	ensure(ppu->stack_size > stack_alloc_size);
+
+	vm::ptr<u64> args = vm::cast(static_cast<u32>(ppu->stack_addr + ppu->stack_size - stack_alloc_size - utils::align<u32>(Emu.data.size(), 0x10)));
+	vm::ptr<u8> args_data = vm::cast(args.addr() + pointers_storage_size);
+
+	const vm::ptr<u64> argv = args;
+
+	for (const auto& arg : Emu.argv)
+	{
+		const u32 arg_size = ::size32(arg) + 1;
+
+		std::memcpy(args_data.get_ptr(), arg.data(), arg_size);
+
+		*args++ = args_data.addr();
+		args_data = vm::cast(args_data.addr() + utils::align<u32>(arg_size, 0x10));
+	}
+
+	*args++ = 0;
+
+	const vm::ptr<u64> envp = args;
+	args = envp;
+
+	for (const auto& arg : Emu.envp)
+	{
+		const u32 arg_size = ::size32(arg) + 1;
+
+		std::memcpy(args_data.get_ptr(), arg.data(), arg_size);
+
+		*args++ = args_data.addr();
+		args_data = vm::cast(args_data.addr() + utils::align<u32>(arg_size, 0x10));
+	}
+
+	*args++ = 0;
+
+	ppu->gpr[1] -= stack_alloc_size;
+
 	ensure(g_fxo->get<lv2_memory_container>().take(primary_stacksize));

 	ppu->cmd_push({ppu_cmd::initialize, 0});
@ -2388,7 +2674,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
 	// Set command line arguments, run entry function
 	ppu->cmd_list
 	({
-		{ ppu_cmd::set_args, 8 }, u64{Emu.argv.size()}, u64{argv.addr()}, u64{envp.addr()}, u64{0}, u64{ppu->id}, u64{tls_vaddr}, u64{tls_fsize}, u64{tls_vsize},
+		{ ppu_cmd::set_args, 8 }, u64{Emu.argv.size()}, u64{argv.addr()}, u64{envp.addr()}, u64{Emu.envp.size()}, u64{ppu->id}, u64{tls_vaddr}, u64{tls_fsize}, u64{tls_vsize},
 		{ ppu_cmd::set_gpr, 11 }, u64{elf.header.e_entry},
 		{ ppu_cmd::set_gpr, 12 }, u64{malloc_pagesize},
 		{ ppu_cmd::entry_call, 0 },
@ -2442,7 +2728,7 @@ std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_ex
 		}
 	}

-	const auto ovlm = std::make_shared<lv2_overlay>();
+	std::shared_ptr<lv2_overlay> ovlm = std::make_shared<lv2_overlay>();

 	// Set path (TODO)
 	ovlm->name = path.substr(path.find_last_of('/') + 1);
@ -2522,12 +2808,6 @@ std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_ex
 			// Initialize executable code if necessary
 			if (prog.p_flags & 0x1 && !virtual_load)
 			{
-				if (ar)
-				{
-					// Disable analysis optimization for savestates (it's not compatible with savestate with patches applied)
-					end = std::max(end, utils::align<u32>(addr + size, 0x10000));
-				}
-
 				ppu_register_range(addr, size);
 			}
 		}
@ -2579,6 +2859,33 @@ std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_ex
 		applied += g_fxo->get<patch_engine>().apply(Emu.GetTitleID() + '-' + hash, [ovlm](u32 addr, u32 size) { return ovlm->get_ptr<u8>(addr, size); });
 	}

+	if (!applied.empty() || ar)
+	{
+		// Compare memory changes in memory after executable code sections end
+		if (end >= ovlm->segs[0].addr && end < ovlm->segs[0].addr + ovlm->segs[0].size)
+		{
+			for (const auto& prog : elf.progs)
+			{
+				// Find the first segment
+				if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz)
+				{
+					std::basic_string_view<uchar> elf_memory{prog.bin.data(), prog.bin.size()};
+					elf_memory.remove_prefix(end - ovlm->segs[0].addr);
+
+					if (elf_memory != std::basic_string_view<uchar>{&ovlm->get_ref<u8>(end), elf_memory.size()})
+					{
+						// There are changes, disable analysis optimization
+						ppu_loader.notice("Disabling analysis optimization due to memory changes from original file");
+
+						end = 0;
+					}
+
+					break;
+				}
+			}
+		}
+	}
+
 	// Embedded SPU elf patching
 	for (const auto& seg : ovlm->segs)
 	{
@ -2665,11 +2972,10 @@ std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_ex
 					fmt::throw_exception("Bad magic! (0x%x)", proc_prx_param.magic);
 				}

-				if (!virtual_load)
-				{
-					ppu_load_exports(&link, proc_prx_param.libent_start, proc_prx_param.libent_end);
-					ppu_load_imports(ovlm->relocs, &link, proc_prx_param.libstub_start, proc_prx_param.libstub_end);
-				}
+				ppu_linkage_info dummy{};
+
+				ppu_load_exports(*ovlm, virtual_load ? &dummy : &link, proc_prx_param.libent_start, proc_prx_param.libent_end);
+				ppu_load_imports(*ovlm, ovlm->relocs, virtual_load ? &dummy : &link, proc_prx_param.libstub_start, proc_prx_param.libstub_end);
 			}
 			break;
 		}
@ -2783,7 +3089,7 @@ bool ppu_load_rel_exec(const ppu_rel_object& elf)
 			_sec.addr = addr;
 			relm.secs.emplace_back(_sec);

-			std::memcpy(vm::base(addr), s.bin.data(), size);
+			std::memcpy(vm::base(addr), s.get_bin().data(), size);
 			addr = utils::align<u32>(addr + size, 128);
 		}
 	}
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@ -11,6 +11,7 @@
 #include "Emu/Memory/vm_locking.h"
 #include "Emu/RSX/Core/RSXReservationLock.hpp"
 #include "Emu/VFS.h"
+#include "Emu/vfs_config.h"
 #include "Emu/system_progress.hpp"
 #include "Emu/system_utils.hpp"
 #include "PPUThread.h"
@ -1206,9 +1207,7 @@ std::array<u32, 2> op_branch_targets(u32 pc, ppu_opcode_t op)
 {
 	std::array<u32, 2> res{pc + 4, umax};

-	g_fxo->need<ppu_far_jumps_t>();
-
-	if (u32 target = g_fxo->get<ppu_far_jumps_t>().get_target(pc))
+	if (u32 target = g_fxo->is_init<ppu_far_jumps_t>() ? g_fxo->get<ppu_far_jumps_t>().get_target(pc) : 0)
 	{
 		res[0] = target;
 		return res;
@ -1564,16 +1563,19 @@ std::vector<std::pair<u32, u32>> ppu_thread::dump_callstack_list() const

 	std::vector<std::pair<u32, u32>> call_stack_list;

-	bool first = true;
+	bool is_first = true;
+	bool skip_single_frame = false;
+
+	const u64 _lr = this->lr;
+	const u32 _cia = this->cia;
+	const u64 gpr0 = this->gpr[0];

 	for (
 		u64 sp = r1;
 		sp % 0x10 == 0u && sp >= stack_min && sp <= stack_max - ppu_stack_start_offset;
-		first = false
+		is_first = false
 		)
 	{
-		u64 addr = *vm::get_super_ptr<u64>(static_cast<u32>(sp + 16));
-
 		auto is_invalid = [](u64 addr)
 		{
 			if (addr > u32{umax} || addr % 4 || !vm::check_addr(static_cast<u32>(addr), vm::page_executable))
@ -1585,28 +1587,328 @@ std::vector<std::pair<u32, u32>> ppu_thread::dump_callstack_list() const
 			return addr == g_fxo->get<ppu_function_manager>().func_addr(1, true);
 		};

-		if (is_invalid(addr))
+		if (is_first && !is_invalid(_lr))
 		{
-			if (first)
-			{
-				// Function hasn't saved LR, could be because it's a leaf function
-				// Use LR directly instead
-				addr = lr;
+			// Detect functions with no stack or before LR has been stored

-				if (is_invalid(addr))
+			// Tracking if instruction has already been passed through
+			// Instead of using map or set, use two vectors relative to CIA and resize as needed
+			std::vector<be_t<u32>> inst_neg;
+			std::vector<be_t<u32>> inst_pos;
+
+			auto get_inst = [&](u32 pos) -> be_t<u32>&
+			{
+				static be_t<u32> s_inst_empty{};
+
+				if (pos < _cia)
 				{
-					// Skip it, workaround
-					continue;
+					const u32 neg_dist = (_cia - pos - 4) / 4;
+
+					if (neg_dist >= inst_neg.size())
+					{
+						const u32 inst_bound = pos & -256;
+
+						const usz old_size = inst_neg.size();
+						const usz new_size = neg_dist + (pos - inst_bound) / 4 + 1;
+
+						if (new_size >= 0x8000)
+						{
+							// Gross lower limit for the function (if it is that size it is unlikely that it is even a leaf function)
+							return s_inst_empty;
+						}
+
+						inst_neg.resize(new_size);
+
+						if (!vm::try_access(inst_bound, &inst_neg[old_size], (new_size - old_size) * sizeof(be_t<u32>), false))
+						{
+							// Failure (this would be detected as failure by zeroes)
+						}
+
+						// Reverse the array (because this buffer directs backwards in address)
+
+						for (usz start = old_size, end = new_size - 1; start < end; start++, end--)
+						{
+							std::swap(inst_neg[start], inst_neg[end]);
+						}
+					}
+
+					return inst_neg[neg_dist];
+				}
+
+				const u32 pos_dist = (pos - _cia) / 4;
+
+				if (pos_dist >= inst_pos.size())
+				{
+					const u32 inst_bound = utils::align<u32>(pos, 256);
+
+					const usz old_size = inst_pos.size();
+					const usz new_size = pos_dist + (inst_bound - pos) / 4 + 1;
+
+					if (new_size >= 0x8000)
+					{
+						// Gross upper limit for the function (if it is that size it is unlikely that it is even a leaf function)
+						return s_inst_empty;
+					}
+
+					inst_pos.resize(new_size);
+
+					if (!vm::try_access(pos, &inst_pos[old_size], (new_size - old_size) * sizeof(be_t<u32>), false))
+					{
+						// Failure (this would be detected as failure by zeroes)
+					}
+				}
+
+				return inst_pos[pos_dist];
+			};
+
+			bool upper_abort = false;
+
+			struct context_t
+			{
+				u32 start_point;
+				bool maybe_leaf = false; // True if the function is leaf or at the very end/start of non-leaf
+				bool non_leaf = false; // Absolutely not a leaf
+				bool about_to_push_frame = false; // STDU incoming
+				bool about_to_store_lr = false; // Link is about to be stored on stack
+				bool about_to_pop_frame = false; // ADDI R1 is about to be issued
+				bool about_to_load_link = false; // MTLR is about to be issued
+				bool maybe_use_reg0_instead_of_lr = false; // Use R0 at the end of a non-leaf function if ADDI has been issued before MTLR
+			};
+
+			// Start with CIA
+			std::deque<context_t> workload{context_t{_cia}};
+
+			usz start = 0;
+
+			for (; start < workload.size(); start++)
+			{
+				for (u32 wa = workload[start].start_point; vm::check_addr(wa, vm::page_executable);)
+				{
+					be_t<u32>& opcode = get_inst(wa);
+
+					auto& [_, maybe_leaf, non_leaf, about_to_push_frame, about_to_store_lr,
+						about_to_pop_frame, about_to_load_link, maybe_use_reg0_instead_of_lr] = workload[start];
+
+					if (!opcode)
+					{
+						// Already passed or failure of reading
+						break;
+					}
+
+					const ppu_opcode_t op{opcode};
+
+					// Mark as passed through
+					opcode = 0;
+
+					const auto type = g_ppu_itype.decode(op.opcode);
+
+					if (workload.size() == 1 && type == ppu_itype::STDU && op.rs == 1u && op.ra == 1u)
+					{
+						if (op.simm16 >= 0)
+						{
+							// Against ABI
+							non_leaf = true;
+							upper_abort = true;
+							break;
+						}
+
+						// Saving LR to register: this is indeed a new function (ok because LR has not been saved yet)
+						maybe_leaf = true;
+						about_to_push_frame = true;
+						about_to_pop_frame = false;
+						upper_abort = true;
+						break;
+					}
+
+					if (workload.size() == 1 && type == ppu_itype::STD && op.ra == 1u && op.rs == 0u)
+					{
+						bool found_matching_stdu = false;
+
+						for (u32 back = 1; back < 20; back++)
+						{
+							be_t<u32>& opcode = get_inst(utils::sub_saturate<u32>(_cia, back * 4));
+
+							if (!opcode)
+							{
+								// Already passed or failure of reading
+								break;
+							}
+
+							const ppu_opcode_t test_op{opcode};
+
+							const auto type = g_ppu_itype.decode(test_op.opcode);
+
+							if (type == ppu_itype::BCLR)
+							{
+								break;
+							}
+
+							if (type == ppu_itype::STDU && test_op.rs == 1u && test_op.ra == 1u)
+							{
+								if (0 - (test_op.ds << 2) == (op.ds << 2) - 0x10)
+								{
+									found_matching_stdu = true;
+								}
+
+								break;
+							}
+						}
+
+						if (found_matching_stdu)
+						{
+							// Saving LR to stack: this is indeed a new function (ok because LR has not been saved yet)
+							maybe_leaf = true;
+							about_to_store_lr = true;
+							about_to_pop_frame = true;
+							upper_abort = true;
+							break;
+						}
+					}
+
+					const u32 spr = ((op.spr >> 5) | ((op.spr & 0x1f) << 5));
+
+					// It can be placed before or after STDU, ignore for now
+					// if (workload.size() == 1 && type == ppu_itype::MFSPR && op.rs == 0u && spr == 0x8)
+					// {
+					// 	// Saving LR to register: this is indeed a new function (ok because LR has not been saved yet)
+					// 	maybe_leaf = true;
+					// 	about_to_store_lr = true;
+					// 	about_to_pop_frame = true;
+					// }
+
+					if (type == ppu_itype::MTSPR && spr == 0x8 && op.rs == 0u)
+					{
+						// Test for special case: if ADDI R1 is not found later in code, it means that LR is not restored and R0 should be used instead
+						// Can also search for ADDI R1 backwards and pull the value from stack (needs more research if it is more reliable)
+						maybe_use_reg0_instead_of_lr = true;
+					}
+
+					if (type == ppu_itype::UNK)
+					{
+						// Ignore for now
+						break;
+					}
+
+					if ((type & ppu_itype::branch) && op.lk)
+					{
+						// Gave up on LR before saving
+						non_leaf = true;
+						about_to_pop_frame = true;
+						upper_abort = true;
+						break;
+					}
+
+					// Even if BCLR is conditional, it still counts because LR value is ready for return
+					if (type == ppu_itype::BCLR)
+					{
+						// Returned
+						maybe_leaf = true;
+						upper_abort = true;
+						break;
+					}
+
+					if (type == ppu_itype::ADDI && op.ra == 1u && op.rd == 1u)
+					{
+						if (op.simm16 < 0)
+						{
+							// Against ABI
+							non_leaf = true;
+							upper_abort = true;
+							break;
+						}
+						else if (op.simm16 > 0)
+						{
+							// Remember that SP is about to be restored
+							about_to_pop_frame = true;
+							non_leaf = true;
+							upper_abort = true;
+							break;
+						}
+					}
+
+					const auto results = op_branch_targets(wa, op);
+
+					bool proceeded = false;
+
+					for (usz res_i = 0; res_i < results.size(); res_i++)
+					{
+						const u32 route_pc = results[res_i];
+
+						if (route_pc == umax)
+						{
+							continue;
+						}
+
+						if (vm::check_addr(route_pc, vm::page_executable) && get_inst(route_pc))
+						{
+							if (proceeded)
+							{
+								// Remember next route start point
+								workload.push_back(context_t{route_pc});
+							}
+							else
+							{
+								// Next PC
+								wa = route_pc;
+								proceeded = true;
+							}
+						}
+					}
+				}
+
+				if (upper_abort)
+				{
+					break;
 				}
 			}
-			else
+
+			const context_t& res = workload[std::min<usz>(start, workload.size() - 1)];
+
+			if (res.maybe_leaf && !res.non_leaf)
 			{
-				break;
+				const u32 result = res.maybe_use_reg0_instead_of_lr ? static_cast<u32>(gpr0) : static_cast<u32>(_lr);
+
+				// Same stack as far as we know
+				call_stack_list.emplace_back(result, static_cast<u32>(sp));
+
+				if (res.about_to_store_lr)
+				{
+					// LR has yet to be stored on stack, ignore the stack value
+					skip_single_frame = true;
+				}
+			}
+
+			if (res.about_to_pop_frame || (res.maybe_leaf && !res.non_leaf))
+			{
+				const u64 temp_sp = *vm::get_super_ptr<u64>(static_cast<u32>(sp));
+
+				if (temp_sp <= sp)
+				{
+					// Ensure inequality and that the old stack pointer is higher than current
+					break;
+				}
+
+				// Read the first stack frame so caller addresses can be obtained
+				sp = temp_sp;
+				continue;
 			}
 		}

-		// TODO: function addresses too
-		call_stack_list.emplace_back(static_cast<u32>(addr), static_cast<u32>(sp));
+		u64 addr = *vm::get_super_ptr<u64>(static_cast<u32>(sp + 16));
+
+		if (skip_single_frame)
+		{
+			skip_single_frame = false;
+		}
+		else if (!is_invalid(addr))
+		{
+			// TODO: function addresses too
+			call_stack_list.emplace_back(static_cast<u32>(addr), static_cast<u32>(sp));
+		}
+		else if (!is_first)
+		{
+			break;
+		}

 		const u64 temp_sp = *vm::get_super_ptr<u64>(static_cast<u32>(sp));

@ -1617,6 +1919,8 @@ std::vector<std::pair<u32, u32>> ppu_thread::dump_callstack_list() const
 		}

 		sp = temp_sp;
+
+		is_first = false;
 	}

 	return call_stack_list;
@ -1808,7 +2112,14 @@ void ppu_thread::cpu_task()
 #endif
 			cmd_pop();

-			ppu_initialize(), spu_cache::initialize();
+			ppu_initialize();
+
+			if (Emu.IsStopped())
+			{
+				return;
+			}
+
+			spu_cache::initialize();

 #ifdef __APPLE__
 			pthread_jit_write_protect_np(true);
@ -1821,7 +2132,16 @@ void ppu_thread::cpu_task()

 			// Wait until the progress dialog is closed.
 			// We don't want to open a cell dialog while a native progress dialog is still open.
-			thread_ctrl::wait_on<atomic_wait::op_ne>(g_progr_ptotal, 0);
+			while (u32 v = g_progr_ptotal)
+			{
+				if (Emu.IsStopped())
+				{
+					return;
+				}
+
+				g_progr_ptotal.wait(v);
+			}
+
 			g_fxo->get<progress_dialog_workaround>().show_overlay_message_only = true;

 			// Sadly we can't postpone initializing guest time because we need to run PPU threads
@ -1839,7 +2159,7 @@ void ppu_thread::cpu_task()
 					}

 					ensure(spu.state.test_and_reset(cpu_flag::stop));
-					spu.state.notify_one(cpu_flag::stop);
+					spu.state.notify_one();
 				}
 			});

@ -1952,7 +2272,7 @@ void ppu_thread::exec_task()

 ppu_thread::~ppu_thread()
 {
-	perf_log.notice("Perf stats for STCX reload: successs %u, failure %u", last_succ, last_fail);
+	perf_log.notice("Perf stats for STCX reload: success %u, failure %u", last_succ, last_fail);
 	perf_log.notice("Perf stats for instructions: total %u", exec_bytes / 4);
 }

@ -2051,7 +2371,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
 	struct init_pushed
 	{
 		bool pushed = false;
-		atomic_t<bool> inited = false;
+		atomic_t<u32> inited = false;
 	};

 	call_history.data.resize(g_cfg.core.ppu_call_history ? call_history_max_size : 1);
@ -2100,7 +2420,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
 				{
 					while (!Emu.IsStopped() && !g_fxo->get<init_pushed>().inited)
 					{
-						thread_ctrl::wait_on(g_fxo->get<init_pushed>().inited, false);
+						thread_ctrl::wait_on(g_fxo->get<init_pushed>().inited, 0);
 					}
 					return false;
 				}
@ -2117,7 +2437,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
 				{ppu_cmd::ptr_call, 0}, +[](ppu_thread&) -> bool
 				{
 					auto& inited = g_fxo->get<init_pushed>().inited;
-					inited = true;
+					inited = 1;
 					inited.notify_all();
 					return true;
 				}
@ -2453,10 +2773,10 @@ static void ppu_check(ppu_thread& ppu, u64 addr)
 {
 	ppu.cia = ::narrow<u32>(addr);

-	// ppu_check() shall not return directly
 	if (ppu.test_stopped())
-		{}
-	ppu_escape(&ppu);
+	{
+		return;
+	}
 }

 static void ppu_trace(u64 addr)
@ -3041,12 +3361,29 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
 		return false;
 	}())
 	{
-		// Test a common pattern in lwmutex
 		extern atomic_t<u32> liblv2_begin, liblv2_end;

+		const u32 notify = ppu.res_notify;
+
+		if (notify)
+		{
+			vm::reservation_notifier(notify).notify_all();
+			ppu.res_notify = 0;
+		}
+
+		// Avoid notifications from lwmutex or sys_spinlock
 		if (ppu.cia < liblv2_begin || ppu.cia >= liblv2_end)
 		{
-			res.notify_all(-128);
+			if (!notify)
+			{
+				// Try to postpone notification to when PPU is asleep or join notifications on the same address
+				// This also optimizes a mutex - won't notify after lock is aqcuired (prolonging the critical section duration), only notifies on unlock
+				ppu.res_notify = addr;
+			}
+			else if ((addr ^ notify) & -128)
+			{
+				res.notify_all();
+			}
 		}

 		if (addr == ppu.last_faddr)
@ -3058,6 +3395,16 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
 		return true;
 	}

+	const u32 notify = ppu.res_notify;
+
+	// Do not risk postponing too much (because this is probably an indefinite loop)
+	// And on failure it has some time to do something else
+	if (notify && ((addr ^ notify) & -128))
+	{
+		vm::reservation_notifier(notify).notify_all();
+		ppu.res_notify = 0;
+	}
+
 	return false;
 }

@ -3222,7 +3569,7 @@ extern void ppu_finalize(const ppu_module& info)
 	fmt::append(cache_path, "ppu-%s-%s/", fmt::base57(info.sha1), info.path.substr(info.path.find_last_of('/') + 1));

 #ifdef LLVM_AVAILABLE
-	g_fxo->get<jit_module_manager>().remove(cache_path + info.name + "_" + std::to_string(info.segs[0].addr));
+	g_fxo->get<jit_module_manager>().remove(cache_path + "_" + std::to_string(std::bit_cast<usz>(info.segs[0].ptr)));
 #endif
 }

@ -3320,8 +3667,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
 				return g_prx_list.count(entry.name) && ::at32(g_prx_list, entry.name) != 0;
 			};

-			// Check .sprx filename
-			if (upper.ends_with(".SPRX") && entry.name != "libfs_utility_init.sprx"sv)
+			// Check PRX filename
+			if (upper.ends_with(".PRX") || (upper.ends_with(".SPRX") && entry.name != "libfs_utility_init.sprx"sv))
 			{
 				if (is_ignored(0))
 				{
@ -3333,8 +3680,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
 				continue;
 			}

-			// Check .self filename
-			if (upper.ends_with(".SELF") && Emu.GetBoot() != dir_queue[i] + entry.name)
+			// Check ELF filename
+			if ((upper.ends_with(".ELF") || upper.ends_with(".SELF")) && Emu.GetBoot() != dir_queue[i] + entry.name)
 			{
 				// Get full path
 				file_queue.emplace_back(dir_queue[i] + entry.name,  0);
@ -3402,14 +3749,14 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
 		// Set low priority
 		thread_ctrl::scoped_priority low_prio(-1);

-		for (usz func_i = fnext++; func_i < file_queue.size(); func_i = fnext++, g_progr_fdone++)
+		for (usz func_i = fnext++, inc_fdone = 1; func_i < file_queue.size(); func_i = fnext++, g_progr_fdone += std::exchange(inc_fdone, 1))
 		{
 			if (Emu.IsStopped())
 			{
 				continue;
 			}

-			auto [path, offset] = std::as_const(file_queue)[func_i];
+			auto& [path, offset] = file_queue[func_i];

 			ppu_log.notice("Trying to load: %s", path);

@ -3448,7 +3795,6 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
 				{
 					obj.clear(), src.close(); // Clear decrypted file and elf object memory
 					ppu_initialize(*prx);
-					ppu_unload_prx(*prx);
 					ppu_finalize(*prx);
 					continue;
 				}
@ -3461,7 +3807,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
 			{
 				while (ovl_err == elf_error::ok)
 				{
-					auto [ovlm, error] = ppu_load_overlay(obj, true, path, offset);
+					const auto [ovlm, error] = ppu_load_overlay(obj, true, path, offset);

 					if (error)
 					{
@ -3497,7 +3843,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_

 			ppu_log.notice("Failed to precompile '%s' (prx: %s, ovl: %s): Attempting tratment as executable file", path, prx_err, ovl_err);
 			possible_exec_file_paths.push(path);
-			continue;
+			inc_fdone = 0;
 		}
 	});

@ -3523,8 +3869,6 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_

 		for (; slice; slice.pop_front(), g_progr_fdone++)
 		{
-			g_progr_ftotal++;
-
 			if (Emu.IsStopped())
 			{
 				continue;
@ -3544,7 +3888,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
 			}

 			// Some files may fail to decrypt due to the lack of klic
-			src = decrypt_self(std::move(src));
+			src = decrypt_self(std::move(src), nullptr, nullptr, true);

 			if (!src)
 			{
@ -3561,6 +3905,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
 					main_ppu_module& _main = g_fxo->get<main_ppu_module>();
 					_main = {};

+					auto current_cache = std::move(g_fxo->get<spu_cache>());
+
 					if (!ppu_load_exec(obj, true, path))
 					{
 						// Abort
@ -3568,16 +3914,27 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
 						break;
 					}

+					if (std::memcmp(main_module.sha1, _main.sha1, sizeof(_main.sha1)) == 0)
+					{
+						g_fxo->get<spu_cache>() = std::move(current_cache);
+						break;
+					}
+
 					if (!_main.analyse(0, _main.elf_entry, _main.seg0_code_end, _main.applied_pathes, [](){ return Emu.IsStopped(); }))
 					{
+						g_fxo->get<spu_cache>() = std::move(current_cache);
 						break;
 					}

 					obj.clear(), src.close(); // Clear decrypted file and elf object memory

+					_main.name = ' '; // Make ppu_finalize work
+					Emu.ConfigurePPUCache(!Emu.IsPathInsideDir(_main.path, g_cfg_vfs.get_dev_flash()));
 					ppu_initialize(_main);
+					spu_cache::initialize(false);
 					ppu_finalize(_main);
 					_main = {};
+					g_fxo->get<spu_cache>() = std::move(current_cache);
 					break;
 				}

@ -3588,10 +3945,11 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
 			}

 			ppu_log.notice("Failed to precompile '%s' as executable (%s)", path, exec_err);
-			continue;
 		}

 		g_fxo->get<main_ppu_module>() = std::move(main_module);
+		g_fxo->get<spu_cache>().collect_funcs_to_precompile = true;
+		Emu.ConfigurePPUCache();
 	});

 	exec_worker();
@ -3622,7 +3980,7 @@ extern void ppu_initialize()
 	// Validate analyser results (not required)
 	_main.validate(0);

-	g_progr = "Scanning PPU Modules...";
+	progr = "Scanning PPU Modules...";

 	bool compile_main = false;

@ -3637,7 +3995,7 @@ extern void ppu_initialize()
 	const std::string firmware_sprx_path = vfs::get("/dev_flash/sys/external/");

 	// If empty we have no indication for firmware cache state, check everything
-	bool compile_fw = true;
+	bool compile_fw = !Emu.IsVsh();

 	idm::select<lv2_obj, lv2_prx>([&](u32, lv2_prx& _module)
 	{
@ -3683,7 +4041,7 @@ extern void ppu_initialize()

 	const std::string mount_point = vfs::get("/dev_flash/");

-	bool dev_flash_located = !Emu.GetCat().ends_with('P') && Emu.IsPathInsideDir(Emu.GetBoot(), mount_point);
+	bool dev_flash_located = !Emu.GetCat().ends_with('P') && Emu.IsPathInsideDir(Emu.GetBoot(), mount_point) && g_cfg.core.llvm_precompilation;

 	if (compile_fw || dev_flash_located)
 	{
@ -3695,8 +4053,6 @@ extern void ppu_initialize()
 			{
 				// Check if cache exists for this infinitesimally small prx
 				dev_flash_located = ppu_initialize(*prx, true);
-				idm::remove<lv2_obj, lv2_prx>(idm::last_id());
-				ppu_unload_prx(*prx);
 			}
 		}

@ -3705,7 +4061,7 @@ extern void ppu_initialize()
 	}

 	// Avoid compilation if main's cache exists or it is a standalone SELF with no PARAM.SFO
-	if (compile_main && g_cfg.core.ppu_llvm_precompilation && !Emu.GetTitleID().empty() && !Emu.IsChildProcess())
+	if (compile_main && g_cfg.core.llvm_precompilation && !Emu.GetTitleID().empty() && !Emu.IsChildProcess())
 	{
 		// Try to add all related directories
 		const std::set<std::string> dirs = Emu.GetGameDirs();
@ -3756,10 +4112,16 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
 		{
 			for (auto& block : func.blocks)
 			{
+				if (g_fxo->is_init<ppu_far_jumps_t>() && !g_fxo->get<ppu_far_jumps_t>().get_targets(block.first, block.second).empty())
+				{
+					// Replace the block with ppu_far_jump
+					continue;
+				}
+
 				ppu_register_function_at(block.first, block.second);
 			}

-			if (g_cfg.core.ppu_debug && func.size && func.toc != umax)
+			if (g_cfg.core.ppu_debug && func.size && func.toc != umax && !ppu_get_far_jump(func.addr))
 			{
 				ppu_toc[func.addr] = func.toc;
 				ppu_ref(func.addr) = &ppu_check_toc;
@ -3808,7 +4170,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
 	// Get cache path for this executable
 	std::string cache_path;

-	if (info.name.empty())
+	if (!info.cache.empty())
 	{
 		cache_path = info.cache;
 	}
@ -3858,7 +4220,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
 	};

 	// Permanently loaded compiled PPU modules (name -> data)
-	jit_module& jit_mod = g_fxo->get<jit_module_manager>().get(cache_path + info.name + "_" + std::to_string(info.segs[0].addr));
+	jit_module& jit_mod = g_fxo->get<jit_module_manager>().get(cache_path + "_" + std::to_string(std::bit_cast<usz>(info.segs[0].ptr)));

 	// Compiler instance (deferred initialization)
 	std::shared_ptr<jit_compiler>& jit = jit_mod.pjit;
@ -4203,7 +4565,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)

 	if (!workload.empty())
 	{
-		g_progr = "Compiling PPU modules...";
+		*progr = "Compiling PPU modules...";
 	}

 	// Create worker threads for compilation (TODO: how many threads)
@ -4271,10 +4633,10 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
 		if (workload.size() < link_workload.size())
 		{
 			// Only show this message if this task is relevant
-			g_progr = "Linking PPU modules...";
+			*progr = "Linking PPU modules...";
 		}

-		for (auto [obj_name, is_compiled] : link_workload)
+		for (const auto& [obj_name, is_compiled] : link_workload)
 		{
 			if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
 			{
--- a/rpcs3/Emu/Cell/PPUThread.h
+++ b/rpcs3/Emu/Cell/PPUThread.h
@ -262,6 +262,7 @@ public:
 	u64 rtime{0};
 	alignas(64) std::byte rdata[128]{}; // Reservation data
 	bool use_full_rdata{};
+	u32 res_notify{};

 	union ppu_prio_t
 	{
--- a/rpcs3/Emu/Cell/PPUTranslator.cpp
+++ b/rpcs3/Emu/Cell/PPUTranslator.cpp
@ -201,8 +201,12 @@ Function* PPUTranslator::Translate(const ppu_function& info)
 		const auto vcheck = BasicBlock::Create(m_context, "__test", m_function);
 		m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), body, vcheck, m_md_likely);

-		// Create tail call to the check function
 		m_ir->SetInsertPoint(vcheck);
+
+		// Raise wait flag as soon as possible
+		m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, ptr, m_ir->getInt32((+cpu_flag::wait).operator u32()), llvm::MaybeAlign{4}, llvm::AtomicOrdering::AcquireRelease);
+
+		// Create tail call to the check function
 		Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
 		m_ir->CreateRetVoid();
 	}
@ -3306,8 +3310,11 @@ void PPUTranslator::SRD(ppu_opcode_t op)
 void PPUTranslator::LVRX(ppu_opcode_t op)
 {
 	const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
-	const auto data = ReadMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>(), m_is_be, 16);
-	set_vr(op.vd, pshufb(value<u8[16]>(data), build<u8[16]>(255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240) + vsplat<u8[16]>(trunc<u8>(value<u64>(addr) & 0xf))));
+	const auto offset = eval(trunc<u8>(value<u64>(addr) & 0xf));
+
+	// Read from instruction address if offset is 0, this prevents accessing potentially bad memory from addr (because no actual memory is dereferenced)
+	const auto data = ReadMemory(m_ir->CreateAnd(m_ir->CreateSelect(m_ir->CreateIsNull(offset.value), m_reloc ? m_seg0 : GetAddr(0), addr), ~0xfull), GetType<u8[16]>(), m_is_be, 16);
+	set_vr(op.vd, pshufb(value<u8[16]>(data), build<u8[16]>(255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240) + vsplat<u8[16]>(offset)));
 }

 void PPUTranslator::LSWI(ppu_opcode_t op)
@ -3613,11 +3620,11 @@ void PPUTranslator::LWZ(ppu_opcode_t op)
 	m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
 	m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset

-	if (m_may_be_mmio && !op.simm16)
+	if (m_may_be_mmio)
 	{
 		struct instructions_data
 		{
-			be_t<u32> insts[2];
+			be_t<u32> insts[3];
 		};

 		// Quick invalidation: expect exact MMIO address, so if the register is being reused with different offset than it's likely not MMIO
@ -3633,6 +3640,12 @@ void PPUTranslator::LWZ(ppu_opcode_t op)
 					continue;
 				}

+				if (op.simm16 && spu_thread::test_is_problem_state_register_offset(test_op.uimm16, true, false))
+				{
+					// Found register reuse with different MMIO offset
+					continue;
+				}
+
 				switch (g_ppu_itype.decode(inst))
 				{
 				case ppu_itype::LWZ:
@ -3710,7 +3723,7 @@ void PPUTranslator::STW(ppu_opcode_t op)
 	m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
 	m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset

-	if (m_may_be_mmio && !op.simm16)
+	if (m_may_be_mmio)
 	{
 		struct instructions_data
 		{
@ -3730,6 +3743,12 @@ void PPUTranslator::STW(ppu_opcode_t op)
 					continue;
 				}

+				if (op.simm16 && spu_thread::test_is_problem_state_register_offset(test_op.uimm16, false, true))
+				{
+					// Found register reuse with different MMIO offset
+					continue;
+				}
+
 				switch (g_ppu_itype.decode(inst))
 				{
 				case ppu_itype::LWZ:
--- a/rpcs3/Emu/Cell/RawSPUThread.cpp
+++ b/rpcs3/Emu/Cell/RawSPUThread.cpp
@ -21,7 +21,7 @@ inline void try_start(spu_thread& spu)
 	}).second)
 	{
 		spu.state -= cpu_flag::stop;
-		spu.state.notify_one(cpu_flag::stop);
+		spu.state.notify_one();
 	}
 };

@ -273,7 +273,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)

 				for (status_npc_sync_var old; (old = status_npc).status & SPU_STATUS_RUNNING;)
 				{
-					status_npc.wait(old);
+					utils::bless<atomic_t<u32>>(&status_npc)[0].wait(old.status);
 				}
 			}
 		}
@ -382,6 +382,18 @@ void spu_load_exec(const spu_exec_object& elf)

 	spu->status_npc = {SPU_STATUS_RUNNING, elf.header.e_entry};
 	atomic_storage<u32>::release(spu->pc, elf.header.e_entry);
+
+	const auto funcs = spu->discover_functions(0, { spu->ls , SPU_LS_SIZE }, true, umax);
+
+	for (u32 addr : funcs)
+	{
+		spu_log.success("Found SPU function at: 0x%08x", addr);
+	}
+
+	if (!funcs.empty())
+	{
+		spu_log.success("Found %u SPU functions", funcs.size());
+	}
 }

 void spu_load_rel_exec(const spu_rel_object& elf)
@ -410,7 +422,7 @@ void spu_load_rel_exec(const spu_rel_object& elf)
 	{
 		if (shdr.sh_type == sec_type::sht_progbits && shdr.sh_flags().all_of(sh_flag::shf_alloc))
 		{
-			std::memcpy(spu->_ptr<void>(offs), shdr.bin.data(), shdr.sh_size);
+			std::memcpy(spu->_ptr<void>(offs), shdr.get_bin().data(), shdr.sh_size);
 			offs = utils::align<u32>(offs + shdr.sh_size, 4);
 		}
 	}
--- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
@ -2607,6 +2607,12 @@ void spu_recompiler::BI(spu_opcode_t op)
 	{
 		spu_log.error("[0x%x] BI: no targets", m_pos);
 	}
+	else if (op.d && found->second.size() == 1 && found->second[0] == spu_branch_target(m_pos, 1))
+	{
+		// Interrupts-disable pattern
+		c->mov(SPU_OFF_8(interrupts_enabled), 0);
+		return;
+	}

 	c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
 	c->and_(*addr, 0x3fffc);
--- a/rpcs3/Emu/Cell/SPUAnalyser.h
+++ b/rpcs3/Emu/Cell/SPUAnalyser.h
@ -14,12 +14,13 @@ struct spu_itype
 	static constexpr struct floating_tag{} floating{}; // Floating-Point Instructions
 	static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions
 	static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values
+	static constexpr struct zregmod_tag{} zregmod{}; // Instructions not modifying any GPR

 	enum type : unsigned char
 	{
 		UNK = 0,

-		HEQ,
+		HEQ, // zregmod_tag first
 		HEQI,
 		HGT,
 		HGTI,
@ -36,11 +37,21 @@ struct spu_itype
 		NOP,
 		SYNC,
 		DSYNC,
-		MFSPR,
 		MTSPR,
+		WRCH,
+
+		STQD, // memory_tag first
+		STQX,
+		STQA,
+		STQR, // zregmod_tag last
+		LQD,
+		LQX,
+		LQA,
+		LQR, // memory_tag last
+
+		MFSPR,
 		RDCH,
 		RCHCNT,
-		WRCH,

 		BR, // branch_tag first
 		BRA,
@ -59,15 +70,6 @@ struct spu_itype
 		BIHZ,
 		BIHNZ, // branch_tag last

-		LQD, // memory_tag first
-		LQX,
-		LQA,
-		LQR,
-		STQD,
-		STQX,
-		STQA,
-		STQR, // memory_tag last
-
 		ILH, // constant_tag_first
 		ILHU,
 		IL,
@ -267,7 +269,7 @@ struct spu_itype
 	// Test for memory instruction
 	friend constexpr bool operator &(type value, memory_tag)
 	{
-		return value >= LQD && value <= STQR;
+		return value >= STQD && value <= LQR;
 	}

 	// Test for compare instruction
@ -293,6 +295,12 @@ struct spu_itype
 	{
 		return value >= ILH && value <= FSMBI;
 	}
+
+	// Test for non register-modifying instruction
+	friend constexpr bool operator &(type value, zregmod_tag)
+	{
+		return value >= HEQ && value <= STQR;
+	}
 };

 struct spu_iflag
--- a/rpcs3/Emu/Cell/SPUDisAsm.h
+++ b/rpcs3/Emu/Cell/SPUDisAsm.h
@ -851,6 +851,13 @@ public:
 	}
 	void BR(spu_opcode_t op)
 	{
+		if (op.rt && op.rt != 127u)
+		{
+			// Valid but makes no sense
+			DisAsm("br??", DisAsmBranchTarget(op.i16));
+			return;
+		}
+
 		DisAsm("br", DisAsmBranchTarget(op.i16));
 	}
 	void FSMBI(spu_opcode_t op)
--- a/rpcs3/Emu/Cell/SPUOpcodes.h
+++ b/rpcs3/Emu/Cell/SPUOpcodes.h
@ -26,17 +26,17 @@ union spu_opcode_t
 	bf_t<u32, 7, 18> i18; // 7..24
 };

-inline u32 spu_branch_target(u32 pc, u32 imm = 0)
+constexpr u32 spu_branch_target(u32 pc, u32 imm = 0)
 {
 	return (pc + (imm << 2)) & 0x3fffc;
 }

-inline u32 spu_ls_target(u32 pc, u32 imm = 0)
+constexpr u32 spu_ls_target(u32 pc, u32 imm = 0)
 {
 	return (pc + (imm << 2)) & 0x3fff0;
 }

-inline u32 spu_decode(u32 inst)
+constexpr u32 spu_decode(u32 inst)
 {
 	return inst >> 21;
 }
--- a/rpcs3/Emu/Cell/SPURecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPURecompiler.cpp
@ -516,6 +516,70 @@ spu_cache::~spu_cache()
 {
 }

+extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 size)
+{
+	if (vaddr % 4)
+	{
+		return;
+	}
+
+	size &= -4;
+
+	if (!size || vaddr + size > SPU_LS_SIZE)
+	{
+		return;
+	}
+
+	if (!g_cfg.core.llvm_precompilation)
+	{
+		return;
+	}
+
+	g_fxo->need<spu_cache>();
+
+	if (!g_fxo->get<spu_cache>().collect_funcs_to_precompile)
+	{
+		return;
+	}
+
+	std::basic_string<u32> data(size / 4, 0);
+	std::memcpy(data.data(), ls_data_vaddr, size);
+
+	spu_cache::precompile_data_t obj{vaddr, std::move(data)};
+
+	obj.funcs = spu_thread::discover_functions(vaddr, { reinterpret_cast<const u8*>(ls_data_vaddr), size }, vaddr != 0, umax);
+
+	if (obj.funcs.empty())
+	{
+		// Nothing to add
+		return;
+	}
+
+	for (u32 addr : obj.funcs)
+	{
+		spu_log.notice("Found SPU function at: 0x%05x", addr);
+	}
+
+	spu_log.notice("Found %u SPU functions", obj.funcs.size());
+
+	g_fxo->get<spu_cache>().precompile_funcs.push(std::move(obj));
+}
+
+// For SPU cache validity check
+static u16 calculate_crc16(const uchar* data, usz length)
+{
+	u16 crc = umax;
+
+	while (length--)
+	{
+		u8 x = (crc >> 8) ^ *data++;
+		x ^= (x >> 4);
+		crc = static_cast<u16>((crc << 8) ^ (x << 12) ^ (x << 5) ^ x);
+	}
+
+	return crc;
+}
+
 std::deque<spu_program> spu_cache::get()
 {
 	std::deque<spu_program> result;
@ -530,18 +594,30 @@ std::deque<spu_program> spu_cache::get()
 	// TODO: signal truncated or otherwise broken file
 	while (true)
 	{
-		be_t<u32> size;
-		be_t<u32> addr;
-		std::vector<u32> func;
+		struct block_info_t
+		{
+			be_t<u16> crc;
+			be_t<u16> size;
+			be_t<u32> addr;
+		} block_info{};

-		if (!m_file.read(size) || !m_file.read(addr))
+		if (!m_file.read(block_info))
 		{
 			break;
 		}

-		func.resize(size);
+		const u32 crc = block_info.crc;
+		const u32 size = block_info.size;
+		const u32 addr = block_info.addr;

-		if (m_file.read(func.data(), func.size() * 4) != func.size() * 4)
+		if (utils::add_saturate<u32>(addr, size * 4) > SPU_LS_SIZE)
+		{
+			break;
+		}
+
+		std::vector<u32> func;
+
+		if (!m_file.read(func, size))
 		{
 			break;
 		}
@ -552,6 +628,13 @@ std::deque<spu_program> spu_cache::get()
 			continue;
 		}

+		// CRC check is optional to be compatible with old format
+		if (crc && std::max<u32>(calculate_crc16(reinterpret_cast<const uchar*>(func.data()), size * 4), 1) != crc)
+		{
+			// Invalid, but continue anyway
+			continue;
+		}
+
 		spu_program res;
 		res.entry_point = addr;
 		res.lower_bound = addr;
@ -572,6 +655,9 @@ void spu_cache::add(const spu_program& func)
 	be_t<u32> size = ::size32(func.data);
 	be_t<u32> addr = func.entry_point;

+	// Add CRC (forced non-zero)
+	size |= std::max<u32>(calculate_crc16(reinterpret_cast<const uchar*>(func.data.data()), size * 4), 1) << 16;
+
 	const fs::iovec_clone gather[3]
 	{
 		{&size, sizeof(size)},
@ -583,7 +669,7 @@ void spu_cache::add(const spu_program& func)
 	m_file.write_gather(gather, 3);
 }

-void spu_cache::initialize()
+void spu_cache::initialize(bool build_existing_cache)
 {
 	spu_runtime::g_interpreter = spu_runtime::g_gateway;

@ -618,6 +704,35 @@ void spu_cache::initialize()
 	atomic_t<usz> fnext{};
 	atomic_t<u8> fail_flag{0};

+	auto data_list = g_fxo->get<spu_cache>().precompile_funcs.pop_all();
+	g_fxo->get<spu_cache>().collect_funcs_to_precompile = false;
+
+	u32 total_precompile = 0;
+
+	for (auto& sec : data_list)
+	{
+		total_precompile += sec.funcs.size();
+	}
+
+	const bool spu_precompilation_enabled = func_list.empty() && g_cfg.core.spu_cache && g_cfg.core.llvm_precompilation;
+
+	if (spu_precompilation_enabled)
+	{
+		// What compiles in this case goes straight to disk
+		g_fxo->get<spu_cache>() = std::move(cache);
+	}
+	else if (!build_existing_cache)
+	{
+		return;
+	}
+	else
+	{
+		total_precompile = 0;
+		data_list = {};
+	}
+
+	atomic_t<usz> data_indexer = 0;
+
 	if (g_cfg.core.spu_decoder == spu_decoder_type::dynamic || g_cfg.core.spu_decoder == spu_decoder_type::llvm)
 	{
 		if (auto compiler = spu_recompiler_base::make_llvm_recompiler(11))
@ -647,10 +762,24 @@ void spu_cache::initialize()
 	if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit || g_cfg.core.spu_decoder == spu_decoder_type::llvm)
 	{
 		// Initialize progress dialog (wait for previous progress done)
-		thread_ctrl::wait_on<atomic_wait::op_ne>(g_progr_ptotal, 0);
+		while (u32 v = g_progr_ptotal)
+		{
+			if (Emu.IsStopped() || !build_existing_cache)
+			{
+				// Workaround: disable progress dialog updates in the case of sole SPU precompilation
+				break;
+			}

-		g_progr_ptotal += ::size32(func_list);
-		progr.emplace("Building SPU cache...");
+			thread_ctrl::wait_on(g_progr_ptotal, v);
+		}
+
+		const u32 add_count = ::size32(func_list) + total_precompile;
+
+		if (add_count)
+		{
+			g_progr_ptotal += build_existing_cache ? add_count : 0;
+			progr.emplace("Building SPU cache...");
+		}

 		worker_count = rpcs3::utils::get_max_threads();
 	}
@ -684,7 +813,7 @@ void spu_cache::initialize()
 		std::vector<be_t<u32>> ls(0x10000);

 		// Build functions
-		for (usz func_i = fnext++; func_i < func_list.size(); func_i = fnext++, g_progr_pdone++)
+		for (usz func_i = fnext++; func_i < func_list.size(); func_i = fnext++, g_progr_pdone += build_existing_cache ? 1 : 0)
 		{
 			const spu_program& func = std::as_const(func_list)[func_i];

@ -736,6 +865,7 @@ void spu_cache::initialize()
 			{
 				// Likely, out of JIT memory. Signal to prevent further building.
 				fail_flag |= 1;
+				continue;
 			}

 			// Clear fake LS
@ -744,15 +874,171 @@ void spu_cache::initialize()
 			result++;
 		}

+		u32 last_sec_idx = umax;
+
+		for (usz func_i = data_indexer++;; func_i = data_indexer++, g_progr_pdone += build_existing_cache ? 1 : 0)
+		{
+			u32 passed_count = 0;
+			u32 func_addr = 0;
+			u32 next_func = 0;
+			u32 sec_addr = umax;
+			u32 sec_idx = 0;
+			std::basic_string_view<u32> inst_data;
+
+			// Try to get the data this index points to
+			for (auto& sec : data_list)
+			{
+				if (func_i < passed_count + sec.funcs.size())
+				{
+					const u32 func_idx = func_i - passed_count;
+					sec_addr = sec.vaddr;
+					func_addr = ::at32(sec.funcs, func_idx);
+					next_func = sec.funcs.size() >= func_idx + 1 ? SPU_LS_SIZE : sec.funcs[func_idx];
+					inst_data = sec.inst_data;
+					break;
+				}
+
+				passed_count += sec.funcs.size();
+				sec_idx++;
+			}
+
+			if (sec_addr == umax)
+			{
+				// End of compilation for thread
+				break;
+			}
+
+			if (Emu.IsStopped() || fail_flag)
+			{
+				continue;
+			}
+
+			if (last_sec_idx != sec_idx)
+			{
+				if (last_sec_idx != umax)
+				{
+					// Clear fake LS of previous section
+					auto& sec = data_list[last_sec_idx];
+					std::memset(ls.data() + sec.vaddr / 4, 0, sec.inst_data.size() * 4);
+				}
+
+				// Initialize LS with the entire section data
+				for (u32 i = 0, pos = sec_addr; i < inst_data.size(); i++, pos += 4)
+				{
+					ls[pos / 4] =  std::bit_cast<be_t<u32>>(inst_data[i]);
+				}
+
+				last_sec_idx = sec_idx;
+			}
+
+			u32 block_addr = func_addr;
+
+			// Call analyser
+			spu_program func2 = compiler->analyse(ls.data(), block_addr);
+
+			std::map<u32, std::basic_string<u32>> targets;
+
+			while (!func2.data.empty())
+			{
+				const u32 last_inst = std::bit_cast<be_t<u32>>(func2.data.back());
+				const u32 prog_size = func2.data.size();
+
+				if (!compiler->compile(std::move(func2)))
+				{
+					// Likely, out of JIT memory. Signal to prevent further building.
+					fail_flag |= 1;
+					break;
+				}
+
+				result++;
+
+				const u32 start_new = block_addr + prog_size * 4;
+
+				if (start_new >= next_func || (start_new == next_func - 4 && ls[start_new / 4] == 0x200000u))
+				{
+					// Completed
+					break;
+				}
+
+				targets.insert(compiler->get_targets().begin(), compiler->get_targets().end());
+
+				if (auto type = g_spu_itype.decode(last_inst);
+					type == spu_itype::BRSL || type == spu_itype::BRASL || type == spu_itype::BISL || type == spu_itype::SYNC)
+				{
+					if (ls[start_new / 4] && g_spu_itype.decode(ls[start_new / 4]) != spu_itype::UNK)
+					{
+						spu_log.notice("Precompiling fallthrough to 0x%05x", start_new);
+						func2 = compiler->analyse(ls.data(), start_new);
+						block_addr = start_new;
+						continue;
+					}
+				}
+
+				if (targets.empty())
+				{
+					break;
+				}
+
+				const auto upper = targets.upper_bound(func_addr);
+
+				if (upper == targets.begin())
+				{
+					break;
+				}
+
+				u32 new_entry = umax;
+
+				// Find the lowest target in the space in-between
+				for (auto it = std::prev(upper); it != targets.end() && it->first < start_new && new_entry > start_new; it++)
+				{
+					for (u32 target : it->second)
+					{
+						if (target >= start_new && target < next_func)
+						{
+							if (target < new_entry)
+							{
+								new_entry = target;
+
+								if (new_entry == start_new)
+								{
+									// Cannot go lower
+									break;
+								}
+							}
+						}
+					}
+				}
+
+				if (new_entry == umax)
+				{
+					break;
+				}
+
+				if (!spu_thread::is_exec_code(new_entry, { reinterpret_cast<const u8*>(ls.data()), SPU_LS_SIZE }))
+				{
+					break;
+				}
+
+				spu_log.notice("Precompiling filler space at 0x%05x (next=0x%05x)", new_entry, next_func);
+				func2 = compiler->analyse(ls.data(), new_entry);
+				block_addr = new_entry;
+			}
+		}
+
 		return result;
 	});

+	u32 built_total = 0;
+
 	// Join (implicitly) and print individual results
 	for (u32 i = 0; i < workers.size(); i++)
 	{
 		spu_log.notice("SPU Runtime: Worker %u built %u programs.", i + 1, workers[i]);
+		built_total += workers[i];
 	}

+	spu_log.notice("SPU Runtime: Workers built %u programs.", built_total);
+
 	if (Emu.IsStopped())
 	{
 		spu_log.error("SPU Runtime: Cache building aborted.");
@ -873,7 +1159,7 @@ void spu_cache::initialize()
 	}

 	// Initialize global cache instance
-	if (g_cfg.core.spu_cache)
+	if (g_cfg.core.spu_cache && cache)
 	{
 		g_fxo->get<spu_cache>() = std::move(cache);
 	}
@ -1896,6 +2182,149 @@ void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/
 	}
 }

+std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/)
+{
+	std::vector<u32> calls;
+	std::vector<u32> branches;
+
+	calls.reserve(100);
+
+	// Discover functions
+	// Use the most simple method: search for instructions that calls them
+	// And then filter invalid cases
+	// TODO: Does not detect jumptables or fixed-addr indirect calls
+	const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
+
+	for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
+	{
+		// Search for BRSL LR and BRASL LR or BR
+		// TODO: BISL
+		const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
+		const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
+		const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
+		const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
+		const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
+		const v128 result = eq_brsl | eq_brasl;
+
+		if (!gv_testz(result))
+		{
+			for (u32 j = 0; j < 4; j++)
+			{
+				if (result.u32r[j])
+				{
+					calls.push_back(i + j * 4);
+				}
+			}
+		}
+
+		if (!gv_testz(eq_br))
+		{
+			for (u32 j = 0; j < 4; j++)
+			{
+				if (eq_br.u32r[j])
+				{
+					branches.push_back(i + j * 4);
+				}
+			}
+		}
+	}
+
+	calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller)
+	{
+		// Check the validity of both the callee code and the following caller code
+		return !is_exec_code(caller, ls, base_addr) || !is_exec_code(caller + 4, ls, base_addr);
+	}), calls.end());
+
+	branches.erase(std::remove_if(branches.begin(), branches.end(), [&](u32 caller)
+	{
+		// Check the validity of the callee code
+		return !is_exec_code(caller, ls, base_addr);
+	}), branches.end());
+
+	std::vector<u32> addrs;
+
+	for (u32 addr : calls)
+	{
+		const spu_opcode_t op{read_from_ptr<be_t<u32>>(ls, addr - base_addr)};
+
+		const u32 func = op_branch_targets(addr, op)[0];
+
+		if (func == umax || addr + 4 == func || func == addr || std::count(addrs.begin(), addrs.end(), func))
+		{
+			continue;
+		}
+
+		addrs.push_back(func);
+	}
+
+	for (u32 addr : branches)
+	{
+		const spu_opcode_t op{read_from_ptr<be_t<u32>>(ls, addr - base_addr)};
+
+		const u32 func = op_branch_targets(addr, op)[0];
+
+		if (func == umax || addr + 4 == func || func == addr || !addr)
+		{
+			continue;
+		}
+
+		// Search for AI R1, +x or OR R3/4, Rx, 0
+		// Reasoning: AI R1, +x means stack pointer restoration, branch after that is likely a tail call
+		// R3 and R4 are common function arguments because they are the first two
+		for (u32 back = addr - 4, it = 10; it && back >= base_addr && back < std::min<u32>(base_addr + ls.size(), 0x3FFF0); it--, back -= 4)
+		{
+			const spu_opcode_t test_op{read_from_ptr<be_t<u32>>(ls, back - base_addr)};
+			const auto type = g_spu_itype.decode(test_op.opcode);
+
+			if (type & spu_itype::branch)
+			{
+				break;
+			}
+
+			bool is_tail = false;
+
+			if (type == spu_itype::AI && test_op.rt == 1u && test_op.ra == 1u)
+			{
+				if (test_op.si10 <= 0)
+				{
+					break;
+				}
+
+				is_tail = true;
+			}
+			else if (!(type & spu_itype::zregmod))
+			{
+				const u32 op_rt = type & spu_itype::_quadrop ? +test_op.rt4 : +test_op.rt;
+
+				if (op_rt >= 80u && (type != spu_itype::LQD || test_op.ra != 1u))
+				{
+					// Modifying non-volatile registers, not a call (and not context restoration)
+					break;
+				}
+
+				//is_tail = op_rt == 3u || op_rt == 4u;
+			}
+
+			if (!is_tail)
+			{
+				continue;
+			}
+
+			if (std::count(addrs.begin(), addrs.end(), func))
+			{
+				break;
+			}
+
+			addrs.push_back(func);
+			break;
+		}
+	}
+
+	std::sort(addrs.begin(), addrs.end());
+
+	return addrs;
+}
+
 spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
 {
 	// Result: addr + raw instruction data
@ -2105,6 +2534,12 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)

 				spu_log.warning("[0x%x] At 0x%x: indirect branch to 0x%x%s", entry_point, pos, target, op.d ? " (D)" : op.e ? " (E)" : "");

+				if (type == spu_itype::BI && target == pos + 4 && op.d)
+				{
+					// Disable interrupts idiom
+					break;
+				}
+
 				m_targets[pos].push_back(target);

 				if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
@ -5313,7 +5748,7 @@ public:
 						if (src > 0x40000)
 						{
 							// Use the xfloat hint to create 256-bit (4x double) PHI
-							llvm::Type* type = g_cfg.core.spu_accurate_xfloat && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);
+							llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);

 							const auto _phi = m_ir->CreatePHI(type, ::size32(bb.preds), fmt::format("phi0x%05x_r%u", baddr, i));
 							m_block->phi[i] = _phi;
@ -6513,6 +6948,24 @@ public:
 		}
 		case SPU_RdDec:
 		{
+			if (utils::get_tsc_freq() && !(g_cfg.core.spu_loop_detection) && (g_cfg.core.clocks_scale == 100))
+			{
+				const auto timestamp = m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_dec_start_timestamp));
+				const auto dec_value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_dec_value));
+				const auto tsc = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_rdtsc));
+				const auto tscx = m_ir->CreateMul(m_ir->CreateUDiv(tsc, m_ir->getInt64(utils::get_tsc_freq())), m_ir->getInt64(80000000));
+				const auto tscm = m_ir->CreateUDiv(m_ir->CreateMul(m_ir->CreateURem(tsc, m_ir->getInt64(utils::get_tsc_freq())), m_ir->getInt64(80000000)), m_ir->getInt64(utils::get_tsc_freq()));
+				const auto tsctb = m_ir->CreateAdd(tscx, tscm);
+
+				const auto frz = m_ir->CreateLoad(get_type<u8>(), spu_ptr<u8>(&spu_thread::is_dec_frozen));
+				const auto frzev = m_ir->CreateICmpEQ(frz, m_ir->getInt8(0));
+
+				const auto delta = m_ir->CreateTrunc(m_ir->CreateSub(tsctb, timestamp), get_type<u32>());
+				const auto deltax = m_ir->CreateSelect(frzev, delta, m_ir->getInt32(0));
+				res.value = m_ir->CreateSub(dec_value, deltax);
+				break;
+			}
+
 			res.value = call("spu_read_decrementer", &exec_read_dec, m_thread);
 			break;
 		}
@ -7180,7 +7633,20 @@ public:
 		case SPU_WrDec:
 		{
 			call("spu_get_events", &exec_get_events, m_thread, m_ir->getInt32(SPU_EVENT_TM));
-			m_ir->CreateStore(call("get_timebased_time", &get_timebased_time), spu_ptr<u64>(&spu_thread::ch_dec_start_timestamp));
+
+			if (utils::get_tsc_freq() && !(g_cfg.core.spu_loop_detection) && (g_cfg.core.clocks_scale == 100))
+			{
+				const auto tsc = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_rdtsc));
+				const auto tscx = m_ir->CreateMul(m_ir->CreateUDiv(tsc, m_ir->getInt64(utils::get_tsc_freq())), m_ir->getInt64(80000000));
+				const auto tscm = m_ir->CreateUDiv(m_ir->CreateMul(m_ir->CreateURem(tsc, m_ir->getInt64(utils::get_tsc_freq())), m_ir->getInt64(80000000)), m_ir->getInt64(utils::get_tsc_freq()));
+				const auto tsctb = m_ir->CreateAdd(tscx, tscm);
+				m_ir->CreateStore(tsctb, spu_ptr<u64>(&spu_thread::ch_dec_start_timestamp));
+			}
+			else
+			{
+				m_ir->CreateStore(call("get_timebased_time", &get_timebased_time), spu_ptr<u64>(&spu_thread::ch_dec_start_timestamp));
+			}
+
 			m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_dec_value));
 			m_ir->CreateStore(m_ir->getInt8(0), spu_ptr<u8>(&spu_thread::is_dec_frozen));
 			return;
@ -7788,19 +8254,27 @@ public:
 	void ROTQMBY(spu_opcode_t op)
 	{
 		const auto a = get_vr<u8[16]>(op.ra);
-		const auto b = get_vr<u8[16]>(op.rb);
+		const auto b = get_vr<u32[4]>(op.rb);
+
+		auto minusb = eval(-b);
+		if (auto [ok, x] = match_expr(b, -match<u32[4]>()); ok)
+		{
+			minusb = eval(x);
+		}
+
+		const auto minusbx = bitcast<u8[16]>(minusb);

 		// Data with swapped endian from a load instruction
 		if (auto [ok, as] = match_expr(a, byteswap(match<u8[16]>())); ok)
 		{
 			const auto sc = build<u8[16]>(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-			const auto sh = sc - (-splat_scalar(b) & 0x1f);
+			const auto sh = sc - (splat_scalar(minusbx) & 0x1f);
 			set_vr(op.rt, pshufb(as, sh));
 			return;
 		}

 		const auto sc = build<u8[16]>(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127);
-		const auto sh = sc + (-splat_scalar(b) & 0x1f);
+		const auto sh = sc + (splat_scalar(minusbx) & 0x1f);
 		set_vr(op.rt, pshufb(a, sh));
 	}

@ -8627,6 +9101,20 @@ public:
 			{
 				if (data == v128::from8p(data._u8[0]))
 				{
+					if (m_use_avx512_icl)
+					{
+						if (perm_only)
+						{
+							set_vr(op.rt4, vperm2b256to128(as, b, c));
+							return;
+						}
+
+						const auto m = gf2p8affineqb(c, build<u8[16]>(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f);
+						const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
+						const auto ab = vperm2b256to128(as, b, c);
+						set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
+						return;
+					}
 					// See above
 					const auto x = pshufb(build<u8[16]>(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x80, 0x80), (c >> 4));
 					const auto ax = pshufb(as, c);
@ -8661,6 +9149,42 @@ public:

 		if (m_use_avx512_icl && (op.ra != op.rb || m_interp_magn))
 		{
+			if (auto [ok, data] = get_const_vector(b.value, m_pos); ok)
+			{
+				if (data == v128::from8p(data._u8[0]))
+				{
+					if (perm_only)
+					{
+						set_vr(op.rt4, vperm2b256to128(a, b, eval(c ^ 0xf)));
+						return;
+					}
+
+					const auto m = gf2p8affineqb(c, build<u8[16]>(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f);
+					const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
+					const auto ab = vperm2b256to128(a, b, eval(c ^ 0xf));
+					set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
+					return;
+				}
+			}
+
+			if (auto [ok, data] = get_const_vector(a.value, m_pos); ok)
+			{
+				if (data == v128::from8p(data._u8[0]))
+				{
+					if (perm_only)
+					{
+						set_vr(op.rt4, vperm2b256to128(b, a, eval(c ^ 0x1f)));
+						return;
+					}
+
+					const auto m = gf2p8affineqb(c, build<u8[16]>(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f);
+					const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
+					const auto ab = vperm2b256to128(b, a, eval(c ^ 0x1f));
+					set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
+					return;
+				}
+			}
+
 			if (perm_only)
 			{
 				set_vr(op.rt4, vperm2b(a, b, eval(c ^ 0xf)));
@ -8865,7 +9389,7 @@ public:
 	void FREST(spu_opcode_t op)
 	{
 		// TODO
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			const auto a = get_vr<f32[4]>(op.ra);
 			const auto mask_ov = sext<s32[4]>(bitcast<s32[4]>(fabs(a)) > splat<s32[4]>(0x7e7fffff));
@ -8874,7 +9398,7 @@ public:
 			return;
 		}

-		if (g_cfg.core.spu_approx_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 		{
 			register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
 			{
@ -8907,13 +9431,13 @@ public:
 	void FRSQEST(spu_opcode_t op)
 	{
 		// TODO
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			set_vr(op.rt, fsplat<f64[4]>(1.0) / fsqrt(fabs(get_vr<f64[4]>(op.ra))));
 			return;
 		}

-		if (g_cfg.core.spu_approx_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 		{
 			register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
 			{
@ -8945,7 +9469,7 @@ public:

 	void FCGT(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) > get_vr<f64[4]>(op.rb))));
 			return;
@ -8992,7 +9516,7 @@ public:
 				return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
 			}

-			if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
+			if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
 			{
 				const auto ai = eval(bitcast<s32[4]>(a));
 				const auto bi = eval(bitcast<s32[4]>(b));
@ -9023,7 +9547,7 @@ public:

 	void FCMGT(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) > fabs(get_vr<f64[4]>(op.rb)))));
 			return;
@ -9069,7 +9593,7 @@ public:
 				return eval(sext<s32[4]>(mai > mbi));
 			}

-			if (g_cfg.core.spu_approx_xfloat)
+			if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 			{
 				return eval(sext<s32[4]>(fcmp_uno(ma > mb) & (mai > mbi)));
 			}
@ -9090,7 +9614,7 @@ public:

 	void FA(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			set_vr(op.rt, get_vr<f64[4]>(op.ra) + get_vr<f64[4]>(op.rb));
 			return;
@ -9115,7 +9639,7 @@ public:

 	void FS(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			set_vr(op.rt, get_vr<f64[4]>(op.ra) - get_vr<f64[4]>(op.rb));
 			return;
@ -9126,7 +9650,7 @@ public:
 			const auto a = value<f32[4]>(ci->getOperand(0));
 			const auto b = value<f32[4]>(ci->getOperand(1));

-			if (g_cfg.core.spu_approx_xfloat)
+			if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 			{
 				const auto bc = clamp_smax(b); // for #4478
 				return eval(a - bc);
@ -9148,7 +9672,7 @@ public:

 	void FM(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			set_vr(op.rt, get_vr<f64[4]>(op.ra) * get_vr<f64[4]>(op.rb));
 			return;
@ -9159,7 +9683,7 @@ public:
 			const auto a = value<f32[4]>(ci->getOperand(0));
 			const auto b = value<f32[4]>(ci->getOperand(1));

-			if (g_cfg.core.spu_approx_xfloat)
+			if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 			{
 				if (a.value == b.value)
 				{
@ -9195,7 +9719,7 @@ public:

 	void FESD(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			const auto r = zshuffle(get_vr<f64[4]>(op.ra), 1, 3);
 			const auto d = bitcast<s64[2]>(r);
@ -9225,7 +9749,7 @@ public:

 	void FRDS(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			const auto r = get_vr<f64[2]>(op.ra);
 			const auto d = bitcast<s64[2]>(r);
@ -9256,7 +9780,7 @@ public:

 	void FCEQ(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) == get_vr<f64[4]>(op.rb))));
 			return;
@ -9309,7 +9833,7 @@ public:
 				return eval(sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
 			}

-			if (g_cfg.core.spu_approx_xfloat)
+			if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 			{
 				return eval(sext<s32[4]>(fcmp_ord(a == b)) | sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
 			}
@ -9330,7 +9854,7 @@ public:

 	void FCMEQ(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) == fabs(get_vr<f64[4]>(op.rb)))));
 			return;
@ -9386,7 +9910,7 @@ public:
 				return eval(sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
 			}

-			if (g_cfg.core.spu_approx_xfloat)
+			if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 			{
 				return eval(sext<s32[4]>(fcmp_ord(fa == fb)) | sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
 			}
@ -9479,7 +10003,7 @@ public:
 	void FNMS(spu_opcode_t op)
 	{
 		// See FMA.
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
 			set_vr(op.rt4, fmuladd(-a, b, c));
@ -9492,7 +10016,7 @@ public:
 			const auto b = value<f32[4]>(ci->getOperand(1));
 			const auto c = value<f32[4]>(ci->getOperand(2));

-			if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
+			if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
 			{
 				return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c);
 			}
@ -9514,7 +10038,7 @@ public:
 	void FMA(spu_opcode_t op)
 	{
 		// Hardware FMA produces the same result as multiple + add on the limited double range (xfloat).
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
 			set_vr(op.rt4, fmuladd(a, b, c));
@ -9527,7 +10051,7 @@ public:
 			const auto b = value<f32[4]>(ci->getOperand(1));
 			const auto c = value<f32[4]>(ci->getOperand(2));

-			if (g_cfg.core.spu_approx_xfloat)
+			if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 			{
 				const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
 				const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
@ -9588,7 +10112,7 @@ public:
 	void FMS(spu_opcode_t op)
 	{
 		// See FMA.
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
 			set_vr(op.rt4, fmuladd(a, b, -c));
@ -9601,7 +10125,7 @@ public:
 			const auto b = value<f32[4]>(ci->getOperand(1));
 			const auto c = value<f32[4]>(ci->getOperand(2));

-			if (g_cfg.core.spu_approx_xfloat)
+			if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 			{
 				return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c));
 			}
@ -9635,7 +10159,7 @@ public:
 	void FI(spu_opcode_t op)
 	{
 		// TODO
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			set_vr(op.rt, get_vr<f64[4]>(op.rb));
 			// const auto [a, b] = get_vrs<f64[4]>(op.ra, op.rb);
@ -9663,7 +10187,7 @@ public:
 			return bitcast<f32[4]>((b & 0xff800000u) | (bitcast<u32[4]>(fpcast<f32[4]>(bnew)) & ~0xff800000u)); // Inject old sign and exponent
 		});

-		if (g_cfg.core.spu_approx_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
 		{
 			register_intrinsic("spu_re", [&](llvm::CallInst* ci)
 			{
@ -9722,7 +10246,7 @@ public:

 	void CFLTS(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
 			value_t<f64[4]> s;
@ -9796,7 +10320,7 @@ public:

 	void CFLTU(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
 			value_t<f64[4]> s;
@ -9863,6 +10387,15 @@ public:
 				a = eval(a * s);

 			value_t<s32[4]> r;
+
+			if (m_use_avx512)
+			{
+				const auto sc = eval(bitcast<f32[4]>(max(bitcast<s32[4]>(a),splat<s32[4]>(0x0))));
+				r.value = m_ir->CreateFPToUI(sc.value, get_type<s32[4]>());
+				set_vr(op.rt, r);
+				return;
+			}
+
 			r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
 			set_vr(op.rt, select(bitcast<s32[4]>(a) > splat<s32[4]>(((32 + 127) << 23) - 1), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
 		}
@ -9870,7 +10403,7 @@ public:

 	void CSFLT(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
 			value_t<f64[4]> r;
@ -9910,7 +10443,7 @@ public:

 	void CUFLT(spu_opcode_t op)
 	{
-		if (g_cfg.core.spu_accurate_xfloat)
+		if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
 		{
 			value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
 			value_t<f64[4]> r;
@ -10495,6 +11028,13 @@ public:
 		// Create jump table if necessary (TODO)
 		const auto tfound = m_targets.find(m_pos);

+		if (op.d && tfound != m_targets.end() && tfound->second.size() == 1 && tfound->second[0] == spu_branch_target(m_pos, 1))
+		{
+			// Interrupts-disable pattern
+			m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
+			return;
+		}
+
 		if (!op.d && !op.e && tfound != m_targets.end() && tfound->second.size() > 1)
 		{
 			// Shift aligned address for switch
@ -10994,7 +11534,7 @@ struct spu_llvm_worker
 				return;
 			}

-			thread_ctrl::wait_on(registered, nullptr);
+			thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&registered)[1], 0);
 			slice = registered.pop_all();
 		}())
 		{
@ -11161,7 +11701,7 @@ struct spu_llvm
 			{
 				// Interrupt profiler thread and put it to sleep
 				static_cast<void>(prof_mutex.reset());
-				thread_ctrl::wait_on(registered, nullptr);
+				thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&registered)[1], 0);
 				continue;
 			}

--- a/rpcs3/Emu/Cell/SPURecompiler.h
+++ b/rpcs3/Emu/Cell/SPURecompiler.h
@ -34,7 +34,18 @@ public:

 	void add(const struct spu_program& func);

-	static void initialize();
+	static void initialize(bool build_existing_cache = true);
+
+	struct precompile_data_t
+	{
+		u32 vaddr;
+		std::basic_string<u32> inst_data;
+		std::vector<u32> funcs;
+	};
+
+	bool collect_funcs_to_precompile = true;
+
+	lf_queue<precompile_data_t> precompile_funcs;
 };

 struct spu_program
@ -331,6 +342,11 @@ public:
 		return *m_spurt;
 	}

+	const auto& get_targets() const
+	{
+		return m_targets;
+	}
+
 	// Create recompiler instance (ASMJIT)
 	static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler();

--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@ -37,6 +37,15 @@
 #include "util/sysinfo.hpp"
 #include "util/serialization.hpp"

+#if defined(ARCH_X64)
+#ifdef _MSC_VER
+#include <intrin.h>
+#include <immintrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#endif
+
 using spu_rdata_t = decltype(spu_thread::rdata);

 template <>
@ -320,6 +329,40 @@ extern void mov_rdata_nt(spu_rdata_t& _dst, const spu_rdata_t& _src)
 #endif
 }

+#if defined(_MSC_VER)
+#define mwaitx_func
+#define waitpkg_func
+#else
+#define mwaitx_func __attribute__((__target__("mwaitx")))
+#define waitpkg_func __attribute__((__target__("waitpkg")))
+#endif
+
+#if defined(ARCH_X64)
+// Waits for a number of TSC clock cycles in power optimized state
+// Cstate is represented in bits [7:4]+1 cstate. So C0 requires bits [7:4] to be set to 0xf, C1 requires bits [7:4] to be set to 0.
+template <typename T, typename... Args>
+mwaitx_func static void __mwaitx(u32 cycles, u32 cstate, const void* cline, const Args&... args)
+{
+	constexpr u32 timer_enable = 0x2;
+
+	// monitorx will wake if the cache line is written to, use it for reservations which fits it almost perfectly
+	_mm_monitorx(const_cast<void*>(cline), 0, 0);
+
+	// Use static function to force inline
+	if (T::needs_wait(args...))
+	{
+		_mm_mwaitx(timer_enable, cstate, cycles);
+	}
+}
+
+// First bit indicates cstate, 0x0 for C.02 state (lower power) or 0x1 for C.01 state (higher power)
+waitpkg_func static void __tpause(u32 cycles, u32 cstate)
+{
+	const u64 tsc = utils::get_tsc() + cycles;
+	_tpause(cstate, tsc);
+}
+#endif
+
 void do_cell_atomic_128_store(u32 addr, const void* to_write);

 extern thread_local u64 g_tls_fault_spu;
@ -445,6 +488,12 @@ std::array<u32, 2> op_branch_targets(u32 pc, spu_opcode_t op)
 	{
 		const int index = (type == spu_itype::BR || type == spu_itype::BRA || type == spu_itype::BRSL || type == spu_itype::BRASL ? 0 : 1);
 		res[index] = (spu_branch_target(type == spu_itype::BRASL || type == spu_itype::BRA ? 0 : pc, op.i16));
+
+		if (res[0] == res[1])
+		{
+			res[1] = umax;
+		}
+
 		break;
 	}
 	case spu_itype::IRET:
@ -1147,7 +1196,7 @@ void spu_thread::dump_regs(std::string& ret, std::any& /*custom_data*/) const
 			}
 		}

-		if (i3 >= 0x80 && is_exec_code(i3, ls))
+		if (i3 >= 0x80 && is_exec_code(i3, { ls, SPU_LS_SIZE }))
 		{
 			dis_asm.disasm(i3);
 			fmt::append(ret, " -> %s", dis_asm.last_opcode);
@ -1233,6 +1282,7 @@ std::vector<std::pair<u32, u32>> spu_thread::dump_callstack_list() const
 	bool first = true;

 	const v128 gpr0 = gpr[0];
+	const u32 _pc = pc;

 	// Declare first 128-bytes as invalid for stack (common values such as 0 do not make sense here)
 	for (u32 sp = gpr[1]._u32[3]; (sp & 0xF) == 0u && sp >= 0x80u && sp <= 0x3FFE0u; first = false)
@ -1250,14 +1300,16 @@ std::vector<std::pair<u32, u32>> spu_thread::dump_callstack_list() const
 				return true;
 			}

-			return !addr || !is_exec_code(addr, ls);
+			return !addr || !is_exec_code(addr, { ls, SPU_LS_SIZE });
 		};

 		if (first && lr._u32[3] != gpr0._u32[3] && !is_invalid(gpr0))
 		{
 			// Detect functions with no stack or before LR has been stored
-			std::vector<bool> passed(SPU_LS_SIZE / 4);
-			std::vector<u32> start_points{pc};
+			std::vector<bool> passed(_pc / 4);
+
+			// Start with PC
+			std::basic_string<u32> start_points{_pc};

 			bool is_ok = false;
 			bool all_failed = false;
@ -1266,7 +1318,11 @@ std::vector<std::pair<u32, u32>> spu_thread::dump_callstack_list() const
 			{
 				for (u32 i = start_points[start]; i < SPU_LS_SIZE;)
 				{
-					if (passed[i / 4])
+					if (i / 4 >= passed.size())
+					{
+						passed.resize(i / 4 + 1);
+					}
+					else if (passed[i / 4])
 					{
 						// Already passed
 						break;
@ -1279,7 +1335,7 @@ std::vector<std::pair<u32, u32>> spu_thread::dump_callstack_list() const

 					if (start == 0 && type == spu_itype::STQD && op.ra == 1u && op.rt == 0u)
 					{
-						// Saving LR to stack: this is indeed a new function
+						// Saving LR to stack: this is indeed a new function (ok because LR has not been saved yet)
 						is_ok = true;
 						break;
 					}
@ -1317,12 +1373,23 @@ std::vector<std::pair<u32, u32>> spu_thread::dump_callstack_list() const
 					for (usz res_i = 0; res_i < results.size(); res_i++)
 					{
 						const u32 route_pc = results[res_i];
-						if (route_pc < SPU_LS_SIZE && !passed[route_pc / 4])
+
+						if (route_pc >= SPU_LS_SIZE)
+						{
+							continue;
+						}
+
+						if (route_pc / 4 >= passed.size())
+						{
+							passed.resize(route_pc / 4 + 1);
+						}
+
+						if (!passed[route_pc / 4])
 						{
 							if (proceeded)
 							{
 								// Remember next route start point
-								start_points.emplace_back(route_pc);
+								start_points.push_back(route_pc);
 							}
 							else
 							{
@ -1821,7 +1888,7 @@ spu_thread::~spu_thread()
 	utils::memory_release(ls - SPU_LS_SIZE * 2, SPU_LS_SIZE * 5);

 	perf_log.notice("Perf stats for transactions: success %u, failure %u", stx, ftx);
-	perf_log.notice("Perf stats for PUTLLC reload: successs %u, failure %u", last_succ, last_fail);
+	perf_log.notice("Perf stats for PUTLLC reload: success %u, failure %u", last_succ, last_fail);
 }

 u8* spu_thread::map_ls(utils::shm& shm, void* ptr)
@ -2418,7 +2485,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
 						}
 					}

-					if (++i < 10)
+					if (true || ++i < 10)
 					{
 						busy_wait(500);
 					}
@ -2426,7 +2493,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
 					{
 						// Wait
 						_cpu->state += cpu_flag::wait + cpu_flag::temp;
-						bits->wait(old, wmask);
+						// bits->wait(old, wmask);
 						_cpu->check_state();
 					}
 				}())
@ -2542,7 +2609,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
 					v &= ~wmask;
 				});

-				bits->notify_all(wmask);
+				// bits->notify_all(wmask);

 				if (size == size0)
 				{
@ -3588,7 +3655,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
 	{
 		if (raddr)
 		{
-			vm::reservation_notifier(addr).notify_all(-128);
+			vm::reservation_notifier(addr).notify_all();
 			raddr = 0;
 		}

@ -3775,7 +3842,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
 	}

 	do_cell_atomic_128_store(addr, _ptr<spu_rdata_t>(args.lsa & 0x3ff80));
-	vm::reservation_notifier(addr).notify_all(-128);
+	vm::reservation_notifier(addr).notify_all();
 }

 bool spu_thread::do_mfc(bool can_escape, bool must_finish)
@ -3952,29 +4019,97 @@ bool spu_thread::check_mfc_interrupts(u32 next_pc)
 	return false;
 }

-bool spu_thread::is_exec_code(u32 addr, const u8* ls_ptr)
+bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_addr)
 {
-	if (addr & ~0x3FFFC)
-	{
-		return false;
-	}
-
 	for (u32 i = 0; i < 30; i++)
 	{
-		const u32 addr0 = addr + (i * 4);
-		const u32 op = read_from_ptr<be_t<u32>>(ls_ptr + addr0);
-		const auto type = s_spu_itype.decode(op);
+		if (addr & ~0x3FFFC)
+		{
+			return false;
+		}

-		if (type == spu_itype::UNK || !op)
+		if (addr < base_addr || addr >= base_addr + ls_ptr.size())
+		{
+			return false;
+		}
+
+		const u32 addr0 = spu_branch_target(addr);
+		const spu_opcode_t op{read_from_ptr<be_t<u32>>(ls_ptr, addr0 - base_addr)};
+		const auto type = s_spu_itype.decode(op.opcode);
+
+		if (type == spu_itype::UNK || !op.opcode)
+		{
+			return false;
+		}
+
+		if (type == spu_itype::STOP && op.rb)
 		{
 			return false;
 		}

 		if (type & spu_itype::branch)
 		{
-			// TODO
-			break;
+			if (type == spu_itype::BR && op.rt && op.rt != 127u)
+			{
+				return false;
+			}
+
+			auto results = op_branch_targets(addr, op);
+
+			if (results[0] == umax)
+			{
+				switch (type)
+				{
+				case spu_itype::BIZ:
+				case spu_itype::BINZ:
+				case spu_itype::BIHZ:
+				case spu_itype::BIHNZ:
+				{
+					results[0] = addr + 4;
+					break;
+				}
+				default:
+				{
+					break;
+				}
+				}
+
+				if (results[0] == umax)
+				{
+					break;
+				}
+			}
+
+			for (usz res_i = 1; res_i < results.size(); res_i++)
+			{
+				const u32 route_pc = results[res_i];
+
+				if (route_pc >= SPU_LS_SIZE)
+				{
+					continue;
+				}
+
+				if (route_pc < base_addr || route_pc >= base_addr + ls_ptr.size())
+				{
+					return false;
+				}
+
+				// Test the validity of a single instruction of the optional target
+				// This function can't be too slow and is unlikely to improve results by a great deal
+				const u32 op0 = read_from_ptr<be_t<u32>>(ls_ptr, route_pc - base_addr);
+				const spu_itype::type type0 = s_spu_itype.decode(op0);
+
+				if (type0 == spu_itype::UNK || !op0)
+				{
+					return false;
+				}
+			}
+
+			addr = spu_branch_target(results[0]);
+			continue;
 		}
+
+		addr += 4;
 	}

 	return true;
@ -4113,7 +4248,32 @@ bool spu_thread::process_mfc_cmd()

 							if (getllar_busy_waiting_switch == 1)
 							{
-								busy_wait(300);
+#if defined(ARCH_X64)
+								if (utils::has_um_wait())
+								{
+									if (utils::has_waitpkg())
+									{
+										__tpause(std::min<u32>(getllar_spin_count, 10) * 500, 0x1);
+									}
+									else
+									{
+										struct check_wait_t
+										{
+											static FORCE_INLINE bool needs_wait(u64 rtime, const atomic_t<u64>& mem_rtime) noexcept
+											{
+												return rtime == mem_rtime;
+											}
+										};
+
+										// Provide the first X64 cache line of the reservation to be tracked
+										__mwaitx<check_wait_t>(std::min<u32>(getllar_spin_count, 17) * 500, 0xf0, std::addressof(data), +rtime, vm::reservation_acquire(addr));
+									}
+								}
+								else
+#endif
+								{
+									busy_wait(300);
+								}
 							}

 							return true;
@ -4908,7 +5068,11 @@ s64 spu_thread::get_ch_value(u32 ch)
 			}
 		}

+#ifdef __linux__
+		const bool reservation_busy_waiting = false;
+#else
 		const bool reservation_busy_waiting = ((utils::get_tsc() >> 8) % 100 + ((raddr == spurs_addr) ? 50 : 0)) < g_cfg.core.spu_reservation_busy_waiting_percentage;
+#endif

 		for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true))
 		{
@ -4930,8 +5094,11 @@ s64 spu_thread::get_ch_value(u32 ch)
 			if (raddr && (mask1 & ~SPU_EVENT_TM) == SPU_EVENT_LR)
 			{
 				// Don't busy-wait with TSX - memory is sensitive
-				if (!reservation_busy_waiting)
+				if (g_use_rtm || !reservation_busy_waiting)
 				{
+#ifdef __linux__
+					vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{50'000});
+#else
 					if (raddr - spurs_addr <= 0x80 && !g_cfg.core.spu_accurate_reservations && mask1 == SPU_EVENT_LR)
 					{
 						atomic_wait_engine::set_one_time_use_wait_callback(+[](u64) -> bool
@ -4944,7 +5111,7 @@ s64 spu_thread::get_ch_value(u32 ch)

 						// Wait without timeout, in this situation we have notifications for all writes making it possible
 						// Abort notifications are handled specially for performance reasons
-						vm::reservation_notifier(raddr).wait(rtime, -128);
+						vm::reservation_notifier(raddr).wait(rtime);
 						continue;
 					}

@ -4976,7 +5143,8 @@ s64 spu_thread::get_ch_value(u32 ch)
 						return true;
 					});

-					vm::reservation_notifier(raddr).wait(rtime, -128, atomic_wait_timeout{80'000});
+					vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{80'000});
+#endif
 				}
 				else
 				{
@ -5464,7 +5632,7 @@ extern void resume_spu_thread_group_from_waiting(spu_thread& spu)
 	{
 		group->run_state = SPU_THREAD_GROUP_STATUS_SUSPENDED;
 		spu.state += cpu_flag::signal;
-		spu.state.notify_one(cpu_flag::signal);
+		spu.state.notify_one();
 		return;
 	}

@ -5482,7 +5650,7 @@ extern void resume_spu_thread_group_from_waiting(spu_thread& spu)
 				thread->state -= cpu_flag::suspend;
 			}

-			thread->state.notify_one(cpu_flag::suspend + cpu_flag::signal);
+			thread->state.notify_one();
 		}
 	}
 }
@ -6022,12 +6190,12 @@ spu_exec_object spu_thread::capture_memory_as_elf(std::span<spu_memory_segment_d
 	{
 		for (pc0 = pc_hint; pc0; pc0 -= 4)
 		{
-			const u32 op = read_from_ptr<be_t<u32>>(all_data.data(), pc0 - 4);
+			const u32 op = read_from_ptr<be_t<u32>>(all_data, pc0 - 4);

 			// Try to find function entry (if they are placed sequentially search for BI $LR of previous function)
 			if (!op || op == 0x35000000u || s_spu_itype.decode(op) == spu_itype::UNK)
 			{
-				if (is_exec_code(pc0, all_data.data()))
+				if (is_exec_code(pc0, { all_data.data(), SPU_LS_SIZE }))
 					break;
 			}
 		}
@ -6037,7 +6205,7 @@ spu_exec_object spu_thread::capture_memory_as_elf(std::span<spu_memory_segment_d
 		for (pc0 = 0; pc0 < SPU_LS_SIZE; pc0 += 4)
 		{
 			// Try to find a function entry (very basic)
-			if (is_exec_code(pc0, all_data.data()))
+			if (is_exec_code(pc0, { all_data.data(), SPU_LS_SIZE }))
 				break;
 		}
 	}
@ -6244,7 +6412,7 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)

 	while (true)
 	{
-		thread_ctrl::wait_on(data, bit_wait);
+		thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32{bit_wait >> 32});
 		old = data;

 		if (!(old & bit_wait))
@ -6325,7 +6493,7 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
 			return false;
 		}

-		thread_ctrl::wait_on(data, state);
+		thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32(state >> 32));
 		state = data;
 	}
 }
@ -6369,7 +6537,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)

 	while (true)
 	{
-		thread_ctrl::wait_on(values, old);
+		thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&values)[0], u32(u64(std::bit_cast<u128>(old))));
 		old = values;

 		if (!old.waiting)
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@ -235,7 +235,7 @@ public:

 				// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value)
 				ensure(this->data.bit_test_reset(off_wait));
-				data.notify_one();
+				utils::bless<atomic_t<u32>>(&data)[1].notify_one();
 			}

 			// Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs
@ -294,7 +294,7 @@ public:

 		if ((old & mask) == mask)
 		{
-			data.notify_one();
+			utils::bless<atomic_t<u32>>(&data)[1].notify_one();
 		}

 		return static_cast<u32>(old);
@ -386,7 +386,7 @@ struct spu_channel_4_t

 				// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value)
 				ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0));
-				values.notify_one();
+				utils::bless<atomic_t<u32>>(&values)[0].notify_one();
 			}

 			return;
@ -825,7 +825,8 @@ public:
 	void set_events(u32 bits);
 	void set_interrupt_status(bool enable);
 	bool check_mfc_interrupts(u32 next_pc);
-	static bool is_exec_code(u32 addr, const u8* ls_ptr); // Only a hint, do not rely on it other than debugging purposes
+	static bool is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_addr = 0); // Only a hint, do not rely on it other than debugging purposes
+	static std::vector<u32> discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/);
 	u32 get_ch_count(u32 ch);
 	s64 get_ch_value(u32 ch);
 	bool set_ch_value(u32 ch, u32 value);
--- a/rpcs3/Emu/Cell/lv2/lv2.cpp
+++ b/rpcs3/Emu/Cell/lv2/lv2.cpp
@ -2,6 +2,7 @@
 #include "Emu/System.h"
 #include "Emu/system_config.h"
 #include "Emu/Memory/vm_ptr.h"
+#include "Emu/Memory/vm_reservation.h"
 #include "Emu/Memory/vm_locking.h"

 #include "Emu/Cell/PPUFunction.h"
@ -53,6 +54,17 @@
 #include <optional>
 #include <deque>
 #include "util/tsc.hpp"
+#include "util/sysinfo.hpp"
+
+#if defined(ARCH_X64)
+#ifdef _MSC_VER
+#include <intrin.h>
+#include <immintrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#endif
+

 extern std::string ppu_get_syscall_name(u64 code);

@ -1257,6 +1269,31 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout)
 		prepare_for_sleep(cpu);
 	}

+	if (cpu.id_type() == 1)
+	{
+		if (u32 addr = static_cast<ppu_thread&>(cpu).res_notify)
+		{
+			static_cast<ppu_thread&>(cpu).res_notify = 0;
+
+			const usz notify_later_idx = std::basic_string_view<const void*>{g_to_notify, std::size(g_to_notify)}.find_first_of(std::add_pointer_t<const void>{});
+
+			if (notify_later_idx != umax)
+			{
+				g_to_notify[notify_later_idx] = &vm::reservation_notifier(addr);
+
+				if (notify_later_idx < std::size(g_to_notify) - 1)
+				{
+					// Null-terminate the list if it ends before last slot
+					g_to_notify[notify_later_idx + 1] = nullptr;
+				}
+			}
+			else
+			{
+				vm::reservation_notifier(addr).notify_all();
+			}
+		}
+	}
+
 	bool result = false;
 	const u64 current_time = get_guest_system_time();
 	{
@ -1283,6 +1320,31 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout)

 bool lv2_obj::awake(cpu_thread* thread, s32 prio)
 {
+	if (ppu_thread* ppu = cpu_thread::get_current<ppu_thread>())
+	{
+		if (u32 addr = ppu->res_notify)
+		{
+			ppu->res_notify = 0;
+
+			const usz notify_later_idx = std::basic_string_view<const void*>{g_to_notify, std::size(g_to_notify)}.find_first_of(std::add_pointer_t<const void>{});
+
+			if (notify_later_idx != umax)
+			{
+				g_to_notify[notify_later_idx] = &vm::reservation_notifier(addr);
+
+				if (notify_later_idx < std::size(g_to_notify) - 1)
+				{
+					// Null-terminate the list if it ends before last slot
+					g_to_notify[notify_later_idx + 1] = nullptr;
+				}
+			}
+			else
+			{
+				vm::reservation_notifier(addr).notify_all();
+			}
+		}
+	}
+
 	bool result = false;
 	{
 		std::lock_guard lock(g_mutex);
@ -1631,7 +1693,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)

 			if (is_paused(target->state - cpu_flag::suspend))
 			{
-				target->state.notify_one(cpu_flag::suspend);
+				target->state.notify_one();
 			}
 		}
 	}
@ -1662,7 +1724,7 @@ void lv2_obj::cleanup()

 void lv2_obj::schedule_all(u64 current_time)
 {
-	usz notify_later_idx = 0;
+	usz notify_later_idx = std::basic_string_view<const void*>{g_to_notify, std::size(g_to_notify)}.find_first_of(std::add_pointer_t<const void>{});

 	if (!g_pending && g_scheduler_ready)
 	{
@ -1681,10 +1743,10 @@ void lv2_obj::schedule_all(u64 current_time)
 					continue;
 				}

-				if (notify_later_idx == std::size(g_to_notify))
+				if (notify_later_idx >= std::size(g_to_notify))
 				{
 					// Out of notification slots, notify locally (resizable container is not worth it)
-					target->state.notify_one(cpu_flag::signal + cpu_flag::suspend);
+					target->state.notify_one();
 				}
 				else
 				{
@ -1715,10 +1777,10 @@ void lv2_obj::schedule_all(u64 current_time)
 				ensure(!target->state.test_and_set(cpu_flag::notify));

 				// Otherwise notify it to wake itself
-				if (notify_later_idx == std::size(g_to_notify))
+				if (notify_later_idx >= std::size(g_to_notify))
 				{
 					// Out of notification slots, notify locally (resizable container is not worth it)
-					target->state.notify_one(cpu_flag::notify);
+					target->state.notify_one();
 				}
 				else
 				{
@ -1880,6 +1942,35 @@ void lv2_obj::set_yield_frequency(u64 freq, u64 max_allowed_tsc)
 	g_lv2_preempts_taken.release(0);
 }

+#if defined(_MSC_VER)
+#define mwaitx_func
+#define waitpkg_func
+#else
+#define mwaitx_func __attribute__((__target__("mwaitx")))
+#define waitpkg_func __attribute__((__target__("waitpkg")))
+#endif
+
+#if defined(ARCH_X64)
+// Waits for a number of TSC clock cycles in power optimized state
+// Cstate is represented in bits [7:4]+1 cstate. So C0 requires bits [7:4] to be set to 0xf, C1 requires bits [7:4] to be set to 0.
+mwaitx_func static void __mwaitx(u32 cycles, u32 cstate)
+{
+	constexpr u32 timer_enable = 0x2;
+
+	// monitorx will wake if the cache line is written to. We don't want this, so place the monitor value on it's own cache line.
+	alignas(64) u64 monitor_var{};
+	_mm_monitorx(&monitor_var, 0, 0);
+	_mm_mwaitx(timer_enable, cstate, cycles);
+}
+
+// First bit indicates cstate, 0x0 for C.02 state (lower power) or 0x1 for C.01 state (higher power)
+waitpkg_func static void __tpause(u32 cycles, u32 cstate)
+{
+	const u64 tsc = utils::get_tsc() + cycles;
+	_tpause(cstate, tsc);
+}
+#endif
+
 bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep)
 {
 	static_assert(u64{umax} / max_timeout >= 100, "max timeout is not valid for scaling");
@ -1948,7 +2039,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 		u64 remaining = usec - passed;
 #ifdef __linux__
 		// NOTE: Assumption that timer initialization has succeeded
-		u64 host_min_quantum = is_usleep && remaining <= 1000 ? 10 : 50;
+		constexpr u64 host_min_quantum = 10;
 #else
 		// Host scheduler quantum for windows (worst case)
 		// NOTE: On ps3 this function has very high accuracy
@ -1965,14 +2056,29 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
 			if (remaining > host_min_quantum)
 			{
 #ifdef __linux__
-				// Do not wait for the last quantum to avoid loss of accuracy
-				wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum));
+				// With timerslack set low, Linux is precise for all values above
+				wait_for(remaining);
 #else
 				// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
 				wait_for(remaining - (remaining % host_min_quantum));
 #endif
 			}
 			// TODO: Determine best value for yield delay
+#if defined(ARCH_X64)
+			else if (utils::has_appropriate_um_wait())
+			{
+				u32 us_in_tsc_clocks = remaining * (utils::get_tsc_freq() / 1000000);
+
+				if (utils::has_waitpkg())
+				{
+					__tpause(us_in_tsc_clocks, 0x1);
+				}
+				else
+				{
+					__mwaitx(us_in_tsc_clocks, 0xf0);
+				}
+			}
+#endif
 			else
 			{
 				// Try yielding. May cause long wake latency but helps weaker CPUs a lot by alleviating resource pressure
--- a/rpcs3/Emu/Cell/lv2/sys_fs.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_fs.cpp
@ -2773,6 +2773,62 @@ error_code sys_fs_chmod(ppu_thread&, vm::cptr<char> path, s32 mode)
 {
 	sys_fs.todo("sys_fs_chmod(path=%s, mode=%#o)", path, mode);

+	const auto [path_error, vpath] = translate_to_sv(path);
+
+	if (path_error)
+	{
+		return {path_error, vpath};
+	}
+
+	const std::string local_path = vfs::get(vpath);
+
+	const auto mp = lv2_fs_object::get_mp(vpath);
+
+	if (local_path.empty())
+	{
+		return {CELL_ENOTMOUNTED, path};
+	}
+
+	if (mp->flags & lv2_mp_flag::read_only)
+	{
+		return {CELL_EROFS, path};
+	}
+
+	std::unique_lock lock(mp->mutex);
+
+	fs::stat_t info{};
+
+	if (!fs::get_stat(local_path, info))
+	{
+		switch (auto error = fs::g_tls_error)
+		{
+		case fs::error::noent:
+		{
+			// Try to locate split files
+
+			for (u32 i = 66601; i <= 66699; i++)
+			{
+				if (!fs::get_stat(fmt::format("%s.%u", local_path, i), info) && !info.is_directory)
+				{
+					break;
+				}
+			}
+
+			if (fs::get_stat(local_path + ".66600", info) && !info.is_directory)
+			{
+				break;
+			}
+
+			return {CELL_ENOENT, path};
+		}
+		default:
+		{
+			sys_fs.error("sys_fs_chmod(): unknown error %s", error);
+			return {CELL_EIO, path};
+		}
+		}
+	}
+
 	return CELL_OK;
 }

--- a/rpcs3/Emu/Cell/lv2/sys_hid.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_hid.cpp
@ -12,7 +12,7 @@

 LOG_CHANNEL(sys_hid);

-error_code sys_hid_manager_open(u64 device_type, u64 port_no, vm::ptr<u32> handle)
+error_code sys_hid_manager_open(ppu_thread& ppu, u64 device_type, u64 port_no, vm::ptr<u32> handle)
 {
 	sys_hid.todo("sys_hid_manager_open(device_type=0x%llx, port_no=0x%llx, handle=*0x%llx)", device_type, port_no, handle);

@ -34,7 +34,7 @@ error_code sys_hid_manager_open(u64 device_type, u64 port_no, vm::ptr<u32> handl

 	if (device_type == 1)
 	{
-		cellPadInit(7);
+		cellPadInit(ppu, 7);
 		cellPadSetPortSetting(::narrow<u32>(port_no) /* 0 */, CELL_PAD_SETTING_LDD | CELL_PAD_SETTING_PRESS_ON | CELL_PAD_SETTING_SENSOR_ON);
 	}

--- a/rpcs3/Emu/Cell/lv2/sys_hid.h
+++ b/rpcs3/Emu/Cell/lv2/sys_hid.h
@ -34,7 +34,7 @@ struct sys_hid_manager_514_pkg_d

 // SysCalls

-error_code sys_hid_manager_open(u64 device_type, u64 port_no, vm::ptr<u32> handle);
+error_code sys_hid_manager_open(ppu_thread& ppu, u64 device_type, u64 port_no, vm::ptr<u32> handle);
 error_code sys_hid_manager_ioctl(u32 hid_handle, u32 pkg_id, vm::ptr<void> buf, u64 buf_size);
 error_code sys_hid_manager_add_hot_key_observer(u32 event_queue, vm::ptr<u32> unk);
 error_code sys_hid_manager_check_focus();
--- a/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp
@ -183,7 +183,7 @@ error_code _sys_interrupt_thread_establish(ppu_thread& ppu, vm::ptr<u32> ih, u32
 		});

 		it->state -= cpu_flag::stop;
-		it->state.notify_one(cpu_flag::stop);
+		it->state.notify_one();

 		return result;
 	});
--- a/rpcs3/Emu/Cell/lv2/sys_memory.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_memory.cpp
@ -85,6 +85,12 @@ struct sys_memory_address_table
 	}
 };

+std::shared_ptr<vm::block_t> reserve_map(u32 alloc_size, u32 align)
+{
+	return vm::reserve_map(align == 0x10000 ? vm::user64k : vm::user1m, 0, align == 0x10000 ? 0x20000000 : utils::align(alloc_size, 0x10000000)
+		, align == 0x10000 ? (vm::page_size_64k | vm::bf0_0x1) : (vm::page_size_1m | vm::bf0_0x1));
+}
+
 // Todo: fix order of error checks

 error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32> alloc_addr)
@ -123,7 +129,7 @@ error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32
 		return CELL_ENOMEM;
 	}

-	if (const auto area = vm::reserve_map(align == 0x10000 ? vm::user64k : vm::user1m, 0, utils::align(size, 0x10000000), 0x401))
+	if (const auto area = reserve_map(size, align))
 	{
 		if (const u32 addr = area->alloc(size, nullptr, align))
 		{
@ -131,7 +137,7 @@ error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32

 			if (alloc_addr)
 			{
-				sys_memory.notice("sys_mmapper_search_and_map(): Allocated 0x%x address (size=0x%x)", addr, size);
+				sys_memory.notice("sys_memory_allocate(): Allocated 0x%x address (size=0x%x)", addr, size);

 				vm::lock_sudo(addr, size);
 				cpu.check_state();
@ -197,7 +203,7 @@ error_code sys_memory_allocate_from_container(cpu_thread& cpu, u32 size, u32 cid
 		return ct.ret;
 	}

-	if (const auto area = vm::reserve_map(align == 0x10000 ? vm::user64k : vm::user1m, 0, utils::align(size, 0x10000000), 0x401))
+	if (const auto area = reserve_map(size, align))
 	{
 		if (const u32 addr = area->alloc(size))
 		{
--- a/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp
@ -862,7 +862,7 @@ error_code mmapper_thread_recover_page_fault(cpu_thread* cpu)

 	if (cpu->state & cpu_flag::signal)
 	{
-		cpu->state.notify_one(cpu_flag::signal);
+		cpu->state.notify_one();
 	}

 	return CELL_OK;
--- a/Show More
+++ b/Show More