Compare commits

..

No commits in common. "master" and "0.8.3" have entirely different histories.

979 changed files with 56190 additions and 231695 deletions

18
.gitattributes vendored
View File

@ -1,18 +0,0 @@
# Vendored Dependencies
src/frontend/glad/** linguist-vendored
src/frontend/qt_sdl/gif-h/** linguist-vendored
src/frontend/qt_sdl/toml/** linguist-vendored
src/net/libslirp/** linguist-vendored
src/net/pcap/** linguist-vendored
src/sha1/** linguist-vendored
src/teakra/** linguist-vendored
src/tiny-AES-c/** linguist-vendored
src/xxhash/** linguist-vendored
# A handful of custom files embedded in the vendored dependencies
## Ad-hoc CMakeLists.txt for melonDS
src/net/libslirp/src/CMakeLists.txt -linguist-vendored
## glib stub
src/net/libslirp/src/glib/** -linguist-vendored

3
.github/FUNDING.yml vendored
View File

@ -1,2 +1 @@
patreon: Arisotura
custom: ["https://paypal.me/Arisotura", "http://melonds.kuribo64.net/donate.php"]
patreon: staplebutter

View File

@ -1,94 +0,0 @@
name: macOS
on:
push:
branches:
- master
- ci/*
pull_request:
branches:
- master
env:
VCPKG_COMMIT: 2ad004460f5db4d3b66f62f5799ff66c265c4b5d
MELONDS_GIT_BRANCH: ${{ github.ref }}
MELONDS_GIT_HASH: ${{ github.sha }}
MELONDS_BUILD_PROVIDER: GitHub Actions
MELONDS_VERSION_SUFFIX: " RC"
jobs:
build-macos:
strategy:
matrix:
arch: [x86_64, arm64]
name: ${{ matrix.arch }}
runs-on: macos-14
steps:
- name: Check out sources
uses: actions/checkout@v3
- name: Install dependencies for package building
run: |
brew install autoconf automake autoconf-archive libtool python-setuptools
- name: Set up CMake
uses: lukka/get-cmake@latest
- name: Set up vcpkg
uses: lukka/run-vcpkg@v11
with:
vcpkgGitCommitId: ${{ env.VCPKG_COMMIT }}
- name: Build
uses: lukka/run-cmake@v10
with:
configurePreset: release-mac-${{ matrix.arch }}
buildPreset: release-mac-${{ matrix.arch }}
configurePresetAdditionalArgs: "['-DMELONDS_EMBED_BUILD_INFO=ON']"
- name: Compress app bundle
shell: bash
run: |
cd build/release-mac-${{ matrix.arch }}
zip -r -y ../../macOS-${{ matrix.arch }}.zip melonDS.app
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: macOS-${{ matrix.arch }}
path: macOS-${{ matrix.arch }}.zip
retention-days: 1
universal-binary:
name: Universal binary
needs: [build-macos]
runs-on: macos-13
continue-on-error: true
steps:
- name: Download x86_64
uses: actions/download-artifact@v4
with:
name: macOS-x86_64
path: x86_64
- name: Download arm64
uses: actions/download-artifact@v4
with:
name: macOS-arm64
path: arm64
- name: Combine app bundles
shell: bash
run: |
unzip x86_64/*.zip -d x86_64
unzip arm64/*.zip -d arm64
lipo {x86_64,arm64}/melonDS.app/Contents/MacOS/melonDS -create -output melonDS
cp -a arm64/melonDS.app melonDS.app
cp melonDS melonDS.app/Contents/MacOS/melonDS
codesign -s - --deep melonDS.app
zip -r -y macOS-universal.zip melonDS.app
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: macOS-universal
path: macOS-universal.zip
# - name: Clean up architecture-specific artifacts
# uses: geekyeggo/delete-artifact@v4
# with:
# failOnError: false
# name: |
# macOS-x86_64
# macOS-arm64

View File

@ -1,63 +0,0 @@
name: Ubuntu
on:
push:
branches:
- master
- ci/*
pull_request:
branches:
- master
env:
MELONDS_GIT_BRANCH: ${{ github.ref }}
MELONDS_GIT_HASH: ${{ github.sha }}
MELONDS_BUILD_PROVIDER: GitHub Actions
MELONDS_VERSION_SUFFIX: " RC"
jobs:
build:
continue-on-error: true
strategy:
matrix:
arch:
- runner: ubuntu-22.04
name: x86_64
- runner: ubuntu-22.04-arm
name: aarch64
name: ${{ matrix.arch.name }}
runs-on: ${{ matrix.arch.runner }}
steps:
- uses: actions/checkout@v4
name: Check out sources
- name: Install dependencies
run: |
sudo apt update
sudo apt install --allow-downgrades cmake ninja-build extra-cmake-modules libpcap0.8-dev libsdl2-dev libenet-dev \
qt6-{base,base-private,multimedia}-dev libqt6svg6-dev libarchive-dev libzstd-dev libfuse2
- name: Configure
run: cmake -B build -G Ninja -DCMAKE_INSTALL_PREFIX=/usr -DMELONDS_EMBED_BUILD_INFO=ON
- name: Build
run: |
cmake --build build
DESTDIR=AppDir cmake --install build
- uses: actions/upload-artifact@v4
with:
name: melonDS-ubuntu-${{ matrix.arch.name }}
path: AppDir/usr/bin/melonDS
- name: Fetch AppImage tools
run: |
wget https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-${{ matrix.arch.name }}.AppImage
wget https://github.com/linuxdeploy/linuxdeploy-plugin-qt/releases/download/continuous/linuxdeploy-plugin-qt-${{ matrix.arch.name }}.AppImage
chmod a+x linuxdeploy-*.AppImage
- name: Build the AppImage
env:
QMAKE: /usr/lib/qt6/bin/qmake
run: |
./linuxdeploy-${{ matrix.arch.name }}.AppImage --appdir AppDir --plugin qt --output appimage
- uses: actions/upload-artifact@v4
with:
name: melonDS-appimage-${{ matrix.arch.name }}
path: melonDS*.AppImage

View File

@ -1,45 +0,0 @@
name: Windows
on:
push:
branches:
- master
- ci/*
pull_request:
branches:
- master
env:
VCPKG_COMMIT: 2ad004460f5db4d3b66f62f5799ff66c265c4b5d
MELONDS_GIT_BRANCH: ${{ github.ref }}
MELONDS_GIT_HASH: ${{ github.sha }}
MELONDS_BUILD_PROVIDER: GitHub Actions
MELONDS_VERSION_SUFFIX: " RC"
jobs:
build:
runs-on: windows-latest
defaults:
run:
shell: msys2 {0}
steps:
- name: Check out sources
uses: actions/checkout@v3
- name: Set up MSYS2
uses: msys2/setup-msys2@v2
with:
msystem: ucrt64
update: true
pacboy: gcc:p cmake:p ninja:p make:p
- name: Set up vcpkg
uses: lukka/run-vcpkg@v11
with:
vcpkgGitCommitId: ${{ env.VCPKG_COMMIT }}
- name: Configure
run: cmake --preset=release-mingw-x86_64 -DMELONDS_EMBED_BUILD_INFO=ON
- name: Build
run: cmake --build --preset=release-mingw-x86_64
- uses: actions/upload-artifact@v4
with:
name: melonDS-windows-x86_64
path: .\build\release-mingw-x86_64\melonDS.exe

15
.gitignore vendored
View File

@ -1,4 +1,3 @@
build*/
bin
obj
*.depend
@ -6,18 +5,6 @@ obj
*.o
melon_grc.c
melon_grc.h
melon.rc
cmake-build*
cmake-build
cmake-build-debug
compile_commands.json
.idea
.cache
*.exe
.DS_Store
.vs
.vscode
CMakeFiles
CMakeCache.txt

View File

@ -1,81 +0,0 @@
# Building melonDS
* [Linux](#linux)
* [Windows](#windows)
* [macOS](#macos)
## Linux
1. Install dependencies:
* Ubuntu:
* All versions: `sudo apt install cmake extra-cmake-modules libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev libarchive-dev libenet-dev libzstd-dev`
* 24.04: `sudo apt install qt6-{base,base-private,multimedia,svg}-dev`
* 22.04: `sudo apt install qtbase6-dev qtbase6-private-dev qtmultimedia6-dev libqt6svg6-dev`
* Older versions: `sudo apt install qtbase5-dev qtbase5-private-dev qtmultimedia5-dev libqt5svg5-dev`
Also add `-DUSE_QT6=OFF` to the first CMake command below.
* Fedora: `sudo dnf install gcc-c++ cmake extra-cmake-modules SDL2-devel libarchive-devel enet-devel libzstd-devel qt6-{qtbase,qtbase-private,qtmultimedia,qtsvg}-devel wayland-devel`
* Arch Linux: `sudo pacman -S base-devel cmake extra-cmake-modules git libpcap sdl2 qt6-{base,multimedia,svg} libarchive enet zstd`
2. Download the melonDS repository and prepare:
```bash
git clone https://github.com/melonDS-emu/melonDS
cd melonDS
```
3. Compile:
```bash
cmake -B build
cmake --build build -j$(nproc --all)
```
## Windows
1. Install [MSYS2](https://www.msys2.org/)
2. Open the MSYS2 terminal from the Start menu:
* For x64 systems (most common), use **MSYS2 UCRT64**
* For ARM64 systems, use **MSYS2 CLANGARM64**
3. Update the packages using `pacman -Syu` and reopen the same terminal if it asks you to
4. Install git and clone the repository
```bash
pacman -S git
git clone https://github.com/melonDS-emu/melonDS
cd melonDS
```
5. Install dependencies:
Replace `<prefix>` below with `mingw-w64-ucrt-x86_64` on x64 systems, or `mingw-w64-clang-aarch64` on ARM64 systems.
```bash
pacman -S <prefix>-{toolchain,cmake,SDL2,libarchive,enet,zstd}
```
6. Install Qt and configure the build directory
* Dynamic builds (with DLLs)
1. Install Qt: `pacman -S <prefix>-{qt6-base,qt6-svg,qt6-multimedia,qt6-svg,qt6-tools}`
2. Set up the build directory with `cmake -B build`
* Static builds (without DLLs, standalone executable)
1. Install Qt: `pacman -S <prefix>-qt5-static`
(Note: As of writing, the `qt6-static` package does not work.)
2. Set up the build directory with `cmake -B build -DBUILD_STATIC=ON -DUSE_QT6=OFF -DCMAKE_PREFIX_PATH=$MSYSTEM_PREFIX/qt5-static`
7. Compile: `cmake --build build`
If everything went well, melonDS should now be in the `build` folder. For dynamic builds, you may need to run melonDS from the MSYS2 terminal in order for it to find the required DLLs.
## macOS
1. Install the [Homebrew Package Manager](https://brew.sh)
2. Install dependencies: `brew install git pkg-config cmake sdl2 qt@6 libarchive enet zstd`
3. Download the melonDS repository and prepare:
```zsh
git clone https://github.com/melonDS-emu/melonDS
cd melonDS
```
4. Compile:
```zsh
cmake -B build -DCMAKE_PREFIX_PATH="$(brew --prefix qt@6);$(brew --prefix libarchive)"
cmake --build build -j$(sysctl -n hw.logicalcpu)
```
If everything went well, melonDS.app should now be in the `build` directory.
### Self-contained app bundle
If you want an app bundle that can be distributed to other computers without needing to install dependencies through Homebrew, you can additionally run `
../tools/mac-libs.rb .` after the build is completed, or add `-DMACOS_BUNDLE_LIBS=ON` to the first CMake command.
## Nix (macOS/Linux)
melonDS provides a Nix flake with support for both macOS and Linux. The [Nix package manager](https://nixos.org) needs to be installed to use it.
* To run melonDS, just type `nix run github:melonDS-emu/melonDS`.
* To get a shell for development, clone the melonDS repository and type `nix develop` in its directory.

View File

@ -1,107 +1,45 @@
cmake_minimum_required(VERSION 3.16)
cmake_minimum_required(VERSION 3.13)
cmake_policy(VERSION 3.15)
cmake_policy(VERSION 3.13)
if (POLICY CMP0076)
cmake_policy(SET CMP0076 NEW)
endif()
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
set(CMAKE_USER_MAKE_RULES_OVERRIDE "${CMAKE_CURRENT_SOURCE_DIR}/cmake/DefaultBuildFlags.cmake")
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.15" CACHE STRING "Minimum OS X deployment version")
option(USE_VCPKG "Use vcpkg for dependency packages" OFF)
if (USE_VCPKG)
include(ConfigureVcpkg)
endif()
project(melonDS
VERSION 1.0
DESCRIPTION "DS emulator, sorta"
HOMEPAGE_URL "https://melonds.kuribo64.net"
LANGUAGES C CXX)
include(CheckSymbolExists)
include(CheckLibraryExists)
include(CMakeDependentOption)
include(CheckIPOSupported)
include(SetupCCache)
include(Sanitizers)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
project(melonDS)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
function(detect_architecture symbol arch)
if (NOT DEFINED ARCHITECTURE)
set(CMAKE_REQUIRED_QUIET 1)
check_symbol_exists("${symbol}" "" ARCHITECTURE_${arch})
unset(CMAKE_REQUIRED_QUIET)
# The output variable needs to be unique across invocations otherwise
# CMake's crazy scope rules will keep it defined
if (ARCHITECTURE_${arch})
set(ARCHITECTURE "${arch}" PARENT_SCOPE)
set(ARCHITECTURE_${arch} 1 PARENT_SCOPE)
add_definitions(-DARCHITECTURE_${arch}=1)
endif()
endif()
endfunction()
detect_architecture("__x86_64__" x86_64)
detect_architecture("__i386__" x86)
detect_architecture("__arm__" ARM)
detect_architecture("__aarch64__" ARM64)
cmake_dependent_option(ENABLE_JIT "Enable JIT recompiler" ON
"ARCHITECTURE STREQUAL x86_64 OR ARCHITECTURE STREQUAL ARM64" OFF)
cmake_dependent_option(ENABLE_JIT_PROFILING "Enable JIT profiling with VTune" OFF "ENABLE_JIT" OFF)
option(ENABLE_OGLRENDERER "Enable OpenGL renderer" ON)
check_ipo_supported(RESULT IPO_SUPPORTED)
cmake_dependent_option(ENABLE_LTO_RELEASE "Enable link-time optimizations for release builds" ON "IPO_SUPPORTED" OFF)
cmake_dependent_option(ENABLE_LTO "Enable link-time optimizations" OFF "IPO_SUPPORTED" OFF)
if (ENABLE_LTO_RELEASE)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
if (CMAKE_BUILD_TYPE STREQUAL Release)
option(ENABLE_LTO "Enable link-time optimization" ON)
else()
option(ENABLE_LTO "Enable link-time optimization" OFF)
endif()
if (ENABLE_LTO)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
if(ENABLE_LTO)
add_compile_options(-O3 -flto)
set(CMAKE_AR "gcc-ar")
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> qcs <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_C_ARCHIVE_FINISH true)
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> qcs <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_CXX_ARCHIVE_FINISH true)
endif()
if (NOT APPLE)
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -s")
endif()
add_compile_options(-fno-pic)
add_link_options(-no-pie)
if (WIN32)
option(BUILD_STATIC "Statically link dependencies" OFF)
endif()
if (BUILD_STATIC AND WIN32)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
endif()
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
option(ENABLE_GDBSTUB "Enable GDB stub" ON)
if (ENABLE_GDBSTUB)
add_definitions(-DGDBSTUB_ENABLED)
endif()
option(BUILD_QT_SDL "Build Qt/SDL frontend" ON)
option(BUILD_LIBUI "Build libui frontend" ON)
add_subdirectory(src)
if (BUILD_QT_SDL)
add_subdirectory(src/frontend/qt_sdl)
if (BUILD_LIBUI)
add_subdirectory(src/libui_sdl)
endif()
configure_file(
${CMAKE_SOURCE_DIR}/romlist.bin
${CMAKE_BINARY_DIR}/romlist.bin COPYONLY)

View File

@ -1,105 +0,0 @@
{
"version": 6,
"configurePresets": [
{
"name": "release",
"displayName": "Release",
"description": "Default release build configuration.",
"generator": "Ninja",
"binaryDir": "${sourceDir}/build/release"
},
{
"inherits": "release",
"name": "release-vcpkg",
"displayName": "Release (vcpkg)",
"description": "Release build with packages from vcpkg.",
"cacheVariables": {
"USE_VCPKG": {
"type": "BOOL",
"value": "ON"
}
}
},
{
"name": "release-mingw-x86_64",
"inherits": "release-vcpkg",
"displayName": "Windows MinGW release (x86_64)",
"binaryDir": "${sourceDir}/build/release-mingw-x86_64",
"generator": "Ninja",
"cacheVariables": {
"BUILD_STATIC": {
"type": "BOOL",
"value": "ON"
}
}
},
{
"name": "release-mac-x86_64",
"inherits": "release-vcpkg",
"displayName": "macOS release (x86_64)",
"binaryDir": "${sourceDir}/build/release-mac-x86_64",
"cacheVariables": { "CMAKE_OSX_ARCHITECTURES": "x86_64" }
},
{
"name": "release-mac-arm64",
"inherits": "release-vcpkg",
"displayName": "macOS release (arm64)",
"binaryDir": "${sourceDir}/build/release-mac-arm64",
"cacheVariables": { "CMAKE_OSX_ARCHITECTURES": "arm64" }
}
],
"buildPresets": [
{
"name": "release",
"configurePreset": "release"
},
{
"name": "release-vcpkg",
"configurePreset": "release-vcpkg"
},
{
"name": "release-mingw-x86_64",
"configurePreset": "release-mingw-x86_64"
},
{
"name": "release-mac-x86_64",
"configurePreset": "release-mac-x86_64"
},
{
"name": "release-mac-arm64",
"configurePreset": "release-mac-arm64"
}
],
"workflowPresets": [
{
"name": "release",
"displayName": "Release",
"steps": [
{ "type": "configure", "name": "release" },
{ "type": "build", "name": "release" }
]
},
{
"name": "release-vcpkg",
"displayName": "Release (vcpkg)",
"steps": [
{ "type": "configure", "name": "release-vcpkg" },
{ "type": "build", "name": "release-vcpkg" }
]
},
{
"name": "release-mac-x86_64",
"steps": [
{ "type": "configure", "name": "release-mac-x86_64" },
{ "type": "build", "name": "release-mac-x86_64" }
]
},
{
"name": "release-mac-arm64",
"steps": [
{ "type": "configure", "name": "release-mac-arm64" },
{ "type": "build", "name": "release-mac-arm64" }
]
}
]
}

View File

@ -1,150 +0,0 @@
# Contributor guide for melonDS
Please follow a style as documented here. Note that this guide was not always enforced, so some parts of the code violate it.
Files should use 4 spaces for indentation.
```c++
// for single line comments prefer C++ style
/*
for multiline comments
both C style comments
*/
// as well as
// C++ style comments are possible
// when including headers from the C standard library use their C names (so don't use cstdio/cstring, ...)
#include <stdio.h>
// namespaces in PascalCase
namespace Component
{ // for all constructs curly braces go onto the following line
// the content of namespaces should not be indented
int GlobalVariable; // in PascalCase
// function names should use PascalCase, parameters camelCase:
void Test(int someParam)
{
int variable = someParam * 2; // local variables in camelCase
// you can slightly vary the spacing around operators:
int variable2 = someParam*2 + 1;
// but avoid something like this: someParam* 2+ 3
for (int i = 0; i < variable; i++) // always a space between if/for/while and the braces
{
// not using curly braces is allowed
// if the body of the if/for/while is simple:
if ((i % 2) == 0)
printf("%d\n", i); // no space between the function name and the braces
}
}
// defines should always be in CAPITALISED_SNAKE_CASE
#ifdef SOME_CONFIGURATION
// the content of #ifdef sections should not be indented
// the only exception being otherwise hard to read nested sections of #ifdef/#if/#defines
// as e.g. in ARMJIT_Memory.cpp
// prefer #ifdef/#ifndef, only use if defined(...) for complex expressions like this:
#if defined(THIS) || defined(THAT)
// ...
#endif
class MyClass // PascalCase
{
public: // access specfications are not indented
void Test(int param) // for methods the same rules apply as for functions
{
}
private:
int MemberVariable; // PascalCase, no prefix
};
#endif
#endif
enum
{
// enums should always have a common prefix in camelCase
// separated by an underscore with the item name
// which has to be in PascalCase
enumPrefix_FirstElement,
enumPrefix_SecondElement,
enumPrefix_ThirdElement,
enumPrefix_FourthElement,
};
}
```
Some additional notes:
* Keep the definition and initialisation of local variables in one place and keep the scope of local variables as small as possible.
**That means avoid code like this**:
```cpp
void ColorConvert(u32* dst, u16* vram)
{
u16 color;
u8 r, g, b;
int i;
for (i = 0; i < 256; i++)
{
color = vram[i];
r = (color & 0x001F) << 1;
g = (color & 0x03E0) >> 4;
b = (color & 0x7C00) >> 9;
dst[i] = r | (g << 8) | (b << 16);
}
}
```
**Do this instead:**
```cpp
void ColorConvert(u32* dst, u16* vram)
{
for (int i = 0; i < 256; i++)
{
u16 color = vram[i];
u8 r = (color & 0x001F) << 1;
u8 g = (color & 0x03E0) >> 4;
u8 b = (color & 0x7C00) >> 9;
dst[i] = r | (g << 8) | (b << 16);
}
}
```
* For integer types preferably use the explictly typed ones. We have short aliases for them defined in types.h (for unsigned types: `u8`, `u16`, `u32`, `u16`. For signed `s8`, `s16`, `s32`, `s64`). In some situations like loop variables, using `int` is possible as well.
* Don't overdo object oriented programming. Always try to use a simpler construct first, only use a polymorphic class if a namespace with functions in it doesn't cut it.
* In doubt put a namespace around your part of the code.
* C style strings (and the associated functions from the C standard library) are used in most places. We are thinking about changing this, as C strings are a bit of a hassle to deal with, but for the time being this is what we use.
* Only the C standard IO is used (so use `printf`, `fopen`, … Do not use `std::cout`/`std::ostream`, …).
* Complex C++ containers can be used (`std::vector`, `std::list`, `std::map`, …). `std::array` is usually not used, unless necessary so that the container can be used with other C++ constructs (e.g. `<algorithm>`). Only use them if a C array doesn't cut it.
* File names should be in PascalCase
* If a header file is called MyHeader.h it should be guarded with an ifdef like this:
```cpp
#ifndef MYHEADER_H
#define MYHEADER_H
// ...
#endif
```
* And at last, if you have any questions, visit us on IRC (see the readme)!

View File

@ -1,15 +1,10 @@
<p align="center"><img src="https://raw.githubusercontent.com/melonDS-emu/melonDS/master/res/icon/melon_128x128.png"></p>
<p align="center"><img src="https://raw.githubusercontent.com/StapleButter/melonDS/master/icon/melon_128x128.png"></p>
<h2 align="center"><b>melonDS</b></h2>
<p align="center">
<a href="http://melonds.kuribo64.net/" alt="melonDS website"><img src="https://img.shields.io/badge/website-melonds.kuribo64.net-%2331352e.svg"></a>
<a href="http://melonds.kuribo64.net/downloads.php" alt="Release: 0.9.5"><img src="https://img.shields.io/badge/release-0.9.5-%235c913b.svg"></a>
<a href="http://melonds.kuribo64.net/downloads.php" alt="Release: 0.8.3"><img src="https://img.shields.io/badge/release-0.8.3-%235c913b.svg"></a>
<a href="https://www.gnu.org/licenses/gpl-3.0" alt="License: GPLv3"><img src="https://img.shields.io/badge/License-GPL%20v3-%23ff554d.svg"></a>
<a href="https://kiwiirc.com/client/irc.badnik.net/?nick=IRC-Source_?#melonds" alt="IRC channel: #melonds"><img src="https://img.shields.io/badge/IRC%20chat-%23melonds-%23dd2e44.svg"></a>
<a href="https://discord.gg/pAMAtExcqV" alt="Discord"><img src="https://img.shields.io/badge/Discord-Kuribo64-7289da?logo=discord&logoColor=white"></a>
<br>
<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-windows.yml?query=event%3Apush"><img src="https://github.com/melonDS-emu/melonDS/actions/workflows/build-windows.yml/badge.svg" /></a>
<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-ubuntu.yml?query=event%3Apush"><img src="https://github.com/melonDS-emu/melonDS/actions/workflows/build-ubuntu.yml/badge.svg" /></a>
<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-macos.yml?query=event%3Apush"><img src="https://github.com/melonDS-emu/melonDS/actions/workflows/build-macos.yml/badge.svg" /></a>
</p>
DS emulator, sorta
@ -18,7 +13,12 @@ The goal is to do things right and fast, akin to blargSNES (but hopefully better
## How to use
Firmware boot (not direct boot) requires a BIOS/firmware dump from an original DS or DS Lite.
melonDS requires BIOS/firmware copies from a DS. Files required:
* bios7.bin, 16KB: ARM7 BIOS
* bios9.bin, 4KB: ARM9 BIOS
* firmware.bin, 128/256/512KB: firmware
Firmware boot requires a firmware dump from an original DS or DS Lite.
DS firmwares dumped from a DSi or 3DS aren't bootable and only contain configuration data, thus they are only suitable when booting games directly.
### Possible firmware sizes
@ -32,20 +32,60 @@ DS BIOS dumps from a DSi or 3DS can be used with no compatibility issues. DSi BI
As for the rest, the interface should be pretty straightforward. If you have a question, don't hesitate to ask, though!
## How to build
See [BUILD.md](./BUILD.md) for build instructions.
### Linux:
* Install dependencies:
```sh
sudo apt-get install gtk+-3.0 libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev
```
* Compile:
```sh
mkdir -p build
cd build
cmake ..
make -j$(nproc --all)
```
### Windows:
* use CodeBlocks
#### MSYS2 and CMake
1. Install [MSYS2](https://www.msys2.org/)
2. Open the **MSYS2 MinGW 64-bit** terminal
3. Update the packages using `pacman -Syu` and reopen the terminal if it asks you to
4. Install dependencies: `pacman -S git make mingw-w64-x86_64-{cmake,mesa,SDL2,toolchain}`
5. Run the following commands
```bash
git clone https://github.com/Arisotura/melonDS.git
cd melonDS
mkdir build
cd build
cmake .. -G "MSYS Makefiles"
make -j$(nproc --all)
../msys-dist.sh
```
If everything went well, melonDS and the libraries it needs should now be in the `dist` folder.
## TODO LIST
* better DSi emulation
* better OpenGL rendering
* netplay
* DSi emulation
* the impossible quest of pixel-perfect 3D graphics
* improve libui and the emulator UI
* support for rendering screens to separate windows
* emulating some fancy addons
* other non-core shit (debugger, graphics viewers, etc)
* other non-core shit (debugger, graphics viewers, cheat crapo, etc)
### TODO LIST FOR LATER (low priority)
### TODO LIST FOR LATER
* better wifi
* maybe emulate flashcarts or other fancy hardware
* big-endian compatibility (Wii, etc)
* LCD refresh time (used by some games for blending effects)
* any feature you can eventually ask for that isn't outright stupid
@ -54,17 +94,12 @@ See [BUILD.md](./BUILD.md) for build instructions.
* Martin for GBAtek, a good piece of documentation
* Cydrak for the extra 3D GPU research
* limittox for the icon
* All of you comrades who have been testing melonDS, reporting issues, suggesting shit, etc
## Licenses
## License
[![GNU GPLv3 Image](https://www.gnu.org/graphics/gplv3-127x51.png)](http://www.gnu.org/licenses/gpl-3.0.en.html)
melonDS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
### External
* Images used in the Input Config Dialog - see `src/frontend/qt_sdl/InputConfig/resources/LICENSE.md`

View File

@ -1,115 +0,0 @@
include(FetchContent)
set(_DEFAULT_VCPKG_ROOT "${CMAKE_SOURCE_DIR}/vcpkg")
set(VCPKG_ROOT "${_DEFAULT_VCPKG_ROOT}" CACHE STRING "The path to the vcpkg repository")
if (VCPKG_ROOT STREQUAL "${_DEFAULT_VCPKG_ROOT}")
if (APPLE) # this doesn't work on non-macOS
file(LOCK "${_DEFAULT_VCPKG_ROOT}" DIRECTORY GUARD FILE)
endif()
FetchContent_Declare(vcpkg
GIT_REPOSITORY "https://github.com/Microsoft/vcpkg.git"
GIT_TAG 2ad004460f5db4d3b66f62f5799ff66c265c4b5d
EXCLUDE_FROM_ALL
SOURCE_DIR "${CMAKE_SOURCE_DIR}/vcpkg")
FetchContent_MakeAvailable(vcpkg)
endif()
set(VCPKG_OVERLAY_TRIPLETS "${CMAKE_SOURCE_DIR}/cmake/overlay-triplets")
option(USE_RECOMMENDED_TRIPLETS "Use the recommended triplets that are used for official builds" ON)
# Duplicated here because it needs to be set before project()
option(USE_QT6 "Use Qt 6 instead of Qt 5" ON)
# Since the Linux build pulls in glib anyway, we can just use upstream libslirp
if (UNIX AND NOT APPLE)
option(USE_SYSTEM_LIBSLIRP "Use system libslirp instead of the bundled version" ON)
endif()
if (NOT USE_QT6)
list(APPEND VCPKG_MANIFEST_FEATURES qt5)
set(VCPKG_MANIFEST_NO_DEFAULT_FEATURES ON)
endif()
if (CMAKE_OSX_ARCHITECTURES MATCHES ";")
message(FATAL_ERROR "macOS universal builds are not supported. Build them individually and combine afterwards instead.")
endif()
if (USE_RECOMMENDED_TRIPLETS)
execute_process(
COMMAND uname -m
OUTPUT_VARIABLE _HOST_PROCESSOR
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(_CAN_TARGET_AS_HOST OFF)
if (APPLE)
if (NOT CMAKE_OSX_ARCHITECTURES)
if (_HOST_PROCESSOR STREQUAL arm64)
set(CMAKE_OSX_ARCHITECTURES arm64)
else()
set(CMAKE_OSX_ARCHITECTURES x86_64)
endif()
endif()
if (CMAKE_OSX_ARCHITECTURES STREQUAL arm64)
set(_WANTED_TRIPLET arm64-osx-11-release)
set(CMAKE_OSX_DEPLOYMENT_TARGET 11.0)
else()
set(_WANTED_TRIPLET x64-osx-1015-release)
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
endif()
elseif(WIN32)
# TODO Windows arm64 if possible
set(_CAN_TARGET_AS_HOST ON)
set(_WANTED_TRIPLET x64-mingw-static-release)
elseif(CMAKE_HOST_SYSTEM_NAME STREQUAL Linux)
# Can't really detect cross compiling here.
set(_CAN_TARGET_AS_HOST ON)
if (_HOST_PROCESSOR STREQUAL x86_64)
set(_WANTED_TRIPLET x64-linux-release)
elseif(_HOST_PROCESSOR STREQUAL "aarch64")
set(_WANTED_TRIPLET arm64-linux-release)
endif()
endif()
# Don't override it if the user set something else
if (NOT VCPKG_TARGET_TRIPLET)
set(VCPKG_TARGET_TRIPLET "${_WANTED_TRIPLET}")
else()
set(_WANTED_TRIPLET "${VCPKG_TARGET_TRIPLET}")
endif()
if (APPLE)
if (_HOST_PROCESSOR MATCHES arm64)
if (_WANTED_TRIPLET MATCHES "^arm64-osx-")
set(_CAN_TARGET_AS_HOST ON)
elseif (_WANTED_TRIPLET STREQUAL "x64-osx-1015-release")
# Use the default triplet for when building for arm64
# because we're probably making a universal build
set(VCPKG_HOST_TRIPLET arm64-osx-11-release)
endif()
else()
if (_WANTED_TRIPLET MATCHES "^x64-osx-")
set(_CAN_TARGET_AS_HOST ON)
elseif (_WANTED_TRIPLET STREQUAL "arm64-osx-11-release")
set(VCPKG_HOST_TRIPLET x64-osx-1015-release)
endif()
endif()
endif()
# If host and target triplet differ, vcpkg seems to always assume that the host can't run the target's binaries.
# In cases like cross compiling from ARM -> Intel macOS, or target being an older version of the host OS, we *can* do that so the packages built targeting the host are redundant.
if (_CAN_TARGET_AS_HOST AND NOT VCPKG_HOST_TRIPLET)
option(VCPKG_TARGET_AS_HOST "Use the target as host triplet to speed up builds" ON)
else()
option(VCPKG_TARGET_AS_HOST "Use the target as host triplet to speed up builds" OFF)
endif()
if (VCPKG_TARGET_AS_HOST)
set(VCPKG_HOST_TRIPLET "${VCPKG_TARGET_TRIPLET}" CACHE STRING "Host triplet to use for vcpkg")
endif()
endif()
set(CMAKE_TOOLCHAIN_FILE "${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake")

View File

@ -1,9 +0,0 @@
if (CMAKE_C_COMPILER_ID STREQUAL GNU)
set(CMAKE_C_FLAGS_DEBUG_INIT "-g -Og")
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
set(CMAKE_CXX_FLAGS_DEBUG_INIT "-g -Og")
endif()
string(REPLACE "-O2" "-O3" CMAKE_C_FLAGS_RELEASE_INIT "${CMAKE_C_FLAGS_RELEASE_INIT}")
string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS_RELEASE_INIT "${CMAKE_CXX_FLAGS_RELEASE_INIT}")

View File

@ -1,297 +0,0 @@
#.rst:
# ECMFindModuleHelpers
# --------------------
#
# Helper macros for find modules: ecm_find_package_version_check(),
# ecm_find_package_parse_components() and
# ecm_find_package_handle_library_components().
#
# ::
#
# ecm_find_package_version_check(<name>)
#
# Prints warnings if the CMake version or the project's required CMake version
# is older than that required by extra-cmake-modules.
#
# ::
#
# ecm_find_package_parse_components(<name>
# RESULT_VAR <variable>
# KNOWN_COMPONENTS <component1> [<component2> [...]]
# [SKIP_DEPENDENCY_HANDLING])
#
# This macro will populate <variable> with a list of components found in
# <name>_FIND_COMPONENTS, after checking that all those components are in the
# list of KNOWN_COMPONENTS; if there are any unknown components, it will print
# an error or warning (depending on the value of <name>_FIND_REQUIRED) and call
# return().
#
# The order of components in <variable> is guaranteed to match the order they
# are listed in the KNOWN_COMPONENTS argument.
#
# If SKIP_DEPENDENCY_HANDLING is not set, for each component the variable
# <name>_<component>_component_deps will be checked for dependent components.
# If <component> is listed in <name>_FIND_COMPONENTS, then all its (transitive)
# dependencies will also be added to <variable>.
#
# ::
#
# ecm_find_package_handle_library_components(<name>
# COMPONENTS <component> [<component> [...]]
# [SKIP_DEPENDENCY_HANDLING])
# [SKIP_PKG_CONFIG])
#
# Creates an imported library target for each component. The operation of this
# macro depends on the presence of a number of CMake variables.
#
# The <name>_<component>_lib variable should contain the name of this library,
# and <name>_<component>_header variable should contain the name of a header
# file associated with it (whatever relative path is normally passed to
# '#include'). <name>_<component>_header_subdir variable can be used to specify
# which subdirectory of the include path the headers will be found in.
# ecm_find_package_components() will then search for the library
# and include directory (creating appropriate cache variables) and create an
# imported library target named <name>::<component>.
#
# Additional variables can be used to provide additional information:
#
# If SKIP_PKG_CONFIG, the <name>_<component>_pkg_config variable is set, and
# pkg-config is found, the pkg-config module given by
# <name>_<component>_pkg_config will be searched for and used to help locate the
# library and header file. It will also be used to set
# <name>_<component>_VERSION.
#
# Note that if version information is found via pkg-config,
# <name>_<component>_FIND_VERSION can be set to require a particular version
# for each component.
#
# If SKIP_DEPENDENCY_HANDLING is not set, the INTERFACE_LINK_LIBRARIES property
# of the imported target for <component> will be set to contain the imported
# targets for the components listed in <name>_<component>_component_deps.
# <component>_FOUND will also be set to false if any of the compoments in
# <name>_<component>_component_deps are not found. This requires the components
# in <name>_<component>_component_deps to be listed before <component> in the
# COMPONENTS argument.
#
# The following variables will be set:
#
# ``<name>_TARGETS``
# the imported targets
# ``<name>_LIBRARIES``
# the found libraries
# ``<name>_INCLUDE_DIRS``
# the combined required include directories for the components
# ``<name>_DEFINITIONS``
# the "other" CFLAGS provided by pkg-config, if any
# ``<name>_VERSION``
# the value of ``<name>_<component>_VERSION`` for the first component that
# has this variable set (note that components are searched for in the order
# they are passed to the macro), although if it is already set, it will not
# be altered
#
# Note that these variables are never cleared, so if
# ecm_find_package_handle_library_components() is called multiple times with
# different components (typically because of multiple find_package() calls) then
# ``<name>_TARGETS``, for example, will contain all the targets found in any
# call (although no duplicates).
#
# Since pre-1.0.0.
#=============================================================================
# Copyright 2014 Alex Merry <alex.merry@kde.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include(CMakeParseArguments)
macro(ecm_find_package_version_check module_name)
if(CMAKE_VERSION VERSION_LESS 2.8.12)
message(FATAL_ERROR "CMake 2.8.12 is required by Find${module_name}.cmake")
endif()
if(CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 2.8.12)
message(AUTHOR_WARNING "Your project should require at least CMake 2.8.12 to use Find${module_name}.cmake")
endif()
endmacro()
macro(ecm_find_package_parse_components module_name)
set(ecm_fppc_options SKIP_DEPENDENCY_HANDLING)
set(ecm_fppc_oneValueArgs RESULT_VAR)
set(ecm_fppc_multiValueArgs KNOWN_COMPONENTS DEFAULT_COMPONENTS)
cmake_parse_arguments(ECM_FPPC "${ecm_fppc_options}" "${ecm_fppc_oneValueArgs}" "${ecm_fppc_multiValueArgs}" ${ARGN})
if(ECM_FPPC_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_parse_components: ${ECM_FPPC_UNPARSED_ARGUMENTS}")
endif()
if(NOT ECM_FPPC_RESULT_VAR)
message(FATAL_ERROR "Missing RESULT_VAR argument to ecm_find_package_parse_components")
endif()
if(NOT ECM_FPPC_KNOWN_COMPONENTS)
message(FATAL_ERROR "Missing KNOWN_COMPONENTS argument to ecm_find_package_parse_components")
endif()
if(NOT ECM_FPPC_DEFAULT_COMPONENTS)
set(ECM_FPPC_DEFAULT_COMPONENTS ${ECM_FPPC_KNOWN_COMPONENTS})
endif()
if(${module_name}_FIND_COMPONENTS)
set(ecm_fppc_requestedComps ${${module_name}_FIND_COMPONENTS})
if(NOT ECM_FPPC_SKIP_DEPENDENCY_HANDLING)
# Make sure deps are included
foreach(ecm_fppc_comp ${ecm_fppc_requestedComps})
foreach(ecm_fppc_dep_comp ${${module_name}_${ecm_fppc_comp}_component_deps})
list(FIND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}" ecm_fppc_index)
if("${ecm_fppc_index}" STREQUAL "-1")
if(NOT ${module_name}_FIND_QUIETLY)
message(STATUS "${module_name}: ${ecm_fppc_comp} requires ${${module_name}_${ecm_fppc_comp}_component_deps}")
endif()
list(APPEND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}")
endif()
endforeach()
endforeach()
else()
message(STATUS "Skipping dependency handling for ${module_name}")
endif()
list(REMOVE_DUPLICATES ecm_fppc_requestedComps)
# This makes sure components are listed in the same order as
# KNOWN_COMPONENTS (potentially important for inter-dependencies)
set(${ECM_FPPC_RESULT_VAR})
foreach(ecm_fppc_comp ${ECM_FPPC_KNOWN_COMPONENTS})
list(FIND ecm_fppc_requestedComps "${ecm_fppc_comp}" ecm_fppc_index)
if(NOT "${ecm_fppc_index}" STREQUAL "-1")
list(APPEND ${ECM_FPPC_RESULT_VAR} "${ecm_fppc_comp}")
list(REMOVE_AT ecm_fppc_requestedComps ${ecm_fppc_index})
endif()
endforeach()
# if there are any left, they are unknown components
if(ecm_fppc_requestedComps)
set(ecm_fppc_msgType STATUS)
if(${module_name}_FIND_REQUIRED)
set(ecm_fppc_msgType FATAL_ERROR)
endif()
if(NOT ${module_name}_FIND_QUIETLY)
message(${ecm_fppc_msgType} "${module_name}: requested unknown components ${ecm_fppc_requestedComps}")
endif()
return()
endif()
else()
set(${ECM_FPPC_RESULT_VAR} ${ECM_FPPC_DEFAULT_COMPONENTS})
endif()
endmacro()
macro(ecm_find_package_handle_library_components module_name)
set(ecm_fpwc_options SKIP_PKG_CONFIG SKIP_DEPENDENCY_HANDLING)
set(ecm_fpwc_oneValueArgs)
set(ecm_fpwc_multiValueArgs COMPONENTS)
cmake_parse_arguments(ECM_FPWC "${ecm_fpwc_options}" "${ecm_fpwc_oneValueArgs}" "${ecm_fpwc_multiValueArgs}" ${ARGN})
if(ECM_FPWC_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_handle_components: ${ECM_FPWC_UNPARSED_ARGUMENTS}")
endif()
if(NOT ECM_FPWC_COMPONENTS)
message(FATAL_ERROR "Missing COMPONENTS argument to ecm_find_package_handle_components")
endif()
include(FindPackageHandleStandardArgs)
find_package(PkgConfig)
foreach(ecm_fpwc_comp ${ECM_FPWC_COMPONENTS})
set(ecm_fpwc_dep_vars)
set(ecm_fpwc_dep_targets)
if(NOT SKIP_DEPENDENCY_HANDLING)
foreach(ecm_fpwc_dep ${${module_name}_${ecm_fpwc_comp}_component_deps})
list(APPEND ecm_fpwc_dep_vars "${module_name}_${ecm_fpwc_dep}_FOUND")
list(APPEND ecm_fpwc_dep_targets "${module_name}::${ecm_fpwc_dep}")
endforeach()
endif()
if(NOT ECM_FPWC_SKIP_PKG_CONFIG AND ${module_name}_${ecm_fpwc_comp}_pkg_config)
pkg_check_modules(PKG_${module_name}_${ecm_fpwc_comp} QUIET
${${module_name}_${ecm_fpwc_comp}_pkg_config})
endif()
find_path(${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
NAMES ${${module_name}_${ecm_fpwc_comp}_header}
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_INCLUDE_DIRS}
PATH_SUFFIXES ${${module_name}_${ecm_fpwc_comp}_header_subdir}
)
find_library(${module_name}_${ecm_fpwc_comp}_LIBRARY
NAMES ${${module_name}_${ecm_fpwc_comp}_lib}
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_LIBRARY_DIRS}
)
set(${module_name}_${ecm_fpwc_comp}_VERSION "${PKG_${module_name}_${ecm_fpwc_comp}_VERSION}")
if(NOT ${module_name}_VERSION)
set(${module_name}_VERSION ${${module_name}_${ecm_fpwc_comp}_VERSION})
endif()
find_package_handle_standard_args(${module_name}_${ecm_fpwc_comp}
FOUND_VAR
${module_name}_${ecm_fpwc_comp}_FOUND
REQUIRED_VARS
${module_name}_${ecm_fpwc_comp}_LIBRARY
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
${ecm_fpwc_dep_vars}
VERSION_VAR
${module_name}_${ecm_fpwc_comp}_VERSION
)
mark_as_advanced(
${module_name}_${ecm_fpwc_comp}_LIBRARY
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
)
if(${module_name}_${ecm_fpwc_comp}_FOUND)
list(APPEND ${module_name}_LIBRARIES
"${${module_name}_${ecm_fpwc_comp}_LIBRARY}")
list(APPEND ${module_name}_INCLUDE_DIRS
"${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}")
set(${module_name}_DEFINITIONS
${${module_name}_DEFINITIONS}
${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS})
if(NOT TARGET ${module_name}::${ecm_fpwc_comp})
add_library(${module_name}::${ecm_fpwc_comp} UNKNOWN IMPORTED)
set_target_properties(${module_name}::${ecm_fpwc_comp} PROPERTIES
IMPORTED_LOCATION "${${module_name}_${ecm_fpwc_comp}_LIBRARY}"
INTERFACE_COMPILE_OPTIONS "${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS}"
INTERFACE_INCLUDE_DIRECTORIES "${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}"
INTERFACE_LINK_LIBRARIES "${ecm_fpwc_dep_targets}"
)
endif()
list(APPEND ${module_name}_TARGETS
"${module_name}::${ecm_fpwc_comp}")
endif()
endforeach()
if(${module_name}_LIBRARIES)
list(REMOVE_DUPLICATES ${module_name}_LIBRARIES)
endif()
if(${module_name}_INCLUDE_DIRS)
list(REMOVE_DUPLICATES ${module_name}_INCLUDE_DIRS)
endif()
if(${module_name}_DEFINITIONS)
list(REMOVE_DUPLICATES ${module_name}_DEFINITIONS)
endif()
if(${module_name}_TARGETS)
list(REMOVE_DUPLICATES ${module_name}_TARGETS)
endif()
endmacro()

View File

@ -1 +0,0 @@
include(${CMAKE_CURRENT_LIST_DIR}/ECMFindModuleHelpers.cmake)

View File

@ -1,172 +0,0 @@
#.rst:
# FindEGL
# -------
#
# Try to find EGL.
#
# This will define the following variables:
#
# ``EGL_FOUND``
# True if (the requested version of) EGL is available
# ``EGL_VERSION``
# The version of EGL; note that this is the API version defined in the
# headers, rather than the version of the implementation (eg: Mesa)
# ``EGL_LIBRARIES``
# This can be passed to target_link_libraries() instead of the ``EGL::EGL``
# target
# ``EGL_INCLUDE_DIRS``
# This should be passed to target_include_directories() if the target is not
# used for linking
# ``EGL_DEFINITIONS``
# This should be passed to target_compile_options() if the target is not
# used for linking
#
# If ``EGL_FOUND`` is TRUE, it will also define the following imported target:
#
# ``EGL::EGL``
# The EGL library
#
# In general we recommend using the imported target, as it is easier to use.
# Bear in mind, however, that if the target is in the link interface of an
# exported library, it must be made available by the package config file.
#
# Since pre-1.0.0.
#=============================================================================
# Copyright 2014 Alex Merry <alex.merry@kde.org>
# Copyright 2014 Martin Gräßlin <mgraesslin@kde.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
include(${CMAKE_CURRENT_LIST_DIR}/ECMFindModuleHelpersStub.cmake)
include(CheckCXXSourceCompiles)
include(CMakePushCheckState)
ecm_find_package_version_check(EGL)
# Use pkg-config to get the directories and then use these values
# in the FIND_PATH() and FIND_LIBRARY() calls
find_package(PkgConfig)
pkg_check_modules(PKG_EGL QUIET egl)
set(EGL_DEFINITIONS ${PKG_EGL_CFLAGS_OTHER})
find_path(EGL_INCLUDE_DIR
NAMES
EGL/egl.h
HINTS
${PKG_EGL_INCLUDE_DIRS}
)
find_library(EGL_LIBRARY
NAMES
EGL
HINTS
${PKG_EGL_LIBRARY_DIRS}
)
# NB: We do *not* use the version information from pkg-config, as that
# is the implementation version (eg: the Mesa version)
if(EGL_INCLUDE_DIR)
# egl.h has defines of the form EGL_VERSION_x_y for each supported
# version; so the header for EGL 1.1 will define EGL_VERSION_1_0 and
# EGL_VERSION_1_1. Finding the highest supported version involves
# finding all these defines and selecting the highest numbered.
file(READ "${EGL_INCLUDE_DIR}/EGL/egl.h" _EGL_header_contents)
string(REGEX MATCHALL
"[ \t]EGL_VERSION_[0-9_]+"
_EGL_version_lines
"${_EGL_header_contents}"
)
unset(_EGL_header_contents)
foreach(_EGL_version_line ${_EGL_version_lines})
string(REGEX REPLACE
"[ \t]EGL_VERSION_([0-9_]+)"
"\\1"
_version_candidate
"${_EGL_version_line}"
)
string(REPLACE "_" "." _version_candidate "${_version_candidate}")
if(NOT DEFINED EGL_VERSION OR EGL_VERSION VERSION_LESS _version_candidate)
set(EGL_VERSION "${_version_candidate}")
endif()
endforeach()
unset(_EGL_version_lines)
endif()
cmake_push_check_state(RESET)
list(APPEND CMAKE_REQUIRED_LIBRARIES "${EGL_LIBRARY}")
list(APPEND CMAKE_REQUIRED_INCLUDES "${EGL_INCLUDE_DIR}")
check_cxx_source_compiles("
#include <EGL/egl.h>
int main(int argc, char *argv[]) {
EGLint x = 0; EGLDisplay dpy = 0; EGLContext ctx = 0;
eglDestroyContext(dpy, ctx);
}" HAVE_EGL)
cmake_pop_check_state()
set(required_vars EGL_INCLUDE_DIR HAVE_EGL)
if(NOT EMSCRIPTEN)
list(APPEND required_vars EGL_LIBRARY)
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(EGL
FOUND_VAR
EGL_FOUND
REQUIRED_VARS
${required_vars}
VERSION_VAR
EGL_VERSION
)
if(EGL_FOUND AND NOT TARGET EGL::EGL)
if (EMSCRIPTEN)
add_library(EGL::EGL INTERFACE IMPORTED)
# Nothing further to be done, system include paths have headers and linkage is implicit.
else()
add_library(EGL::EGL UNKNOWN IMPORTED)
set_target_properties(EGL::EGL PROPERTIES
IMPORTED_LOCATION "${EGL_LIBRARY}"
INTERFACE_COMPILE_OPTIONS "${EGL_DEFINITIONS}"
INTERFACE_INCLUDE_DIRECTORIES "${EGL_INCLUDE_DIR}"
)
endif()
endif()
mark_as_advanced(EGL_LIBRARY EGL_INCLUDE_DIR HAVE_EGL)
# compatibility variables
set(EGL_LIBRARIES ${EGL_LIBRARY})
set(EGL_INCLUDE_DIRS ${EGL_INCLUDE_DIR})
set(EGL_VERSION_STRING ${EGL_VERSION})
include(FeatureSummary)
set_package_properties(EGL PROPERTIES
URL "https://www.khronos.org/egl/"
DESCRIPTION "A platform-agnostic mechanism for creating rendering surfaces for use with other graphics libraries, such as OpenGL|ES and OpenVG."
)

View File

@ -1,48 +0,0 @@
# - Try to find enet
# Once done this will define
#
# ENET_FOUND - system has enet
# ENET_INCLUDE_DIRS - the enet include directory
# ENET_LIBRARIES - the libraries needed to use enet
#
# $ENETDIR is an environment variable used for finding enet.
#
# Borrowed from The Mana World
# http://themanaworld.org/
#
# Several changes and additions by Fabian 'x3n' Landau
# Lots of simplifications by Adrian Friedli
# > www.orxonox.net <
FIND_PATH(ENET_INCLUDE_DIRS enet/enet.h
PATHS
$ENV{ENETDIR}
/usr/local
/usr
PATH_SUFFIXES include
)
FIND_LIBRARY(ENET_LIBRARY
NAMES enet
PATHS
$ENV{ENETDIR}
/usr/local
/usr
PATH_SUFFIXES lib
)
# handle the QUIETLY and REQUIRED arguments and set ENET_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(ENet DEFAULT_MSG ENET_LIBRARY ENET_INCLUDE_DIRS)
IF (ENET_FOUND)
IF(WIN32)
SET(WINDOWS_ENET_DEPENDENCIES "ws2_32;winmm")
SET(ENET_LIBRARIES ${ENET_LIBRARY} ${WINDOWS_ENET_DEPENDENCIES})
ELSE(WIN32)
SET(ENET_LIBRARIES ${ENET_LIBRARY})
ENDIF(WIN32)
ENDIF (ENET_FOUND)
MARK_AS_ADVANCED(ENET_LIBRARY ENET_LIBRARIES ENET_INCLUDE_DIRS)

View File

@ -1,10 +0,0 @@
find_path(VTUNE_PATH "")
set(VTUNE_INCLUDE_DIR "${VTUNE_PATH}/include")
if (WIN32)
set(VTUNE_LIBRARY "${VTUNE_PATH}/lib64/jitprofiling.lib")
else()
set(VTUNE_LIBRARY "${VTUNE_PATH}/lib64/jitprofiling.a")
endif()

View File

@ -1,35 +0,0 @@
# The entire codebase quite reasonably does things like #include <SDL2/SDL.h> or <epoxy/gl.h>
# CMake apparently doesn't think you should be doing this, so just includes $PREFIX/include/packagename for a given
# package as include directories when using `target_link_libraries` with an imported target, this hacky function fixes
# that up so includes can keep working as they always did but we can still use fancy imported targets.
# This is stupid.
function(fix_interface_includes)
foreach (target ${ARGN})
set(NEW_DIRS)
get_target_property(DIRS "${target}" INTERFACE_INCLUDE_DIRECTORIES)
if (NOT DIRS)
continue()
endif()
foreach (DIR ${DIRS})
get_filename_component(PARENT_DIR "${DIR}" DIRECTORY)
if (PARENT_DIR MATCHES "include$")
list(APPEND NEW_DIRS "${PARENT_DIR}")
endif()
# HACK
# The libarchive pkg-config file in MSYS2 seems to include a UNIX-style path for its
# include directory and CMake doesn't like that.
if (WIN32 AND MINGW AND target STREQUAL PkgConfig::LibArchive)
list(FILTER DIRS EXCLUDE REGEX "^/[^.]+64/.*")
endif()
endforeach()
list(APPEND DIRS ${NEW_DIRS})
set_target_properties("${target}" PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${DIRS}")
endforeach()
endfunction()

View File

@ -1,8 +0,0 @@
set(SANITIZE "" CACHE STRING "Sanitizers to enable.")
string(REGEX MATCHALL "[^,]+" ENABLED_SANITIZERS "${SANITIZE}")
foreach(SANITIZER ${ENABLED_SANITIZERS})
add_compile_options("-fsanitize=${SANITIZER}")
add_link_options("-fsanitize=${SANITIZER}")
endforeach()

View File

@ -1,19 +0,0 @@
include(FindPackageMessage)
find_program(CCACHE "ccache")
cmake_dependent_option(USE_CCACHE "Use CCache to speed up repeated builds." ON CCACHE OFF)
if (NOT CCACHE OR NOT USE_CCACHE)
return()
endif()
# Fedora, and probably also Red Hat-based distros in general, use CCache by default if it's installed on the system.
# We'll try to detect this here, and exit if that's the case.
# Trying to launch ccache with ccache as we'd otherwise do seems to cause build issues.
if (CMAKE_C_COMPILER MATCHES "ccache" OR CMAKE_CXX_COMPILER MATCHES "ccache")
return()
endif()
find_package_message(CCache "Using CCache to speed up compilation" "${USE_CCACHE}")
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE}")

View File

@ -1,12 +0,0 @@
# Toolchain file for building with Homebrew's LLVM on macOS
# This is useful on 10.14 where std::filesystem is not supported.
set(CMAKE_C_COMPILER /usr/local/opt/llvm/bin/clang)
set(CMAKE_CXX_COMPILER /usr/local/opt/llvm/bin/clang++)
add_link_options(-L/usr/local/opt/llvm/lib)
# LLVM in Homebrew is built with latest Xcode which has a newer linker than
# what is bundled in the default install of Xcode Command Line Tools, so we
# override it to prevent it passing flags not supported by the system's ld.
add_link_options(-mlinker-version=450)

View File

@ -1,12 +0,0 @@
set(VCPKG_TARGET_ARCHITECTURE arm64)
set(VCPKG_CRT_LINKAGE dynamic)
set(VCPKG_LIBRARY_LINKAGE static)
set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
set(VCPKG_CMAKE_SYSTEM_VERSION 11.0)
set(VCPKG_OSX_ARCHITECTURES arm64)
set(VCPKG_BUILD_TYPE release)
set(VCPKG_OSX_DEPLOYMENT_TARGET 11.0)
set(VCPKG_C_FLAGS -mmacosx-version-min=11.0)
set(VCPKG_CXX_FLAGS -mmacosx-version-min=11.0)

View File

@ -1,7 +0,0 @@
set(VCPKG_TARGET_ARCHITECTURE x64)
set(VCPKG_CRT_LINKAGE dynamic)
set(VCPKG_LIBRARY_LINKAGE static)
set(VCPKG_ENV_PASSTHROUGH PATH)
set(VCPKG_BUILD_TYPE release)
set(VCPKG_CMAKE_SYSTEM_NAME MinGW)

View File

@ -1,12 +0,0 @@
set(VCPKG_TARGET_ARCHITECTURE x64)
set(VCPKG_CRT_LINKAGE dynamic)
set(VCPKG_LIBRARY_LINKAGE static)
set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
set(VCPKG_CMAKE_SYSTEM_VERSION 10.15)
set(VCPKG_OSX_ARCHITECTURES x86_64)
set(VCPKG_BUILD_TYPE release)
set(VCPKG_OSX_DEPLOYMENT_TARGET 10.15)
set(VCPKG_C_FLAGS -mmacosx-version-min=10.15)
set(VCPKG_CXX_FLAGS -mmacosx-version-min=10.15)

View File

@ -1,61 +0,0 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1739020877,
"narHash": "sha256-mIvECo/NNdJJ/bXjNqIh8yeoSjVLAuDuTUzAo7dzs8Y=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "a79cfe0ebd24952b580b1cf08cd906354996d547",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

109
flake.nix
View File

@ -1,109 +0,0 @@
{
description = "Nintendo DS emulator";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }: flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
inherit (pkgs.lib) cmakeBool optionals makeLibraryPath;
inherit (pkgs.stdenv) isLinux isDarwin;
revision = with self; if sourceInfo?dirtyRev
then sourceInfo.dirtyRev
else sourceInfo.rev;
shortRevision = with self; if sourceInfo?dirtyShortRev
then sourceInfo.dirtyShortRev
else sourceInfo.shortRev;
melonDS = pkgs.stdenv.mkDerivation {
pname = "melonDS";
version = "1.0-${shortRevision}";
src = ./.;
nativeBuildInputs = with pkgs; [
cmake
ninja
pkg-config
qt6.wrapQtAppsHook
];
buildInputs = (with pkgs; [
qt6.qtbase
qt6.qtmultimedia
SDL2
zstd
libarchive
libGL
libslirp
enet
]) ++ optionals (!isDarwin) (with pkgs; [
kdePackages.extra-cmake-modules
qt6.qtwayland
wayland
]);
cmakeFlags = [
(cmakeBool "USE_QT6" true)
(cmakeBool "USE_SYSTEM_LIBSLIRP" true)
(cmakeBool "MELONDS_EMBED_BUILD_INFO" true)
];
env.MELONDS_GIT_HASH = revision;
env.MELONDS_GIT_BRANCH = "(unknown)";
env.MELONDS_BUILD_PROVIDER = "Nix";
qtWrapperArgs = optionals isLinux [
"--prefix LD_LIBRARY_PATH : ${makeLibraryPath [ pkgs.libpcap pkgs.wayland ]}"
] ++ optionals isDarwin [
"--prefix DYLD_LIBRARY_PATH : ${makeLibraryPath [ pkgs.libpcap ]}"
];
passthru = {
exePath = if isDarwin then
"/Applications/melonDS.app/Contents/MacOS/melonDS"
else "/bin/melonDS";
};
};
in {
packages.default = melonDS;
apps.default = flake-utils.lib.mkApp {
drv = self.packages.${system}.default;
};
devShells = {
default = pkgs.mkShell {
inputsFrom = [ self.packages.${system}.default ];
packages = with pkgs; [
qt6.qttools
];
};
# Shell for building static melonDS release builds with vcpkg
# Use mkShellNoCC to ensure Nix's gcc/clang and stdlib isn't used
vcpkg = pkgs.mkShellNoCC {
packages = with pkgs; [
autoconf
autoconf-archive
automake
cmake
cups.dev # Needed by qtbase despite not enabling print support
git
iconv.dev
libtool
ninja
pkg-config
python3
];
# Undo the SDK setup done by nixpkgs so we can use AppleClang
shellHook = ''
unset DEVELOPER_DIR SDKROOT MACOSX_DEPLOYMENT_TARGET
'';
};
};
}
);
}

View File

@ -0,0 +1,8 @@
[Desktop Entry]
Name=melonDS
Comment=Nintendo DS emulator
Exec=melonDS
Type=Application
Categories=Game;
Terminal=false
Icon=net.kuribo64.melonds

View File

@ -0,0 +1,31 @@
---
app-id: net.kuribo64.melonds
runtime: org.freedesktop.Platform
runtime-version: '18.08'
sdk: org.freedesktop.Sdk
command: melonDS
finish-args:
- "--share=ipc"
- "--socket=x11"
- "--socket=pulseaudio"
- "--share=network"
- "--device=all"
- "--filesystem=home"
modules:
- name: libpcap
sources:
- type: archive
url: http://www.tcpdump.org/release/libpcap-1.9.0.tar.gz
sha256: 2edb88808e5913fdaa8e9c1fcaf272e19b2485338742b5074b9fe44d68f37019
- name: melonds
buildsystem: cmake-ninja
sources:
- type: git
url: https://github.com/StapleButter/melonDS.git
commit: d4d4965b2fffc69958685a25a9d9fc0c78b54567
- type: file
path: net.kuribo64.melonds.desktop
post-install:
- "desktop-file-install --dir=/app/share/applications net.kuribo64.melonds.desktop"
- "install -D icon/melon_256x256.png /app/share/icons/hicolor/256x256/apps/net.kuribo64.melonds.png"

View File

@ -1,17 +0,0 @@
TC_PREFIX = /home/exophase/pandora-dev
PREFIX = $(TC_PREFIX)/arm-2011.03
AS = $(PREFIX)/bin/arm-none-linux-gnueabi-gcc
OBJCOPY = $(PREFIX)/bin/arm-none-linux-gnueabi-objcopy
BIN_ARM7 = drastic_bios_arm7
BIN_ARM9 = drastic_bios_arm9
all:
$(AS) bios_common.S -DBIOS_ARM7 -march=armv4 -c -Wa,-asl=$(BIN_ARM7).list -o $(BIN_ARM7).o
$(AS) bios_common.S -DBIOS_ARM9 -march=armv5 -c -Wa,-asl=$(BIN_ARM9).list -o $(BIN_ARM9).o
$(OBJCOPY) -O binary $(BIN_ARM7).o $(BIN_ARM7).bin
$(OBJCOPY) -O binary $(BIN_ARM9).o $(BIN_ARM9).bin
clean:
rm -f $(BIN_ARM7).bin $(BIN_ARM7).o $(BIN_ARM9).bin $(BIN_ARM9).o

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

View File

@ -1,36 +0,0 @@
Custom NDS ARM7/ARM9 BIOS replacement
Copyright (c) 2013, Gilead Kutnick
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1) Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2) Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-- Info --
This archive contains source code and assembly for a custom BIOS replacement
for the Nintendo DS system. This code is in no way affiliated with Nintendo
and is not derived from Nintendo's BIOS implementation but has been implemented
using publically available documentation.
It can be assembled using the included Makefile along with a proper ARM gcc
toolchain. Change the first four lines to point to the proper toolchain of your
choice.

BIN
icon/melon_128x128.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

BIN
icon/melon_16x16.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 730 B

BIN
icon/melon_256x256.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

BIN
icon/melon_32x32.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
icon/melon_48x48.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

BIN
icon/melon_64x64.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

View File

Before

Width:  |  Height:  |  Size: 121 KiB

After

Width:  |  Height:  |  Size: 121 KiB

View File

@ -2,12 +2,12 @@
#define VFT_APP 0x00000001L
//this will set your .exe icon
100 ICON MOVEABLE PURE LOADONCALL DISCARDABLE "${CMAKE_SOURCE_DIR}/res/melon.ico"
100 ICON MOVEABLE PURE LOADONCALL DISCARDABLE "melon.ico"
//include version information in .exe, modify these values to match your needs
1 VERSIONINFO
FILEVERSION ${MELON_RC_VERSION}
PRODUCTVERSION ${MELON_RC_VERSION}
FILEVERSION 0,8,3,0
PRODUCTVERSION 0,8,3,0
FILETYPE VFT_APP
{
BLOCK "StringFileInfo"
@ -15,14 +15,14 @@ FILETYPE VFT_APP
BLOCK "040904E4"
{
VALUE "CompanyName", "Melon Factory of Kuribo64"
VALUE "FileVersion", "${melonDS_VERSION}"
VALUE "FileDescription", "melonDS emulator"
VALUE "FileVersion", "0.8.3"
VALUE "FileDescription", "DS emulator, sorta. also 1st quality melon."
VALUE "InternalName", "SDnolem"
VALUE "LegalCopyright", "2016-2023 melonDS team"
VALUE "LegalCopyright", "2016-2019 Arisotura & co."
VALUE "LegalTrademarks", ""
VALUE "OriginalFilename", "melonDS.exe"
VALUE "OriginalFilename", "zafkflzdasd.exe"
VALUE "ProductName", "melonDS"
VALUE "ProductVersion", "${melonDS_VERSION}"
VALUE "ProductVersion", "0.8.3"
}
}
BLOCK "VarFileInfo"

262
melonDS.cbp Normal file
View File

@ -0,0 +1,262 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<CodeBlocks_project_file>
<FileVersion major="1" minor="6" />
<Project>
<Option title="melonDS" />
<Option pch_mode="2" />
<Option compiler="gcc" />
<Build>
<Target title="Debug Windows">
<Option platforms="Windows;" />
<Option output="bin/Debug/melonDS" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Debug/" />
<Option type="1" />
<Option compiler="gcc" />
<Compiler>
<Add option="-m64" />
<Add option="-gdwarf-2" />
<Add option="-D_FILE_OFFSET_BITS=64" />
</Compiler>
<Linker>
<Add option="-m64" />
</Linker>
</Target>
<Target title="Release Windows">
<Option platforms="Windows;" />
<Option output="bin/Release/melonDS" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Release/" />
<Option type="0" />
<Option compiler="gcc" />
<Compiler>
<Add option="-O3" />
<Add option="-m64" />
<Add option="-flto" />
<Add option="-D_FILE_OFFSET_BITS=64" />
</Compiler>
<Linker>
<Add option="-s" />
<Add option="-m64" />
</Linker>
</Target>
<Target title="DebugFast Windows">
<Option platforms="Windows;" />
<Option output="bin/DebugFast/melonDS" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/DebugFast/" />
<Option type="1" />
<Option compiler="gcc" />
<Compiler>
<Add option="-O3" />
<Add option="-m64" />
<Add option="-D_FILE_OFFSET_BITS=64" />
</Compiler>
<Linker>
<Add option="-m64" />
</Linker>
</Target>
</Build>
<Compiler>
<Add option="-Wall" />
<Add option="-fexceptions" />
<Add option="-pipe" />
<Add directory="src" />
</Compiler>
<Linker>
<Add library="SDL2" />
<Add library="shell32" />
<Add library="comctl32" />
<Add library="comdlg32" />
<Add library="advapi32" />
<Add library="wsock32" />
<Add library="oleacc" />
<Add library="ole32" />
<Add library="usp10" />
<Add library="gdi32" />
<Add library="d2d1" />
<Add library="dwrite" />
<Add library="uxtheme" />
<Add library="iphlpapi" />
<Add library="user32" />
<Add library="ws2_32" />
<Add library="opengl32" />
</Linker>
<Unit filename="melon.rc">
<Option compilerVar="WINDRES" />
</Unit>
<Unit filename="src/ARM.cpp" />
<Unit filename="src/ARM.h" />
<Unit filename="src/ARMInterpreter.cpp" />
<Unit filename="src/ARMInterpreter.h" />
<Unit filename="src/ARMInterpreter_ALU.cpp" />
<Unit filename="src/ARMInterpreter_ALU.h" />
<Unit filename="src/ARMInterpreter_Branch.cpp" />
<Unit filename="src/ARMInterpreter_Branch.h" />
<Unit filename="src/ARMInterpreter_LoadStore.cpp" />
<Unit filename="src/ARMInterpreter_LoadStore.h" />
<Unit filename="src/ARM_InstrTable.h" />
<Unit filename="src/CP15.cpp" />
<Unit filename="src/CRC32.cpp" />
<Unit filename="src/CRC32.h" />
<Unit filename="src/Config.cpp" />
<Unit filename="src/Config.h" />
<Unit filename="src/DMA.cpp" />
<Unit filename="src/DMA.h" />
<Unit filename="src/FIFO.h" />
<Unit filename="src/GPU.cpp" />
<Unit filename="src/GPU.h" />
<Unit filename="src/GPU2D.cpp" />
<Unit filename="src/GPU2D.h" />
<Unit filename="src/GPU3D.cpp" />
<Unit filename="src/GPU3D.h" />
<Unit filename="src/GPU3D_OpenGL.cpp" />
<Unit filename="src/GPU3D_OpenGL_shaders.h" />
<Unit filename="src/GPU3D_Soft.cpp" />
<Unit filename="src/NDS.cpp" />
<Unit filename="src/NDS.h" />
<Unit filename="src/NDSCart.cpp" />
<Unit filename="src/NDSCart.h" />
<Unit filename="src/OpenGLSupport.cpp" />
<Unit filename="src/OpenGLSupport.h" />
<Unit filename="src/Platform.h" />
<Unit filename="src/RTC.cpp" />
<Unit filename="src/RTC.h" />
<Unit filename="src/SPI.cpp" />
<Unit filename="src/SPI.h" />
<Unit filename="src/SPU.cpp" />
<Unit filename="src/SPU.h" />
<Unit filename="src/Savestate.cpp" />
<Unit filename="src/Savestate.h" />
<Unit filename="src/Wifi.cpp" />
<Unit filename="src/Wifi.h" />
<Unit filename="src/WifiAP.cpp" />
<Unit filename="src/WifiAP.h" />
<Unit filename="src/libui_sdl/DlgAudioSettings.cpp" />
<Unit filename="src/libui_sdl/DlgAudioSettings.h" />
<Unit filename="src/libui_sdl/DlgEmuSettings.cpp" />
<Unit filename="src/libui_sdl/DlgEmuSettings.h" />
<Unit filename="src/libui_sdl/DlgInputConfig.cpp" />
<Unit filename="src/libui_sdl/DlgInputConfig.h" />
<Unit filename="src/libui_sdl/DlgVideoSettings.cpp" />
<Unit filename="src/libui_sdl/DlgVideoSettings.h" />
<Unit filename="src/libui_sdl/DlgWifiSettings.cpp" />
<Unit filename="src/libui_sdl/DlgWifiSettings.h" />
<Unit filename="src/libui_sdl/LAN_PCap.cpp" />
<Unit filename="src/libui_sdl/LAN_PCap.h" />
<Unit filename="src/libui_sdl/LAN_Socket.cpp" />
<Unit filename="src/libui_sdl/LAN_Socket.h" />
<Unit filename="src/libui_sdl/OSD.cpp" />
<Unit filename="src/libui_sdl/OSD.h" />
<Unit filename="src/libui_sdl/Platform.cpp" />
<Unit filename="src/libui_sdl/PlatformConfig.cpp" />
<Unit filename="src/libui_sdl/PlatformConfig.h" />
<Unit filename="src/libui_sdl/font.h" />
<Unit filename="src/libui_sdl/libui/common/areaevents.c">
<Option compilerVar="CC" />
</Unit>
<Unit filename="src/libui_sdl/libui/common/control.c">
<Option compilerVar="CC" />
</Unit>
<Unit filename="src/libui_sdl/libui/common/controlsigs.h" />
<Unit filename="src/libui_sdl/libui/common/debug.c">
<Option compilerVar="CC" />
</Unit>
<Unit filename="src/libui_sdl/libui/common/matrix.c">
<Option compilerVar="CC" />
</Unit>
<Unit filename="src/libui_sdl/libui/common/shouldquit.c">
<Option compilerVar="CC" />
</Unit>
<Unit filename="src/libui_sdl/libui/common/uipriv.h" />
<Unit filename="src/libui_sdl/libui/common/userbugs.c">
<Option compilerVar="CC" />
</Unit>
<Unit filename="src/libui_sdl/libui/ui.h" />
<Unit filename="src/libui_sdl/libui/ui_windows.h" />
<Unit filename="src/libui_sdl/libui/windows/_uipriv_migrate.hpp" />
<Unit filename="src/libui_sdl/libui/windows/alloc.cpp" />
<Unit filename="src/libui_sdl/libui/windows/area.cpp" />
<Unit filename="src/libui_sdl/libui/windows/area.hpp" />
<Unit filename="src/libui_sdl/libui/windows/areadraw.cpp" />
<Unit filename="src/libui_sdl/libui/windows/areaevents.cpp" />
<Unit filename="src/libui_sdl/libui/windows/areascroll.cpp" />
<Unit filename="src/libui_sdl/libui/windows/areautil.cpp" />
<Unit filename="src/libui_sdl/libui/windows/box.cpp" />
<Unit filename="src/libui_sdl/libui/windows/button.cpp" />
<Unit filename="src/libui_sdl/libui/windows/checkbox.cpp" />
<Unit filename="src/libui_sdl/libui/windows/colorbutton.cpp" />
<Unit filename="src/libui_sdl/libui/windows/colordialog.cpp" />
<Unit filename="src/libui_sdl/libui/windows/combobox.cpp" />
<Unit filename="src/libui_sdl/libui/windows/compilerver.hpp" />
<Unit filename="src/libui_sdl/libui/windows/container.cpp" />
<Unit filename="src/libui_sdl/libui/windows/control.cpp" />
<Unit filename="src/libui_sdl/libui/windows/d2dscratch.cpp" />
<Unit filename="src/libui_sdl/libui/windows/datetimepicker.cpp" />
<Unit filename="src/libui_sdl/libui/windows/debug.cpp" />
<Unit filename="src/libui_sdl/libui/windows/draw.cpp" />
<Unit filename="src/libui_sdl/libui/windows/draw.hpp" />
<Unit filename="src/libui_sdl/libui/windows/drawmatrix.cpp" />
<Unit filename="src/libui_sdl/libui/windows/drawpath.cpp" />
<Unit filename="src/libui_sdl/libui/windows/drawtext.cpp" />
<Unit filename="src/libui_sdl/libui/windows/dwrite.cpp" />
<Unit filename="src/libui_sdl/libui/windows/editablecombo.cpp" />
<Unit filename="src/libui_sdl/libui/windows/entry.cpp" />
<Unit filename="src/libui_sdl/libui/windows/events.cpp" />
<Unit filename="src/libui_sdl/libui/windows/fontbutton.cpp" />
<Unit filename="src/libui_sdl/libui/windows/fontdialog.cpp" />
<Unit filename="src/libui_sdl/libui/windows/form.cpp" />
<Unit filename="src/libui_sdl/libui/windows/gl.cpp" />
<Unit filename="src/libui_sdl/libui/windows/graphemes.cpp" />
<Unit filename="src/libui_sdl/libui/windows/grid.cpp" />
<Unit filename="src/libui_sdl/libui/windows/group.cpp" />
<Unit filename="src/libui_sdl/libui/windows/init.cpp" />
<Unit filename="src/libui_sdl/libui/windows/label.cpp" />
<Unit filename="src/libui_sdl/libui/windows/main.cpp" />
<Unit filename="src/libui_sdl/libui/windows/menu.cpp" />
<Unit filename="src/libui_sdl/libui/windows/multilineentry.cpp" />
<Unit filename="src/libui_sdl/libui/windows/parent.cpp" />
<Unit filename="src/libui_sdl/libui/windows/progressbar.cpp" />
<Unit filename="src/libui_sdl/libui/windows/radiobuttons.cpp" />
<Unit filename="src/libui_sdl/libui/windows/resources.hpp" />
<Unit filename="src/libui_sdl/libui/windows/resources.rc">
<Option compilerVar="WINDRES" />
</Unit>
<Unit filename="src/libui_sdl/libui/windows/separator.cpp" />
<Unit filename="src/libui_sdl/libui/windows/sizing.cpp" />
<Unit filename="src/libui_sdl/libui/windows/slider.cpp" />
<Unit filename="src/libui_sdl/libui/windows/spinbox.cpp" />
<Unit filename="src/libui_sdl/libui/windows/stddialogs.cpp" />
<Unit filename="src/libui_sdl/libui/windows/tab.cpp" />
<Unit filename="src/libui_sdl/libui/windows/tabpage.cpp" />
<Unit filename="src/libui_sdl/libui/windows/text.cpp" />
<Unit filename="src/libui_sdl/libui/windows/uipriv_windows.hpp" />
<Unit filename="src/libui_sdl/libui/windows/utf16.cpp" />
<Unit filename="src/libui_sdl/libui/windows/utilwin.cpp" />
<Unit filename="src/libui_sdl/libui/windows/winapi.hpp" />
<Unit filename="src/libui_sdl/libui/windows/window.cpp" />
<Unit filename="src/libui_sdl/libui/windows/winpublic.cpp" />
<Unit filename="src/libui_sdl/libui/windows/winutil.cpp" />
<Unit filename="src/libui_sdl/main.cpp" />
<Unit filename="src/libui_sdl/main_shaders.h" />
<Unit filename="src/pcap/bluetooth.h" />
<Unit filename="src/pcap/bpf.h" />
<Unit filename="src/pcap/can_socketcan.h" />
<Unit filename="src/pcap/compiler-tests.h" />
<Unit filename="src/pcap/dlt.h" />
<Unit filename="src/pcap/funcattrs.h" />
<Unit filename="src/pcap/ipnet.h" />
<Unit filename="src/pcap/namedb.h" />
<Unit filename="src/pcap/nflog.h" />
<Unit filename="src/pcap/pcap-inttypes.h" />
<Unit filename="src/pcap/pcap.h" />
<Unit filename="src/pcap/sll.h" />
<Unit filename="src/pcap/usb.h" />
<Unit filename="src/pcap/vlan.h" />
<Unit filename="src/types.h" />
<Unit filename="src/version.h" />
<Unit filename="xp.manifest" />
<Extensions>
<code_completion />
<envvars />
<debugger />
</Extensions>
</Project>
</CodeBlocks_project_file>

11
melon_grc.xml Normal file
View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<gresources>
<gresource prefix="/org/kuriboland/melonDS">
<file preprocess="to-pixdata">icon/melon_16x16.png</file>
<file preprocess="to-pixdata">icon/melon_32x32.png</file>
<file preprocess="to-pixdata">icon/melon_48x48.png</file>
<file preprocess="to-pixdata">icon/melon_64x64.png</file>
<file preprocess="to-pixdata">icon/melon_128x128.png</file>
<file preprocess="to-pixdata">icon/melon_256x256.png</file>
</gresource>
</gresources>

View File

@ -11,5 +11,4 @@ for lib in $(ldd melonDS.exe | grep mingw | sed "s/.*=> //" | sed "s/(.*)//"); d
cp "${lib}" dist
done
cp melonDS.exe dist
windeployqt dist
cp melonDS.exe romlist.bin dist

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 983 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 132 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

View File

@ -1,101 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleExecutable</key>
<string>melonDS</string>
<key>CFBundleIconFile</key>
<string>melon.icns</string>
<key>CFBundleIdentifier</key>
<string>net.kuribo64.melonDS</string>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleVersion</key>
<string>${melonDS_VERSION}</string>
<key>CFBundleShortVersionString</key>
<string>${melonDS_VERSION}</string>
<key>LSApplicationCategoryType</key>
<string>public.app-category.games</string>
<key>NSHumanReadableCopyright</key>
<string>Licensed under GPLv3</string>
<key>NSPrincipalClass</key>
<string>NSApplication</string>
<key>NSHighResolutionCapable</key>
<true/>
<key>NSMicrophoneUsageDescription</key>
<string>We need microphone access so you can use the emulated DS microphone</string>
<key>NSCameraUsageDescription</key>
<string>Camera access is needed for emulation of the DSi's cameras</string>
<key>CFBundleDocumentTypes</key>
<array>
<dict>
<key>CFBundleTypeName</key>
<string>Nintendo DS ROM</string>
<key>CFBundleTypeExtensions</key>
<array>
<string>nds</string>
<string>srl</string>
<string>dsi</string>
<string>ids</string>
<string>nds.zst</string>
<string>srl.zst</string>
<string>dsi.zst</string>
<string>ids.zst</string>
</array>
<key>CFBundleTypeRole</key>
<string>Viewer</string>
</dict>
<dict>
<key>CFBundleTypeName</key>
<string>Game Boy Advance ROM</string>
<key>CFBundleTypeExtensions</key>
<array>
<string>gba</string>
<string>agb</string>
<string>gba.zst</string>
<string>agb.zst</string>
</array>
<key>CFBundleTypeRole</key>
<string>Viewer</string>
<key>LSHandlerRank</key>
<string>Alternate</string>
</dict>
<dict>
<key>CFBundleTypeName</key>
<string>Archive containing ROM</string>
<key>CFBundleTypeExtensions</key>
<array>
<key>zip</key>
<key>7z</key>
<key>rar</key>
<key>tar</key>
<key>tar.gz</key>
<key>tgz</key>
<key>tar.xz</key>
<key>txz</key>
<key>tar.bz2</key>
<key>tbz2</key>
<key>tar.lz4</key>
<key>tlz4</key>
<key>tar.zst</key>
<key>tzst</key>
<key>tar.Z</key>
<key>taz</key>
<key>tar.lz</key>
<key>tar.lzma</key>
<key>tlz</key>
<key>tar.lrz</key>
<key>tlrz</key>
<key>tar.lzo</key>
<key>tzo</key>
</array>
<key>CFBundleTypeRole</key>
<string>Viewer</string>
<key>LSHandlerRank</key>
<string>Alternate</string>
</dict>
</array>
</dict>
</plist>

View File

@ -1,7 +0,0 @@
<!DOCTYPE RCC>
<RCC version="1.0">
<qresource>
<file alias="melon-icon">icon/melon_256x256.png</file>
<file alias="melon-logo">melon384.png</file>
</qresource>
</RCC>

View File

@ -1,80 +0,0 @@
<svg width="1024" height="1024" xmlns="http://www.w3.org/2000/svg">
<path fill="#5c913b" fill-rule="evenodd" d="M357.62 154.38c56.821-56.821 85.232-85.232 118.463-94.604a132.32 132.32 0 0 1 71.834 0c33.231 9.372 61.642 37.783 118.463 94.604l203.24 203.24c56.821 56.821 85.232 85.232 94.604 118.463a132.32 132.32 0 0 1 0 71.834c-9.372 33.231-37.783 61.642-94.604 118.463L666.38 869.62c-56.821 56.821-85.232 85.232-118.463 94.604a132.319 132.319 0 0 1-71.834 0c-33.231-9.372-61.642-37.783-118.463-94.604L154.38 666.38c-56.821-56.821-85.232-85.232-94.604-118.463a132.318 132.318 0 0 1 0-71.834c9.372-33.231 37.783-61.642 94.604-118.463Z"/>
<filter id="a" x="0" y="0" width="1024" height="1024" filterUnits="userSpaceOnUse" primitiveUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
<feGaussianBlur stdDeviation="18.378"/>
<feOffset dy="7.351" result="offsetblur"/>
<feFlood flood-color="#000" flood-opacity=".4"/>
<feComposite in2="offsetblur" operator="in"/>
<feMerge>
<feMergeNode/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
<path fill="#ffe8b6" fill-rule="evenodd" filter="url(#a)" d="M383.23 209.478c47.35-47.35 71.026-71.026 98.72-78.836a110.266 110.266 0 0 1 59.86 0c27.694 7.81 51.369 31.486 98.72 78.836L814.66 383.61c47.351 47.351 71.027 71.026 78.837 98.72a110.265 110.265 0 0 1 0 59.86c-7.81 27.694-31.486 51.37-78.837 98.72L640.53 815.042c-47.351 47.35-71.027 71.026-98.72 78.836a110.265 110.265 0 0 1-59.86 0c-27.694-7.81-51.37-31.485-98.72-78.836L209.098 640.91c-47.35-47.35-71.026-71.026-78.836-98.72a110.266 110.266 0 0 1 0-59.86c7.81-27.694 31.485-51.369 78.836-98.72Z"/>
<filter id="b" x="0" y="0" width="1024" height="1024" filterUnits="userSpaceOnUse" primitiveUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
<feGaussianBlur stdDeviation="18.378"/>
<feOffset dy="7.351" result="offsetblur"/>
<feFlood flood-color="#000" flood-opacity=".4"/>
<feComposite in2="offsetblur" operator="in"/>
<feMerge>
<feMergeNode/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
<path fill="#dd2e44" fill-rule="evenodd" filter="url(#b)" d="M404.822 239.527c39.46-39.46 59.189-59.189 82.266-65.697a91.889 91.889 0 0 1 49.885 0c23.077 6.508 42.807 26.238 82.266 65.697l165.295 165.295c39.459 39.46 59.188 59.189 65.697 82.266a91.888 91.888 0 0 1 0 49.884c-6.509 23.078-26.238 42.808-65.697 82.266L619.239 784.534c-39.46 39.459-59.189 59.188-82.266 65.697a91.888 91.888 0 0 1-49.885 0c-23.077-6.509-42.807-26.238-82.266-65.697L239.527 619.239c-39.46-39.46-59.189-59.189-65.697-82.267a91.888 91.888 0 0 1 0-49.884c6.508-23.077 26.238-42.807 65.697-82.266Z"/>
<filter id="c" x="0" y="0" width="1024" height="1024" filterUnits="userSpaceOnUse" primitiveUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
<feGaussianBlur stdDeviation="3.676"/>
<feOffset dx="5.198" dy="5.198" result="offsetblur"/>
<feFlood flood-color="#000" flood-opacity=".104"/>
<feComposite in2="offsetblur" operator="in"/>
<feMerge>
<feMergeNode/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
<path fill="#ff554d" fill-rule="evenodd" filter="url(#c)" d="M405.568 606.767s-14.487 11.68-31.187 15.594c-9.553 2.238-24.422 1.568-31.188-5.198l-103.96-103.96c-6.766-6.766-7.436-21.635-5.198-31.187 3.914-16.7 15.594-31.188 15.594-31.188l197.523-197.523s14.488-11.68 31.188-15.594c9.553-2.239 24.422-1.568 31.188 5.198l103.96 103.96c6.765 6.765 7.436 21.634 5.197 31.187-3.913 16.7-15.594 31.188-15.594 31.188Z"/>
<filter id="d" x="0" y="0" width="1024" height="1024" filterUnits="userSpaceOnUse" primitiveUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
<feGaussianBlur stdDeviation="3.676"/>
<feOffset dx="5.198" dy="5.198" result="offsetblur"/>
<feFlood flood-color="#000" flood-opacity=".104"/>
<feComposite in2="offsetblur" operator="in"/>
<feMerge>
<feMergeNode/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
<path fill="#ff554d" fill-rule="evenodd" filter="url(#d)" d="M574.579 779.453s-14.488 11.68-31.188 15.594c-9.553 2.238-24.422 1.568-31.188-5.198l-103.96-103.96c-6.765-6.766-7.436-21.635-5.197-31.187 3.914-16.7 15.594-31.188 15.594-31.188L616.163 425.99s14.488-11.68 31.187-15.594c9.553-2.239 24.422-1.568 31.188 5.198l103.96 103.96c6.766 6.765 7.436 21.634 5.198 31.187-3.914 16.7-15.594 31.188-15.594 31.188Z"/>
<filter id="e" x="0" y="0" width="1024" height="1024" filterUnits="userSpaceOnUse" primitiveUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
<feGaussianBlur stdDeviation="3.676"/>
<feOffset dx="5.198" dy="5.198" result="offsetblur"/>
<feFlood flood-color="#000" flood-opacity=".35"/>
<feComposite in2="offsetblur" operator="in"/>
<feMerge>
<feMergeNode/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
<path fill="#31352e" fill-rule="evenodd" filter="url(#e)" d="M414.022 532.978s-8.315 6.704-17.9 8.95c-5.483 1.285-14.016.9-17.9-2.983l-59.667-59.667c-3.883-3.884-4.268-12.417-2.983-17.9 2.246-9.585 8.95-17.9 8.95-17.9L437.89 330.11s8.315-6.704 17.9-8.95c5.482-1.285 14.016-.9 17.9 2.984l59.666 59.666c3.884 3.884 4.269 12.418 2.984 17.9-2.246 9.585-8.95 17.9-8.95 17.9Z"/>
<filter id="f" x="0" y="0" width="1024" height="1024" filterUnits="userSpaceOnUse" primitiveUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
<feGaussianBlur stdDeviation="3.676"/>
<feOffset dx="5.198" dy="5.198" result="offsetblur"/>
<feFlood flood-color="#000" flood-opacity=".35"/>
<feComposite in2="offsetblur" operator="in"/>
<feMerge>
<feMergeNode/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
<path fill="#31352e" fill-rule="evenodd" filter="url(#f)" d="M677.203 544.885c7.177 7.176 18.813 7.176 25.99 0l19.954-19.955c7.177-7.176 7.177-18.813 0-25.99l-19.954-19.954c-7.177-7.176-18.813-7.176-25.99 0l-19.954 19.955c-7.177 7.176-7.177 18.813 0 25.99Z"/>
<filter id="g" x="0" y="0" width="1024" height="1024" filterUnits="userSpaceOnUse" primitiveUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
<feGaussianBlur stdDeviation="3.676"/>
<feOffset dx="5.198" dy="5.198" result="offsetblur"/>
<feFlood flood-color="#000" flood-opacity=".35"/>
<feComposite in2="offsetblur" operator="in"/>
<feMerge>
<feMergeNode/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
<path fill="#31352e" fill-rule="evenodd" filter="url(#g)" d="M489.752 728.66c7.177 7.177 18.813 7.177 25.99 0l19.954-19.954c7.177-7.177 7.177-18.813 0-25.99l-19.954-19.954c-7.177-7.176-18.813-7.176-25.99 0l-19.954 19.955c-7.177 7.176-7.177 18.812 0 25.99Z"/>
</svg>

Before

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

View File

@ -1,11 +0,0 @@
[Desktop Entry]
Name=melonDS
GenericName=Nintendo DS Emulator
Comment=A fast and accurate Nintendo DS emulator.
Exec=melonDS %f
Type=Application
Categories=Game;Emulator;
Terminal=false
Icon=net.kuribo64.melonDS
MimeType=application/x-nintendo-ds-rom;
Keywords=emulator;Nintendo;DS;NDS;Nintendo DS;

BIN
romlist.bin Normal file

Binary file not shown.

View File

@ -1,197 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include <stdio.h>
#include <string.h>
#include "ARCodeFile.h"
#include "Platform.h"
namespace melonDS
{
using namespace Platform;
// TODO: import codes from other sources (usrcheat.dat, ...)
// TODO: more user-friendly error reporting
ARCodeFile::ARCodeFile(const std::string& filename)
{
Filename = filename;
if (!Load())
Error = true;
}
std::vector<ARCode> ARCodeFile::GetCodes() const noexcept
{
if (Error)
return {};
std::vector<ARCode> codes;
for (const ARCodeCat& cat : Categories)
{
for (const ARCode& code : cat.Codes)
{
codes.push_back(code);
}
}
return codes;
}
bool ARCodeFile::Load()
{
FileHandle* f = OpenFile(Filename, FileMode::ReadText);
if (!f) return true;
Categories.clear();
bool isincat = false;
ARCodeCat curcat;
bool isincode = false;
ARCode curcode;
char linebuf[1024];
while (!IsEndOfFile(f))
{
if (!FileReadLine(linebuf, 1024, f))
break;
linebuf[1023] = '\0';
char* start = linebuf;
while (start[0]==' ' || start[0]=='\t')
start++;
if (start[0]=='#' || start[0]=='\r' || start[0]=='\n' || start[0]=='\0')
continue;
if (!strncasecmp(start, "CAT", 3))
{
char catname[128];
int ret = sscanf(start, "CAT %127[^\r\n]", catname);
catname[127] = '\0';
if (ret < 1)
{
Log(LogLevel::Error, "AR: malformed CAT line: %s\n", start);
CloseFile(f);
return false;
}
if (isincode) curcat.Codes.push_back(curcode);
isincode = false;
if (isincat) Categories.push_back(curcat);
isincat = true;
curcat.Name = catname;
curcat.Codes.clear();
}
else if (!strncasecmp(start, "CODE", 4))
{
int enable;
char codename[128];
int ret = sscanf(start, "CODE %d %127[^\r\n]", &enable, codename);
codename[127] = '\0';
if (ret < 2)
{
Log(LogLevel::Error, "AR: malformed CODE line: %s\n", start);
CloseFile(f);
return false;
}
if (!isincat)
{
Log(LogLevel::Error, "AR: encountered CODE line with no category started\n");
CloseFile(f);
return false;
}
if (isincode) curcat.Codes.push_back(curcode);
isincode = true;
curcode.Name = codename;
curcode.Enabled = enable!=0;
curcode.Code.clear();
}
else
{
u32 c0, c1;
int ret = sscanf(start, "%08X %08X", &c0, &c1);
if (ret < 2)
{
Log(LogLevel::Error, "AR: malformed data line: %s\n", start);
CloseFile(f);
return false;
}
if (!isincode)
{
Log(LogLevel::Error, "AR: encountered data line with no code started\n");
CloseFile(f);
return false;
}
curcode.Code.push_back(c0);
curcode.Code.push_back(c1);
}
}
if (isincode) curcat.Codes.push_back(curcode);
if (isincat) Categories.push_back(curcat);
CloseFile(f);
return true;
}
bool ARCodeFile::Save()
{
FileHandle* f = Platform::OpenFile(Filename, FileMode::WriteText);
if (!f) return false;
for (ARCodeCatList::iterator it = Categories.begin(); it != Categories.end(); it++)
{
ARCodeCat& cat = *it;
if (it != Categories.begin()) FileWriteFormatted(f, "\n");
FileWriteFormatted(f, "CAT %s\n\n", cat.Name.c_str());
for (ARCodeList::iterator jt = cat.Codes.begin(); jt != cat.Codes.end(); jt++)
{
ARCode& code = *jt;
FileWriteFormatted(f, "CODE %d %s\n", code.Enabled, code.Name.c_str());
for (size_t i = 0; i < code.Code.size(); i+=2)
{
FileWriteFormatted(f, "%08X %08X\n", code.Code[i], code.Code[i + 1]);
}
FileWriteFormatted(f, "\n");
}
}
CloseFile(f);
return true;
}
}

View File

@ -1,67 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARCODEFILE_H
#define ARCODEFILE_H
#include <string>
#include <list>
#include <vector>
#include "types.h"
namespace melonDS
{
struct ARCode
{
std::string Name;
bool Enabled;
std::vector<u32> Code;
};
typedef std::list<ARCode> ARCodeList;
struct ARCodeCat
{
std::string Name;
ARCodeList Codes;
};
typedef std::list<ARCodeCat> ARCodeCatList;
class ARCodeFile
{
public:
ARCodeFile(const std::string& filename);
~ARCodeFile() noexcept = default;
[[nodiscard]] std::vector<ARCode> GetCodes() const noexcept;
bool Error = false;
bool Load();
bool Save();
ARCodeCatList Categories {};
private:
std::string Filename;
};
}
#endif // ARCODEFILE_H

View File

@ -1,398 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include <stdio.h>
#include <string.h>
#include "NDS.h"
#include "DSi.h"
#include "AREngine.h"
#include "Platform.h"
namespace melonDS
{
using Platform::Log;
using Platform::LogLevel;
AREngine::AREngine(melonDS::NDS& nds) : NDS(nds)
{
}
#define case16(x) \
case ((x)+0x00): case ((x)+0x01): case ((x)+0x02): case ((x)+0x03): \
case ((x)+0x04): case ((x)+0x05): case ((x)+0x06): case ((x)+0x07): \
case ((x)+0x08): case ((x)+0x09): case ((x)+0x0A): case ((x)+0x0B): \
case ((x)+0x0C): case ((x)+0x0D): case ((x)+0x0E): case ((x)+0x0F)
void AREngine::RunCheat(const ARCode& arcode)
{
const u32* code = &arcode.Code[0];
u32 offset = 0;
u32 datareg = 0;
u32 cond = 1;
u32 condstack = 0;
const u32* loopstart = code;
u32 loopcount = 0;
u32 loopcond = 1;
u32 loopcondstack = 0;
// TODO: does anything reset this??
u32 c5count = 0;
for (;;)
{
if (code >= &arcode.Code[arcode.Code.size()])
break;
u32 a = *code++;
u32 b = *code++;
u8 op = a >> 24;
if ((op < 0xD0 && op != 0xC5) || op > 0xD2)
{
if (!cond)
{
if ((op & 0xF0) == 0xE0)
{
for (u32 i = 0; i < b; i += 8)
code += 2;
}
continue;
}
}
switch (op)
{
case16(0x00): // 32-bit write
NDS.ARM7Write32((a & 0x0FFFFFFF) + offset, b);
break;
case16(0x10): // 16-bit write
NDS.ARM7Write16((a & 0x0FFFFFFF) + offset, b & 0xFFFF);
break;
case16(0x20): // 8-bit write
NDS.ARM7Write8((a & 0x0FFFFFFF) + offset, b & 0xFF);
break;
case16(0x30): // IF b > u32[a]
{
condstack <<= 1;
condstack |= cond;
u32 addr = a & 0x0FFFFFFF;
if (!addr) addr = offset;
u32 chk = NDS.ARM7Read32(addr);
cond = (b > chk) ? 1:0;
}
break;
case16(0x40): // IF b < u32[a]
{
condstack <<= 1;
condstack |= cond;
u32 addr = a & 0x0FFFFFFF;
if (!addr) addr = offset;
u32 chk = NDS.ARM7Read32(addr);
cond = (b < chk) ? 1:0;
}
break;
case16(0x50): // IF b == u32[a]
{
condstack <<= 1;
condstack |= cond;
u32 addr = a & 0x0FFFFFFF;
if (!addr) addr = offset;
u32 chk = NDS.ARM7Read32(addr);
cond = (b == chk) ? 1:0;
}
break;
case16(0x60): // IF b != u32[a]
{
condstack <<= 1;
condstack |= cond;
u32 addr = a & 0x0FFFFFFF;
if (!addr) addr = offset;
u32 chk = NDS.ARM7Read32(addr);
cond = (b != chk) ? 1:0;
}
break;
case16(0x70): // IF b.l > ((~b.h) & u16[a])
{
condstack <<= 1;
condstack |= cond;
u32 addr = a & 0x0FFFFFFF;
if (!addr) addr = offset;
u16 val = NDS.ARM7Read16(addr);
u16 chk = ~(b >> 16);
chk &= val;
cond = ((b & 0xFFFF) > chk) ? 1:0;
}
break;
case16(0x80): // IF b.l < ((~b.h) & u16[a])
{
condstack <<= 1;
condstack |= cond;
u32 addr = a & 0x0FFFFFFF;
if (!addr) addr = offset;
u16 val = NDS.ARM7Read16(addr);
u16 chk = ~(b >> 16);
chk &= val;
cond = ((b & 0xFFFF) < chk) ? 1:0;
}
break;
case16(0x90): // IF b.l == ((~b.h) & u16[a])
{
condstack <<= 1;
condstack |= cond;
u32 addr = a & 0x0FFFFFFF;
if (!addr) addr = offset;
u16 val = NDS.ARM7Read16(addr);
u16 chk = ~(b >> 16);
chk &= val;
cond = ((b & 0xFFFF) == chk) ? 1:0;
}
break;
case16(0xA0): // IF b.l != ((~b.h) & u16[a])
{
condstack <<= 1;
condstack |= cond;
u32 addr = a & 0x0FFFFFFF;
if (!addr) addr = offset;
u16 val = NDS.ARM7Read16(addr);
u16 chk = ~(b >> 16);
chk &= val;
cond = ((b & 0xFFFF) != chk) ? 1:0;
}
break;
case16(0xB0): // offset = u32[a + offset]
offset = NDS.ARM7Read32((a & 0x0FFFFFFF) + offset);
break;
case 0xC0: // FOR 0..b
loopstart = code; // points to the first opcode after the FOR
loopcount = b;
loopcond = cond; // checkme
loopcondstack = condstack; // (GBAtek is not very clear there)
break;
case 0xC4: // offset = pointer to C4000000 opcode
// theoretically used for safe storage, by accessing [offset+4]
// in practice could be used for a self-modifying AR code
// could be implemented with some hackery, but, does anything even
// use it??
Log(LogLevel::Error, "AR: !! THE FUCKING C4000000 OPCODE. TELL ARISOTURA.\n");
return;
case 0xC5: // count++ / IF (count & b.l) == b.h
{
// with weird condition checking, apparently
// oh well
c5count++;
if (!cond) break;
condstack <<= 1;
condstack |= cond;
u16 mask = b & 0xFFFF;
u16 chk = b >> 16;
cond = ((c5count & mask) == chk) ? 1:0;
}
break;
case 0xC6: // u32[b] = offset
NDS.ARM7Write32(b, offset);
break;
case 0xD0: // ENDIF
cond = condstack & 0x1;
condstack >>= 1;
break;
case 0xD1: // NEXT
if (loopcount > 0)
{
loopcount--;
code = loopstart;
}
else
{
cond = loopcond;
condstack = loopcondstack;
}
break;
case 0xD2: // NEXT+FLUSH
if (loopcount > 0)
{
loopcount--;
code = loopstart;
}
else
{
offset = 0;
datareg = 0;
condstack = 0;
cond = 1;
}
break;
case 0xD3: // offset = b
offset = b;
break;
case 0xD4: // datareg += b
datareg += b;
break;
case 0xD5: // datareg = b
datareg = b;
break;
case 0xD6: // u32[b+offset] = datareg / offset += 4
NDS.ARM7Write32(b + offset, datareg);
offset += 4;
break;
case 0xD7: // u16[b+offset] = datareg / offset += 2
NDS.ARM7Write16(b + offset, datareg & 0xFFFF);
offset += 2;
break;
case 0xD8: // u8[b+offset] = datareg / offset += 1
NDS.ARM7Write8(b + offset, datareg & 0xFF);
offset += 1;
break;
case 0xD9: // datareg = u32[b+offset]
datareg = NDS.ARM7Read32(b + offset);
break;
case 0xDA: // datareg = u16[b+offset]
datareg = NDS.ARM7Read16(b + offset);
break;
case 0xDB: // datareg = u8[b+offset]
datareg = NDS.ARM7Read8(b + offset);
break;
case 0xDC: // offset += b
offset += b;
break;
case16(0xE0): // copy b param bytes to address a+offset
{
// TODO: check for bad alignment of dstaddr
u32 dstaddr = (a & 0x0FFFFFFF) + offset;
u32 bytesleft = b;
while (bytesleft >= 8)
{
NDS.ARM7Write32(dstaddr, *code++); dstaddr += 4;
NDS.ARM7Write32(dstaddr, *code++); dstaddr += 4;
bytesleft -= 8;
}
if (bytesleft > 0)
{
u8* leftover = (u8*)code;
code += 2;
if (bytesleft >= 4)
{
NDS.ARM7Write32(dstaddr, *(u32*)leftover); dstaddr += 4;
leftover += 4;
bytesleft -= 4;
}
while (bytesleft > 0)
{
NDS.ARM7Write8(dstaddr, *leftover++); dstaddr++;
bytesleft--;
}
}
}
break;
case16(0xF0): // copy b bytes from address offset to address a
{
// TODO: check for bad alignment of srcaddr/dstaddr
u32 srcaddr = offset;
u32 dstaddr = (a & 0x0FFFFFFF);
u32 bytesleft = b;
while (bytesleft >= 4)
{
NDS.ARM7Write32(dstaddr, NDS.ARM7Read32(srcaddr));
srcaddr += 4;
dstaddr += 4;
bytesleft -= 4;
}
while (bytesleft > 0)
{
NDS.ARM7Write8(dstaddr, NDS.ARM7Read8(srcaddr));
srcaddr++;
dstaddr++;
bytesleft--;
}
}
break;
default:
Log(LogLevel::Warn, "!! bad AR opcode %08X %08X\n", a, b);
return;
}
}
}
void AREngine::RunCheats()
{
if (Cheats.empty()) return;
for (const ARCode& code : Cheats)
{
if (code.Enabled)
RunCheat(code);
}
}
}

View File

@ -1,43 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARENGINE_H
#define ARENGINE_H
#include <vector>
#include "ARCodeFile.h"
namespace melonDS
{
class NDS;
class AREngine
{
public:
AREngine(melonDS::NDS& nds);
std::vector<ARCode> Cheats {};
private:
friend class ARM;
void RunCheats();
void RunCheat(const ARCode& arcode);
melonDS::NDS& NDS;
};
}
#endif // ARENGINE_H

File diff suppressed because it is too large Load Diff

354
src/ARM.h
View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.
@ -20,22 +20,11 @@
#define ARM_H
#include <algorithm>
#include <optional>
#include "types.h"
#include "MemRegion.h"
#include "MemConstants.h"
#include "NDS.h"
#ifdef GDBSTUB_ENABLED
#include "debug/GdbStub.h"
#endif
namespace melonDS
{
inline u32 ROR(u32 x, u32 n)
{
return (x >> (n&0x1F)) | (x << ((32-n)&0x1F));
}
#define ROR(x, n) (((x) >> (n)) | ((x) << (32-(n))))
enum
{
@ -43,39 +32,16 @@ enum
RWFlags_ForceUser = (1<<21),
};
enum class CPUExecuteMode : u32
{
Interpreter,
InterpreterGDB,
#ifdef JIT_ENABLED
JIT
#endif
};
struct GDBArgs;
class ARMJIT;
class GPU;
class ARMJIT_Memory;
class NDS;
class Savestate;
class ARM
#ifdef GDBSTUB_ENABLED
: public Gdb::StubCallbacks
#endif
{
public:
ARM(u32 num, bool jit, std::optional<GDBArgs> gdb, NDS& nds);
virtual ~ARM(); // destroy shit
void SetGdbArgs(std::optional<GDBArgs> gdb);
ARM(u32 num);
~ARM(); // destroy shit
virtual void Reset();
virtual void DoSavestate(Savestate* file);
virtual void FillPipeline() = 0;
virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0;
void RestoreCPSR();
@ -85,9 +51,9 @@ public:
Halted = halt;
}
void NocashPrint(u32 addr) noexcept;
virtual void Execute() = 0;
bool CheckCondition(u32 code) const
bool CheckCondition(u32 code)
{
if (code == 0xE) return true;
if (ConditionTable[code] & (1 << (CPSR>>28))) return true;
@ -116,19 +82,7 @@ public:
if (v) CPSR |= 0x10000000;
}
inline bool ModeIs(u32 mode) const
{
u32 cm = CPSR & 0x1f;
mode &= 0x1f;
if (mode == cm) return true;
if (mode == 0x17) return cm >= 0x14 && cm <= 0x17; // abt
if (mode == 0x1b) return cm >= 0x18 && cm <= 0x1b; // und
return false;
}
void UpdateMode(u32 oldmode, u32 newmode, bool phony = false);
void UpdateMode(u32 oldmode, u32 newmode);
void TriggerIRQ();
@ -149,21 +103,13 @@ public:
virtual void AddCycles_CDI() = 0;
virtual void AddCycles_CD() = 0;
void CheckGdbIncoming();
u32 Num;
s32 Cycles;
union
{
struct
{
u8 Halted;
u8 IRQ; // nonzero to trigger IRQ
u8 IdleLoop;
};
u32 StopExecution;
};
u32 Halted;
u32 IRQ; // nonzero to trigger IRQ
u32 CodeRegion;
s32 CodeCycles;
@ -183,101 +129,56 @@ public:
u32 ExceptionBase;
MemRegion CodeMem;
NDS::MemRegion CodeMem;
#ifdef JIT_ENABLED
u32 FastBlockLookupStart, FastBlockLookupSize;
u64* FastBlockLookup;
#endif
static const u32 ConditionTable[16];
#ifdef GDBSTUB_ENABLED
Gdb::GdbStub GdbStub;
#endif
melonDS::NDS& NDS;
protected:
virtual u8 BusRead8(u32 addr) = 0;
virtual u16 BusRead16(u32 addr) = 0;
virtual u32 BusRead32(u32 addr) = 0;
virtual void BusWrite8(u32 addr, u8 val) = 0;
virtual void BusWrite16(u32 addr, u16 val) = 0;
virtual void BusWrite32(u32 addr, u32 val) = 0;
#ifdef GDBSTUB_ENABLED
bool IsSingleStep;
bool BreakReq;
bool BreakOnStartup;
u16 Port;
public:
int GetCPU() const override { return Num ? 7 : 9; }
u32 ReadReg(Gdb::Register reg) override;
void WriteReg(Gdb::Register reg, u32 v) override;
u32 ReadMem(u32 addr, int size) override;
void WriteMem(u32 addr, int size, u32 v) override;
void ResetGdb() override;
int RemoteCmd(const u8* cmd, size_t len) override;
protected:
#endif
void GdbCheckA();
void GdbCheckB();
void GdbCheckC();
static u32 ConditionTable[16];
};
class ARMv5 : public ARM
{
public:
ARMv5(melonDS::NDS& nds, std::optional<GDBArgs> gdb, bool jit);
~ARMv5();
ARMv5();
void Reset() override;
void Reset();
void DoSavestate(Savestate* file) override;
void DoSavestate(Savestate* file);
void UpdateRegionTimings(u32 addrstart, u32 addrend);
void FillPipeline() override;
void JumpTo(u32 addr, bool restorecpsr = false) override;
void JumpTo(u32 addr, bool restorecpsr = false);
void PrefetchAbort();
void DataAbort();
template <CPUExecuteMode mode>
void Execute();
// all code accesses are forced nonseq 32bit
u32 CodeRead32(u32 addr, bool branch);
void DataRead8(u32 addr, u32* val) override;
void DataRead16(u32 addr, u32* val) override;
void DataRead32(u32 addr, u32* val) override;
void DataRead32S(u32 addr, u32* val) override;
void DataWrite8(u32 addr, u8 val) override;
void DataWrite16(u32 addr, u16 val) override;
void DataWrite32(u32 addr, u32 val) override;
void DataWrite32S(u32 addr, u32 val) override;
void DataRead8(u32 addr, u32* val);
void DataRead16(u32 addr, u32* val);
void DataRead32(u32 addr, u32* val);
void DataRead32S(u32 addr, u32* val);
void DataWrite8(u32 addr, u8 val);
void DataWrite16(u32 addr, u16 val);
void DataWrite32(u32 addr, u32 val);
void DataWrite32S(u32 addr, u32 val);
void AddCycles_C() override
void AddCycles_C()
{
// code only. always nonseq 32-bit for ARM9.
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC;
}
void AddCycles_CI(s32 numI) override
void AddCycles_CI(s32 numI)
{
// code+internal
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC + numI;
}
void AddCycles_CDI() override
void AddCycles_CDI()
{
// LDR/LDM cycles. ARM9 seems to skip the internal cycle there.
// TODO: ITCM data fetches shouldn't be parallelized, they say
@ -290,7 +191,7 @@ public:
// Cycles += numC + numD;
}
void AddCycles_CD() override
void AddCycles_CD()
{
// TODO: ITCM data fetches shouldn't be parallelized, they say
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
@ -302,7 +203,7 @@ public:
// Cycles += numC + numD;
}
void GetCodeMemRegion(u32 addr, MemRegion* region);
void GetCodeMemRegion(u32 addr, NDS::MemRegion* region);
void CP15Reset();
void CP15DoSavestate(Savestate* file);
@ -310,8 +211,7 @@ public:
void UpdateDTCMSetting();
void UpdateITCMSetting();
void UpdatePURegion(u32 n);
void UpdatePURegions(bool update_all);
void UpdatePURegions();
u32 RandomLineIndex();
@ -320,23 +220,18 @@ public:
void ICacheInvalidateAll();
void CP15Write(u32 id, u32 val);
u32 CP15Read(u32 id) const;
u32 CP15Read(u32 id);
u32 CP15Control;
u32 RNGSeed;
u32 TraceProcessID;
u32 DTCMSetting, ITCMSetting;
// for aarch64 JIT they need to go up here
// to be addressable by a 12-bit immediate
u8 ITCM[0x8000];
u32 ITCMSize;
u32 DTCMBase, DTCMMask;
s32 RegionCodeCycles;
u8 ITCM[ITCMPhysicalSize];
u8* DTCM;
u8 DTCM[0x4000];
u32 DTCMBase, DTCMSize;
u8 ICache[0x2000];
u32 ICacheTags[64*4];
@ -356,71 +251,162 @@ public:
u8 PU_UserMap[0x100000];
// games operate under system mode, generally
//#define PU_Map PU_PrivMap
u8* PU_Map;
#define PU_Map PU_PrivMap
// code/16N/32N/32S
u8 MemTimings[0x100000][4];
s32 RegionCodeCycles;
u8* CurICacheLine;
bool (*GetMemRegion)(u32 addr, bool write, MemRegion* region);
#ifdef GDBSTUB_ENABLED
u32 ReadMem(u32 addr, int size) override;
void WriteMem(u32 addr, int size, u32 v) override;
#endif
protected:
u8 BusRead8(u32 addr) override;
u16 BusRead16(u32 addr) override;
u32 BusRead32(u32 addr) override;
void BusWrite8(u32 addr, u8 val) override;
void BusWrite16(u32 addr, u16 val) override;
void BusWrite32(u32 addr, u32 val) override;
};
class ARMv4 : public ARM
{
public:
ARMv4(melonDS::NDS& nds, std::optional<GDBArgs> gdb, bool jit);
ARMv4();
void FillPipeline() override;
void JumpTo(u32 addr, bool restorecpsr = false);
void JumpTo(u32 addr, bool restorecpsr = false) override;
template <CPUExecuteMode mode>
void Execute();
u16 CodeRead16(u32 addr)
{
return BusRead16(addr);
return NDS::ARM7Read16(addr);
}
u32 CodeRead32(u32 addr)
{
return BusRead32(addr);
return NDS::ARM7Read32(addr);
}
void DataRead8(u32 addr, u32* val) override;
void DataRead16(u32 addr, u32* val) override;
void DataRead32(u32 addr, u32* val) override;
void DataRead32S(u32 addr, u32* val) override;
void DataWrite8(u32 addr, u8 val) override;
void DataWrite16(u32 addr, u16 val) override;
void DataWrite32(u32 addr, u32 val) override;
void DataWrite32S(u32 addr, u32 val) override;
void AddCycles_C() override;
void AddCycles_CI(s32 num) override;
void AddCycles_CDI() override;
void AddCycles_CD() override;
protected:
u8 BusRead8(u32 addr) override;
u16 BusRead16(u32 addr) override;
u32 BusRead32(u32 addr) override;
void BusWrite8(u32 addr, u8 val) override;
void BusWrite16(u32 addr, u16 val) override;
void BusWrite32(u32 addr, u32 val) override;
void DataRead8(u32 addr, u32* val)
{
*val = NDS::ARM7Read8(addr);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
}
void DataRead16(u32 addr, u32* val)
{
addr &= ~1;
*val = NDS::ARM7Read16(addr);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
}
void DataRead32(u32 addr, u32* val)
{
addr &= ~3;
*val = NDS::ARM7Read32(addr);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][2];
}
void DataRead32S(u32 addr, u32* val)
{
addr &= ~3;
*val = NDS::ARM7Read32(addr);
DataCycles += NDS::ARM7MemTimings[DataRegion][3];
}
void DataWrite8(u32 addr, u8 val)
{
NDS::ARM7Write8(addr, val);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
}
void DataWrite16(u32 addr, u16 val)
{
addr &= ~1;
NDS::ARM7Write16(addr, val);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
}
void DataWrite32(u32 addr, u32 val)
{
addr &= ~3;
NDS::ARM7Write32(addr, val);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][2];
}
void DataWrite32S(u32 addr, u32 val)
{
addr &= ~3;
NDS::ARM7Write32(addr, val);
DataCycles += NDS::ARM7MemTimings[DataRegion][3];
}
void AddCycles_C()
{
// code only. this code fetch is sequential.
Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3];
}
void AddCycles_CI(s32 num)
{
// code+internal. results in a nonseq code fetch.
Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num;
}
void AddCycles_CDI()
{
// LDR/LDM cycles.
s32 numC = NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2];
s32 numD = DataCycles;
if (DataRegion == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
Cycles += numC + numD;
else
{
numC++;
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
}
else if (CodeRegion == 0x02)
{
numD++;
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
Cycles += numC + numD + 1;
}
}
void AddCycles_CD()
{
// TODO: max gain should be 5c when writing to mainRAM
s32 numC = NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2];
s32 numD = DataCycles;
if (DataRegion == 0x02)
{
if (CodeRegion == 0x02)
Cycles += numC + numD;
else
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else if (CodeRegion == 0x02)
{
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
Cycles += numC + numD;
}
}
};
namespace ARMInterpreter
@ -430,5 +416,5 @@ void A_UNK(ARM* cpu);
void T_UNK(ARM* cpu);
}
}
#endif // ARM_H

View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.
@ -22,24 +22,15 @@
#include "ARMInterpreter_ALU.h"
#include "ARMInterpreter_Branch.h"
#include "ARMInterpreter_LoadStore.h"
#include "Platform.h"
#ifdef GDBSTUB_ENABLED
#include "debug/GdbStub.h"
#endif
namespace melonDS::ARMInterpreter
namespace ARMInterpreter
{
using Platform::Log;
using Platform::LogLevel;
void A_UNK(ARM* cpu)
{
Log(LogLevel::Warn, "undefined ARM%d instruction %08X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-8);
#ifdef GDBSTUB_ENABLED
cpu->GdbStub.Enter(cpu->GdbStub.IsConnected(), Gdb::TgtStatus::FaultInsn, cpu->R[15]-8);
#endif
printf("undefined ARM%d instruction %08X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-8);
//for (int i = 0; i < 16; i++) printf("R%d: %08X\n", i, cpu->R[i]);
//NDS::Halt();
u32 oldcpsr = cpu->CPSR;
@ -54,10 +45,7 @@ void A_UNK(ARM* cpu)
void T_UNK(ARM* cpu)
{
Log(LogLevel::Warn, "undefined THUMB%d instruction %04X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-4);
#ifdef GDBSTUB_ENABLED
cpu->GdbStub.Enter(cpu->GdbStub.IsConnected(), Gdb::TgtStatus::FaultInsn, cpu->R[15]-4);
#endif
printf("undefined THUMB%d instruction %04X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-4);
//NDS::Halt();
u32 oldcpsr = cpu->CPSR;
cpu->CPSR &= ~0xBF;
@ -81,17 +69,9 @@ void A_MSR_IMM(ARM* cpu)
case 0x11: psr = &cpu->R_FIQ[7]; break;
case 0x12: psr = &cpu->R_IRQ[2]; break;
case 0x13: psr = &cpu->R_SVC[2]; break;
case 0x14:
case 0x15:
case 0x16:
case 0x17: psr = &cpu->R_ABT[2]; break;
case 0x18:
case 0x19:
case 0x1A:
case 0x1B: psr = &cpu->R_UND[2]; break;
default:
cpu->AddCycles_C();
return;
default: printf("bad CPU mode %08X\n", cpu->CPSR); return;
}
}
else
@ -112,9 +92,6 @@ void A_MSR_IMM(ARM* cpu)
u32 val = ROR((cpu->CurInstr & 0xFF), ((cpu->CurInstr >> 7) & 0x1E));
// bit4 is forced to 1
val |= 0x00000010;
*psr &= ~mask;
*psr |= (val & mask);
@ -134,17 +111,9 @@ void A_MSR_REG(ARM* cpu)
case 0x11: psr = &cpu->R_FIQ[7]; break;
case 0x12: psr = &cpu->R_IRQ[2]; break;
case 0x13: psr = &cpu->R_SVC[2]; break;
case 0x14:
case 0x15:
case 0x16:
case 0x17: psr = &cpu->R_ABT[2]; break;
case 0x18:
case 0x19:
case 0x1A:
case 0x1B: psr = &cpu->R_UND[2]; break;
default:
cpu->AddCycles_C();
return;
default: printf("bad CPU mode %08X\n", cpu->CPSR); return;
}
}
else
@ -165,9 +134,6 @@ void A_MSR_REG(ARM* cpu)
u32 val = cpu->R[cpu->CurInstr & 0xF];
// bit4 is forced to 1
val |= 0x00000010;
*psr &= ~mask;
*psr |= (val & mask);
@ -187,15 +153,9 @@ void A_MRS(ARM* cpu)
case 0x11: psr = cpu->R_FIQ[7]; break;
case 0x12: psr = cpu->R_IRQ[2]; break;
case 0x13: psr = cpu->R_SVC[2]; break;
case 0x14:
case 0x15:
case 0x16:
case 0x17: psr = cpu->R_ABT[2]; break;
case 0x18:
case 0x19:
case 0x1A:
case 0x1B: psr = cpu->R_UND[2]; break;
default: psr = cpu->CPSR; break;
default: printf("bad CPU mode %08X\n", cpu->CPSR); return;
}
}
else
@ -208,9 +168,6 @@ void A_MRS(ARM* cpu)
void A_MCR(ARM* cpu)
{
if ((cpu->CPSR & 0x1F) == 0x10)
return A_UNK(cpu);
u32 cp = (cpu->CurInstr >> 8) & 0xF;
//u32 op = (cpu->CurInstr >> 21) & 0x7;
u32 cn = (cpu->CurInstr >> 16) & 0xF;
@ -223,11 +180,11 @@ void A_MCR(ARM* cpu)
}
else if (cpu->Num==1 && cp==14)
{
Log(LogLevel::Debug, "MCR p14,%d,%d,%d on ARM7\n", cn, cm, cpinfo);
printf("MCR p14,%d,%d,%d on ARM7\n", cn, cm, cpinfo);
}
else
{
Log(LogLevel::Warn, "bad MCR opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9);
printf("bad MCR opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9);
return A_UNK(cpu); // TODO: check what kind of exception it really is
}
@ -236,9 +193,6 @@ void A_MCR(ARM* cpu)
void A_MRC(ARM* cpu)
{
if ((cpu->CPSR & 0x1F) == 0x10)
return A_UNK(cpu);
u32 cp = (cpu->CurInstr >> 8) & 0xF;
//u32 op = (cpu->CurInstr >> 21) & 0x7;
u32 cn = (cpu->CurInstr >> 16) & 0xF;
@ -251,11 +205,11 @@ void A_MRC(ARM* cpu)
}
else if (cpu->Num==1 && cp==14)
{
Log(LogLevel::Debug, "MRC p14,%d,%d,%d on ARM7\n", cn, cm, cpinfo);
printf("MRC p14,%d,%d,%d on ARM7\n", cn, cm, cpinfo);
}
else
{
Log(LogLevel::Warn, "bad MRC opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9);
printf("bad MRC opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9);
return A_UNK(cpu); // TODO: check what kind of exception it really is
}

View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.
@ -22,26 +22,14 @@
#include "types.h"
#include "ARM.h"
namespace melonDS
{
namespace ARMInterpreter
{
extern void (*ARMInstrTable[4096])(ARM* cpu);
extern void (*THUMBInstrTable[1024])(ARM* cpu);
void A_MSR_IMM(ARM* cpu);
void A_MSR_REG(ARM* cpu);
void A_MRS(ARM* cpu);
void A_MCR(ARM* cpu);
void A_MRC(ARM* cpu);
void A_SVC(ARM* cpu);
void T_SVC(ARM* cpu);
void A_BLX_IMM(ARM* cpu); // I'm a special one look at me
}
}
#endif // ARMINTERPRETER_H

View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.
@ -18,46 +18,18 @@
#include <stdio.h>
#include "ARM.h"
#include "NDS.h"
namespace melonDS::ARMInterpreter
#define CARRY_ADD(a, b) ((0xFFFFFFFF-a) < b)
#define CARRY_SUB(a, b) (a >= b)
#define OVERFLOW_ADD(a, b, res) ((!(((a) ^ (b)) & 0x80000000)) && (((a) ^ (res)) & 0x80000000))
#define OVERFLOW_SUB(a, b, res) ((((a) ^ (b)) & 0x80000000) && (((a) ^ (res)) & 0x80000000))
namespace ARMInterpreter
{
inline bool CarryAdd(u32 a, u32 b)
{
return (0xFFFFFFFF-a) < b;
}
inline bool CarrySub(u32 a, u32 b)
{
return a >= b;
}
inline bool OverflowAdd(u32 a, u32 b)
{
u32 res = a + b;
return (!((a ^ b) & 0x80000000)) && ((a ^ res) & 0x80000000);
}
inline bool OverflowSub(u32 a, u32 b)
{
u32 res = a - b;
return ((a ^ b) & 0x80000000) && ((a ^ res) & 0x80000000);
}
inline bool OverflowAdc(u32 a, u32 b, u32 carry)
{
s64 fullResult = (s64)(s32)a + (s32)b + carry;
u32 res = a + b + carry;
return (s32)res != fullResult;
}
inline bool OverflowSbc(u32 a, u32 b, u32 carry)
{
s64 fullResult = (s64)(s32)a - (s32)b - carry;
u32 res = a - b - carry;
return (s32)res != fullResult;
}
#define LSL_IMM(x, s) \
x <<= s;
@ -154,11 +126,6 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry)
#define A_CALC_OP2_IMM \
u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E);
#define A_CALC_OP2_IMM_S \
u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \
if ((cpu->CurInstr>>7)&0x1E) \
cpu->SetC(b & 0x80000000);
#define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \
u32 b = cpu->R[cpu->CurInstr&0xF]; \
u32 s = (cpu->CurInstr>>7)&0x1F; \
@ -219,7 +186,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \
} \
void A_##x##_IMM_S(ARM* cpu) \
{ \
A_CALC_OP2_IMM##s \
A_CALC_OP2_IMM \
A_##x##_S(0) \
} \
void A_##x##_REG_LSL_IMM_S(ARM* cpu) \
@ -267,7 +234,7 @@ void A_##x##_REG_ROR_REG_S(ARM* cpu) \
\
void A_##x##_IMM(ARM* cpu) \
{ \
A_CALC_OP2_IMM##s \
A_CALC_OP2_IMM \
A_##x(0) \
} \
void A_##x##_REG_LSL_IMM(ARM* cpu) \
@ -318,7 +285,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -349,7 +316,7 @@ A_IMPLEMENT_ALU_OP(AND,_S)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -380,7 +347,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -392,8 +359,8 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
u32 res = a - b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarrySub(a, b), \
OverflowSub(a, b)); \
CARRY_SUB(a, b), \
OVERFLOW_SUB(a, b, res)); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -413,7 +380,7 @@ A_IMPLEMENT_ALU_OP(SUB,)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -425,8 +392,8 @@ A_IMPLEMENT_ALU_OP(SUB,)
u32 res = b - a; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarrySub(b, a), \
OverflowSub(b, a)); \
CARRY_SUB(b, a), \
OVERFLOW_SUB(b, a, res)); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -446,7 +413,7 @@ A_IMPLEMENT_ALU_OP(RSB,)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -458,8 +425,8 @@ A_IMPLEMENT_ALU_OP(RSB,)
u32 res = a + b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarryAdd(a, b), \
OverflowAdd(a, b)); \
CARRY_ADD(a, b), \
OVERFLOW_ADD(a, b, res)); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -479,7 +446,7 @@ A_IMPLEMENT_ALU_OP(ADD,)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -493,8 +460,8 @@ A_IMPLEMENT_ALU_OP(ADD,)
u32 res = res_tmp + carry; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarryAdd(a, b) | CarryAdd(res_tmp, carry), \
OverflowAdc(a, b, carry)); \
CARRY_ADD(a, b) | CARRY_ADD(res_tmp, carry), \
OVERFLOW_ADD(a, b, res_tmp) | OVERFLOW_ADD(res_tmp, carry, res)); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -514,7 +481,7 @@ A_IMPLEMENT_ALU_OP(ADC,)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -528,8 +495,8 @@ A_IMPLEMENT_ALU_OP(ADC,)
u32 res = res_tmp - carry; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarrySub(a, b) & CarrySub(res_tmp, carry), \
OverflowSbc(a, b, carry)); \
CARRY_SUB(a, b) & CARRY_SUB(res_tmp, carry), \
OVERFLOW_SUB(a, b, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -549,7 +516,7 @@ A_IMPLEMENT_ALU_OP(SBC,)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -563,8 +530,8 @@ A_IMPLEMENT_ALU_OP(SBC,)
u32 res = res_tmp - carry; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarrySub(b, a) & CarrySub(res_tmp, carry), \
OverflowSbc(b, a, carry)); \
CARRY_SUB(b, a) & CARRY_SUB(res_tmp, carry), \
OVERFLOW_SUB(b, a, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -603,8 +570,8 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S)
u32 res = a - b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarrySub(a, b), \
OverflowSub(a, b)); \
CARRY_SUB(a, b), \
OVERFLOW_SUB(a, b, res)); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(CMP,)
@ -615,8 +582,8 @@ A_IMPLEMENT_ALU_TEST(CMP,)
u32 res = a + b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarryAdd(a, b), \
OverflowAdd(a, b)); \
CARRY_ADD(a, b), \
OVERFLOW_ADD(a, b, res)); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(CMN,)
@ -628,7 +595,7 @@ A_IMPLEMENT_ALU_TEST(CMN,)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -657,7 +624,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(b & ~1); \
cpu->JumpTo(b); \
} \
else \
{ \
@ -689,11 +656,8 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
(cpu->NextInstr[0] & 0xFF000000) == 0xEA000000 && // branch
(cpu->NextInstr[1] & 0xFFFF) == 0x6464)
{
// GBATek says the two bytes after the 2nd ID are _reserved_ for flags
// but since they serve no purpose ATTOW, we can skip them
u32 addr = cpu->R[15] + 4; // Skip 2nd ID and flags
// TODO: Pass flags to NocashPrint
cpu->NDS.NocashPrint(cpu->Num, addr);
u32 addr = cpu->R[15] + 2;
NDS::NocashPrint(cpu->Num, addr);
}
}
@ -704,7 +668,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(res & ~1); \
cpu->JumpTo(res); \
} \
else \
{ \
@ -734,7 +698,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S)
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
cpu->JumpTo(b & ~1); \
cpu->JumpTo(b); \
} \
else \
{ \
@ -961,7 +925,7 @@ void A_SMLAxy(ARM* cpu)
u32 res = res_mul + rn;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (OverflowAdd(res_mul, rn))
if (OVERFLOW_ADD(res_mul, rn, res))
cpu->CPSR |= 0x08000000;
cpu->AddCycles_C(); // TODO: interlock??
@ -982,7 +946,7 @@ void A_SMLAWy(ARM* cpu)
u32 res = res_mul + rn;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (OverflowAdd(res_mul, rn))
if (OVERFLOW_ADD(res_mul, rn, res))
cpu->CPSR |= 0x08000000;
cpu->AddCycles_C(); // TODO: interlock??
@ -1079,7 +1043,7 @@ void A_QADD(ARM* cpu)
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 res = rm + rn;
if (OverflowAdd(rm, rn))
if (OVERFLOW_ADD(rm, rn, res))
{
res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
cpu->CPSR |= 0x08000000;
@ -1097,7 +1061,7 @@ void A_QSUB(ARM* cpu)
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 res = rm - rn;
if (OverflowSub(rm, rn))
if (OVERFLOW_SUB(rm, rn, res))
{
res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
cpu->CPSR |= 0x08000000;
@ -1114,7 +1078,7 @@ void A_QDADD(ARM* cpu)
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
if (OverflowAdd(rn, rn))
if (rn & 0x40000000)
{
rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF;
cpu->CPSR |= 0x08000000; // CHECKME
@ -1123,7 +1087,7 @@ void A_QDADD(ARM* cpu)
rn <<= 1;
u32 res = rm + rn;
if (OverflowAdd(rm, rn))
if (OVERFLOW_ADD(rm, rn, res))
{
res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
cpu->CPSR |= 0x08000000;
@ -1140,7 +1104,7 @@ void A_QDSUB(ARM* cpu)
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
if (OverflowAdd(rn, rn))
if (rn & 0x40000000)
{
rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF;
cpu->CPSR |= 0x08000000; // CHECKME
@ -1149,7 +1113,7 @@ void A_QDSUB(ARM* cpu)
rn <<= 1;
u32 res = rm - rn;
if (OverflowSub(rm, rn))
if (OVERFLOW_SUB(rm, rn, res))
{
res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
cpu->CPSR |= 0x08000000;
@ -1206,8 +1170,8 @@ void T_ADD_REG_(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
!res,
CarryAdd(a, b),
OverflowAdd(a, b));
CARRY_ADD(a, b),
OVERFLOW_ADD(a, b, res));
cpu->AddCycles_C();
}
@ -1219,8 +1183,8 @@ void T_SUB_REG_(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
!res,
CarrySub(a, b),
OverflowSub(a, b));
CARRY_SUB(a, b),
OVERFLOW_SUB(a, b, res));
cpu->AddCycles_C();
}
@ -1232,8 +1196,8 @@ void T_ADD_IMM_(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
!res,
CarryAdd(a, b),
OverflowAdd(a, b));
CARRY_ADD(a, b),
OVERFLOW_ADD(a, b, res));
cpu->AddCycles_C();
}
@ -1245,8 +1209,8 @@ void T_SUB_IMM_(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
!res,
CarrySub(a, b),
OverflowSub(a, b));
CARRY_SUB(a, b),
OVERFLOW_SUB(a, b, res));
cpu->AddCycles_C();
}
@ -1266,8 +1230,8 @@ void T_CMP_IMM(ARM* cpu)
u32 res = a - b;
cpu->SetNZCV(res & 0x80000000,
!res,
CarrySub(a, b),
OverflowSub(a, b));
CARRY_SUB(a, b),
OVERFLOW_SUB(a, b, res));
cpu->AddCycles_C();
}
@ -1279,8 +1243,8 @@ void T_ADD_IMM(ARM* cpu)
cpu->R[(cpu->CurInstr >> 8) & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
!res,
CarryAdd(a, b),
OverflowAdd(a, b));
CARRY_ADD(a, b),
OVERFLOW_ADD(a, b, res));
cpu->AddCycles_C();
}
@ -1292,8 +1256,8 @@ void T_SUB_IMM(ARM* cpu)
cpu->R[(cpu->CurInstr >> 8) & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
!res,
CarrySub(a, b),
OverflowSub(a, b));
CARRY_SUB(a, b),
OVERFLOW_SUB(a, b, res));
cpu->AddCycles_C();
}
@ -1363,8 +1327,8 @@ void T_ADC_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
!res,
CarryAdd(a, b) | CarryAdd(res_tmp, carry),
OverflowAdc(a, b, carry));
CARRY_ADD(a, b) | CARRY_ADD(res_tmp, carry),
OVERFLOW_ADD(a, b, res_tmp) | OVERFLOW_ADD(res_tmp, carry, res));
cpu->AddCycles_C();
}
@ -1378,8 +1342,8 @@ void T_SBC_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
!res,
CarrySub(a, b) & CarrySub(res_tmp, carry),
OverflowSbc(a, b, carry));
CARRY_SUB(a, b) & CARRY_SUB(res_tmp, carry),
OVERFLOW_SUB(a, b, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res));
cpu->AddCycles_C();
}
@ -1411,8 +1375,8 @@ void T_NEG_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
!res,
CarrySub(0, b),
OverflowSub(0, b));
CARRY_SUB(0, b),
OVERFLOW_SUB(0, b, res));
cpu->AddCycles_C();
}
@ -1423,8 +1387,8 @@ void T_CMP_REG(ARM* cpu)
u32 res = a - b;
cpu->SetNZCV(res & 0x80000000,
!res,
CarrySub(a, b),
OverflowSub(a, b));
CARRY_SUB(a, b),
OVERFLOW_SUB(a, b, res));
cpu->AddCycles_C();
}
@ -1435,8 +1399,8 @@ void T_CMN_REG(ARM* cpu)
u32 res = a + b;
cpu->SetNZCV(res & 0x80000000,
!res,
CarryAdd(a, b),
OverflowAdd(a, b));
CARRY_ADD(a, b),
OVERFLOW_ADD(a, b, res));
cpu->AddCycles_C();
}
@ -1532,8 +1496,8 @@ void T_CMP_HIREG(ARM* cpu)
cpu->SetNZCV(res & 0x80000000,
!res,
CarrySub(a, b),
OverflowSub(a, b));
CARRY_SUB(a, b),
OVERFLOW_SUB(a, b, res));
cpu->AddCycles_C();
}
@ -1558,11 +1522,8 @@ void T_MOV_HIREG(ARM* cpu)
(cpu->NextInstr[0] & 0xF800) == 0xE000 && // branch
(cpu->NextInstr[1] & 0xFFFF) == 0x6464)
{
// GBATek says the two bytes after the 2nd ID are _reserved_ for flags
// but since they serve no purpose ATTOW, we can skip them
u32 addr = cpu->R[15] + 4; // Skip 2nd ID and flags
// TODO: Pass flags to NocashPrint
cpu->NDS.NocashPrint(cpu->Num, addr);
u32 addr = cpu->R[15] + 2;
NDS::NocashPrint(cpu->Num, addr);
}
}

View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.
@ -19,8 +19,6 @@
#ifndef ARMINTERPRETER_ALU_H
#define ARMINTERPRETER_ALU_H
namespace melonDS
{
namespace ARMInterpreter
{
@ -136,5 +134,4 @@ void T_ADD_SP(ARM* cpu);
}
}
#endif

View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.
@ -16,13 +16,12 @@
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include <stdio.h>
#include "ARM.h"
#include "Platform.h"
namespace melonDS::ARMInterpreter
namespace ARMInterpreter
{
using Platform::Log;
using Platform::LogLevel;
void A_B(ARM* cpu)
@ -80,7 +79,7 @@ void T_BLX_REG(ARM* cpu)
{
if (cpu->Num==1)
{
Log(LogLevel::Warn, "!! THUMB BLX_REG ON ARM7\n");
printf("!! THUMB BLX_REG ON ARM7\n");
return;
}

View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.
@ -19,8 +19,6 @@
#ifndef ARMINTERPRETER_BRANCH_H
#define ARMINTERPRETER_BRANCH_H
namespace melonDS
{
namespace ARMInterpreter
{
@ -38,5 +36,4 @@ void T_BL_LONG_2(ARM* cpu);
}
}
#endif

View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.
@ -20,7 +20,7 @@
#include "ARM.h"
namespace melonDS::ARMInterpreter
namespace ARMInterpreter
{
@ -62,20 +62,14 @@ namespace melonDS::ARMInterpreter
#define A_STR \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \
if (((cpu->CurInstr>>12) & 0xF) == 0xF) \
storeval += 4; \
cpu->DataWrite32(offset, storeval); \
cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
cpu->AddCycles_CD();
// TODO: user mode (bit21)
#define A_STR_POST \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \
if (((cpu->CurInstr>>12) & 0xF) == 0xF) \
storeval += 4; \
cpu->DataWrite32(addr, storeval); \
cpu->DataWrite32(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
cpu->AddCycles_CD();
@ -416,7 +410,7 @@ void A_LDM(ARM* cpu)
}
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true);
cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10);
for (int i = 0; i < 15; i++)
{
@ -430,9 +424,9 @@ void A_LDM(ARM* cpu)
}
}
u32 pc = 0;
if (cpu->CurInstr & (1<<15))
{
u32 pc;
if (preinc) base += 4;
if (first) cpu->DataRead32 (base, &pc);
else cpu->DataRead32S(base, &pc);
@ -440,8 +434,13 @@ void A_LDM(ARM* cpu)
if (cpu->Num == 1)
pc &= ~0x1;
cpu->JumpTo(pc, cpu->CurInstr & (1<<22));
}
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR);
if (cpu->CurInstr & (1<<21))
{
// post writeback
@ -461,12 +460,6 @@ void A_LDM(ARM* cpu)
cpu->R[baseid] = wbbase;
}
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
if (cpu->CurInstr & (1<<15))
cpu->JumpTo(pc, cpu->CurInstr & (1<<22));
cpu->AddCycles_CDI();
}
@ -501,7 +494,7 @@ void A_STM(ARM* cpu)
else if (mode != 0x10 && mode != 0x1F)
isbanked = (baseid >= 13 && baseid < 15);
cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true);
cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10);
}
for (u32 i = 0; i < 16; i++)
@ -527,7 +520,7 @@ void A_STM(ARM* cpu)
}
if (cpu->CurInstr & (1<<22))
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR);
if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21)))
cpu->R[baseid] = base;

View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.
@ -19,7 +19,7 @@
#ifndef ARMINTERPRETER_LOADSTORE_H
#define ARMINTERPRETER_LOADSTORE_H
namespace melonDS::ARMInterpreter
namespace ARMInterpreter
{
#define A_PROTO_WB_LDRSTR(x) \

File diff suppressed because it is too large Load Diff

View File

@ -1,206 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMJIT_H
#define ARMJIT_H
#include <algorithm>
#include <optional>
#include <memory>
#include "types.h"
#include "MemConstants.h"
#include "Args.h"
#include "ARMJIT_Memory.h"
#ifdef JIT_ENABLED
#include "JitBlock.h"
#if defined(__APPLE__) && defined(__aarch64__)
#include <pthread.h>
#endif
#include "ARMJIT_Compiler.h"
namespace melonDS
{
class ARM;
class JitBlock;
class ARMJIT
{
public:
ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept;
~ARMJIT() noexcept;
void InvalidateByAddr(u32) noexcept;
void CheckAndInvalidateWVRAM(int) noexcept;
void CheckAndInvalidateITCM() noexcept;
void Reset() noexcept;
void JitEnableWrite() noexcept;
void JitEnableExecute() noexcept;
void CompileBlock(ARM* cpu) noexcept;
void ResetBlockCache() noexcept;
template <u32 num, int region>
void CheckAndInvalidate(u32 addr) noexcept
{
u32 localAddr = Memory.LocaliseAddress(region, num, addr);
if (CodeMemRegions[region][(localAddr & 0x7FFFFFF) / 512].Code & (1 << ((localAddr & 0x1FF) / 16)))
InvalidateByAddr(localAddr);
}
JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr) noexcept;
bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size) noexcept;
u32 LocaliseCodeAddress(u32 num, u32 addr) const noexcept;
ARMJIT_Memory Memory;
private:
int MaxBlockSize {};
bool LiteralOptimizations = false;
bool BranchOptimizations = false;
bool FastMemory = false;
public:
melonDS::NDS& NDS;
TinyVector<u32> InvalidLiterals {};
friend class ARMJIT_Memory;
void blockSanityCheck(u32 num, u32 blockAddr, JitBlockEntry entry) noexcept;
void RetireJitBlock(JitBlock* block) noexcept;
int GetMaxBlockSize() const noexcept { return MaxBlockSize; }
bool LiteralOptimizationsEnabled() const noexcept { return LiteralOptimizations; }
bool BranchOptimizationsEnabled() const noexcept { return BranchOptimizations; }
bool FastMemoryEnabled() const noexcept { return FastMemory; }
void SetJITArgs(JITArgs args) noexcept;
void SetMaxBlockSize(int size) noexcept;
void SetLiteralOptimizations(bool enabled) noexcept;
void SetBranchOptimizations(bool enabled) noexcept;
void SetFastMemory(bool enabled) noexcept;
Compiler JITCompiler;
std::unordered_map<u32, JitBlock*> JitBlocks9 {};
std::unordered_map<u32, JitBlock*> JitBlocks7 {};
std::unordered_map<u32, JitBlock*> RestoreCandidates {};
AddressRange CodeIndexITCM[ITCMPhysicalSize / 512] {};
AddressRange CodeIndexMainRAM[MainRAMMaxSize / 512] {};
AddressRange CodeIndexSWRAM[SharedWRAMSize / 512] {};
AddressRange CodeIndexVRAM[0x100000 / 512] {};
AddressRange CodeIndexARM9BIOS[ARM9BIOSSize / 512] {};
AddressRange CodeIndexARM7BIOS[ARM7BIOSSize / 512] {};
AddressRange CodeIndexARM7WRAM[ARM7WRAMSize / 512] {};
AddressRange CodeIndexARM7WVRAM[0x40000 / 512] {};
AddressRange CodeIndexBIOS9DSi[0x10000 / 512] {};
AddressRange CodeIndexBIOS7DSi[0x10000 / 512] {};
AddressRange CodeIndexNWRAM_A[NWRAMSize / 512] {};
AddressRange CodeIndexNWRAM_B[NWRAMSize / 512] {};
AddressRange CodeIndexNWRAM_C[NWRAMSize / 512] {};
u64 FastBlockLookupITCM[ITCMPhysicalSize / 2] {};
u64 FastBlockLookupMainRAM[MainRAMMaxSize / 2] {};
u64 FastBlockLookupSWRAM[SharedWRAMSize / 2] {};
u64 FastBlockLookupVRAM[0x100000 / 2] {};
u64 FastBlockLookupARM9BIOS[ARM9BIOSSize / 2] {};
u64 FastBlockLookupARM7BIOS[ARM7BIOSSize / 2] {};
u64 FastBlockLookupARM7WRAM[ARM7WRAMSize / 2] {};
u64 FastBlockLookupARM7WVRAM[0x40000 / 2] {};
u64 FastBlockLookupBIOS9DSi[0x10000 / 2] {};
u64 FastBlockLookupBIOS7DSi[0x10000 / 2] {};
u64 FastBlockLookupNWRAM_A[NWRAMSize / 2] {};
u64 FastBlockLookupNWRAM_B[NWRAMSize / 2] {};
u64 FastBlockLookupNWRAM_C[NWRAMSize / 2] {};
AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count] =
{
NULL,
CodeIndexITCM,
NULL,
CodeIndexARM9BIOS,
CodeIndexMainRAM,
CodeIndexSWRAM,
NULL,
CodeIndexVRAM,
CodeIndexARM7BIOS,
CodeIndexARM7WRAM,
NULL,
NULL,
CodeIndexARM7WVRAM,
CodeIndexBIOS9DSi,
CodeIndexBIOS7DSi,
CodeIndexNWRAM_A,
CodeIndexNWRAM_B,
CodeIndexNWRAM_C
};
u64* const FastBlockLookupRegions[ARMJIT_Memory::memregions_Count] =
{
NULL,
FastBlockLookupITCM,
NULL,
FastBlockLookupARM9BIOS,
FastBlockLookupMainRAM,
FastBlockLookupSWRAM,
NULL,
FastBlockLookupVRAM,
FastBlockLookupARM7BIOS,
FastBlockLookupARM7WRAM,
NULL,
NULL,
FastBlockLookupARM7WVRAM,
FastBlockLookupBIOS9DSi,
FastBlockLookupBIOS7DSi,
FastBlockLookupNWRAM_A,
FastBlockLookupNWRAM_B,
FastBlockLookupNWRAM_C
};
};
}
// Defined in assembly
extern "C" void ARM_Dispatch(melonDS::ARM* cpu, melonDS::JitBlockEntry entry);
#else
namespace melonDS
{
class ARM;
// This version is a stub; the methods all do nothing,
// but there's still a Memory member.
class ARMJIT
{
public:
ARMJIT(melonDS::NDS& nds, std::optional<JITArgs>) noexcept : Memory(nds) {}
~ARMJIT() noexcept {}
void InvalidateByAddr(u32) noexcept {}
void CheckAndInvalidateWVRAM(int) noexcept {}
void CheckAndInvalidateITCM() noexcept {}
void Reset() noexcept {}
void JitEnableWrite() noexcept {}
void JitEnableExecute() noexcept {}
void CompileBlock(ARM*) noexcept {}
void ResetBlockCache() noexcept {}
template <u32, int>
void CheckAndInvalidate(u32 addr) noexcept {}
ARMJIT_Memory Memory;
};
}
#endif // JIT_ENABLED
#endif // ARMJIT_H

View File

@ -1,973 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "ARMJIT_Compiler.h"
using namespace Arm64Gen;
namespace melonDS
{
void Compiler::Comp_RegShiftReg(int op, bool S, Op2& op2, ARM64Reg rs)
{
if (!(CurInstr.SetFlags & 0x2))
S = false;
CPSRDirty |= S;
UBFX(W1, rs, 0, 8);
if (!S)
{
if (op == 3)
RORV(W0, op2.Reg.Rm, W1);
else
{
CMP(W1, 32);
if (op == 2)
{
MOVI2R(W2, 31);
CSEL(W1, W2, W1, CC_GE);
ASRV(W0, op2.Reg.Rm, W1);
}
else
{
if (op == 0)
LSLV(W0, op2.Reg.Rm, W1);
else if (op == 1)
LSRV(W0, op2.Reg.Rm, W1);
CSEL(W0, WZR, W0, CC_GE);
}
}
}
else
{
MOV(W0, op2.Reg.Rm);
FixupBranch zero = CBZ(W1);
SUB(W1, W1, 1);
if (op == 3)
{
RORV(W0, op2.Reg.Rm, W1);
BFI(RCPSR, W0, 29, 1);
}
else
{
CMP(W1, 31);
if (op == 2)
{
MOVI2R(W2, 31);
CSEL(W1, W2, W1, CC_GT);
ASRV(W0, op2.Reg.Rm, W1);
BFI(RCPSR, W0, 29, 1);
}
else
{
if (op == 0)
{
LSLV(W0, op2.Reg.Rm, W1);
UBFX(W1, W0, 31, 1);
}
else if (op == 1)
LSRV(W0, op2.Reg.Rm, W1);
CSEL(W1, WZR, op ? W0 : W1, CC_GT);
BFI(RCPSR, W1, 29, 1);
CSEL(W0, WZR, W0, CC_GE);
}
}
MOV(W0, W0, ArithOption(W0, (ShiftType)op, 1));
SetJumpTarget(zero);
}
op2 = Op2(W0, ST_LSL, 0);
}
void Compiler::Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, ARM64Reg tmp)
{
if (!(CurInstr.SetFlags & 0x2))
S = false;
CPSRDirty |= S;
switch (op)
{
case 0: // LSL
if (S && amount)
{
UBFX(tmp, op2.Reg.Rm, 32 - amount, 1);
BFI(RCPSR, tmp, 29, 1);
}
op2 = Op2(op2.Reg.Rm, ST_LSL, amount);
return;
case 1: // LSR
if (S)
{
UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
BFI(RCPSR, tmp, 29, 1);
}
if (amount == 0)
{
op2 = Op2(0);
return;
}
op2 = Op2(op2.Reg.Rm, ST_LSR, amount);
return;
case 2: // ASR
if (S)
{
UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
BFI(RCPSR, tmp, 29, 1);
}
op2 = Op2(op2.Reg.Rm, ST_ASR, amount ? amount : 31);
return;
case 3: // ROR
if (amount == 0)
{
UBFX(tmp, RCPSR, 29, 1);
LSL(tmp, tmp, 31);
if (S)
BFI(RCPSR, op2.Reg.Rm, 29, 1);
ORR(tmp, tmp, op2.Reg.Rm, ArithOption(tmp, ST_LSR, 1));
op2 = Op2(tmp, ST_LSL, 0);
}
else
{
if (S)
{
UBFX(tmp, op2.Reg.Rm, amount - 1, 1);
BFI(RCPSR, tmp, 29, 1);
}
op2 = Op2(op2.Reg.Rm, ST_ROR, amount);
}
return;
}
}
void Compiler::Comp_RetriveFlags(bool retriveCV)
{
if (CurInstr.SetFlags)
CPSRDirty = true;
if (CurInstr.SetFlags & 0x4)
{
CSET(W0, CC_EQ);
BFI(RCPSR, W0, 30, 1);
}
if (CurInstr.SetFlags & 0x8)
{
CSET(W0, CC_MI);
BFI(RCPSR, W0, 31, 1);
}
if (retriveCV)
{
if (CurInstr.SetFlags & 0x2)
{
CSET(W0, CC_CS);
BFI(RCPSR, W0, 29, 1);
}
if (CurInstr.SetFlags & 0x1)
{
CSET(W0, CC_VS);
BFI(RCPSR, W0, 28, 1);
}
}
}
void Compiler::Comp_Logical(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
{
if (S && !CurInstr.SetFlags)
S = false;
switch (op)
{
case 0x0: // AND
if (S)
{
if (op2.IsImm)
ANDSI2R(rd, rn, op2.Imm, W0);
else
ANDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
ANDI2R(rd, rn, op2.Imm, W0);
else
AND(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x1: // EOR
if (op2.IsImm)
EORI2R(rd, rn, op2.Imm, W0);
else
EOR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
if (S && FlagsNZNeeded())
TST(rd, rd);
break;
case 0xC: // ORR
if (op2.IsImm)
ORRI2R(rd, rn, op2.Imm, W0);
else
ORR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
if (S && FlagsNZNeeded())
TST(rd, rd);
break;
case 0xE: // BIC
if (S)
{
if (op2.IsImm)
ANDSI2R(rd, rn, ~op2.Imm, W0);
else
BICS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
ANDI2R(rd, rn, ~op2.Imm, W0);
else
BIC(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
}
if (S)
Comp_RetriveFlags(false);
}
void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
{
if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
{
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W0, ST_LSL, 0);
}
if (S && !CurInstr.SetFlags)
S = false;
bool CVInGPR = false;
switch (op)
{
case 0x2: // SUB
if (S)
{
if (op2.IsImm)
SUBSI2R(rd, rn, op2.Imm, W0);
else
SUBS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
{
MOVI2R(W2, op2.Imm);
SUBI2R(rd, rn, op2.Imm, W0);
}
else
SUB(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x3: // RSB
if (op2.IsZero())
{
op2 = Op2(WZR);
}
else if (op2.IsImm)
{
MOVI2R(W1, op2.Imm);
op2 = Op2(W1);
}
else if (op2.Reg.ShiftAmount != 0)
{
MOV(W1, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W1);
}
if (S)
SUBS(rd, op2.Reg.Rm, rn);
else
SUB(rd, op2.Reg.Rm, rn);
break;
case 0x4: // ADD
if (S)
{
if (op2.IsImm)
ADDSI2R(rd, rn, op2.Imm, W0);
else
ADDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
ADDI2R(rd, rn, op2.Imm, W0);
else
ADD(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x5: // ADC
UBFX(W2, RCPSR, 29, 1);
if (S)
{
if (op2.IsImm)
{
CVInGPR = true;
ADDS(W1, rn, W2);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
if (op2.IsImm)
ADDSI2R(rd, W1, op2.Imm, W0);
else
ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
CSINC(W2, W2, WZR, CC_CC);
CSINC(W3, W3, WZR, CC_VC);
}
else
{
if (op2.Reg.ShiftAmount > 0)
{
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W0, ST_LSL, 0);
}
CMP(W2, 1);
ADCS(rd, rn, op2.Reg.Rm);
}
}
else
{
ADD(W1, rn, W2);
if (op2.IsImm)
ADDI2R(rd, W1, op2.Imm, W0);
else
ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x6: // SBC
UBFX(W2, RCPSR, 29, 1);
if (S && !op2.IsImm)
{
if (op2.Reg.ShiftAmount > 0)
{
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W0, ST_LSL, 0);
}
CMP(W2, 1);
SBCS(rd, rn, op2.Reg.Rm);
}
else
{
// W1 = -op2 - 1
if (op2.IsImm)
MOVI2R(W1, ~op2.Imm);
else
ORN(W1, WZR, op2.Reg.Rm, op2.ToArithOption());
if (S)
{
CVInGPR = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
ADDS(rd, rn, W1);
CSINC(W2, W2, WZR, CC_CC);
CSINC(W3, W3, WZR, CC_VC);
}
else
{
ADD(W1, W2, W1);
ADD(rd, rn, W1);
}
}
break;
case 0x7: // RSC
UBFX(W2, RCPSR, 29, 1);
// W1 = -rn - 1
MVN(W1, rn);
if (S)
{
CVInGPR = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
if (op2.IsImm)
ADDSI2R(rd, W1, op2.Imm);
else
ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
CSINC(W2, W2, WZR, CC_CC);
CSINC(W3, W3, WZR, CC_VC);
}
else
{
ADD(W1, W2, W1);
if (op2.IsImm)
ADDI2R(rd, W1, op2.Imm);
else
ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
}
break;
}
if (S)
{
if (CVInGPR)
{
BFI(RCPSR, W2, 29, 1);
BFI(RCPSR, W3, 28, 1);
}
Comp_RetriveFlags(!CVInGPR);
}
}
void Compiler::Comp_Compare(int op, ARM64Reg rn, Op2 op2)
{
if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
{
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W0, ST_LSL, 0);
}
switch (op)
{
case 0x8: // TST
if (op2.IsImm)
TSTI2R(rn, op2.Imm, W0);
else
ANDS(WZR, rn, op2.Reg.Rm, op2.ToArithOption());
break;
case 0x9: // TEQ
if (op2.IsImm)
EORI2R(W0, rn, op2.Imm, W0);
else
EOR(W0, rn, op2.Reg.Rm, op2.ToArithOption());
TST(W0, W0);
break;
case 0xA: // CMP
if (op2.IsImm)
CMPI2R(rn, op2.Imm, W0);
else
CMP(rn, op2.Reg.Rm, op2.ToArithOption());
break;
case 0xB: // CMN
if (op2.IsImm)
ADDSI2R(WZR, rn, op2.Imm, W0);
else
CMN(rn, op2.Reg.Rm, op2.ToArithOption());
break;
}
Comp_RetriveFlags(op >= 0xA);
}
// also counts cycles!
void Compiler::A_Comp_GetOp2(bool S, Op2& op2)
{
if (CurInstr.Instr & (1 << 25))
{
Comp_AddCycles_C();
u32 shift = (CurInstr.Instr >> 7) & 0x1E;
u32 imm = melonDS::ROR(CurInstr.Instr & 0xFF, shift);
if (S && shift && (CurInstr.SetFlags & 0x2))
{
CPSRDirty = true;
if (imm & 0x80000000)
ORRI2R(RCPSR, RCPSR, 1 << 29);
else
ANDI2R(RCPSR, RCPSR, ~(1 << 29));
}
op2 = Op2(imm);
}
else
{
int op = (CurInstr.Instr >> 5) & 0x3;
op2.Reg.Rm = MapReg(CurInstr.A_Reg(0));
if (CurInstr.Instr & (1 << 4))
{
Comp_AddCycles_CI(1);
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
if (CurInstr.A_Reg(0) == 15)
{
ADD(W0, op2.Reg.Rm, 4);
op2.Reg.Rm = W0;
}
Comp_RegShiftReg(op, S, op2, rs);
}
else
{
Comp_AddCycles_C();
int amount = (CurInstr.Instr >> 7) & 0x1F;
Comp_RegShiftImm(op, amount, S, op2);
}
}
}
void Compiler::A_Comp_ALUCmpOp()
{
u32 op = (CurInstr.Instr >> 21) & 0xF;
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
Op2 op2;
A_Comp_GetOp2(op <= 0x9, op2);
Comp_Compare(op, rn, op2);
}
void Compiler::A_Comp_ALUMovOp()
{
bool S = CurInstr.Instr & (1 << 20);
u32 op = (CurInstr.Instr >> 21) & 0xF;
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
Op2 op2;
A_Comp_GetOp2(S, op2);
if (op == 0xF) // MVN
{
if (op2.IsImm)
{
if (CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm);
MOVI2R(rd, ~op2.Imm);
}
else
ORN(rd, WZR, op2.Reg.Rm, op2.ToArithOption());
}
else // MOV
{
if (op2.IsImm)
{
if (CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm);
MOVI2R(rd, op2.Imm);
}
else
{
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
}
}
if (S)
{
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
}
if (CurInstr.Info.Branches())
Comp_JumpTo(rd, true, S);
}
void Compiler::A_Comp_ALUTriOp()
{
bool S = CurInstr.Instr & (1 << 20);
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool logical = (1 << op) & 0xF303;
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
Op2 op2;
A_Comp_GetOp2(S && logical, op2);
if (op2.IsImm && op2.Imm == 0)
op2 = Op2(WZR, ST_LSL, 0);
if (logical)
Comp_Logical(op, S, rd, rn, op2);
else
Comp_Arithmetic(op, S, rd, rn, op2);
if (CurInstr.Info.Branches())
Comp_JumpTo(rd, true, S);
}
void Compiler::A_Comp_Clz()
{
Comp_AddCycles_C();
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
CLZ(rd, rm);
assert(Num == 0);
}
void Compiler::Comp_Mul_Mla(bool S, bool mla, ARM64Reg rd, ARM64Reg rm, ARM64Reg rs, ARM64Reg rn)
{
if (Num == 0)
{
Comp_AddCycles_CI(S ? 3 : 1);
}
else
{
CLS(W0, rs);
Comp_AddCycles_CI(mla ? 1 : 0, W0, ArithOption(W0, ST_LSR, 3));
}
if (mla)
MADD(rd, rm, rs, rn);
else
MUL(rd, rm, rs);
if (S && FlagsNZNeeded())
{
TST(rd, rd);
Comp_RetriveFlags(false);
}
}
void Compiler::A_Comp_Mul_Long()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
bool S = CurInstr.Instr & (1 << 20);
bool add = CurInstr.Instr & (1 << 21);
bool sign = CurInstr.Instr & (1 << 22);
if (Num == 0)
{
Comp_AddCycles_CI(S ? 3 : 1);
}
else
{
if (sign)
CLS(W0, rs);
else
CLZ(W0, rs);
Comp_AddCycles_CI(0, W0, ArithOption(W0, ST_LSR, 3));
}
if (add)
{
MOV(W0, rn);
BFI(X0, EncodeRegTo64(rd), 32, 32);
if (sign)
SMADDL(EncodeRegTo64(rn), rm, rs, X0);
else
UMADDL(EncodeRegTo64(rn), rm, rs, X0);
if (S && FlagsNZNeeded())
TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
}
else
{
if (sign)
SMULL(EncodeRegTo64(rn), rm, rs);
else
UMULL(EncodeRegTo64(rn), rm, rs);
if (S && FlagsNZNeeded())
TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
}
if (S)
Comp_RetriveFlags(false);
}
void Compiler::A_Comp_Mul_Short()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool x = CurInstr.Instr & (1 << 5);
bool y = CurInstr.Instr & (1 << 6);
SBFX(W1, rs, y ? 16 : 0, 16);
if (op == 0b1000)
{
// SMLAxy
SBFX(W0, rm, x ? 16 : 0, 16);
MUL(W0, W0, W1);
ORRI2R(W1, RCPSR, 0x08000000);
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
ADDS(rd, W0, rn);
CSEL(RCPSR, W1, RCPSR, CC_VS);
CPSRDirty = true;
Comp_AddCycles_C();
}
else if (op == 0b1011)
{
// SMULxy
SBFX(W0, rm, x ? 16 : 0, 16);
MUL(rd, W0, W1);
Comp_AddCycles_C();
}
else if (op == 0b1010)
{
// SMLALxy
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
MOV(W2, rn);
BFI(X2, rd, 32, 32);
SBFX(W0, rm, x ? 16 : 0, 16);
SMADDL(EncodeRegTo64(rn), W0, W1, X2);
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
Comp_AddCycles_CI(1);
}
else if (op == 0b1001)
{
// SMLAWy/SMULWy
SMULL(X0, rm, W1);
ASR(x ? EncodeRegTo64(rd) : X0, X0, 16);
if (!x)
{
ORRI2R(W1, RCPSR, 0x08000000);
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
ADDS(rd, W0, rn);
CSEL(RCPSR, W1, RCPSR, CC_VS);
CPSRDirty = true;
}
Comp_AddCycles_C();
}
}
void Compiler::A_Comp_Mul()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
bool S = CurInstr.Instr & (1 << 20);
bool mla = CurInstr.Instr & (1 << 21);
ARM64Reg rn = INVALID_REG;
if (mla)
rn = MapReg(CurInstr.A_Reg(12));
Comp_Mul_Mla(S, mla, rd, rm, rs, rn);
}
void Compiler::T_Comp_ShiftImm()
{
Comp_AddCycles_C();
u32 op = (CurInstr.Instr >> 11) & 0x3;
int amount = (CurInstr.Instr >> 6) & 0x1F;
ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
Op2 op2;
op2.Reg.Rm = MapReg(CurInstr.T_Reg(3));
Comp_RegShiftImm(op, amount, true, op2);
if (op2.IsImm)
MOVI2R(rd, op2.Imm);
else
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
}
void Compiler::T_Comp_AddSub_()
{
Comp_AddCycles_C();
Op2 op2;
if (CurInstr.Instr & (1 << 10))
op2 = Op2((CurInstr.Instr >> 6) & 0x7);
else
op2 = Op2(MapReg(CurInstr.T_Reg(6)));
Comp_Arithmetic(
CurInstr.Instr & (1 << 9) ? 0x2 : 0x4,
true,
MapReg(CurInstr.T_Reg(0)),
MapReg(CurInstr.T_Reg(3)),
op2);
}
void Compiler::T_Comp_ALUImm8()
{
Comp_AddCycles_C();
u32 imm = CurInstr.Instr & 0xFF;
int op = (CurInstr.Instr >> 11) & 0x3;
ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
switch (op)
{
case 0:
MOVI2R(rd, imm);
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
break;
case 1:
Comp_Compare(0xA, rd, Op2(imm));
break;
case 2:
case 3:
Comp_Arithmetic(op == 2 ? 0x4 : 0x2, true, rd, rd, Op2(imm));
break;
}
}
void Compiler::T_Comp_ALU()
{
int op = (CurInstr.Instr >> 6) & 0xF;
ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
ARM64Reg rs = MapReg(CurInstr.T_Reg(3));
if ((op >= 0x2 && op <= 0x4) || op == 0x7)
Comp_AddCycles_CI(1);
else
Comp_AddCycles_C();
switch (op)
{
case 0x0:
Comp_Logical(0x0, true, rd, rd, Op2(rs));
break;
case 0x1:
Comp_Logical(0x1, true, rd, rd, Op2(rs));
break;
case 0x2:
case 0x3:
case 0x4:
case 0x7:
{
Op2 op2;
op2.Reg.Rm = rd;
Comp_RegShiftReg(op == 0x7 ? 3 : (op - 0x2), true, op2, rs);
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
}
break;
case 0x5:
Comp_Arithmetic(0x5, true, rd, rd, Op2(rs));
break;
case 0x6:
Comp_Arithmetic(0x6, true, rd, rd, Op2(rs));
break;
case 0x8:
Comp_Compare(0x8, rd, Op2(rs));
break;
case 0x9:
Comp_Arithmetic(0x3, true, rd, rs, Op2(0));
break;
case 0xA:
Comp_Compare(0xA, rd, Op2(rs));
break;
case 0xB:
Comp_Compare(0xB, rd, Op2(rs));
break;
case 0xC:
Comp_Logical(0xC, true, rd, rd, Op2(rs));
break;
case 0xD:
Comp_Mul_Mla(true, false, rd, rd, rs, INVALID_REG);
break;
case 0xE:
Comp_Logical(0xE, true, rd, rd, Op2(rs));
break;
case 0xF:
MVN(rd, rs);
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
break;
}
}
void Compiler::T_Comp_ALU_HiReg()
{
u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
u32 op = (CurInstr.Instr >> 8) & 0x3;
Comp_AddCycles_C();
switch (op)
{
case 0:
Comp_Arithmetic(0x4, false, rdMapped, rdMapped, Op2(rs));
break;
case 1:
Comp_Compare(0xA, rdMapped, rs);
return;
case 2:
MOV(rdMapped, rs);
break;
}
if (rd == 15)
{
Comp_JumpTo(rdMapped, false, false);
}
}
void Compiler::T_Comp_AddSP()
{
Comp_AddCycles_C();
ARM64Reg sp = MapReg(13);
u32 offset = (CurInstr.Instr & 0x7F) << 2;
if (CurInstr.Instr & (1 << 7))
SUB(sp, sp, offset);
else
ADD(sp, sp, offset);
}
void Compiler::T_Comp_RelAddr()
{
Comp_AddCycles_C();
ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
u32 offset = (CurInstr.Instr & 0xFF) << 2;
if (CurInstr.Instr & (1 << 11))
{
ARM64Reg sp = MapReg(13);
ADD(rd, sp, offset);
}
else
MOVI2R(rd, (R15 & ~2) + offset);
}
}

View File

@ -1,440 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "ARMJIT_Compiler.h"
#include "../NDS.h"
using namespace Arm64Gen;
// hack
const int kCodeCacheTiming = 3;
namespace melonDS
{
template <typename T>
void JumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
{
cpu->JumpTo(addr, changeCPSR);
}
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
{
// we can simplify constant branches by a lot
// it's not completely safe to assume stuff like, which instructions to preload
// we'll see how it works out
IrregularCycles = true;
u32 newPC;
u32 cycles = 0;
bool setupRegion = false;
if (addr & 0x1 && !Thumb)
{
CPSRDirty = true;
ORRI2R(RCPSR, RCPSR, 0x20);
}
else if (!(addr & 0x1) && Thumb)
{
CPSRDirty = true;
ANDI2R(RCPSR, RCPSR, ~0x20);
}
if (Num == 0)
{
ARMv5* cpu9 = (ARMv5*)CurCPU;
u32 oldregion = R15 >> 24;
u32 newregion = addr >> 24;
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
cpu9->RegionCodeCycles = regionCodeCycles;
MOVI2R(W0, regionCodeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, RegionCodeCycles));
setupRegion = newregion != oldregion;
if (setupRegion)
cpu9->SetupCodeMem(addr);
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
else
{
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}
cpu9->RegionCodeCycles = compileTimeCodeCycles;
if (setupRegion)
cpu9->SetupCodeMem(R15);
}
else
{
ARMv4* cpu7 = (ARMv4*)CurCPU;
u32 codeRegion = addr >> 24;
u32 codeCycles = addr >> 15; // cheato
cpu7->CodeRegion = codeRegion;
cpu7->CodeCycles = codeCycles;
MOVI2R(W0, codeRegion);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeRegion));
MOVI2R(W0, codeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// this is necessary because ARM7 bios protection
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS.ARM7MemTimings[codeCycles][0] + NDS.ARM7MemTimings[codeCycles][1];
CurCPU->R[15] = compileTimePC;
}
else
{
addr &= ~0x3;
newPC = addr+4;
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS.ARM7MemTimings[codeCycles][2] + NDS.ARM7MemTimings[codeCycles][3];
CurCPU->R[15] = compileTimePC;
}
cpu7->CodeRegion = R15 >> 24;
cpu7->CodeCycles = addr >> 15;
}
if (Exit)
{
MOVI2R(W0, newPC);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
}
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
}
void* Compiler::Gen_JumpTo9(int kind)
{
AlignCode16();
void* res = GetRXPtr();
LSR(W1, W0, 12);
ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2);
LDRB(W1, RCPU, W1);
LDR(INDEX_UNSIGNED, W2, RCPU, offsetof(ARMv5, ITCMSize));
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles));
CMP(W1, 0xFF);
MOVI2R(W3, kCodeCacheTiming);
CSEL(W1, W3, W1, CC_EQ);
CMP(W0, W2);
CSINC(W1, W1, WZR, CC_HS);
FixupBranch switchToThumb;
if (kind == 0)
switchToThumb = TBNZ(W0, 0);
if (kind == 0 || kind == 1)
{
// ARM
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
ANDI2R(W0, W0, ~3);
ADD(W0, W0, 4);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
ADD(W1, W1, W1);
ADD(RCycles, RCycles, W1);
RET();
}
if (kind == 0 || kind == 2)
{
// Thumb
if (kind == 0)
{
SetJumpTarget(switchToThumb);
ORRI2R(RCPSR, RCPSR, 0x20);
}
ANDI2R(W0, W0, ~1);
ADD(W0, W0, 2);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
ADD(W2, W1, W1);
TSTI2R(W0, 0x2);
CSEL(W1, W1, W2, CC_EQ);
ADD(RCycles, RCycles, W1);
RET();
}
return res;
}
void* Compiler::Gen_JumpTo7(int kind)
{
void* res = GetRXPtr();
LSR(W1, W0, 24);
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeRegion));
LSR(W1, W0, 15);
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeCycles));
MOVP2R(X2, NDS.ARM7MemTimings);
LDR(W3, X2, ArithOption(W1, true));
FixupBranch switchToThumb;
if (kind == 0)
switchToThumb = TBNZ(W0, 0);
if (kind == 0 || kind == 1)
{
UBFX(W2, W3, 0, 8);
UBFX(W3, W3, 8, 8);
ADD(W2, W3, W2);
ADD(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~3);
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
ADD(W3, W0, 4);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
RET();
}
if (kind == 0 || kind == 2)
{
if (kind == 0)
{
SetJumpTarget(switchToThumb);
ORRI2R(RCPSR, RCPSR, 0x20);
}
UBFX(W2, W3, 16, 8);
UBFX(W3, W3, 24, 8);
ADD(W2, W3, W2);
ADD(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~1);
ADD(W3, W0, 2);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
RET();
}
return res;
}
void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR)
{
IrregularCycles = true;
if (!restoreCPSR)
{
if (switchThumb)
CPSRDirty = true;
MOV(W0, addr);
BL((Num ? JumpToFuncs7 : JumpToFuncs9)[switchThumb ? 0 : (Thumb + 1)]);
}
else
{
bool cpsrDirty = CPSRDirty;
SaveCPSR();
SaveCycles();
PushRegs(restoreCPSR, true);
if (switchThumb)
MOV(W1, addr);
else
{
if (Thumb)
ORRI2R(W1, addr, 1);
else
ANDI2R(W1, addr, ~1);
}
MOV(X0, RCPU);
MOVI2R(W2, restoreCPSR);
if (Num == 0)
QuickCallFunction(X3, JumpToTrampoline<ARMv5>);
else
QuickCallFunction(X3, JumpToTrampoline<ARMv4>);
PopRegs(restoreCPSR, true);
LoadCycles();
LoadCPSR();
if (CurInstr.Cond() < 0xE)
CPSRDirty = cpsrDirty;
}
}
void Compiler::A_Comp_BranchImm()
{
int op = (CurInstr.Instr >> 24) & 1;
s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
u32 target = R15 + offset;
bool link = op;
if (CurInstr.Cond() == 0xF) // BLX_imm
{
target += (op << 1) + 1;
link = true;
}
if (link)
MOVI2R(MapReg(14), R15 - 4);
Comp_JumpTo(target);
}
void Compiler::A_Comp_BranchXchangeReg()
{
ARM64Reg rn = MapReg(CurInstr.A_Reg(0));
MOV(W0, rn);
if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
MOVI2R(MapReg(14), R15 - 4);
Comp_JumpTo(W0, true);
}
void Compiler::T_Comp_BCOND()
{
u32 cond = (CurInstr.Instr >> 8) & 0xF;
FixupBranch skipExecute = CheckCondition(cond);
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
Comp_BranchSpecialBehaviour(true);
FixupBranch skipFailed = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C(true);
Comp_BranchSpecialBehaviour(false);
SetJumpTarget(skipFailed);
}
void Compiler::T_Comp_B()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
Comp_JumpTo(R15 + offset + 1);
}
void Compiler::T_Comp_BranchXchangeReg()
{
bool link = CurInstr.Instr & (1 << 7);
if (link)
{
if (Num == 1)
{
Log(LogLevel::Warn, "BLX unsupported on ARM7!!!\n");
return;
}
MOV(W0, MapReg(CurInstr.A_Reg(3)));
MOVI2R(MapReg(14), R15 - 1);
Comp_JumpTo(W0, true);
}
else
{
ARM64Reg rn = MapReg(CurInstr.A_Reg(3));
Comp_JumpTo(rn, true);
}
}
void Compiler::T_Comp_BL_LONG_1()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
MOVI2R(MapReg(14), R15 + offset);
Comp_AddCycles_C();
}
void Compiler::T_Comp_BL_LONG_2()
{
ARM64Reg lr = MapReg(14);
s32 offset = (CurInstr.Instr & 0x7FF) << 1;
ADD(W0, lr, offset);
MOVI2R(lr, (R15 - 2) | 1);
Comp_JumpTo(W0, Num == 0 && !(CurInstr.Instr & (1 << 12)));
}
void Compiler::T_Comp_BL_Merged()
{
Comp_AddCycles_C();
R15 += 2;
u32 upperPart = CurInstr.Instr >> 16;
u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
target += (upperPart & 0x7FF) << 1;
if (Num == 1 || upperPart & (1 << 12))
target |= 1;
MOVI2R(MapReg(14), (R15 - 2) | 1);
Comp_JumpTo(target);
}
}

View File

@ -1,968 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "ARMJIT_Compiler.h"
#include "../ARMJIT_Internal.h"
#include "../ARMInterpreter.h"
#include "../ARMJIT.h"
#include "../NDS.h"
#include "../ARMJIT_Global.h"
#include <stdlib.h>
using namespace Arm64Gen;
extern "C" void ARM_Ret();
namespace melonDS
{
/*
Recompiling classic ARM to ARMv8 code is at the same time
easier and trickier than compiling to a less related architecture
like x64. At one hand you can translate a lot of instructions directly.
But at the same time, there are a ton of exceptions, like for
example ADD and SUB can't have a RORed second operand on ARMv8.
While writing a JIT when an instruction is recompiled into multiple ones
not to write back until you've read all the other operands!
*/
template <>
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
{
W19, W20, W21, W22, W23, W24, W25,
W8, W9, W10, W11, W12, W13, W14, W15
};
template <>
const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 15;
const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15});
void Compiler::MovePC()
{
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
}
void Compiler::A_Comp_MRS()
{
Comp_AddCycles_C();
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
if (CurInstr.Instr & (1 << 22))
{
ANDI2R(W5, RCPSR, 0x1F);
MOVI2R(W3, 0);
MOVI2R(W1, 15 - 8);
BL(ReadBanked);
MOV(rd, W3);
}
else
MOV(rd, RCPSR);
}
void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode)
{
arm->UpdateMode(oldmode, newmode);
}
void Compiler::A_Comp_MSR()
{
Comp_AddCycles_C();
ARM64Reg val;
if (CurInstr.Instr & (1 << 25))
{
val = W0;
MOVI2R(val, melonDS::ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E)));
}
else
{
val = MapReg(CurInstr.A_Reg(0));
}
u32 mask = 0;
if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF;
if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00;
if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000;
if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000;
if (CurInstr.Instr & (1 << 22))
{
ANDI2R(W5, RCPSR, 0x1F);
MOVI2R(W3, 0);
MOVI2R(W1, 15 - 8);
BL(ReadBanked);
MOVI2R(W1, mask);
MOVI2R(W2, mask & 0xFFFFFF00);
ANDI2R(W5, RCPSR, 0x1F);
CMP(W5, 0x10);
CSEL(W1, W2, W1, CC_EQ);
BIC(W3, W3, W1);
AND(W0, val, W1);
ORR(W3, W3, W0);
MOVI2R(W1, 15 - 8);
BL(WriteBanked);
}
else
{
mask &= 0xFFFFFFDF;
CPSRDirty = true;
if ((mask & 0xFF) == 0)
{
ANDI2R(RCPSR, RCPSR, ~mask);
ANDI2R(W0, val, mask);
ORR(RCPSR, RCPSR, W0);
}
else
{
MOVI2R(W2, mask);
MOVI2R(W3, mask & 0xFFFFFF00);
ANDI2R(W1, RCPSR, 0x1F);
// W1 = first argument
CMP(W1, 0x10);
CSEL(W2, W3, W2, CC_EQ);
BIC(RCPSR, RCPSR, W2);
AND(W0, val, W2);
ORR(RCPSR, RCPSR, W0);
MOV(W2, RCPSR);
MOV(X0, RCPU);
PushRegs(true, true);
QuickCallFunction(X3, UpdateModeTrampoline);
PopRegs(true, true);
}
}
}
void Compiler::PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload)
{
BitSet32 loadedRegs(RegCache.LoadedRegs);
if (saveHiRegs)
{
BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
{
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]);
// prevent saving the register twice
loadedRegs[reg] = false;
}
}
for (int reg : loadedRegs)
{
if (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs))))
{
if ((Thumb || CurInstr.Cond() == 0xE) && !((1 << reg) & (CurInstr.Info.DstRegs|CurInstr.Info.SrcRegs)) && allowUnload)
RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]);
}
}
}
void Compiler::PopRegs(bool saveHiRegs, bool saveRegsToBeChanged)
{
BitSet32 loadedRegs(RegCache.LoadedRegs);
for (int reg : loadedRegs)
{
if ((saveHiRegs && reg >= 8 && reg < 15)
|| (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs)))))
{
LoadReg(reg, RegCache.Mapping[reg]);
}
}
}
Compiler::Compiler(melonDS::NDS& nds) : Arm64Gen::ARM64XEmitter(), NDS(nds)
{
#ifdef __SWITCH__
JitRWBase = aligned_alloc(0x1000, JitMemSize);
JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000;
virtmemLock();
JitRWStart = virtmemFindAslr(JitMemSize, 0x1000);
MemoryInfo info = {0};
u32 pageInfo = {0};
int i = 0;
while (JitRXStart != NULL)
{
svcQueryMemory(&info, &pageInfo, (u64)JitRXStart);
if (info.type != MemType_Unmapped)
JitRXStart = (void*)((u8*)info.addr - JitMemSize - 0x1000);
else
break;
if (i++ > 8)
{
Log(LogLevel::Error, "couldn't find unmapped place for jit memory\n");
JitRXStart = NULL;
}
}
assert(JitRXStart != NULL);
bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
assert(succeded);
succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize, Perm_Rx));
assert(succeded);
succeded = R_SUCCEEDED(svcMapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
assert(succeded);
virtmemUnlock();
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
JitMemMainSize = JitMemSize;
#else
ARMJIT_Global::Init();
CodeMemBase = ARMJIT_Global::AllocateCodeMem();
nds.JIT.JitEnableWrite();
SetCodeBase(reinterpret_cast<u8*>(CodeMemBase), reinterpret_cast<u8*>(CodeMemBase));
JitMemMainSize = ARMJIT_Global::CodeMemorySliceSize;
#endif
SetCodePtr(0);
for (int i = 0; i < 3; i++)
{
JumpToFuncs9[i] = Gen_JumpTo9(i);
JumpToFuncs7[i] = Gen_JumpTo7(i);
}
/*
W4 - whether the register was written to
W5 - mode
W1 - reg num
W3 - in/out value of reg
*/
{
ReadBanked = GetRXPtr();
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
RET();
SetJumpTarget(fiq);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
RET();
SetJumpTarget(irq);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
RET();
SetJumpTarget(svc);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
RET();
SetJumpTarget(abt);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
RET();
SetJumpTarget(und);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
RET();
}
{
WriteBanked = GetRXPtr();
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
MOVI2R(W4, 0);
RET();
SetJumpTarget(fiq);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
MOVI2R(W4, 1);
RET();
SetJumpTarget(irq);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
MOVI2R(W4, 1);
RET();
SetJumpTarget(svc);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
MOVI2R(W4, 1);
RET();
SetJumpTarget(abt);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
MOVI2R(W4, 1);
RET();
SetJumpTarget(und);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
MOVI2R(W4, 1);
RET();
}
for (int consoleType = 0; consoleType < 2; consoleType++)
{
for (int num = 0; num < 2; num++)
{
for (int size = 0; size < 3; size++)
{
for (int reg = 0; reg < 32; reg++)
{
if (!(reg == W4 || (reg >= W8 && reg <= W15) || (reg >= W19 && reg <= W25)))
continue;
ARM64Reg rdMapped = (ARM64Reg)reg;
PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr();
if (num == 0)
{
MOV(X1, RCPU);
MOV(W2, rdMapped);
}
else
{
MOV(W1, rdMapped);
}
ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs);
if (consoleType == 0)
{
switch ((8 << size) | num)
{
case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break;
case 33: QuickCallFunction(X3, SlowWrite7<u32, 0>); break;
case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break;
case 17: QuickCallFunction(X3, SlowWrite7<u16, 0>); break;
case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break;
case 9: QuickCallFunction(X3, SlowWrite7<u8, 0>); break;
}
}
else
{
switch ((8 << size) | num)
{
case 32: QuickCallFunction(X3, SlowWrite9<u32, 1>); break;
case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break;
case 16: QuickCallFunction(X3, SlowWrite9<u16, 1>); break;
case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break;
case 8: QuickCallFunction(X3, SlowWrite9<u8, 1>); break;
case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break;
}
}
ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs);
RET();
for (int signextend = 0; signextend < 2; signextend++)
{
PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr();
if (num == 0)
MOV(X1, RCPU);
ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs);
if (consoleType == 0)
{
switch ((8 << size) | num)
{
case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break;
case 33: QuickCallFunction(X3, SlowRead7<u32, 0>); break;
case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break;
case 17: QuickCallFunction(X3, SlowRead7<u16, 0>); break;
case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break;
case 9: QuickCallFunction(X3, SlowRead7<u8, 0>); break;
}
}
else
{
switch ((8 << size) | num)
{
case 32: QuickCallFunction(X3, SlowRead9<u32, 1>); break;
case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break;
case 16: QuickCallFunction(X3, SlowRead9<u16, 1>); break;
case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break;
case 8: QuickCallFunction(X3, SlowRead9<u8, 1>); break;
case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break;
}
}
ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs);
if (size == 32)
MOV(rdMapped, W0);
else if (signextend)
SBFX(rdMapped, W0, 0, 8 << size);
else
UBFX(rdMapped, W0, 0, 8 << size);
RET();
}
}
}
}
}
FlushIcache();
JitMemSecondarySize = 1024*1024*4;
JitMemMainSize -= GetCodeOffset();
JitMemMainSize -= JitMemSecondarySize;
SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr());
}
Compiler::~Compiler()
{
#ifdef __SWITCH__
if (JitRWStart != NULL)
{
bool succeded = R_SUCCEEDED(svcUnmapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
assert(succeded);
succeded = R_SUCCEEDED(svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
assert(succeded);
free(JitRWBase);
}
#endif
ARMJIT_Global::FreeCodeMem(CodeMemBase);
ARMJIT_Global::DeInit();
}
void Compiler::LoadCycles()
{
LDR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
}
void Compiler::SaveCycles()
{
STR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
}
void Compiler::LoadReg(int reg, ARM64Reg nativeReg)
{
if (reg == 15)
MOVI2R(nativeReg, R15);
else
LDR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R) + reg*4);
}
void Compiler::SaveReg(int reg, ARM64Reg nativeReg)
{
STR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R) + reg*4);
}
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
LDR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
}
void Compiler::SaveCPSR(bool markClean)
{
if (CPSRDirty)
{
STR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
CPSRDirty = CPSRDirty && !markClean;
}
}
FixupBranch Compiler::CheckCondition(u32 cond)
{
if (cond >= 0x8)
{
LSR(W1, RCPSR, 28);
MOVI2R(W2, 1);
LSLV(W2, W2, W1);
ANDI2R(W2, W2, ARM::ConditionTable[cond], W3);
return CBZ(W2);
}
else
{
u8 bit = (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)));
if (cond & 1)
return TBNZ(RCPSR, bit);
else
return TBZ(RCPSR, bit);
}
}
#define F(x) &Compiler::A_Comp_##x
const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
{
// AND
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// EOR
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// SUB
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// RSB
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// ADD
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// ADC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// SBC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// RSC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// ORR
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// MOV
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
// BIC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// MVN
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
// TST
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// TEQ
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// CMP
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// CMN
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// Mul
F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short),
// ARMv5 exclusives
F(Clz), NULL, NULL, NULL, NULL,
// STR
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// STRB
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// LDR
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// LDRB
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// STRH
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// LDRD
NULL, NULL, NULL, NULL,
// STRD
NULL, NULL, NULL, NULL,
// LDRH
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// LDRSB
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// LDRSH
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// Swap
NULL, NULL,
// LDM, STM
F(LDM_STM), F(LDM_STM),
// Branch
F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg),
// Special
NULL, F(MSR), F(MSR), F(MRS), NULL, NULL, NULL,
&Compiler::Nop
};
#undef F
#define F(x) &Compiler::T_Comp_##x
const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] =
{
// Shift imm
F(ShiftImm), F(ShiftImm), F(ShiftImm),
// Add/sub tri operand
F(AddSub_), F(AddSub_), F(AddSub_), F(AddSub_),
// 8 bit imm
F(ALUImm8), F(ALUImm8), F(ALUImm8), F(ALUImm8),
// ALU
F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
// ALU hi reg
F(ALU_HiReg), F(ALU_HiReg), F(ALU_HiReg),
// PC/SP relative ops
F(RelAddr), F(RelAddr), F(AddSP),
// LDR PC rel
F(LoadPCRel),
// LDR/STR reg offset
F(MemReg), F(MemReg), F(MemReg), F(MemReg),
// LDR/STR sign extended, half
F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), F(MemRegHalf),
// LDR/STR imm offset
F(MemImm), F(MemImm), F(MemImm), F(MemImm),
// LDR/STR half imm offset
F(MemImmHalf), F(MemImmHalf),
// LDR/STR sp rel
F(MemSPRel), F(MemSPRel),
// PUSH/POP
F(PUSH_POP), F(PUSH_POP),
// LDMIA, STMIA
F(LDMIA_STMIA), F(LDMIA_STMIA),
// Branch
F(BCOND), F(BranchXchangeReg), F(BranchXchangeReg), F(B), F(BL_LONG_1), F(BL_LONG_2),
// Unk, SVC
NULL, NULL,
F(BL_Merged)
};
bool Compiler::CanCompile(bool thumb, u16 kind)
{
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
}
void Compiler::Comp_BranchSpecialBehaviour(bool taken)
{
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
{
MOVI2R(W0, 1);
STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop));
}
if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
|| (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
{
RegCache.PrepareExit();
if (ConstantCycles)
ADD(RCycles, RCycles, ConstantCycles);
QuickTailCall(X0, ARM_Ret);
}
}
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr)
{
if (JitMemMainSize - GetCodeOffset() < 1024 * 16)
{
Log(LogLevel::Debug, "JIT near memory full, resetting...\n");
NDS.JIT.ResetBlockCache();
}
if ((JitMemMainSize + JitMemSecondarySize) - OtherCodeRegion < 1024 * 8)
{
Log(LogLevel::Debug, "JIT far memory full, resetting...\n");
NDS.JIT.ResetBlockCache();
}
JitBlockEntry res = (JitBlockEntry)GetRXPtr();
Thumb = thumb;
Num = cpu->Num;
CurCPU = cpu;
ConstantCycles = 0;
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
CPSRDirty = false;
if (hasMemInstr)
MOVP2R(RMemBase, Num == 0 ? NDS.JIT.Memory.FastMem9Start : NDS.JIT.Memory.FastMem7Start);
for (int i = 0; i < instrsCount; i++)
{
CurInstr = instrs[i];
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
CodeRegion = R15 >> 24;
CompileFunc comp = Thumb
? T_Comp[CurInstr.Info.Kind]
: A_Comp[CurInstr.Info.Kind];
Exit = i == (instrsCount - 1) || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
//printf("%x instr %x regs: r%x w%x n%x flags: %x %x %x\n", R15, CurInstr.Instr, CurInstr.Info.SrcRegs, CurInstr.Info.DstRegs, CurInstr.Info.ReadFlags, CurInstr.Info.NotStrictlyNeeded, CurInstr.Info.WriteFlags, CurInstr.SetFlags);
bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
{
MOVI2R(W0, R15);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
if (comp == NULL)
{
MOVI2R(W0, CurInstr.Instr);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CurInstr));
}
if (Num == 0)
{
MOVI2R(W0, (s32)CurInstr.CodeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
}
}
if (comp == NULL)
{
SaveCycles();
SaveCPSR();
RegCache.Flush();
}
else
RegCache.Prepare(Thumb, i);
if (Thumb)
{
if (comp == NULL)
{
MOV(X0, RCPU);
QuickCallFunction(X1, InterpretTHUMB[CurInstr.Info.Kind]);
}
else
{
(this->*comp)();
}
}
else
{
u32 cond = CurInstr.Cond();
if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{
if (comp)
(this->*comp)();
else
{
MOV(X0, RCPU);
QuickCallFunction(X1, ARMInterpreter::A_BLX_IMM);
}
}
else if (cond == 0xF)
{
Comp_AddCycles_C();
}
else
{
IrregularCycles = comp == NULL;
FixupBranch skipExecute;
if (cond < 0xE)
skipExecute = CheckCondition(cond);
if (comp == NULL)
{
MOV(X0, RCPU);
QuickCallFunction(X1, InterpretARM[CurInstr.Info.Kind]);
}
else
{
(this->*comp)();
}
Comp_BranchSpecialBehaviour(true);
if (cond < 0xE)
{
if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
{
FixupBranch skipNop = B();
SetJumpTarget(skipExecute);
if (IrregularCycles)
Comp_AddCycles_C(true);
Comp_BranchSpecialBehaviour(false);
SetJumpTarget(skipNop);
}
else
{
SetJumpTarget(skipExecute);
}
}
}
}
if (comp == NULL)
{
LoadCycles();
LoadCPSR();
}
}
RegCache.Flush();
if (ConstantCycles)
ADD(RCycles, RCycles, ConstantCycles);
QuickTailCall(X0, ARM_Ret);
FlushIcache();
return res;
}
void Compiler::Reset()
{
LoadStorePatches.clear();
SetCodePtr(0);
OtherCodeRegion = JitMemMainSize;
const u32 brk_0 = 0xD4200000;
for (int i = 0; i < (JitMemMainSize + JitMemSecondarySize) / 4; i++)
*(((u32*)GetRWPtr()) + i) = brk_0;
}
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if (forceNonConstant)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 numI)
{
IrregularCycles = true;
s32 cycles = (Num ?
NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI;
if (Thumb || CurInstr.Cond() == 0xE)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift)
{
IrregularCycles = true;
s32 cycles = (Num ?
NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c;
ADD(RCycles, RCycles, cycles);
if (Thumb || CurInstr.Cond() >= 0xE)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CDI()
{
if (Num == 0)
Comp_AddCycles_CD();
else
{
IrregularCycles = true;
s32 cycles;
s32 numC = NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
cycles = numC + numD;
else
{
numC++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
}
else if (CodeRegion == 0x02)
{
numD++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles = numC + numD + 1;
}
if (!Thumb && CurInstr.Cond() < 0xE)
ADD(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
}
void Compiler::Comp_AddCycles_CD()
{
u32 cycles = 0;
if (Num == 0)
{
s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
s32 numD = CurInstr.DataCycles;
//if (DataRegion != CodeRegion)
cycles = std::max(numC + numD - 6, std::max(numC, numD));
IrregularCycles = cycles != numC;
}
else
{
s32 numC = NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if ((CurInstr.DataRegion >> 24) == 0x02)
{
if (CodeRegion == 0x02)
cycles += numC + numD;
else
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else if (CodeRegion == 0x02)
{
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles += numC + numD;
}
IrregularCycles = true;
}
if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles)
ADD(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
}

View File

@ -1,294 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMJIT_A64_COMPILER_H
#define ARMJIT_A64_COMPILER_H
#if defined(JIT_ENABLED) && defined(__aarch64__)
#include "../ARM.h"
#include "../dolphin/Arm64Emitter.h"
#include "../ARMJIT_Internal.h"
#include "../ARMJIT_RegisterCache.h"
#include <unordered_map>
namespace melonDS
{
class ARMJIT;
const Arm64Gen::ARM64Reg RMemBase = Arm64Gen::X26;
const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27;
const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28;
const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29;
struct Op2
{
Op2()
{}
Op2(Arm64Gen::ARM64Reg rm) : IsImm(false)
{
Reg.Rm = rm;
Reg.ShiftType = Arm64Gen::ST_LSL;
Reg.ShiftAmount = 0;
}
Op2(u32 imm) : IsImm(true), Imm(imm)
{}
Op2(Arm64Gen::ARM64Reg rm, Arm64Gen::ShiftType st, int amount) : IsImm(false)
{
Reg.Rm = rm;
Reg.ShiftType = st;
Reg.ShiftAmount = amount;
}
Arm64Gen::ArithOption ToArithOption()
{
assert(!IsImm);
return Arm64Gen::ArithOption(Reg.Rm, Reg.ShiftType, Reg.ShiftAmount);
}
bool IsSimpleReg()
{ return !IsImm && !Reg.ShiftAmount && Reg.ShiftType == Arm64Gen::ST_LSL; }
bool ImmFits12Bit()
{ return IsImm && ((Imm & 0xFFF) == Imm); }
bool IsZero()
{ return IsImm && !Imm; }
bool IsImm;
union
{
struct
{
Arm64Gen::ARM64Reg Rm;
Arm64Gen::ShiftType ShiftType;
int ShiftAmount;
} Reg;
u32 Imm;
};
};
struct LoadStorePatch
{
void* PatchFunc;
s32 PatchOffset;
u32 PatchSize;
};
class Compiler : public Arm64Gen::ARM64XEmitter
{
public:
typedef void (Compiler::*CompileFunc)();
explicit Compiler(melonDS::NDS& nds);
~Compiler() override;
void PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload = true);
void PopRegs(bool saveHiRegs, bool saveRegsToBeChanged);
Arm64Gen::ARM64Reg MapReg(int reg)
{
assert(RegCache.Mapping[reg] != Arm64Gen::INVALID_REG);
return RegCache.Mapping[reg];
}
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr);
bool CanCompile(bool thumb, u16 kind);
bool FlagsNZNeeded() const
{
return CurInstr.SetFlags & 0xC;
}
void Reset();
void Comp_AddCycles_C(bool forceNonConstant = false);
void Comp_AddCycles_CI(u32 numI);
void Comp_AddCycles_CI(u32 c, Arm64Gen::ARM64Reg numI, Arm64Gen::ArithOption shift);
void Comp_AddCycles_CD();
void Comp_AddCycles_CDI();
void MovePC();
void LoadReg(int reg, Arm64Gen::ARM64Reg nativeReg);
void SaveReg(int reg, Arm64Gen::ARM64Reg nativeReg);
void LoadCPSR();
void SaveCPSR(bool markClean = true);
void LoadCycles();
void SaveCycles();
void Nop() {}
void A_Comp_ALUTriOp();
void A_Comp_ALUMovOp();
void A_Comp_ALUCmpOp();
void A_Comp_Mul();
void A_Comp_Mul_Long();
void A_Comp_Mul_Short();
void A_Comp_Clz();
void A_Comp_MemWB();
void A_Comp_MemHD();
void A_Comp_LDM_STM();
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
void A_Comp_MRS();
void A_Comp_MSR();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALUImm8();
void T_Comp_ALU();
void T_Comp_ALU_HiReg();
void T_Comp_AddSP();
void T_Comp_RelAddr();
void T_Comp_MemReg();
void T_Comp_MemImm();
void T_Comp_MemRegHalf();
void T_Comp_MemImmHalf();
void T_Comp_LoadPCRel();
void T_Comp_MemSPRel();
void T_Comp_LDMIA_STMIA();
void T_Comp_PUSH_POP();
void T_Comp_BCOND();
void T_Comp_B();
void T_Comp_BranchXchangeReg();
void T_Comp_BL_LONG_1();
void T_Comp_BL_LONG_2();
void T_Comp_BL_Merged();
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn);
void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn);
void Comp_Compare(int op, Arm64Gen::ARM64Reg rn, Op2 op2);
void Comp_Logical(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
void Comp_Arithmetic(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
void Comp_RetriveFlags(bool retriveCV);
Arm64Gen::FixupBranch CheckCondition(u32 cond);
void Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR = false);
void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
void A_Comp_GetOp2(bool S, Op2& op2);
void Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, Arm64Gen::ARM64Reg tmp = Arm64Gen::W0);
void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
enum
{
memop_Writeback = 1 << 0,
memop_Post = 1 << 1,
memop_SignExtend = 1 << 2,
memop_Store = 1 << 3,
memop_SubtractOffset = 1 << 4
};
void Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags);
// 0 = switch mode, 1 = stay arm, 2 = stay thumb
void* Gen_JumpTo9(int kind);
void* Gen_JumpTo7(int kind);
void Comp_BranchSpecialBehaviour(bool taken);
JitBlockEntry AddEntryOffset(u32 offset)
{
return (JitBlockEntry)(GetRXBase() + offset);
}
u32 SubEntryOffset(JitBlockEntry entry)
{
return (u8*)entry - GetRXBase();
}
bool IsJITFault(const u8* pc);
u8* RewriteMemAccess(u8* pc);
void SwapCodeRegion()
{
ptrdiff_t offset = GetCodeOffset();
SetCodePtrUnsafe(OtherCodeRegion);
OtherCodeRegion = offset;
}
melonDS::NDS& NDS;
ptrdiff_t OtherCodeRegion;
bool Exit;
FetchedInstr CurInstr;
bool Thumb;
u32 R15;
u32 Num;
ARM* CurCPU;
u32 ConstantCycles;
u32 CodeRegion;
BitSet32 SavedRegs;
u32 JitMemSecondarySize;
u32 JitMemMainSize;
std::unordered_map<ptrdiff_t, LoadStorePatch> LoadStorePatches;
RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache;
bool CPSRDirty = false;
bool IrregularCycles = false;
#ifdef __SWITCH__
void* JitRWBase;
void* JitRWStart;
void* JitRXStart;
#endif
void* CodeMemBase;
void* ReadBanked, *WriteBanked;
void* JumpToFuncs9[3];
void* JumpToFuncs7[3];
// [Console Type][Num][Size][Sign Extend][Output register]
void* PatchedLoadFuncs[2][2][3][2][32];
void* PatchedStoreFuncs[2][2][3][32];
};
}
#endif
#endif

View File

@ -1,101 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "../ARMJIT_x64/ARMJIT_Offsets.h"
.text
#define RCPSR w27
#define RCycles w28
#define RCPU x29
.p2align 4,,15
#ifdef __APPLE__
.global _ARM_Dispatch
_ARM_Dispatch:
#else
.global ARM_Dispatch
ARM_Dispatch:
#endif
stp x19, x20, [sp, #-96]!
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]
mov RCPU, x0
ldr RCycles, [RCPU, ARM_Cycles_offset]
ldr RCPSR, [RCPU, ARM_CPSR_offset]
br x1
.p2align 4,,15
#ifdef __APPLE__
.global _ARM_Ret
_ARM_Ret:
#else
.global ARM_Ret
ARM_Ret:
#endif
str RCycles, [RCPU, ARM_Cycles_offset]
str RCPSR, [RCPU, ARM_CPSR_offset]
ldp x29, x30, [sp, #80]
ldp x27, x28, [sp, #64]
ldp x25, x26, [sp, #48]
ldp x23, x24, [sp, #32]
ldp x21, x22, [sp, #16]
ldp x19, x20, [sp], #96
ret
.p2align 4,,15
.global ARM_RestoreContext
ARM_RestoreContext:
mov sp, x0
ldp x0, x1, [sp]
ldp x2, x3, [sp, #16]
ldp x4, x5, [sp, #32]
ldp x6, x7, [sp, #48]
ldp x8, x9, [sp, #64]
ldp x10, x11, [sp, #80]
ldp x12, x13, [sp, #96]
ldp x14, x15, [sp, #112]
ldp x16, x17, [sp, #128]
ldp x18, x19, [sp, #144]
ldp x20, x21, [sp, #160]
ldp x22, x23, [sp, #176]
ldp x24, x25, [sp, #192]
ldp x26, x27, [sp, #208]
ldp x28, x29, [sp, #224]
ldr x30, [sp, #240]
ldp x17, x18, [sp, #248]
mov sp, x17
br x18
#if !defined(__APPLE__) && !defined(__WIN32__)
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,861 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "ARMJIT_Compiler.h"
#include "../ARMJIT.h"
#include "../ARMJIT_Memory.h"
#include "../NDS.h"
using namespace Arm64Gen;
namespace melonDS
{
bool Compiler::IsJITFault(const u8* pc)
{
return (u64)pc >= (u64)GetRXBase() && (u64)pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
}
u8* Compiler::RewriteMemAccess(u8* pc)
{
ptrdiff_t pcOffset = pc - GetRXBase();
auto it = LoadStorePatches.find(pcOffset);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
LoadStorePatches.erase(it);
ptrdiff_t curCodeOffset = GetCodeOffset();
SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
BL(patch.PatchFunc);
for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
HINT(HINT_NOP);
FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
SetCodePtrUnsafe(curCodeOffset);
return pc + (ptrdiff_t)patch.PatchOffset;
}
Log(LogLevel::Error, "this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
abort();
}
bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
u32 localAddr = NDS.JIT.LocaliseCodeAddress(Num, addr);
int invalidLiteralIdx = NDS.JIT.InvalidLiterals.Find(localAddr);
if (invalidLiteralIdx != -1)
{
return false;
}
Comp_AddCycles_CDI();
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
CurCPU->R[15] = R15;
if (size == 32)
{
CurCPU->DataRead32(addr & ~0x3, &val);
val = melonDS::ROR(val, (addr & 0x3) << 3);
}
else if (size == 16)
{
CurCPU->DataRead16(addr & ~0x1, &val);
if (signExtend)
val = ((s32)val << 16) >> 16;
}
else
{
CurCPU->DataRead8(addr, &val);
if (signExtend)
val = ((s32)val << 24) >> 24;
}
CurCPU->R[15] = tmpR15;
MOVI2R(MapReg(rd), val);
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.PutLiteral(rd, val);
return true;
}
void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
{
u32 addressMask = ~0;
if (size == 32)
addressMask = ~3;
if (size == 16)
addressMask = ~1;
if (NDS.JIT.LiteralOptimizationsEnabled() && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr))
return;
}
if (flags & memop_Store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rnMapped = MapReg(rn);
if (Thumb && rn == 15)
{
ANDI2R(W3, rnMapped, ~2);
rnMapped = W3;
}
if (flags & memop_Store && flags & (memop_Post|memop_Writeback) && rd == rn)
{
MOV(W4, rdMapped);
rdMapped = W4;
}
ARM64Reg finalAddr = W0;
if (flags & memop_Post)
{
finalAddr = rnMapped;
MOV(W0, rnMapped);
}
bool addrIsStatic = NDS.JIT.LiteralOptimizationsEnabled()
&& RegCache.IsLiteral(rn) && offset.IsImm && !(flags & (memop_Writeback|memop_Post));
u32 staticAddress;
if (addrIsStatic)
staticAddress = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
if (!offset.IsImm)
Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
// offset might has become an immediate
if (offset.IsImm)
{
if (offset.Imm)
{
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Imm);
else
ADD(finalAddr, rnMapped, offset.Imm);
}
else if (finalAddr != rnMapped)
MOV(finalAddr, rnMapped);
}
else
{
if (offset.Reg.ShiftType == ST_ROR)
{
ROR(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
offset = Op2(W0);
}
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
else
ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
}
if (!(flags & memop_Post) && (flags & memop_Writeback))
MOV(rnMapped, W0);
u32 expectedTarget = Num == 0
? NDS.JIT.Memory.ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
: NDS.JIT.Memory.ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
if (NDS.JIT.FastMemoryEnabled() && ((!Thumb && CurInstr.Cond() != 0xE) || NDS.JIT.Memory.IsFastmemCompatible(expectedTarget)))
{
ptrdiff_t memopStart = GetCodeOffset();
LoadStorePatch patch;
assert((rdMapped >= W8 && rdMapped <= W15) || (rdMapped >= W19 && rdMapped <= W25) || rdMapped == W4);
patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[NDS.ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped]
: PatchedLoadFuncs[NDS.ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped];
// take a chance at fastmem
if (size > 8)
ANDI2R(W1, W0, addressMask);
ptrdiff_t loadStorePosition = GetCodeOffset();
if (flags & memop_Store)
{
STRGeneric(size, rdMapped, size > 8 ? X1 : X0, RMemBase);
}
else
{
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, RMemBase);
if (size == 32 && !addrIsStatic)
{
UBFIZ(W0, W0, 3, 2);
RORV(rdMapped, rdMapped, W0);
}
}
patch.PatchOffset = memopStart - loadStorePosition;
patch.PatchSize = GetCodeOffset() - memopStart;
LoadStorePatches[loadStorePosition] = patch;
}
else
{
void* func = NULL;
if (addrIsStatic)
func = NDS.JIT.Memory.GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size);
PushRegs(false, false);
if (func)
{
if (flags & memop_Store)
MOV(W1, rdMapped);
QuickCallFunction(X2, (void (*)())func);
PopRegs(false, false);
if (!(flags & memop_Store))
{
if (size == 32)
{
if (staticAddress & 0x3)
ROR(rdMapped, W0, (staticAddress & 0x3) << 3);
else
MOV(rdMapped, W0);
}
else
{
if (flags & memop_SignExtend)
SBFX(rdMapped, W0, 0, size);
else
UBFX(rdMapped, W0, 0, size);
}
}
}
else
{
if (Num == 0)
{
MOV(X1, RCPU);
if (flags & memop_Store)
{
MOV(W2, rdMapped);
switch (size | NDS.ConsoleType)
{
case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break;
case 33: QuickCallFunction(X3, SlowWrite9<u32, 1>); break;
case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break;
case 17: QuickCallFunction(X3, SlowWrite9<u16, 1>); break;
case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break;
case 9: QuickCallFunction(X3, SlowWrite9<u8, 1>); break;
}
}
else
{
switch (size | NDS.ConsoleType)
{
case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break;
case 33: QuickCallFunction(X3, SlowRead9<u32, 1>); break;
case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break;
case 17: QuickCallFunction(X3, SlowRead9<u16, 1>); break;
case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break;
case 9: QuickCallFunction(X3, SlowRead9<u8, 1>); break;
}
}
}
else
{
if (flags & memop_Store)
{
MOV(W1, rdMapped);
switch (size | NDS.ConsoleType)
{
case 32: QuickCallFunction(X3, SlowWrite7<u32, 0>); break;
case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break;
case 16: QuickCallFunction(X3, SlowWrite7<u16, 0>); break;
case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break;
case 8: QuickCallFunction(X3, SlowWrite7<u8, 0>); break;
case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break;
}
}
else
{
switch (size | NDS.ConsoleType)
{
case 32: QuickCallFunction(X3, SlowRead7<u32, 0>); break;
case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break;
case 16: QuickCallFunction(X3, SlowRead7<u16, 0>); break;
case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break;
case 8: QuickCallFunction(X3, SlowRead7<u8, 0>); break;
case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break;
}
}
}
PopRegs(false, false);
if (!(flags & memop_Store))
{
if (size == 32)
MOV(rdMapped, W0);
else if (flags & memop_SignExtend)
SBFX(rdMapped, W0, 0, size);
else
UBFX(rdMapped, W0, 0, size);
}
}
}
if (CurInstr.Info.Branches())
{
if (size < 32)
Log(LogLevel::Debug, "LDR size < 32 branching?\n");
Comp_JumpTo(rdMapped, Num == 0, false);
}
}
void Compiler::A_Comp_MemWB()
{
Op2 offset;
if (CurInstr.Instr & (1 << 25))
offset = Op2(MapReg(CurInstr.A_Reg(0)), (ShiftType)((CurInstr.Instr >> 5) & 0x3), (CurInstr.Instr >> 7) & 0x1F);
else
offset = Op2(CurInstr.Instr & 0xFFF);
bool load = CurInstr.Instr & (1 << 20);
bool byte = CurInstr.Instr & (1 << 22);
int flags = 0;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, byte ? 8 : 32, flags);
}
void Compiler::A_Comp_MemHD()
{
bool load = CurInstr.Instr & (1 << 20);
bool signExtend;
int op = (CurInstr.Instr >> 5) & 0x3;
int size;
if (load)
{
signExtend = op >= 2;
size = op == 2 ? 8 : 16;
}
else
{
size = 16;
signExtend = false;
}
Op2 offset;
if (CurInstr.Instr & (1 << 22))
offset = Op2((CurInstr.Instr & 0xF) | ((CurInstr.Instr >> 4) & 0xF0));
else
offset = Op2(MapReg(CurInstr.A_Reg(0)));
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
}
void Compiler::T_Comp_MemReg()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op & 0x2;
bool byte = op & 0x1;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3),
Op2(MapReg(CurInstr.T_Reg(6))), byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::T_Comp_MemImm()
{
int op = (CurInstr.Instr >> 11) & 0x3;
bool load = op & 0x1;
bool byte = op & 0x2;
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset),
byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::T_Comp_MemRegHalf()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op != 0;
int size = op != 1 ? 16 : 8;
bool signExtend = op & 1;
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(MapReg(CurInstr.T_Reg(6))),
size, flags);
}
void Compiler::T_Comp_MemImmHalf()
{
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16,
load ? 0 : memop_Store);
}
void Compiler::T_Comp_LoadPCRel()
{
u32 offset = ((CurInstr.Instr & 0xFF) << 2);
u32 addr = (R15 & ~0x2) + offset;
if (!NDS.JIT.LiteralOptimizationsEnabled() || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr))
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0);
}
void Compiler::T_Comp_MemSPRel()
{
u32 offset = (CurInstr.Instr & 0xFF) * 4;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store);
}
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn)
{
IrregularCycles = true;
int regsCount = regs.Count();
if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me
int firstReg = *regs.begin();
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << firstReg) && !(firstReg == rn && skipLoadingRn))
{
int flags = 0;
if (store)
flags |= memop_Store;
if (decrement)
flags |= memop_SubtractOffset;
Op2 offset = preinc ? Op2(4) : Op2(0);
Comp_MemAccess(firstReg, rn, offset, 32, flags);
return decrement ? -4 : 4;
}
if (store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
int expectedTarget = Num == 0
? NDS.JIT.Memory.ClassifyAddress9(CurInstr.DataRegion)
: NDS.JIT.Memory.ClassifyAddress7(CurInstr.DataRegion);
bool compileFastPath = NDS.JIT.FastMemoryEnabled()
&& store && !usermode && (CurInstr.Cond() < 0xE || NDS.JIT.Memory.IsFastmemCompatible(expectedTarget));
{
s32 offset = decrement
? -regsCount * 4 + (preinc ? 0 : 4)
: (preinc ? 4 : 0);
if (offset)
ADDI2R(W0, MapReg(rn), offset);
else if (compileFastPath)
ANDI2R(W0, MapReg(rn), ~3);
else
MOV(W0, MapReg(rn));
}
u8* patchFunc;
if (compileFastPath)
{
ptrdiff_t fastPathStart = GetCodeOffset();
ptrdiff_t loadStoreOffsets[8];
ADD(X1, RMemBase, X0);
u32 offset = 0;
BitSet16::Iterator it = regs.begin();
u32 i = 0;
if (regsCount & 1)
{
int reg = *it;
it++;
ARM64Reg first = W3;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else if (store)
LoadReg(reg, first);
loadStoreOffsets[i++] = GetCodeOffset();
if (store)
{
STR(INDEX_UNSIGNED, first, X1, offset);
}
else if (!(reg == rn && skipLoadingRn))
{
LDR(INDEX_UNSIGNED, first, X1, offset);
if (!(RegCache.LoadedRegs & (1 << reg)))
SaveReg(reg, first);
}
offset += 4;
}
while (it != regs.end())
{
int reg = *it;
it++;
int nextReg = *it;
it++;
ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg))
{
if (!(reg == rn && skipLoadingRn))
first = MapReg(reg);
}
else if (store)
{
LoadReg(reg, first);
}
if (RegCache.LoadedRegs & (1 << nextReg))
{
if (!(nextReg == rn && skipLoadingRn))
second = MapReg(nextReg);
}
else if (store)
{
LoadReg(nextReg, second);
}
loadStoreOffsets[i++] = GetCodeOffset();
if (store)
{
STP(INDEX_SIGNED, first, second, X1, offset);
}
else
{
LDP(INDEX_SIGNED, first, second, X1, offset);
if (!(RegCache.LoadedRegs & (1 << reg)))
SaveReg(reg, first);
if (!(RegCache.LoadedRegs & (1 << nextReg)))
SaveReg(nextReg, second);
}
offset += 8;
}
LoadStorePatch patch;
patch.PatchSize = GetCodeOffset() - fastPathStart;
SwapCodeRegion();
patchFunc = (u8*)GetRXPtr();
patch.PatchFunc = patchFunc;
u32 numLoadStores = i;
for (i = 0; i < numLoadStores; i++)
{
patch.PatchOffset = fastPathStart - loadStoreOffsets[i];
LoadStorePatches[loadStoreOffsets[i]] = patch;
}
ABI_PushRegisters({30});
}
int i = 0;
SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
if (store)
{
if (usermode && (regs & BitSet16(0x7f00)))
UBFX(W5, RCPSR, 0, 5);
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
{
BitSet16::Iterator nextReg = it;
nextReg++;
int reg = *it;
if (usermode && reg >= 8 && reg < 15)
{
if (RegCache.LoadedRegs & (1 << reg))
MOV(W3, MapReg(reg));
else
LoadReg(reg, W3);
MOVI2R(W1, reg - 8);
BL(ReadBanked);
STR(INDEX_UNSIGNED, W3, SP, i * 8);
}
else if (!usermode && nextReg != regs.end())
{
ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else
LoadReg(reg, W3);
if (RegCache.LoadedRegs & (1 << *nextReg))
second = MapReg(*nextReg);
else
LoadReg(*nextReg, W4);
STP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
i++;
it++;
}
else if (RegCache.LoadedRegs & (1 << reg))
{
STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8);
}
else
{
LoadReg(reg, W3);
STR(INDEX_UNSIGNED, W3, SP, i * 8);
}
i++;
it++;
}
}
PushRegs(false, false, !compileFastPath);
ADD(X1, SP, 0);
MOVI2R(W2, regsCount);
if (Num == 0)
{
MOV(X3, RCPU);
switch ((u32)store * 2 | NDS.ConsoleType)
{
case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, 0>); break;
case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, 1>); break;
case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, 0>); break;
case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, 1>); break;
}
}
else
{
switch ((u32)store * 2 | NDS.ConsoleType)
{
case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, 0>); break;
case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, 1>); break;
case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, 0>); break;
case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, 1>); break;
}
}
PopRegs(false, false);
if (!store)
{
if (usermode && !regs[15] && (regs & BitSet16(0x7f00)))
UBFX(W5, RCPSR, 0, 5);
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
{
BitSet16::Iterator nextReg = it;
nextReg++;
int reg = *it;
if (usermode && !regs[15] && reg >= 8 && reg < 15)
{
LDR(INDEX_UNSIGNED, W3, SP, i * 8);
MOVI2R(W1, reg - 8);
BL(WriteBanked);
if (!(reg == rn && skipLoadingRn))
{
FixupBranch alreadyWritten = CBNZ(W4);
if (RegCache.LoadedRegs & (1 << reg))
MOV(MapReg(reg), W3);
else
SaveReg(reg, W3);
SetJumpTarget(alreadyWritten);
}
}
else if (!usermode && nextReg != regs.end())
{
ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg) && !(reg == rn && skipLoadingRn))
first = MapReg(reg);
if (RegCache.LoadedRegs & (1 << *nextReg) && !(*nextReg == rn && skipLoadingRn))
second = MapReg(*nextReg);
LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
if (first == W3)
SaveReg(reg, W3);
if (second == W4)
SaveReg(*nextReg, W4);
it++;
i++;
}
else if (RegCache.LoadedRegs & (1 << reg))
{
if (!(reg == rn && skipLoadingRn))
{
ARM64Reg mapped = MapReg(reg);
LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
}
}
else
{
LDR(INDEX_UNSIGNED, W3, SP, i * 8);
SaveReg(reg, W3);
}
it++;
i++;
}
}
ADD(SP, SP, ((regsCount + 1) & ~1) * 8);
if (compileFastPath)
{
ABI_PopRegisters({30});
RET();
FlushIcacheSection(patchFunc, (u8*)GetRXPtr());
SwapCodeRegion();
}
if (!store && regs[15])
{
ARM64Reg mapped = MapReg(15);
Comp_JumpTo(mapped, Num == 0, usermode);
}
return regsCount * 4 * (decrement ? -1 : 1);
}
void Compiler::A_Comp_LDM_STM()
{
BitSet16 regs(CurInstr.Instr & 0xFFFF);
bool load = CurInstr.Instr & (1 << 20);
bool pre = CurInstr.Instr & (1 << 24);
bool add = CurInstr.Instr & (1 << 23);
bool writeback = CurInstr.Instr & (1 << 21);
bool usermode = CurInstr.Instr & (1 << 22);
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
if (load && writeback && regs[CurInstr.A_Reg(16)])
writeback = Num == 0
&& (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1));
s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode, load && writeback);
if (writeback && offset)
{
if (offset > 0)
ADD(rn, rn, offset);
else
SUB(rn, rn, -offset);
}
}
void Compiler::T_Comp_PUSH_POP()
{
bool load = CurInstr.Instr & (1 << 11);
BitSet16 regs(CurInstr.Instr & 0xFF);
if (CurInstr.Instr & (1 << 8))
{
if (load)
regs[15] = true;
else
regs[14] = true;
}
ARM64Reg sp = MapReg(13);
s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false, false);
if (offset)
{
if (offset > 0)
ADD(sp, sp, offset);
else
SUB(sp, sp, -offset);
}
}
void Compiler::T_Comp_LDMIA_STMIA()
{
BitSet16 regs(CurInstr.Instr & 0xFF);
ARM64Reg rb = MapReg(CurInstr.T_Reg(8));
bool load = CurInstr.Instr & (1 << 11);
u32 regsCount = regs.Count();
bool writeback = !load || !regs[CurInstr.T_Reg(8)];
s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false, load && writeback);
if (writeback && offset)
{
if (offset > 0)
ADD(rb, rb, offset);
else
SUB(rb, rb, -offset);
}
}
}

View File

@ -1,34 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMJIT_COMPILER_H
#define ARMJIT_COMPILER_H
#ifdef JIT_ENABLED
#if defined(__x86_64__)
#include "ARMJIT_x64/ARMJIT_Compiler.h"
#elif defined(__aarch64__)
#include "ARMJIT_A64/ARMJIT_Compiler.h"
#else
#error "The current target platform doesn't have a JIT backend"
#endif
#endif
#endif

View File

@ -1,126 +0,0 @@
#include "ARMJIT_Global.h"
#include "ARMJIT_Memory.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <stdio.h>
#include <stdint.h>
#include <mutex>
namespace melonDS
{
namespace ARMJIT_Global
{
std::mutex globalMutex;
#if defined(__APPLE__) && defined(__aarch64__)
#define APPLE_AARCH64
#endif
#ifndef APPLE_AARCH64
static constexpr size_t NumCodeMemSlices = 4;
static constexpr size_t CodeMemoryAlignedSize = NumCodeMemSlices * CodeMemorySliceSize;
// I haven't heard of pages larger than 16 KB
u8 CodeMemory[CodeMemoryAlignedSize + 16*1024];
u32 AvailableCodeMemSlices = (1 << NumCodeMemSlices) - 1;
u8* GetAlignedCodeMemoryStart()
{
return reinterpret_cast<u8*>((reinterpret_cast<intptr_t>(CodeMemory) + (16*1024-1)) & ~static_cast<intptr_t>(16*1024-1));
}
#endif
int RefCounter = 0;
void* AllocateCodeMem()
{
std::lock_guard guard(globalMutex);
#ifndef APPLE_AARCH64
if (AvailableCodeMemSlices)
{
int slice = __builtin_ctz(AvailableCodeMemSlices);
AvailableCodeMemSlices &= ~(1 << slice);
//printf("allocating slice %d\n", slice);
return &GetAlignedCodeMemoryStart()[slice * CodeMemorySliceSize];
}
#endif
// allocate
#ifdef _WIN32
return VirtualAlloc(nullptr, CodeMemorySliceSize, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#elif defined(APPLE_AARCH64)
return mmap(NULL, CodeMemorySliceSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT,-1, 0);
#else
//printf("mmaping...\n");
return mmap(nullptr, CodeMemorySliceSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
}
void FreeCodeMem(void* codeMem)
{
std::lock_guard guard(globalMutex);
#ifndef APPLE_AARCH64
for (int i = 0; i < NumCodeMemSlices; i++)
{
if (codeMem == &GetAlignedCodeMemoryStart()[CodeMemorySliceSize * i])
{
//printf("freeing slice\n");
AvailableCodeMemSlices |= 1 << i;
return;
}
}
#endif
#ifdef _WIN32
VirtualFree(codeMem, CodeMemorySliceSize, MEM_RELEASE|MEM_DECOMMIT);
#else
munmap(codeMem, CodeMemorySliceSize);
#endif
}
void Init()
{
std::lock_guard guard(globalMutex);
RefCounter++;
if (RefCounter == 1)
{
#ifdef _WIN32
DWORD dummy;
VirtualProtect(GetAlignedCodeMemoryStart(), CodeMemoryAlignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(APPLE_AARCH64)
// Apple aarch64 always uses dynamic allocation
#else
mprotect(GetAlignedCodeMemoryStart(), CodeMemoryAlignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
ARMJIT_Memory::RegisterFaultHandler();
}
}
void DeInit()
{
std::lock_guard guard(globalMutex);
RefCounter--;
if (RefCounter == 0)
{
ARMJIT_Memory::UnregisterFaultHandler();
}
}
}
}

View File

@ -1,108 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMJIT_INTERNAL_H
#define ARMJIT_INTERNAL_H
#include "types.h"
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include "ARM_InstrInfo.h"
#include "JitBlock.h"
#include "TinyVector.h"
namespace melonDS
{
class ARM;
class ARMv5;
// here lands everything which doesn't fit into ARMJIT.h
// where it would be included by pretty much everything
enum
{
branch_IdleBranch = 1 << 0,
branch_FollowCondTaken = 1 << 1,
branch_FollowCondNotTaken = 1 << 2,
branch_StaticTarget = 1 << 3,
};
struct FetchedInstr
{
u32 A_Reg(int pos) const
{
return (Instr >> pos) & 0xF;
}
u32 T_Reg(int pos) const
{
return (Instr >> pos) & 0x7;
}
u32 Cond() const
{
return Instr >> 28;
}
u8 BranchFlags;
u8 SetFlags;
u32 Instr;
u32 Addr;
u8 DataCycles;
u16 CodeCycles;
u32 DataRegion;
ARMInstrInfo::Info Info;
};
// size should be 16 bytes because I'm to lazy to use mul and whatnot
struct __attribute__((packed)) AddressRange
{
TinyVector<JitBlock*> Blocks;
u32 Code;
};
typedef void (*InterpreterFunc)(ARM* cpu);
extern InterpreterFunc InterpretARM[];
extern InterpreterFunc InterpretTHUMB[];
inline bool PageContainsCode(const AddressRange* range, u32 pageSize)
{
for (int i = 0; i < pageSize / 512; i++)
{
if (range[i].Blocks.Length > 0)
return true;
}
return false;
}
template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu);
template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, u32 val);
template <typename T, int ConsoleType> T SlowRead7(u32 addr);
template <typename T, int ConsoleType> void SlowWrite7(u32 addr, u32 val);
template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,244 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMJIT_MEMORY
#define ARMJIT_MEMORY
#include "types.h"
#include "MemConstants.h"
#ifdef JIT_ENABLED
# include <mutex>
# include "TinyVector.h"
# include "ARM.h"
# if defined(__SWITCH__)
# include <switch.h>
# elif defined(_WIN32)
#include <windows.h>
# else
# include <sys/mman.h>
# include <sys/stat.h>
# include <fcntl.h>
# include <unistd.h>
# include <signal.h>
# endif
#else
# include <array>
#endif
namespace melonDS
{
#ifdef JIT_ENABLED
namespace Platform { struct DynamicLibrary; }
class Compiler;
class ARMJIT;
#endif
static constexpr u32 LargePageSize = 0x4000;
static constexpr u32 RegularPageSize = 0x1000;
constexpr u32 RoundUp(u32 size) noexcept
{
return (size + LargePageSize - 1) & ~(LargePageSize - 1);
}
static constexpr u32 MemBlockMainRAMOffset = 0;
static constexpr u32 MemBlockSWRAMOffset = RoundUp(MainRAMMaxSize);
static constexpr u32 MemBlockARM7WRAMOffset = MemBlockSWRAMOffset + RoundUp(SharedWRAMSize);
static constexpr u32 MemBlockDTCMOffset = MemBlockARM7WRAMOffset + RoundUp(ARM7WRAMSize);
static constexpr u32 MemBlockNWRAM_AOffset = MemBlockDTCMOffset + RoundUp(DTCMPhysicalSize);
static constexpr u32 MemBlockNWRAM_BOffset = MemBlockNWRAM_AOffset + RoundUp(NWRAMSize);
static constexpr u32 MemBlockNWRAM_COffset = MemBlockNWRAM_BOffset + RoundUp(NWRAMSize);
static constexpr u32 MemoryTotalSize = MemBlockNWRAM_COffset + RoundUp(NWRAMSize);
class ARMJIT_Memory
{
public:
enum
{
memregion_Other = 0,
memregion_ITCM,
memregion_DTCM,
memregion_BIOS9,
memregion_MainRAM,
memregion_SharedWRAM,
memregion_IO9,
memregion_VRAM,
memregion_BIOS7,
memregion_WRAM7,
memregion_IO7,
memregion_Wifi,
memregion_VWRAM,
// DSi
memregion_BIOS9DSi,
memregion_BIOS7DSi,
memregion_NewSharedWRAM_A,
memregion_NewSharedWRAM_B,
memregion_NewSharedWRAM_C,
memregions_Count
};
#ifdef JIT_ENABLED
public:
explicit ARMJIT_Memory(melonDS::NDS& nds);
~ARMJIT_Memory() noexcept;
ARMJIT_Memory(const ARMJIT_Memory&) = delete;
ARMJIT_Memory(ARMJIT_Memory&&) = delete;
ARMJIT_Memory& operator=(const ARMJIT_Memory&) = delete;
ARMJIT_Memory& operator=(ARMJIT_Memory&&) = delete;
void Reset() noexcept;
void RemapDTCM(u32 newBase, u32 newSize) noexcept;
void RemapSWRAM() noexcept;
void RemapNWRAM(int num) noexcept;
void SetCodeProtection(int region, u32 offset, bool protect) noexcept;
[[nodiscard]] u8* GetMainRAM() noexcept { return MemoryBase + MemBlockMainRAMOffset; }
[[nodiscard]] const u8* GetMainRAM() const noexcept { return MemoryBase + MemBlockMainRAMOffset; }
[[nodiscard]] u8* GetSharedWRAM() noexcept { return MemoryBase + MemBlockSWRAMOffset; }
[[nodiscard]] const u8* GetSharedWRAM() const noexcept { return MemoryBase + MemBlockSWRAMOffset; }
[[nodiscard]] u8* GetARM7WRAM() noexcept { return MemoryBase + MemBlockARM7WRAMOffset; }
[[nodiscard]] const u8* GetARM7WRAM() const noexcept { return MemoryBase + MemBlockARM7WRAMOffset; }
[[nodiscard]] u8* GetARM9DTCM() noexcept { return MemoryBase + MemBlockDTCMOffset; }
[[nodiscard]] const u8* GetARM9DTCM() const noexcept { return MemoryBase + MemBlockDTCMOffset; }
[[nodiscard]] u8* GetNWRAM_A() noexcept { return MemoryBase + MemBlockNWRAM_AOffset; }
[[nodiscard]] const u8* GetNWRAM_A() const noexcept { return MemoryBase + MemBlockNWRAM_AOffset; }
[[nodiscard]] u8* GetNWRAM_B() noexcept { return MemoryBase + MemBlockNWRAM_BOffset; }
[[nodiscard]] const u8* GetNWRAM_B() const noexcept { return MemoryBase + MemBlockNWRAM_BOffset; }
[[nodiscard]] u8* GetNWRAM_C() noexcept { return MemoryBase + MemBlockNWRAM_COffset; }
[[nodiscard]] const u8* GetNWRAM_C() const noexcept { return MemoryBase + MemBlockNWRAM_COffset; }
int ClassifyAddress9(u32 addr) const noexcept;
int ClassifyAddress7(u32 addr) const noexcept;
bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize) const noexcept;
u32 LocaliseAddress(int region, u32 num, u32 addr) const noexcept;
bool IsFastmemCompatible(int region) const noexcept;
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) const noexcept;
bool MapAtAddress(u32 addr) noexcept;
static bool IsFastMemSupported();
static void RegisterFaultHandler();
static void UnregisterFaultHandler();
static u32 PageSize;
static u32 PageShift;
private:
friend class Compiler;
struct Mapping
{
u32 Addr;
u32 Size, LocalOffset;
u32 Num;
void Unmap(int region, NDS& nds) noexcept;
};
struct FaultDescription
{
u32 EmulatedFaultAddr;
u8* FaultPC;
};
static bool FaultHandler(FaultDescription& faultDesc, melonDS::NDS& nds);
bool MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) noexcept;
bool UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) noexcept;
void SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) noexcept;
melonDS::NDS& NDS;
void* FastMem9Start;
void* FastMem7Start;
u8* MemoryBase = nullptr;
#if defined(__SWITCH__)
VirtmemReservation* FastMem9Reservation, *FastMem7Reservation;
u8* MemoryBaseCodeMem;
#elif defined(_WIN32)
struct VirtmemPlaceholder
{
uintptr_t Start;
size_t Size;
};
std::vector<VirtmemPlaceholder> VirtmemPlaceholders;
static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo);
HANDLE MemoryFile = INVALID_HANDLE_VALUE;
#else
static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext);
int MemoryFile = -1;
#endif
#ifdef ANDROID
Platform::DynamicLibrary* Libandroid = nullptr;
#endif
u8 MappingStatus9[1 << (32-12)] {};
u8 MappingStatus7[1 << (32-12)] {};
TinyVector<Mapping> Mappings[memregions_Count] {};
#else
public:
explicit ARMJIT_Memory(melonDS::NDS&) {};
~ARMJIT_Memory() = default;
ARMJIT_Memory(const ARMJIT_Memory&) = delete;
ARMJIT_Memory(ARMJIT_Memory&&) = delete;
ARMJIT_Memory& operator=(const ARMJIT_Memory&) = delete;
ARMJIT_Memory& operator=(ARMJIT_Memory&&) = delete;
void Reset() noexcept {}
void RemapDTCM(u32 newBase, u32 newSize) noexcept {}
void RemapSWRAM() noexcept {}
void RemapNWRAM(int num) noexcept {}
void SetCodeProtection(int region, u32 offset, bool protect) noexcept {}
[[nodiscard]] u8* GetMainRAM() noexcept { return MainRAM.data(); }
[[nodiscard]] const u8* GetMainRAM() const noexcept { return MainRAM.data(); }
[[nodiscard]] u8* GetSharedWRAM() noexcept { return SharedWRAM.data(); }
[[nodiscard]] const u8* GetSharedWRAM() const noexcept { return SharedWRAM.data(); }
[[nodiscard]] u8* GetARM7WRAM() noexcept { return ARM7WRAM.data(); }
[[nodiscard]] const u8* GetARM7WRAM() const noexcept { return ARM7WRAM.data(); }
[[nodiscard]] u8* GetARM9DTCM() noexcept { return DTCM.data(); }
[[nodiscard]] const u8* GetARM9DTCM() const noexcept { return DTCM.data(); }
[[nodiscard]] u8* GetNWRAM_A() noexcept { return NWRAM_A.data(); }
[[nodiscard]] const u8* GetNWRAM_A() const noexcept { return NWRAM_A.data(); }
[[nodiscard]] u8* GetNWRAM_B() noexcept { return NWRAM_B.data(); }
[[nodiscard]] const u8* GetNWRAM_B() const noexcept { return NWRAM_B.data(); }
[[nodiscard]] u8* GetNWRAM_C() noexcept { return NWRAM_C.data(); }
[[nodiscard]] const u8* GetNWRAM_C() const noexcept { return NWRAM_C.data(); }
private:
std::array<u8, MainRAMMaxSize> MainRAM {};
std::array<u8, ARM7WRAMSize> ARM7WRAM {};
std::array<u8, SharedWRAMSize> SharedWRAM {};
std::array<u8, DTCMPhysicalSize> DTCM {};
std::array<u8, NWRAMSize> NWRAM_A {};
std::array<u8, NWRAMSize> NWRAM_B {};
std::array<u8, NWRAMSize> NWRAM_C {};
#endif
};
}
#endif

View File

@ -1,223 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMJIT_REGCACHE_H
#define ARMJIT_REGCACHE_H
#include "ARMJIT_Internal.h"
#include "Platform.h"
// TODO: replace this in the future
#include "dolphin/BitSet.h"
#include <assert.h>
namespace melonDS
{
using Platform::Log;
using Platform::LogLevel;
using namespace Common;
// Imported inside the namespace so that other headers aren't polluted
template <typename T, typename Reg>
class RegisterCache
{
public:
RegisterCache()
{}
RegisterCache(T* compiler, FetchedInstr instrs[], int instrsCount, bool pcAllocatableAsSrc = false)
: Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount)
{
for (int i = 0; i < 16; i++)
Mapping[i] = (Reg)-1;
PCAllocatableAsSrc = ~(pcAllocatableAsSrc
? 0
: (1 << 15));
}
void UnloadRegister(int reg)
{
assert(Mapping[reg] != -1);
if (DirtyRegs & (1 << reg))
Compiler->SaveReg(reg, Mapping[reg]);
DirtyRegs &= ~(1 << reg);
LoadedRegs &= ~(1 << reg);
NativeRegsUsed &= ~(1 << (int)Mapping[reg]);
Mapping[reg] = (Reg)-1;
}
void LoadRegister(int reg, bool loadValue)
{
assert(Mapping[reg] == -1);
for (int i = 0; i < NativeRegsAvailable; i++)
{
Reg nativeReg = NativeRegAllocOrder[i];
if (!(NativeRegsUsed & (1 << nativeReg)))
{
Mapping[reg] = nativeReg;
NativeRegsUsed |= 1 << (int)nativeReg;
LoadedRegs |= 1 << reg;
if (loadValue)
Compiler->LoadReg(reg, nativeReg);
return;
}
}
Log(LogLevel::Error, "this is a JIT bug! LoadRegister failed\n");
abort();
}
void PutLiteral(int reg, u32 val)
{
LiteralsLoaded |= (1 << reg);
LiteralValues[reg] = val;
}
void UnloadLiteral(int reg)
{
LiteralsLoaded &= ~(1 << reg);
}
bool IsLiteral(int reg) const
{
return LiteralsLoaded & (1 << reg);
}
void PrepareExit()
{
BitSet16 dirtyRegs(DirtyRegs);
for (int reg : dirtyRegs)
Compiler->SaveReg(reg, Mapping[reg]);
}
void Flush()
{
BitSet16 loadedSet(LoadedRegs);
for (int reg : loadedSet)
UnloadRegister(reg);
LiteralsLoaded = 0;
}
void Prepare(bool thumb, int i)
{
FetchedInstr instr = Instrs[i];
if (LoadedRegs & (1 << 15))
UnloadRegister(15);
BitSet16 invalidedLiterals(LiteralsLoaded & instr.Info.DstRegs);
for (int reg : invalidedLiterals)
UnloadLiteral(reg);
u16 futureNeeded = 0;
int ranking[16];
for (int j = 0; j < 16; j++)
ranking[j] = 0;
for (int j = i; j < InstrsCount; j++)
{
BitSet16 regsNeeded((Instrs[j].Info.SrcRegs & ~(1 << 15)) | Instrs[j].Info.DstRegs);
futureNeeded |= regsNeeded.m_val;
regsNeeded &= BitSet16(~Instrs[j].Info.NotStrictlyNeeded);
for (int reg : regsNeeded)
ranking[reg]++;
}
// we'll unload all registers which are never used again
BitSet16 neverNeededAgain(LoadedRegs & ~futureNeeded);
for (int reg : neverNeededAgain)
UnloadRegister(reg);
u16 necessaryRegs = ((instr.Info.SrcRegs & PCAllocatableAsSrc) | instr.Info.DstRegs) & ~instr.Info.NotStrictlyNeeded;
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
if (needToBeLoaded != BitSet16(0))
{
int neededCount = needToBeLoaded.Count();
BitSet16 loadedSet(LoadedRegs);
while (loadedSet.Count() + neededCount > NativeRegsAvailable)
{
int leastReg = -1;
int rank = 1000;
for (int reg : loadedSet)
{
if (!((1 << reg) & necessaryRegs) && ranking[reg] < rank)
{
leastReg = reg;
rank = ranking[reg];
}
}
assert(leastReg != -1);
UnloadRegister(leastReg);
loadedSet.m_val = LoadedRegs;
}
// we don't need to load a value which is always going to be overwritten
BitSet16 needValueLoaded(needToBeLoaded);
if (thumb || instr.Cond() >= 0xE)
needValueLoaded = BitSet16(instr.Info.SrcRegs);
for (int reg : needToBeLoaded)
LoadRegister(reg, needValueLoaded[reg]);
}
{
BitSet16 loadedSet(LoadedRegs);
BitSet16 loadRegs(instr.Info.NotStrictlyNeeded & futureNeeded & ~LoadedRegs);
if (loadRegs && loadedSet.Count() < NativeRegsAvailable)
{
int left = NativeRegsAvailable - loadedSet.Count();
for (int reg : loadRegs)
{
if (left-- == 0)
break;
LoadRegister(reg, !(thumb || instr.Cond() >= 0xE) || (1 << reg) & instr.Info.SrcRegs);
}
}
}
DirtyRegs |= (LoadedRegs & instr.Info.DstRegs) & ~(1 << 15);
}
static const Reg NativeRegAllocOrder[];
static const int NativeRegsAvailable;
Reg Mapping[16];
u32 LiteralValues[16];
u16 LiteralsLoaded = 0;
u32 NativeRegsUsed = 0;
u16 LoadedRegs = 0;
u16 DirtyRegs = 0;
u16 PCAllocatableAsSrc = 0;
T* Compiler;
FetchedInstr* Instrs;
int InstrsCount;
};
}
#endif

View File

@ -1,802 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "ARMJIT_Compiler.h"
#include "../ARM.h"
using namespace Gen;
namespace melonDS
{
// uses RSCRATCH3
void Compiler::Comp_ArithTriOp(void (Compiler::*op)(int, const OpArg&, const OpArg&),
OpArg rd, OpArg rn, OpArg op2, bool carryUsed, int opFlags)
{
if (opFlags & opSyncCarry)
{
BT(32, R(RCPSR), Imm8(29));
if (opFlags & opInvertCarry)
CMC();
}
if (rd == rn && !(opFlags & opInvertOp2))
(this->*op)(32, rd, op2);
else if (opFlags & opSymmetric && op2 == R(RSCRATCH))
{
if (opFlags & opInvertOp2)
NOT(32, op2);
(this->*op)(32, op2, rn);
MOV(32, rd, op2);
}
else
{
if (opFlags & opInvertOp2)
{
if (op2 != R(RSCRATCH))
{
MOV(32, R(RSCRATCH), op2);
op2 = R(RSCRATCH);
}
NOT(32, op2);
}
MOV(32, R(RSCRATCH3), rn);
(this->*op)(32, R(RSCRATCH3), op2);
MOV(32, rd, R(RSCRATCH3));
}
if (opFlags & opSetsFlags)
Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed);
}
void Compiler::Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags)
{
if (opFlags & opSyncCarry)
{
BT(32, R(RCPSR), Imm8(29));
if (opFlags & opInvertCarry)
CMC();
}
if (op2 != R(RSCRATCH))
{
MOV(32, R(RSCRATCH), op2);
op2 = R(RSCRATCH);
}
(this->*op)(32, op2, rn);
MOV(32, rd, op2);
if (opFlags & opSetsFlags)
Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed);
}
void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed)
{
switch (op)
{
case 0: // TST
if (rn.IsImm())
{
MOV(32, R(RSCRATCH3), rn);
rn = R(RSCRATCH3);
}
TEST(32, rn, op2);
break;
case 1: // TEQ
MOV(32, R(RSCRATCH3), rn);
XOR(32, R(RSCRATCH3), op2);
break;
case 2: // CMP
if (rn.IsImm())
{
MOV(32, R(RSCRATCH3), rn);
rn = R(RSCRATCH3);
}
CMP(32, rn, op2);
break;
case 3: // CMN
MOV(32, R(RSCRATCH3), rn);
ADD(32, R(RSCRATCH3), op2);
break;
}
Comp_RetriveFlags(op == 2, op >= 2, carryUsed);
}
// also calculates cycles
OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed)
{
S = S && (CurInstr.SetFlags & 0x2);
if (CurInstr.Instr & (1 << 25))
{
Comp_AddCycles_C();
u32 shift = (CurInstr.Instr >> 7) & 0x1E;
u32 imm = melonDS::ROR(CurInstr.Instr & 0xFF, shift);
carryUsed = false;
if (S && shift)
{
CPSRDirty = true;
carryUsed = true;
if (imm & 0x80000000)
MOV(32, R(RSCRATCH2), Imm32(1));
else
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
}
return Imm32(imm);
}
else
{
int op = (CurInstr.Instr >> 5) & 0x3;
if (CurInstr.Instr & (1 << 4))
{
Comp_AddCycles_CI(1);
OpArg rm = MapReg(CurInstr.A_Reg(0));
if (rm.IsImm() && CurInstr.A_Reg(0) == 15)
rm = Imm32(rm.Imm32() + 4);
return Comp_RegShiftReg(op, MapReg(CurInstr.A_Reg(8)), rm, S, carryUsed);
}
else
{
Comp_AddCycles_C();
return Comp_RegShiftImm(op, (CurInstr.Instr >> 7) & 0x1F,
MapReg(CurInstr.A_Reg(0)), S, carryUsed);
}
}
}
void Compiler::A_Comp_CmpOp()
{
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool carryUsed;
OpArg rn = MapReg(CurInstr.A_Reg(16));
OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed);
Comp_CmpOp(op - 0x8, rn, op2, carryUsed);
}
void Compiler::A_Comp_Arith()
{
bool S = CurInstr.Instr & (1 << 20);
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool carryUsed;
OpArg rn = MapReg(CurInstr.A_Reg(16));
OpArg rd = MapReg(CurInstr.A_Reg(12));
OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed);
u32 sFlag = S ? opSetsFlags : 0;
switch (op)
{
case 0x0: // AND
Comp_ArithTriOp(&Compiler::AND, rd, rn, op2, carryUsed, opSymmetric|sFlag);
break;
case 0x1: // EOR
Comp_ArithTriOp(&Compiler::XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
break;
case 0x2: // SUB
Comp_ArithTriOp(&Compiler::SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
break;
case 0x3: // RSB
if (op2.IsZero())
{
if (rd != rn)
MOV(32, rd, rn);
NEG(32, rd);
if (S)
Comp_RetriveFlags(true, true, false);
}
else
Comp_ArithTriOpReverse(&Compiler::SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
break;
case 0x4: // ADD
Comp_ArithTriOp(&Compiler::ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV);
break;
case 0x5: // ADC
Comp_ArithTriOp(&Compiler::ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry);
break;
case 0x6: // SBC
Comp_ArithTriOp(&Compiler::SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opSyncCarry|opInvertCarry);
break;
case 0x7: // RSC
Comp_ArithTriOpReverse(&Compiler::SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry);
break;
case 0xC: // ORR
Comp_ArithTriOp(&Compiler::OR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
break;
case 0xE: // BIC
Comp_ArithTriOp(&Compiler::AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2);
break;
default:
Log(LogLevel::Error, "this is a JIT bug! %04x\n", op);
abort();
}
if (CurInstr.A_Reg(12) == 15)
Comp_JumpTo(rd.GetSimpleReg(), S);
}
void Compiler::A_Comp_MovOp()
{
bool carryUsed;
bool S = CurInstr.Instr & (1 << 20);
OpArg op2 = A_Comp_GetALUOp2(S, carryUsed);
OpArg rd = MapReg(CurInstr.A_Reg(12));
if (rd != op2)
MOV(32, rd, op2);
if (((CurInstr.Instr >> 21) & 0xF) == 0xF)
{
NOT(32, rd);
if (op2.IsImm() && CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm32());
}
else if (op2.IsImm() && CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm32());
if (S)
{
if (FlagsNZRequired())
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
if (CurInstr.A_Reg(12) == 15)
Comp_JumpTo(rd.GetSimpleReg(), S);
}
void Compiler::A_Comp_CLZ()
{
OpArg rd = MapReg(CurInstr.A_Reg(12));
OpArg rm = MapReg(CurInstr.A_Reg(0));
MOV(32, R(RSCRATCH), Imm32(32));
TEST(32, rm, rm);
FixupBranch skipZero = J_CC(CC_Z);
BSR(32, RSCRATCH, rm);
XOR(32, R(RSCRATCH), Imm8(0x1F)); // 31 - RSCRATCH
SetJumpTarget(skipZero);
MOV(32, rd, R(RSCRATCH));
}
void Compiler::Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn)
{
if (Num == 0)
Comp_AddCycles_CI(S ? 3 : 1);
else
{
XOR(32, R(RSCRATCH), R(RSCRATCH));
MOV(32, R(RSCRATCH3), rs);
TEST(32, R(RSCRATCH3), R(RSCRATCH3));
FixupBranch zeroBSR = J_CC(CC_Z);
BSR(32, RSCRATCH2, R(RSCRATCH3));
NOT(32, R(RSCRATCH3));
BSR(32, RSCRATCH, R(RSCRATCH3));
CMP(32, R(RSCRATCH2), R(RSCRATCH));
CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L);
SHR(32, R(RSCRATCH), Imm8(3));
SetJumpTarget(zeroBSR); // fortunately that's even right
Comp_AddCycles_CI(RSCRATCH, add ? 2 : 1);
}
static_assert(EAX == RSCRATCH, "Someone changed RSCRATCH!");
MOV(32, R(RSCRATCH), rm);
if (add)
{
IMUL(32, RSCRATCH, rs);
LEA(32, rd.GetSimpleReg(), MRegSum(RSCRATCH, rn.GetSimpleReg()));
if (S && FlagsNZRequired())
TEST(32, rd, rd);
}
else
{
IMUL(32, RSCRATCH, rs);
MOV(32, rd, R(RSCRATCH));
if (S && FlagsNZRequired())
TEST(32, R(RSCRATCH), R(RSCRATCH));
}
if (S)
Comp_RetriveFlags(false, false, false);
}
void Compiler::A_Comp_MUL_MLA()
{
bool S = CurInstr.Instr & (1 << 20);
bool add = CurInstr.Instr & (1 << 21);
OpArg rd = MapReg(CurInstr.A_Reg(16));
OpArg rm = MapReg(CurInstr.A_Reg(0));
OpArg rs = MapReg(CurInstr.A_Reg(8));
OpArg rn;
if (add)
rn = MapReg(CurInstr.A_Reg(12));
Comp_MulOp(S, add, rd, rm, rs, rn);
}
void Compiler::A_Comp_Mul_Long()
{
bool S = CurInstr.Instr & (1 << 20);
bool add = CurInstr.Instr & (1 << 21);
bool sign = CurInstr.Instr & (1 << 22);
OpArg rd = MapReg(CurInstr.A_Reg(16));
OpArg rm = MapReg(CurInstr.A_Reg(0));
OpArg rs = MapReg(CurInstr.A_Reg(8));
OpArg rn = MapReg(CurInstr.A_Reg(12));
if (Num == 0)
Comp_AddCycles_CI(S ? 3 : 1);
else
{
XOR(32, R(RSCRATCH), R(RSCRATCH));
MOV(32, R(RSCRATCH3), rs);
TEST(32, R(RSCRATCH3), R(RSCRATCH3));
FixupBranch zeroBSR = J_CC(CC_Z);
if (sign)
{
BSR(32, RSCRATCH2, R(RSCRATCH3));
NOT(32, R(RSCRATCH3));
BSR(32, RSCRATCH, R(RSCRATCH3));
CMP(32, R(RSCRATCH2), R(RSCRATCH));
CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L);
}
else
{
BSR(32, RSCRATCH, R(RSCRATCH3));
}
SHR(32, R(RSCRATCH), Imm8(3));
SetJumpTarget(zeroBSR); // fortunately that's even right
Comp_AddCycles_CI(RSCRATCH, 2);
}
if (sign)
{
MOVSX(64, 32, RSCRATCH2, rm);
MOVSX(64, 32, RSCRATCH3, rs);
}
else
{
MOV(32, R(RSCRATCH2), rm);
MOV(32, R(RSCRATCH3), rs);
}
if (add)
{
MOV(32, R(RSCRATCH), rd);
SHL(64, R(RSCRATCH), Imm8(32));
OR(64, R(RSCRATCH), rn);
IMUL(64, RSCRATCH2, R(RSCRATCH3));
ADD(64, R(RSCRATCH2), R(RSCRATCH));
}
else
{
IMUL(64, RSCRATCH2, R(RSCRATCH3));
if (S && FlagsNZRequired())
TEST(64, R(RSCRATCH2), R(RSCRATCH2));
}
if (S)
Comp_RetriveFlags(false, false, false);
MOV(32, rn, R(RSCRATCH2));
SHR(64, R(RSCRATCH2), Imm8(32));
MOV(32, rd, R(RSCRATCH2));
}
void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
{
if (CurInstr.SetFlags == 0)
return;
if (retriveCV && !(CurInstr.SetFlags & 0x3))
retriveCV = false;
bool carryOnly = !retriveCV && carryUsed;
if (carryOnly && !(CurInstr.SetFlags & 0x2))
{
carryUsed = false;
carryOnly = false;
}
CPSRDirty = true;
if (retriveCV)
{
SETcc(CC_O, R(RSCRATCH));
SETcc(sign ? CC_NC : CC_C, R(RSCRATCH3));
LEA(32, RSCRATCH2, MComplex(RSCRATCH, RSCRATCH3, SCALE_2, 0));
}
if (FlagsNZRequired())
{
SETcc(CC_S, R(RSCRATCH));
SETcc(CC_Z, R(RSCRATCH3));
LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0));
int shiftAmount = 30;
if (retriveCV || carryUsed)
{
LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0));
shiftAmount = carryOnly ? 29 : 28;
}
SHL(32, R(RSCRATCH), Imm8(shiftAmount));
AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
OR(32, R(RCPSR), R(RSCRATCH));
}
else if (carryUsed || retriveCV)
{
SHL(32, R(RSCRATCH2), Imm8(carryOnly ? 29 : 28));
AND(32, R(RCPSR), Imm32(0xFFFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
OR(32, R(RCPSR), R(RSCRATCH2));
}
}
// always uses RSCRATCH, RSCRATCH2 only if S == true
OpArg Compiler::Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed)
{
carryUsed = S;
if (S)
{
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
TEST(32, R(RCPSR), Imm32(1 << 29));
SETcc(CC_NZ, R(RSCRATCH2));
}
MOV(32, R(RSCRATCH), rm);
static_assert(RSCRATCH3 == ECX, "Someone changed RSCRATCH3");
MOV(32, R(ECX), rs);
AND(32, R(ECX), Imm32(0xFF));
FixupBranch zero = J_CC(CC_Z);
if (op < 3)
{
void (Compiler::*shiftOp)(int, const OpArg&, const OpArg&) = NULL;
if (op == 0)
shiftOp = &Compiler::SHL;
else if (op == 1)
shiftOp = &Compiler::SHR;
else if (op == 2)
shiftOp = &Compiler::SAR;
CMP(32, R(ECX), Imm8(32));
FixupBranch lt32 = J_CC(CC_L);
FixupBranch done1;
if (op < 2)
{
FixupBranch eq32 = J_CC(CC_E);
XOR(32, R(RSCRATCH), R(RSCRATCH));
if (S)
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
done1 = J();
SetJumpTarget(eq32);
}
(this->*shiftOp)(32, R(RSCRATCH), Imm8(31));
(this->*shiftOp)(32, R(RSCRATCH), Imm8(1));
if (S)
SETcc(CC_C, R(RSCRATCH2));
FixupBranch done2 = J();
SetJumpTarget(lt32);
(this->*shiftOp)(32, R(RSCRATCH), R(ECX));
if (S)
SETcc(CC_C, R(RSCRATCH2));
if (op < 2)
SetJumpTarget(done1);
SetJumpTarget(done2);
}
else if (op == 3)
{
if (S)
BT(32, R(RSCRATCH), Imm8(31));
ROR(32, R(RSCRATCH), R(ECX));
if (S)
SETcc(CC_C, R(RSCRATCH2));
}
SetJumpTarget(zero);
return R(RSCRATCH);
}
// may uses RSCRATCH for op2 and RSCRATCH2 for the carryValue
OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& carryUsed)
{
carryUsed = true;
switch (op)
{
case 0: // LSL
if (amount > 0)
{
MOV(32, R(RSCRATCH), rm);
SHL(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
carryUsed = false;
return rm;
}
case 1: // LSR
if (amount > 0)
{
MOV(32, R(RSCRATCH), rm);
SHR(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
if (S)
{
MOV(32, R(RSCRATCH2), rm);
SHR(32, R(RSCRATCH2), Imm8(31));
}
return Imm32(0);
}
case 2: // ASR
MOV(32, R(RSCRATCH), rm);
SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
if (S)
{
if (amount == 0)
BT(32, rm, Imm8(31));
SETcc(CC_C, R(RSCRATCH2));
}
return R(RSCRATCH);
case 3: // ROR
MOV(32, R(RSCRATCH), rm);
if (amount > 0)
ROR(32, R(RSCRATCH), Imm8(amount));
else
{
BT(32, R(RCPSR), Imm8(29));
RCR(32, R(RSCRATCH), Imm8(1));
}
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
abort();
}
void Compiler::T_Comp_ShiftImm()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurInstr.T_Reg(3));
int op = (CurInstr.Instr >> 11) & 0x3;
int amount = (CurInstr.Instr >> 6) & 0x1F;
Comp_AddCycles_C();
bool carryUsed;
OpArg shifted = Comp_RegShiftImm(op, amount, rs, true, carryUsed);
if (shifted != rd)
MOV(32, rd, shifted);
if (FlagsNZRequired())
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
void Compiler::T_Comp_AddSub_()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurInstr.T_Reg(3));
int op = (CurInstr.Instr >> 9) & 0x3;
OpArg rn = op >= 2 ? Imm32((CurInstr.Instr >> 6) & 0x7) : MapReg(CurInstr.T_Reg(6));
Comp_AddCycles_C();
// special case for thumb mov being alias to add rd, rn, #0
if (CurInstr.SetFlags == 0 && rn.IsImm() && rn.Imm32() == 0)
{
if (rd != rs)
MOV(32, rd, rs);
}
else if (op & 1)
Comp_ArithTriOp(&Compiler::SUB, rd, rs, rn, false, opSetsFlags|opInvertCarry|opRetriveCV);
else
Comp_ArithTriOp(&Compiler::ADD, rd, rs, rn, false, opSetsFlags|opSymmetric|opRetriveCV);
}
void Compiler::T_Comp_ALU_Imm8()
{
OpArg rd = MapReg(CurInstr.T_Reg(8));
u32 op = (CurInstr.Instr >> 11) & 0x3;
OpArg imm = Imm32(CurInstr.Instr & 0xFF);
Comp_AddCycles_C();
switch (op)
{
case 0x0:
MOV(32, rd, imm);
if (FlagsNZRequired())
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, false);
return;
case 0x1:
Comp_CmpOp(2, rd, imm, false);
return;
case 0x2:
Comp_ArithTriOp(&Compiler::ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV);
return;
case 0x3:
Comp_ArithTriOp(&Compiler::SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV);
return;
}
}
void Compiler::T_Comp_MUL()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurInstr.T_Reg(3));
Comp_MulOp(true, false, rd, rd, rs, Imm8(-1));
}
void Compiler::T_Comp_ALU()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurInstr.T_Reg(3));
u32 op = (CurInstr.Instr >> 6) & 0xF;
if ((op >= 0x2 && op < 0x4) || op == 0x7)
Comp_AddCycles_CI(1); // shift by reg
else
Comp_AddCycles_C();
switch (op)
{
case 0x0: // AND
Comp_ArithTriOp(&Compiler::AND, rd, rd, rs, false, opSetsFlags|opSymmetric);
return;
case 0x1: // EOR
Comp_ArithTriOp(&Compiler::XOR, rd, rd, rs, false, opSetsFlags|opSymmetric);
return;
case 0x2:
case 0x3:
case 0x4:
case 0x7:
{
int shiftOp = op == 0x7 ? 3 : op - 0x2;
bool carryUsed;
OpArg shifted = Comp_RegShiftReg(shiftOp, rs, rd, true, carryUsed);
if (FlagsNZRequired())
TEST(32, shifted, shifted);
MOV(32, rd, shifted);
Comp_RetriveFlags(false, false, true);
}
return;
case 0x5: // ADC
Comp_ArithTriOp(&Compiler::ADC, rd, rd, rs, false, opSetsFlags|opSymmetric|opSyncCarry|opRetriveCV);
return;
case 0x6: // SBC
Comp_ArithTriOp(&Compiler::SBB, rd, rd, rs, false, opSetsFlags|opSyncCarry|opInvertCarry|opRetriveCV);
return;
case 0x8: // TST
Comp_CmpOp(0, rd, rs, false);
return;
case 0x9: // NEG
if (rd != rs)
MOV(32, rd, rs);
NEG(32, rd);
Comp_RetriveFlags(true, true, false);
return;
case 0xA: // CMP
Comp_CmpOp(2, rd, rs, false);
return;
case 0xB: // CMN
Comp_CmpOp(3, rd, rs, false);
return;
case 0xC: // ORR
Comp_ArithTriOp(&Compiler::OR, rd, rd, rs, false, opSetsFlags|opSymmetric);
return;
case 0xE: // BIC
Comp_ArithTriOp(&Compiler::AND, rd, rd, rs, false, opSetsFlags|opSymmetric|opInvertOp2);
return;
case 0xF: // MVN
if (rd != rs)
MOV(32, rd, rs);
NOT(32, rd);
Comp_RetriveFlags(false, false, false);
return;
default:
break;
}
}
void Compiler::T_Comp_ALU_HiReg()
{
u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
OpArg rdMapped = MapReg(rd);
OpArg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
u32 op = (CurInstr.Instr >> 8) & 0x3;
Comp_AddCycles_C();
switch (op)
{
case 0x0: // ADD
Comp_ArithTriOp(&Compiler::ADD, rdMapped, rdMapped, rs, false, opSymmetric);
break;
case 0x1: // CMP
Comp_CmpOp(2, rdMapped, rs, false);
return; // this is on purpose
case 0x2: // MOV
if (rdMapped != rs)
MOV(32, rdMapped, rs);
break;
}
if (rd == 15)
{
OR(32, rdMapped, Imm8(1));
Comp_JumpTo(rdMapped.GetSimpleReg());
}
}
void Compiler::T_Comp_AddSP()
{
Comp_AddCycles_C();
OpArg sp = MapReg(13);
OpArg offset = Imm32((CurInstr.Instr & 0x7F) << 2);
if (CurInstr.Instr & (1 << 7))
SUB(32, sp, offset);
else
ADD(32, sp, offset);
}
void Compiler::T_Comp_RelAddr()
{
Comp_AddCycles_C();
OpArg rd = MapReg(CurInstr.T_Reg(8));
u32 offset = (CurInstr.Instr & 0xFF) << 2;
if (CurInstr.Instr & (1 << 11))
{
OpArg sp = MapReg(13);
LEA(32, rd.GetSimpleReg(), MDisp(sp.GetSimpleReg(), offset));
}
else
MOV(32, rd, Imm32((R15 & ~2) + offset));
}
}

View File

@ -1,302 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "ARMJIT_Compiler.h"
#include "../ARM.h"
#include "../NDS.h"
using namespace Gen;
namespace melonDS
{
template <typename T>
int squeezePointer(T* ptr)
{
int truncated = (int)((u64)ptr);
assert((T*)((u64)truncated) == ptr);
return truncated;
}
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
{
// we can simplify constant branches by a lot
IrregularCycles = true;
u32 newPC;
u32 cycles = 0;
if (addr & 0x1 && !Thumb)
{
CPSRDirty = true;
OR(32, R(RCPSR), Imm8(0x20));
}
else if (!(addr & 0x1) && Thumb)
{
CPSRDirty = true;
AND(32, R(RCPSR), Imm32(~0x20));
}
if (Num == 0)
{
ARMv5* cpu9 = (ARMv5*)CurCPU;
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
cpu9->RegionCodeCycles = regionCodeCycles;
if (Exit)
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
else
{
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}
cpu9->RegionCodeCycles = compileTimeCodeCycles;
}
else
{
ARMv4* cpu7 = (ARMv4*)CurCPU;
u32 codeRegion = addr >> 24;
u32 codeCycles = addr >> 15; // cheato
cpu7->CodeRegion = codeRegion;
cpu7->CodeCycles = codeCycles;
if (Exit)
{
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(codeCycles));
}
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// this is necessary because ARM7 bios protection
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS.ARM7MemTimings[codeCycles][0] + NDS.ARM7MemTimings[codeCycles][1];
CurCPU->R[15] = compileTimePC;
}
else
{
addr &= ~0x3;
newPC = addr+4;
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS.ARM7MemTimings[codeCycles][2] + NDS.ARM7MemTimings[codeCycles][3];
CurCPU->R[15] = compileTimePC;
}
cpu7->CodeRegion = R15 >> 24;
cpu7->CodeCycles = addr >> 15;
}
if (Exit)
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
}
void ARMv4JumpToTrampoline(ARMv4* arm, u32 addr, bool restorecpsr)
{
arm->JumpTo(addr, restorecpsr);
}
void ARMv5JumpToTrampoline(ARMv5* arm, u32 addr, bool restorecpsr)
{
arm->JumpTo(addr, restorecpsr);
}
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
{
IrregularCycles = true;
bool cpsrDirty = CPSRDirty;
SaveCPSR();
PushRegs(restoreCPSR, true);
MOV(64, R(ABI_PARAM1), R(RCPU));
MOV(32, R(ABI_PARAM2), R(addr));
if (!restoreCPSR)
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
else
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
if (Num == 0)
ABI_CallFunction(ARMv5JumpToTrampoline);
else
ABI_CallFunction(ARMv4JumpToTrampoline);
PopRegs(restoreCPSR, true);
LoadCPSR();
// in case this instruction is skipped
if (CurInstr.Cond() < 0xE)
CPSRDirty = cpsrDirty;
}
void Compiler::A_Comp_BranchImm()
{
int op = (CurInstr.Instr >> 24) & 1;
s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
u32 target = R15 + offset;
bool link = op;
if (CurInstr.Cond() == 0xF) // BLX_imm
{
target += (op << 1) + 1;
link = true;
}
if (link)
MOV(32, MapReg(14), Imm32(R15 - 4));
Comp_JumpTo(target);
}
void Compiler::A_Comp_BranchXchangeReg()
{
OpArg rn = MapReg(CurInstr.A_Reg(0));
MOV(32, R(RSCRATCH), rn);
if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
MOV(32, MapReg(14), Imm32(R15 - 4));
Comp_JumpTo(RSCRATCH);
}
void Compiler::T_Comp_BCOND()
{
u32 cond = (CurInstr.Instr >> 8) & 0xF;
FixupBranch skipExecute = CheckCondition(cond);
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
Comp_SpecialBranchBehaviour(true);
FixupBranch skipFailed = J();
SetJumpTarget(skipExecute);
Comp_SpecialBranchBehaviour(false);
Comp_AddCycles_C(true);
SetJumpTarget(skipFailed);
}
void Compiler::T_Comp_B()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
Comp_JumpTo(R15 + offset + 1);
}
void Compiler::T_Comp_BranchXchangeReg()
{
bool link = CurInstr.Instr & (1 << 7);
if (link)
{
if (Num == 1)
{
Log(LogLevel::Warn, "BLX unsupported on ARM7!!!\n");
return;
}
MOV(32, R(RSCRATCH), MapReg(CurInstr.A_Reg(3)));
MOV(32, MapReg(14), Imm32(R15 - 1));
Comp_JumpTo(RSCRATCH);
}
else
{
OpArg rn = MapReg(CurInstr.A_Reg(3));
Comp_JumpTo(rn.GetSimpleReg());
}
}
void Compiler::T_Comp_BL_LONG_1()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
MOV(32, MapReg(14), Imm32(R15 + offset));
Comp_AddCycles_C();
}
void Compiler::T_Comp_BL_LONG_2()
{
OpArg lr = MapReg(14);
s32 offset = (CurInstr.Instr & 0x7FF) << 1;
LEA(32, RSCRATCH, MDisp(lr.GetSimpleReg(), offset));
MOV(32, lr, Imm32((R15 - 2) | 1));
if (Num == 1 || CurInstr.Instr & (1 << 12))
OR(32, R(RSCRATCH), Imm8(1));
Comp_JumpTo(RSCRATCH);
}
void Compiler::T_Comp_BL_Merged()
{
Comp_AddCycles_C();
R15 += 2;
u32 upperPart = CurInstr.Instr >> 16;
u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
target += (upperPart & 0x7FF) << 1;
if (Num == 1 || upperPart & (1 << 12))
target |= 1;
MOV(32, MapReg(14), Imm32((R15 - 2) | 1));
Comp_JumpTo(target);
}
}

View File

@ -1,964 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "ARMJIT_Compiler.h"
#include "../ARMJIT.h"
#include "../ARMInterpreter.h"
#include "../NDS.h"
#include "../ARMJIT_Global.h"
#include <assert.h>
#include <stdarg.h>
#include "../dolphin/CommonFuncs.h"
using namespace Gen;
using namespace Common;
extern "C" void ARM_Ret();
namespace melonDS
{
template <>
const X64Reg RegisterCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
{
#ifdef _WIN32
RBX, RSI, RDI, R12, R13, R14, // callee saved
R10, R11, // caller saved
#else
RBX, R12, R13, R14, // callee saved, this is sad
R9, R10, R11, // caller saved
#endif
};
template <>
const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable =
#ifdef _WIN32
8
#else
7
#endif
;
#ifdef _WIN32
const BitSet32 CallerSavedPushRegs({R10, R11});
#else
const BitSet32 CallerSavedPushRegs({R9, R10, R11});
#endif
void Compiler::PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload)
{
BitSet32 loadedRegs(RegCache.LoadedRegs);
if (saveHiRegs)
{
BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
{
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]);
// prevent saving the register twice
loadedRegs[reg] = false;
}
}
for (int reg : loadedRegs)
{
if (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs))))
{
if ((Thumb || CurInstr.Cond() == 0xE) && !((1 << reg) & (CurInstr.Info.DstRegs|CurInstr.Info.SrcRegs)) && allowUnload)
RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]);
}
}
}
void Compiler::PopRegs(bool saveHiRegs, bool saveRegsToBeChanged)
{
BitSet32 loadedRegs(RegCache.LoadedRegs);
for (int reg : loadedRegs)
{
if ((saveHiRegs && reg >= 8 && reg < 15)
|| (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs)))))
{
LoadReg(reg, RegCache.Mapping[reg]);
}
}
}
void Compiler::A_Comp_MRS()
{
Comp_AddCycles_C();
OpArg rd = MapReg(CurInstr.A_Reg(12));
if (CurInstr.Instr & (1 << 22))
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
XOR(32, R(RSCRATCH3), R(RSCRATCH3));
MOV(32, R(RSCRATCH2), Imm32(15 - 8));
CALL(ReadBanked);
MOV(32, rd, R(RSCRATCH3));
}
else
{
MOV(32, rd, R(RCPSR));
}
}
void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode)
{
arm->UpdateMode(oldmode, newmode);
}
void Compiler::A_Comp_MSR()
{
Comp_AddCycles_C();
OpArg val = CurInstr.Instr & (1 << 25)
? Imm32(melonDS::ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E)))
: MapReg(CurInstr.A_Reg(0));
u32 mask = 0;
if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF;
if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00;
if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000;
if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000;
if (CurInstr.Instr & (1 << 22))
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
XOR(32, R(RSCRATCH3), R(RSCRATCH3));
MOV(32, R(RSCRATCH2), Imm32(15 - 8));
CALL(ReadBanked);
MOV(32, R(RSCRATCH2), Imm32(mask));
MOV(32, R(RSCRATCH4), R(RSCRATCH2));
AND(32, R(RSCRATCH4), Imm32(0xFFFFFF00));
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
CMP(32, R(RSCRATCH), Imm8(0x10));
CMOVcc(32, RSCRATCH2, R(RSCRATCH4), CC_E);
MOV(32, R(RSCRATCH4), R(RSCRATCH2));
NOT(32, R(RSCRATCH4));
AND(32, R(RSCRATCH3), R(RSCRATCH4));
AND(32, R(RSCRATCH2), val);
OR(32, R(RSCRATCH3), R(RSCRATCH2));
MOV(32, R(RSCRATCH2), Imm32(15 - 8));
CALL(WriteBanked);
}
else
{
mask &= 0xFFFFFFDF;
CPSRDirty = true;
if ((mask & 0xFF) == 0)
{
AND(32, R(RCPSR), Imm32(~mask));
if (!val.IsImm())
{
MOV(32, R(RSCRATCH), val);
AND(32, R(RSCRATCH), Imm32(mask));
OR(32, R(RCPSR), R(RSCRATCH));
}
else
{
OR(32, R(RCPSR), Imm32(val.Imm32() & mask));
}
}
else
{
MOV(32, R(RSCRATCH2), Imm32(mask));
MOV(32, R(RSCRATCH3), R(RSCRATCH2));
AND(32, R(RSCRATCH3), Imm32(0xFFFFFF00));
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
CMP(32, R(RSCRATCH), Imm8(0x10));
CMOVcc(32, RSCRATCH2, R(RSCRATCH3), CC_E);
MOV(32, R(RSCRATCH3), R(RCPSR));
// I need you ANDN
MOV(32, R(RSCRATCH), R(RSCRATCH2));
NOT(32, R(RSCRATCH));
AND(32, R(RCPSR), R(RSCRATCH));
AND(32, R(RSCRATCH2), val);
OR(32, R(RCPSR), R(RSCRATCH2));
PushRegs(true, true);
MOV(32, R(ABI_PARAM3), R(RCPSR));
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(UpdateModeTrampoline);
PopRegs(true, true);
}
}
}
Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
{
ARMJIT_Global::Init();
CodeMemBase = static_cast<u8*>(ARMJIT_Global::AllocateCodeMem());
CodeMemSize = ARMJIT_Global::CodeMemorySliceSize;
ResetStart = CodeMemBase;
Reset();
{
// RSCRATCH mode
// RSCRATCH2 reg number
// RSCRATCH3 value in current mode
// ret - RSCRATCH3
ReadBanked = (void*)GetWritableCodePtr();
CMP(32, R(RSCRATCH), Imm8(0x11));
FixupBranch fiq = J_CC(CC_E);
SUB(32, R(RSCRATCH2), Imm8(13 - 8));
FixupBranch notEverything = J_CC(CC_L);
CMP(32, R(RSCRATCH), Imm8(0x12));
FixupBranch irq = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x13));
FixupBranch svc = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x17));
FixupBranch abt = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x1B));
FixupBranch und = J_CC(CC_E);
SetJumpTarget(notEverything);
RET();
SetJumpTarget(fiq);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ)));
RET();
SetJumpTarget(irq);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ)));
RET();
SetJumpTarget(svc);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC)));
RET();
SetJumpTarget(abt);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT)));
RET();
SetJumpTarget(und);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND)));
RET();
#ifdef JIT_PROFILING_ENABLED
CreateMethod("ReadBanked", ReadBanked);
#endif
}
{
// RSCRATCH mode
// RSCRATCH2 reg n
// RSCRATCH3 value
// carry flag set if the register isn't banked
WriteBanked = (void*)GetWritableCodePtr();
CMP(32, R(RSCRATCH), Imm8(0x11));
FixupBranch fiq = J_CC(CC_E);
SUB(32, R(RSCRATCH2), Imm8(13 - 8));
FixupBranch notEverything = J_CC(CC_L);
CMP(32, R(RSCRATCH), Imm8(0x12));
FixupBranch irq = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x13));
FixupBranch svc = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x17));
FixupBranch abt = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x1B));
FixupBranch und = J_CC(CC_E);
SetJumpTarget(notEverything);
STC();
RET();
SetJumpTarget(fiq);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ)), R(RSCRATCH3));
CLC();
RET();
SetJumpTarget(irq);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ)), R(RSCRATCH3));
CLC();
RET();
SetJumpTarget(svc);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC)), R(RSCRATCH3));
CLC();
RET();
SetJumpTarget(abt);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT)), R(RSCRATCH3));
CLC();
RET();
SetJumpTarget(und);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND)), R(RSCRATCH3));
CLC();
RET();
#ifdef JIT_PROFILING_ENABLED
CreateMethod("WriteBanked", WriteBanked);
#endif
}
for (int consoleType = 0; consoleType < 2; consoleType++)
{
for (int num = 0; num < 2; num++)
{
for (int size = 0; size < 3; size++)
{
for (int reg = 0; reg < 16; reg++)
{
if (reg == RSCRATCH || reg == ABI_PARAM1 || reg == ABI_PARAM2)
{
PatchedStoreFuncs[consoleType][num][size][reg] = NULL;
PatchedLoadFuncs[consoleType][num][size][0][reg] = NULL;
PatchedLoadFuncs[consoleType][num][size][1][reg] = NULL;
continue;
}
X64Reg rdMapped = (X64Reg)reg;
PatchedStoreFuncs[consoleType][num][size][reg] = GetWritableCodePtr();
if (RSCRATCH3 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (num == 0)
{
MOV(64, R(ABI_PARAM2), R(RCPU));
if (rdMapped != ABI_PARAM3)
MOV(32, R(ABI_PARAM3), R(rdMapped));
}
else
{
MOV(32, R(ABI_PARAM2), R(rdMapped));
}
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break;
case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
case 17: ABI_CallFunction(SlowWrite7<u16, 0>); break;
case 8: ABI_CallFunction(SlowWrite9<u8, 0>); break;
case 9: ABI_CallFunction(SlowWrite7<u8, 0>); break;
}
}
else
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break;
case 16: ABI_CallFunction(SlowWrite9<u16, 1>); break;
case 17: ABI_CallFunction(SlowWrite7<u16, 1>); break;
case 8: ABI_CallFunction(SlowWrite9<u8, 1>); break;
case 9: ABI_CallFunction(SlowWrite7<u8, 1>); break;
}
}
ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
RET();
#ifdef JIT_PROFILING_ENABLED
CreateMethod("FastMemStorePatch%d_%d_%d", PatchedStoreFuncs[consoleType][num][size][reg], num, size, reg);
#endif
for (int signextend = 0; signextend < 2; signextend++)
{
PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetWritableCodePtr();
if (RSCRATCH3 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (num == 0)
MOV(64, R(ABI_PARAM2), R(RCPU));
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 0>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 0>); break;
case 16: ABI_CallFunction(SlowRead9<u16, 0>); break;
case 17: ABI_CallFunction(SlowRead7<u16, 0>); break;
case 8: ABI_CallFunction(SlowRead9<u8, 0>); break;
case 9: ABI_CallFunction(SlowRead7<u8, 0>); break;
}
}
else
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 1>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 1>); break;
case 16: ABI_CallFunction(SlowRead9<u16, 1>); break;
case 17: ABI_CallFunction(SlowRead7<u16, 1>); break;
case 8: ABI_CallFunction(SlowRead9<u8, 1>); break;
case 9: ABI_CallFunction(SlowRead7<u8, 1>); break;
}
}
ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (signextend)
MOVSX(32, 8 << size, rdMapped, R(RSCRATCH));
else
MOVZX(32, 8 << size, rdMapped, R(RSCRATCH));
RET();
#ifdef JIT_PROFILING_ENABLED
CreateMethod("FastMemLoadPatch%d_%d_%d_%d", PatchedLoadFuncs[consoleType][num][size][signextend][reg], num, size, reg, signextend);
#endif
}
}
}
}
}
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
NearStart = ResetStart;
FarStart = ResetStart + 1024*1024*24;
NearSize = FarStart - ResetStart;
FarSize = (ResetStart + CodeMemSize) - FarStart;
}
Compiler::~Compiler()
{
ARMJIT_Global::FreeCodeMem(CodeMemBase);
ARMJIT_Global::DeInit();
}
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
MOV(32, R(RCPSR), MDisp(RCPU, offsetof(ARM, CPSR)));
}
void Compiler::SaveCPSR(bool flagClean)
{
if (CPSRDirty)
{
MOV(32, MDisp(RCPU, offsetof(ARM, CPSR)), R(RCPSR));
if (flagClean)
CPSRDirty = false;
}
}
void Compiler::LoadReg(int reg, X64Reg nativeReg)
{
if (reg != 15)
MOV(32, R(nativeReg), MDisp(RCPU, offsetof(ARM, R) + reg*4));
else
MOV(32, R(nativeReg), Imm32(R15));
}
void Compiler::SaveReg(int reg, X64Reg nativeReg)
{
MOV(32, MDisp(RCPU, offsetof(ARM, R) + reg*4), R(nativeReg));
}
// invalidates RSCRATCH and RSCRATCH3
Gen::FixupBranch Compiler::CheckCondition(u32 cond)
{
// hack, ldm/stm can get really big TODO: make this better
bool ldmStm = !Thumb &&
(CurInstr.Info.Kind == ARMInstrInfo::ak_LDM || CurInstr.Info.Kind == ARMInstrInfo::ak_STM);
if (cond >= 0x8)
{
static_assert(RSCRATCH3 == ECX, "RSCRATCH has to be equal to ECX!");
MOV(32, R(RSCRATCH3), R(RCPSR));
SHR(32, R(RSCRATCH3), Imm8(28));
MOV(32, R(RSCRATCH), Imm32(1));
SHL(32, R(RSCRATCH), R(RSCRATCH3));
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
return J_CC(CC_Z, ldmStm);
}
else
{
// could have used a LUT, but then where would be the fun?
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
return J_CC(cond & 1 ? CC_NZ : CC_Z, ldmStm);
}
}
#define F(x) &Compiler::x
const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
{
// AND
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// EOR
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// SUB
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// RSB
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// ADD
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// ADC
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// SBC
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// RSC
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// ORR
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// MOV
F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
// BIC
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// MVN
F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
// TST
F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
// TEQ
F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
// CMP
F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
// CMN
F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
// Mul
F(A_Comp_MUL_MLA), F(A_Comp_MUL_MLA), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), NULL, NULL, NULL, NULL, NULL,
// ARMv5 stuff
F(A_Comp_CLZ), NULL, NULL, NULL, NULL,
// STR
F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
// STRB
F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
// LDR
F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
// LDRB
F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
// STRH
F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
// LDRD, STRD never used by anything so they stay interpreted (by anything I mean the 5 games I checked)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// LDRH
F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
// LDRSB
F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
// LDRSH
F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
// swap
NULL, NULL,
// LDM/STM
F(A_Comp_LDM_STM), F(A_Comp_LDM_STM),
// Branch
F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchXchangeReg), F(A_Comp_BranchXchangeReg),
// system stuff
NULL, F(A_Comp_MSR), F(A_Comp_MSR), F(A_Comp_MRS), NULL, NULL, NULL,
F(Nop)
};
const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = {
// Shift imm
F(T_Comp_ShiftImm), F(T_Comp_ShiftImm), F(T_Comp_ShiftImm),
// Three operand ADD/SUB
F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_),
// 8 bit imm
F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8),
// general ALU
F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU),
F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU),
F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU),
F(T_Comp_ALU), F(T_Comp_MUL), F(T_Comp_ALU), F(T_Comp_ALU),
// hi reg
F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg),
// pc/sp relative
F(T_Comp_RelAddr), F(T_Comp_RelAddr), F(T_Comp_AddSP),
// LDR pcrel
F(T_Comp_LoadPCRel),
// LDR/STR reg offset
F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg),
// LDR/STR sign extended, half
F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf),
// LDR/STR imm offset
F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm),
// LDR/STR half imm offset
F(T_Comp_MemImmHalf), F(T_Comp_MemImmHalf),
// LDR/STR sp rel
F(T_Comp_MemSPRel), F(T_Comp_MemSPRel),
// PUSH/POP
F(T_Comp_PUSH_POP), F(T_Comp_PUSH_POP),
// LDMIA, STMIA
F(T_Comp_LDMIA_STMIA), F(T_Comp_LDMIA_STMIA),
// Branch
F(T_Comp_BCOND), F(T_Comp_BranchXchangeReg), F(T_Comp_BranchXchangeReg), F(T_Comp_B), F(T_Comp_BL_LONG_1), F(T_Comp_BL_LONG_2),
// Unk, SVC
NULL, NULL,
F(T_Comp_BL_Merged)
};
#undef F
bool Compiler::CanCompile(bool thumb, u16 kind) const
{
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
}
void Compiler::Reset()
{
memset(ResetStart, 0xcc, CodeMemSize);
SetCodePtr(ResetStart);
NearCode = NearStart;
FarCode = FarStart;
LoadStorePatches.clear();
}
bool Compiler::IsJITFault(const u8* addr)
{
return (u64)addr >= (u64)ResetStart && (u64)addr < (u64)ResetStart + CodeMemSize;
}
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
{
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
|| (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
{
RegCache.PrepareExit();
if (ConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
ABI_TailCall(ARM_Ret);
}
}
#ifdef JIT_PROFILING_ENABLED
void Compiler::CreateMethod(const char* namefmt, void* start, ...)
{
if (iJIT_IsProfilingActive())
{
va_list args;
va_start(args, start);
char name[64];
vsprintf(name, namefmt, args);
va_end(args);
iJIT_Method_Load method = {0};
method.method_id = iJIT_GetNewMethodID();
method.method_name = name;
method.method_load_address = start;
method.method_size = GetWritableCodePtr() - (u8*)start;
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&method);
}
}
#endif
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemoryInstr)
{
if (NearSize - (GetCodePtr() - NearStart) < 1024 * 32) // guess...
{
Log(LogLevel::Debug, "near reset\n");
NDS.JIT.ResetBlockCache();
}
if (FarSize - (FarCode - FarStart) < 1024 * 32) // guess...
{
Log(LogLevel::Debug, "far reset\n");
NDS.JIT.ResetBlockCache();
}
ConstantCycles = 0;
Thumb = thumb;
Num = cpu->Num;
CodeRegion = instrs[0].Addr >> 24;
CurCPU = cpu;
// CPSR might have been modified in a previous block
CPSRDirty = false;
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
for (int i = 0; i < instrsCount; i++)
{
CurInstr = instrs[i];
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
CodeRegion = R15 >> 24;
Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
CompileFunc comp = Thumb
? T_Comp[CurInstr.Info.Kind]
: A_Comp[CurInstr.Info.Kind];
bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
{
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
if (comp == NULL)
{
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
SaveCPSR();
}
}
if (comp != NULL)
RegCache.Prepare(Thumb, i);
else
RegCache.Flush();
if (Thumb)
{
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(InterpretTHUMB[CurInstr.Info.Kind]);
}
else
{
(this->*comp)();
}
}
else
{
u32 cond = CurInstr.Cond();
if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{
if (comp)
(this->*comp)();
else
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
}
}
else if (cond == 0xF)
{
Comp_AddCycles_C();
}
else
{
IrregularCycles = comp == NULL;
FixupBranch skipExecute;
if (cond < 0xE)
skipExecute = CheckCondition(cond);
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(InterpretARM[CurInstr.Info.Kind]);
}
else
{
(this->*comp)();
}
Comp_SpecialBranchBehaviour(true);
if (CurInstr.Cond() < 0xE)
{
if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
{
FixupBranch skipFailed = J();
SetJumpTarget(skipExecute);
Comp_AddCycles_C(true);
Comp_SpecialBranchBehaviour(false);
SetJumpTarget(skipFailed);
}
else
{
SetJumpTarget(skipExecute);
}
}
}
}
if (comp == NULL)
LoadCPSR();
}
RegCache.Flush();
if (ConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
ABI_TailCall(ARM_Ret);
#ifdef JIT_PROFILING_ENABLED
CreateMethod("JIT_Block_%d_%d_%08X", (void*)res, Num, Thumb, instrs[0].Addr);
#endif
/*FILE* codeout = fopen("codeout", "a");
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
fwrite((u8*)res, GetWritableCodePtr() - (u8*)res, 1, codeout);
fclose(codeout);*/
return res;
}
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
void Compiler::Comp_AddCycles_CI(u32 i)
{
s32 cycles = (Num ?
NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
if (!Thumb && CurInstr.Cond() < 0xE)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add)
{
s32 cycles = Num ?
NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if (!Thumb && CurInstr.Cond() < 0xE)
{
LEA(32, RSCRATCH, MDisp(i, add + cycles));
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
}
else
{
ConstantCycles += cycles;
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
}
}
void Compiler::Comp_AddCycles_CDI()
{
if (Num == 0)
Comp_AddCycles_CD();
else
{
IrregularCycles = true;
s32 cycles;
s32 numC = NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
cycles = numC + numD;
else
{
numC++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
}
else if (CodeRegion == 0x02)
{
numD++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles = numC + numD + 1;
}
if (!Thumb && CurInstr.Cond() < 0xE)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
}
void Compiler::Comp_AddCycles_CD()
{
u32 cycles = 0;
if (Num == 0)
{
s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
s32 numD = CurInstr.DataCycles;
//if (DataRegion != CodeRegion)
cycles = std::max(numC + numD - 6, std::max(numC, numD));
IrregularCycles = cycles != numC;
}
else
{
s32 numC = NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if ((CurInstr.DataRegion >> 4) == 0x02)
{
if (CodeRegion == 0x02)
cycles += numC + numD;
else
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else if (CodeRegion == 0x02)
{
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles += numC + numD;
}
IrregularCycles = true;
}
if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
}

View File

@ -1,289 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMJIT_X64_COMPILER_H
#define ARMJIT_X64_COMPILER_H
#if defined(JIT_ENABLED) && defined(__x86_64__)
#include "../dolphin/x64Emitter.h"
#include "../ARMJIT_Internal.h"
#include "../ARMJIT_RegisterCache.h"
#ifdef JIT_PROFILING_ENABLED
#include <jitprofiling.h>
#endif
#include <unordered_map>
namespace melonDS
{
class ARMJIT;
class ARMJIT_Memory;
class NDS;
const Gen::X64Reg RCPU = Gen::RBP;
const Gen::X64Reg RCPSR = Gen::R15;
const Gen::X64Reg RSCRATCH = Gen::EAX;
const Gen::X64Reg RSCRATCH2 = Gen::EDX;
const Gen::X64Reg RSCRATCH3 = Gen::ECX;
const Gen::X64Reg RSCRATCH4 = Gen::R8;
struct LoadStorePatch
{
void* PatchFunc;
s16 Offset;
u16 Size;
};
struct Op2
{
Op2()
{}
Op2(u32 imm)
: IsImm(true), Imm(imm)
{}
Op2(int reg, int op, int amount)
: IsImm(false)
{
Reg.Reg = reg;
Reg.Op = op;
Reg.Amount = amount;
}
bool IsImm;
union
{
struct
{
int Reg, Op, Amount;
} Reg;
u32 Imm;
};
};
class Compiler : public Gen::XEmitter
{
public:
explicit Compiler(melonDS::NDS& nds);
~Compiler();
void Reset();
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemoryInstr);
void LoadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg);
bool CanCompile(bool thumb, u16 kind) const;
typedef void (Compiler::*CompileFunc)();
void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
void Comp_AddCycles_C(bool forceNonConstant = false);
void Comp_AddCycles_CI(u32 i);
void Comp_AddCycles_CI(Gen::X64Reg i, int add);
void Comp_AddCycles_CDI();
void Comp_AddCycles_CD();
enum
{
opSetsFlags = 1 << 0,
opSymmetric = 1 << 1,
opRetriveCV = 1 << 2,
opInvertCarry = 1 << 3,
opSyncCarry = 1 << 4,
opInvertOp2 = 1 << 5,
};
void Nop() {}
void A_Comp_Arith();
void A_Comp_MovOp();
void A_Comp_CmpOp();
void A_Comp_MUL_MLA();
void A_Comp_Mul_Long();
void A_Comp_CLZ();
void A_Comp_MemWB();
void A_Comp_MemHalf();
void A_Comp_LDM_STM();
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
void A_Comp_MRS();
void A_Comp_MSR();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALU_Imm8();
void T_Comp_ALU();
void T_Comp_ALU_HiReg();
void T_Comp_MUL();
void T_Comp_RelAddr();
void T_Comp_AddSP();
void T_Comp_MemReg();
void T_Comp_MemImm();
void T_Comp_MemRegHalf();
void T_Comp_MemImmHalf();
void T_Comp_LoadPCRel();
void T_Comp_MemSPRel();
void T_Comp_PUSH_POP();
void T_Comp_LDMIA_STMIA();
void T_Comp_BCOND();
void T_Comp_B();
void T_Comp_BranchXchangeReg();
void T_Comp_BL_LONG_1();
void T_Comp_BL_LONG_2();
void T_Comp_BL_Merged();
enum
{
memop_Writeback = 1 << 0,
memop_Post = 1 << 1,
memop_SignExtend = 1 << 2,
memop_Store = 1 << 3,
memop_SubtractOffset = 1 << 4
};
void Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags);
s32 Comp_MemAccessBlock(int rn, Common::BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn);
bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
void Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
void Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed);
void Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn);
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
void Comp_SpecialBranchBehaviour(bool taken);
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed);
void LoadCPSR();
void SaveCPSR(bool flagClean = true);
bool FlagsNZRequired()
{ return CurInstr.SetFlags & 0xC; }
Gen::FixupBranch CheckCondition(u32 cond);
void PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload = true);
void PopRegs(bool saveHiRegs, bool saveRegsToBeChanged);
Gen::OpArg MapReg(int reg)
{
if (reg == 15 && !(RegCache.LoadedRegs & (1 << 15)))
return Gen::Imm32(R15);
assert(RegCache.Mapping[reg] != Gen::INVALID_REG);
return Gen::R(RegCache.Mapping[reg]);
}
JitBlockEntry AddEntryOffset(u32 offset)
{
return (JitBlockEntry)(ResetStart + offset);
}
u32 SubEntryOffset(JitBlockEntry entry)
{
return (u8*)entry - ResetStart;
}
void SwitchToNearCode()
{
FarCode = GetWritableCodePtr();
SetCodePtr(NearCode);
}
void SwitchToFarCode()
{
NearCode = GetWritableCodePtr();
SetCodePtr(FarCode);
}
bool IsJITFault(const u8* addr);
u8* RewriteMemAccess(u8* pc);
#ifdef JIT_PROFILING_ENABLED
void CreateMethod(const char* namefmt, void* start, ...);
#endif
melonDS::NDS& NDS;
u8* FarCode {};
u8* NearCode {};
u32 FarSize {};
u32 NearSize {};
u8* NearStart {};
u8* FarStart {};
void* PatchedStoreFuncs[2][2][3][16] {};
void* PatchedLoadFuncs[2][2][3][2][16] {};
std::unordered_map<u8*, LoadStorePatch> LoadStorePatches {};
u8* CodeMemBase;
u8* ResetStart {};
u32 CodeMemSize {};
bool Exit {};
bool IrregularCycles {};
void* ReadBanked {};
void* WriteBanked {};
bool CPSRDirty = false;
FetchedInstr CurInstr {};
RegisterCache<Compiler, Gen::X64Reg> RegCache {};
bool Thumb {};
u32 Num {};
u32 R15 {};
u32 CodeRegion {};
u32 ConstantCycles {};
ARM* CurCPU {};
};
}
#endif
#endif

View File

@ -1,33 +0,0 @@
/*
Copyright 2016-2024 melonDS team, RSDuck
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "../ARM.h"
using namespace melonDS;
int main(int argc, char* argv[])
{
FILE* f = fopen("ARMJIT_Offsets.h", "w");
#define writeOffset(field) \
fprintf(f, "#define ARM_" #field "_offset 0x%x\n", offsetof(ARM, field))
writeOffset(CPSR);
writeOffset(Cycles);
writeOffset(StopExecution);
fclose(f);
return 0;
}

View File

@ -1,111 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
.intel_syntax noprefix
#include "ARMJIT_Offsets.h"
.text
#define RCPU rbp
#define RCPSR r15d
#ifdef WIN64
#define ARG1_REG ecx
#define ARG2_REG edx
#define ARG3_REG r8d
#define ARG4_REG r9d
#define ARG1_REG64 rcx
#define ARG2_REG64 rdx
#define ARG3_REG64 r8
#define ARG4_REG64 r9
#else
#define ARG1_REG edi
#define ARG2_REG esi
#define ARG3_REG edx
#define ARG4_REG ecx
#define ARG1_REG64 rdi
#define ARG2_REG64 rsi
#define ARG3_REG64 rdx
#define ARG4_REG64 rcx
#endif
.p2align 4,,15
#ifdef __APPLE__
.global _ARM_Dispatch
_ARM_Dispatch:
#else
.global ARM_Dispatch
ARM_Dispatch:
#endif
#ifdef WIN64
push rdi
push rsi
#endif
push rbx
push r12
push r13
push r14
push r15
push rbp
#ifdef WIN64
sub rsp, 0x28
#else
sub rsp, 0x8
#endif
mov RCPU, ARG1_REG64
mov RCPSR, [RCPU + ARM_CPSR_offset]
jmp ARG2_REG64
.p2align 4,,15
#ifdef __APPLE__
.global _ARM_Ret
_ARM_Ret:
#else
.global ARM_Ret
ARM_Ret:
#endif
mov [RCPU + ARM_CPSR_offset], RCPSR
#ifdef WIN64
add rsp, 0x28
#else
add rsp, 0x8
#endif
pop rbp
pop r15
pop r14
pop r13
pop r12
pop rbx
#ifdef WIN64
pop rsi
pop rdi
#endif
ret
#if !defined(__APPLE__) && !defined(WIN64)
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,858 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "ARMJIT_Compiler.h"
#include "../ARMJIT.h"
#include "../NDS.h"
using namespace Gen;
namespace melonDS
{
template <typename T>
int squeezePointer(T* ptr)
{
int truncated = (int)((u64)ptr);
assert((T*)((u64)truncated) == ptr);
return truncated;
}
u8* Compiler::RewriteMemAccess(u8* pc)
{
auto it = LoadStorePatches.find(pc);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
LoadStorePatches.erase(it);
//printf("rewriting memory access %p %d %d\n", (u8*)pc-ResetStart, patch.Offset, patch.Size);
XEmitter emitter(pc + (ptrdiff_t)patch.Offset);
emitter.CALL(patch.PatchFunc);
ptrdiff_t remainingSize = (ptrdiff_t)patch.Size - 5;
assert(remainingSize >= 0);
if (remainingSize > 0)
emitter.NOP(remainingSize);
return pc + (ptrdiff_t)patch.Offset;
}
Log(LogLevel::Error, "this is a JIT bug %sx\n", pc);
abort();
}
/*
According to DeSmuME and my own research, approx. 99% (seriously, that's an empirical number)
of all memory load and store instructions always access addresses in the same region as
during the their first execution.
I tried multiple optimisations, which would benefit from this behaviour
(having fast paths for the first region, ), though none of them yielded a measureable
improvement.
*/
bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
u32 localAddr = NDS.JIT.LocaliseCodeAddress(Num, addr);
int invalidLiteralIdx = NDS.JIT.InvalidLiterals.Find(localAddr);
if (invalidLiteralIdx != -1)
{
return false;
}
Comp_AddCycles_CDI();
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
CurCPU->R[15] = R15;
if (size == 32)
{
CurCPU->DataRead32(addr & ~0x3, &val);
val = melonDS::ROR(val, (addr & 0x3) << 3);
}
else if (size == 16)
{
CurCPU->DataRead16(addr & ~0x1, &val);
if (signExtend)
val = ((s32)val << 16) >> 16;
}
else
{
CurCPU->DataRead8(addr, &val);
if (signExtend)
val = ((s32)val << 24) >> 24;
}
CurCPU->R[15] = tmpR15;
MOV(32, MapReg(rd), Imm32(val));
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.PutLiteral(rd, val);
return true;
}
void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags)
{
u32 addressMask = ~0;
if (size == 32)
addressMask = ~3;
if (size == 16)
addressMask = ~1;
if (NDS.JIT.LiteralOptimizationsEnabled() && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr))
return;
}
if (flags & memop_Store)
{
Comp_AddCycles_CD();
}
else
{
Comp_AddCycles_CDI();
}
bool addrIsStatic = NDS.JIT.LiteralOptimizationsEnabled()
&& RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post));
u32 staticAddress;
if (addrIsStatic)
staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
OpArg rdMapped = MapReg(rd);
OpArg rnMapped = MapReg(rn);
if (Thumb && rn == 15)
rnMapped = Imm32(R15 & ~0x2);
if (flags & memop_Store && flags & (memop_Post|memop_Writeback) && rd == rn)
{
MOV(32, R(RSCRATCH4), rdMapped);
rdMapped = R(RSCRATCH4);
}
X64Reg finalAddr = RSCRATCH3;
if (flags & memop_Post)
{
MOV(32, R(RSCRATCH3), rnMapped);
finalAddr = rnMapped.GetSimpleReg();
}
if (op2.IsImm)
{
MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
}
else
{
OpArg rm = MapReg(op2.Reg.Reg);
if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
&& op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
{
LEA(32, finalAddr,
MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
}
else
{
bool throwAway;
OpArg offset =
Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
if (flags & memop_SubtractOffset)
{
if (R(finalAddr) != rnMapped)
MOV(32, R(finalAddr), rnMapped);
if (!offset.IsZero())
SUB(32, R(finalAddr), offset);
}
else
MOV_sum(32, finalAddr, rnMapped, offset);
}
}
if ((flags & memop_Writeback) && !(flags & memop_Post))
MOV(32, rnMapped, R(finalAddr));
u32 expectedTarget = Num == 0
? NDS.JIT.Memory.ClassifyAddress9(CurInstr.DataRegion)
: NDS.JIT.Memory.ClassifyAddress7(CurInstr.DataRegion);
if (NDS.JIT.FastMemoryEnabled() && ((!Thumb && CurInstr.Cond() != 0xE) || NDS.JIT.Memory.IsFastmemCompatible(expectedTarget)))
{
if (rdMapped.IsImm())
{
MOV(32, R(RSCRATCH4), rdMapped);
rdMapped = R(RSCRATCH4);
}
u8* memopStart = GetWritableCodePtr();
LoadStorePatch patch;
assert(rdMapped.GetSimpleReg() >= 0 && rdMapped.GetSimpleReg() < 16);
patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[NDS.ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()]
: PatchedLoadFuncs[NDS.ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()];
assert(patch.PatchFunc != NULL);
MOV(64, R(RSCRATCH), ImmPtr(Num == 0 ? NDS.JIT.Memory.FastMem9Start : NDS.JIT.Memory.FastMem7Start));
X64Reg maskedAddr = RSCRATCH3;
if (size > 8)
{
maskedAddr = RSCRATCH2;
MOV(32, R(RSCRATCH2), R(RSCRATCH3));
AND(32, R(RSCRATCH2), Imm8(addressMask));
}
u8* memopLoadStoreLocation = GetWritableCodePtr();
if (flags & memop_Store)
{
MOV(size, MRegSum(RSCRATCH, maskedAddr), rdMapped);
}
else
{
if (flags & memop_SignExtend)
MOVSX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr));
else
MOVZX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr));
if (size == 32)
{
if (addrIsStatic)
{
if (staticAddress & 0x3)
ROR(32, rdMapped, Imm8((staticAddress & 0x3) * 8));
}
else
{
AND(32, R(RSCRATCH3), Imm8(0x3));
SHL(32, R(RSCRATCH3), Imm8(3));
ROR(32, rdMapped, R(RSCRATCH3));
}
}
}
patch.Offset = memopStart - memopLoadStoreLocation;
patch.Size = GetWritableCodePtr() - memopStart;
assert(patch.Size >= 5);
LoadStorePatches[memopLoadStoreLocation] = patch;
}
else
{
PushRegs(false, false);
void* func = NULL;
if (addrIsStatic)
func = NDS.JIT.Memory.GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size);
if (func)
{
AND(32, R(RSCRATCH3), Imm8(addressMask));
if (ABI_PARAM1 != RSCRATCH3)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (flags & memop_Store)
MOV(32, R(ABI_PARAM2), rdMapped);
ABI_CallFunction((void (*)())func);
PopRegs(false, false);
if (!(flags & memop_Store))
{
if (size == 32)
{
MOV(32, rdMapped, R(RSCRATCH));
if (staticAddress & 0x3)
ROR(32, rdMapped, Imm8((staticAddress & 0x3) * 8));
}
else
{
if (flags & memop_SignExtend)
MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
else
MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
}
}
}
else
{
if (Num == 0)
{
// on Windows param 3 is R8 which is also scratch 4 which can be used for rd
if (flags & memop_Store)
MOV(32, R(ABI_PARAM3), rdMapped);
MOV(64, R(ABI_PARAM2), R(RCPU));
if (ABI_PARAM1 != RSCRATCH3)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (flags & memop_Store)
{
switch (size | NDS.ConsoleType)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
case 8: ABI_CallFunction(&SlowWrite9<u8, 0>); break;
case 33: ABI_CallFunction(&SlowWrite9<u32, 1>); break;
case 17: ABI_CallFunction(&SlowWrite9<u16, 1>); break;
case 9: ABI_CallFunction(&SlowWrite9<u8, 1>); break;
}
}
else
{
switch (size | NDS.ConsoleType)
{
case 32: ABI_CallFunction(&SlowRead9<u32, 0>); break;
case 16: ABI_CallFunction(&SlowRead9<u16, 0>); break;
case 8: ABI_CallFunction(&SlowRead9<u8, 0>); break;
case 33: ABI_CallFunction(&SlowRead9<u32, 1>); break;
case 17: ABI_CallFunction(&SlowRead9<u16, 1>); break;
case 9: ABI_CallFunction(&SlowRead9<u8, 1>); break;
}
}
}
else
{
if (ABI_PARAM1 != RSCRATCH3)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (flags & memop_Store)
{
MOV(32, R(ABI_PARAM2), rdMapped);
switch (size | NDS.ConsoleType)
{
case 32: ABI_CallFunction(&SlowWrite7<u32, 0>); break;
case 16: ABI_CallFunction(&SlowWrite7<u16, 0>); break;
case 8: ABI_CallFunction(&SlowWrite7<u8, 0>); break;
case 33: ABI_CallFunction(&SlowWrite7<u32, 1>); break;
case 17: ABI_CallFunction(&SlowWrite7<u16, 1>); break;
case 9: ABI_CallFunction(&SlowWrite7<u8, 1>); break;
}
}
else
{
switch (size | NDS.ConsoleType)
{
case 32: ABI_CallFunction(&SlowRead7<u32, 0>); break;
case 16: ABI_CallFunction(&SlowRead7<u16, 0>); break;
case 8: ABI_CallFunction(&SlowRead7<u8, 0>); break;
case 33: ABI_CallFunction(&SlowRead7<u32, 1>); break;
case 17: ABI_CallFunction(&SlowRead7<u16, 1>); break;
case 9: ABI_CallFunction(&SlowRead7<u8, 1>); break;
}
}
}
PopRegs(false, false);
if (!(flags & memop_Store))
{
if (flags & memop_SignExtend)
MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
else
MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
}
}
}
if (!(flags & memop_Store) && rd == 15)
{
if (size < 32)
Log(LogLevel::Debug, "!!! LDR <32 bit PC %08X %x\n", R15, CurInstr.Instr);
{
if (Num == 1)
{
if (Thumb)
OR(32, rdMapped, Imm8(0x1));
else
AND(32, rdMapped, Imm8(0xFE));
}
Comp_JumpTo(rdMapped.GetSimpleReg());
}
}
}
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn)
{
int regsCount = regs.Count();
if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me
int firstReg = *regs.begin();
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << firstReg) && !(firstReg == rn && skipLoadingRn))
{
int flags = 0;
if (store)
flags |= memop_Store;
if (decrement && preinc)
flags |= memop_SubtractOffset;
Op2 offset = preinc ? Op2(4) : Op2(0);
Comp_MemAccess(firstReg, rn, offset, 32, flags);
return decrement ? -4 : 4;
}
s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
int expectedTarget = Num == 0
? NDS.JIT.Memory.ClassifyAddress9(CurInstr.DataRegion)
: NDS.JIT.Memory.ClassifyAddress7(CurInstr.DataRegion);
if (!store)
Comp_AddCycles_CDI();
else
Comp_AddCycles_CD();
bool compileFastPath = NDS.JIT.FastMemoryEnabled()
&& !usermode && (CurInstr.Cond() < 0xE || NDS.JIT.Memory.IsFastmemCompatible(expectedTarget));
// we need to make sure that the stack stays aligned to 16 bytes
#ifdef _WIN32
// include shadow
u32 stackAlloc = (((regsCount + 4 + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8;
#else
u32 stackAlloc = (((regsCount + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8;
#endif
u32 allocOffset = stackAlloc - regsCount * 8;
if (decrement)
MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(-regsCount * 4 + (preinc ? 0 : 4)));
else
MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(preinc ? 4 : 0));
if (compileFastPath)
{
AND(32, R(RSCRATCH4), Imm8(~3));
u8* fastPathStart = GetWritableCodePtr();
u8* loadStoreAddr[16];
MOV(64, R(RSCRATCH2), ImmPtr(Num == 0 ? NDS.JIT.Memory.FastMem9Start : NDS.JIT.Memory.FastMem7Start));
ADD(64, R(RSCRATCH2), R(RSCRATCH4));
u32 offset = 0;
int i = 0;
for (int reg : regs)
{
loadStoreAddr[i] = GetWritableCodePtr();
OpArg mem = MDisp(RSCRATCH2, offset);
if (store)
{
if (RegCache.LoadedRegs & (1 << reg))
{
MOV(32, mem, MapReg(reg));
}
else
{
LoadReg(reg, RSCRATCH);
loadStoreAddr[i] = GetWritableCodePtr();
MOV(32, mem, R(RSCRATCH));
}
}
else
{
if (RegCache.LoadedRegs & (1 << reg))
{
if (!(reg == rn && skipLoadingRn))
MOV(32, MapReg(reg), mem);
else
MOV(32, R(RSCRATCH), mem); // just touch the memory
}
else
{
MOV(32, R(RSCRATCH), mem);
SaveReg(reg, RSCRATCH);
}
}
offset += 4;
i++;
}
LoadStorePatch patch;
patch.Size = GetWritableCodePtr() - fastPathStart;
SwitchToFarCode();
patch.PatchFunc = GetWritableCodePtr();
for (i = 0; i < regsCount; i++)
{
patch.Offset = fastPathStart - loadStoreAddr[i];
LoadStorePatches[loadStoreAddr[i]] = patch;
}
}
if (!store)
{
PushRegs(false, false, !compileFastPath);
MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
if (allocOffset == 0)
MOV(64, R(ABI_PARAM2), R(RSP));
else
LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
if (Num == 0)
MOV(64, R(ABI_PARAM4), R(RCPU));
switch (Num * 2 | NDS.ConsoleType)
{
case 0: ABI_CallFunction(&SlowBlockTransfer9<false, 0>); break;
case 1: ABI_CallFunction(&SlowBlockTransfer9<false, 1>); break;
case 2: ABI_CallFunction(&SlowBlockTransfer7<false, 0>); break;
case 3: ABI_CallFunction(&SlowBlockTransfer7<false, 1>); break;
}
PopRegs(false, false);
if (allocOffset)
ADD(64, R(RSP), Imm8(allocOffset));
bool firstUserMode = true;
for (int reg : regs)
{
if (usermode && !regs[15] && reg >= 8 && reg < 15)
{
if (firstUserMode)
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
firstUserMode = false;
}
MOV(32, R(RSCRATCH2), Imm32(reg - 8));
POP(RSCRATCH3);
CALL(WriteBanked);
if (!(reg == rn && skipLoadingRn))
{
FixupBranch sucessfulWritten = J_CC(CC_NC);
if (RegCache.LoadedRegs & (1 << reg))
MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3));
else
SaveReg(reg, RSCRATCH3);
SetJumpTarget(sucessfulWritten);
}
}
else if (!(RegCache.LoadedRegs & (1 << reg)))
{
assert(reg != 15);
POP(RSCRATCH);
SaveReg(reg, RSCRATCH);
}
else if (reg == rn && skipLoadingRn)
{
ADD(64, R(RSP), Imm8(8));
}
else
{
POP(MapReg(reg).GetSimpleReg());
}
}
}
else
{
bool firstUserMode = true;
for (int reg = 15; reg >= 0; reg--)
{
if (regs[reg])
{
if (usermode && reg >= 8 && reg < 15)
{
if (firstUserMode)
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
firstUserMode = false;
}
if (RegCache.Mapping[reg] == INVALID_REG)
LoadReg(reg, RSCRATCH3);
else
MOV(32, R(RSCRATCH3), R(RegCache.Mapping[reg]));
MOV(32, R(RSCRATCH2), Imm32(reg - 8));
CALL(ReadBanked);
PUSH(RSCRATCH3);
}
else if (!(RegCache.LoadedRegs & (1 << reg)))
{
LoadReg(reg, RSCRATCH);
PUSH(RSCRATCH);
}
else
{
PUSH(MapReg(reg).GetSimpleReg());
}
}
}
if (allocOffset)
SUB(64, R(RSP), Imm8(allocOffset));
PushRegs(false, false, !compileFastPath);
MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
if (allocOffset)
LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
else
MOV(64, R(ABI_PARAM2), R(RSP));
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
if (Num == 0)
MOV(64, R(ABI_PARAM4), R(RCPU));
switch (Num * 2 | NDS.ConsoleType)
{
case 0: ABI_CallFunction(&SlowBlockTransfer9<true, 0>); break;
case 1: ABI_CallFunction(&SlowBlockTransfer9<true, 1>); break;
case 2: ABI_CallFunction(&SlowBlockTransfer7<true, 0>); break;
case 3: ABI_CallFunction(&SlowBlockTransfer7<true, 1>); break;
}
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
PopRegs(false, false);
}
if (compileFastPath)
{
RET();
SwitchToNearCode();
}
if (!store && regs[15])
{
if (Num == 1)
{
if (Thumb)
OR(32, MapReg(15), Imm8(1));
else
AND(32, MapReg(15), Imm8(0xFE));
}
Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
}
return offset;
}
void Compiler::A_Comp_MemWB()
{
bool load = CurInstr.Instr & (1 << 20);
bool byte = CurInstr.Instr & (1 << 22);
int size = byte ? 8 : 32;
int flags = 0;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
Op2 offset;
if (!(CurInstr.Instr & (1 << 25)))
{
offset = Op2(CurInstr.Instr & 0xFFF);
}
else
{
int op = (CurInstr.Instr >> 5) & 0x3;
int amount = (CurInstr.Instr >> 7) & 0x1F;
int rm = CurInstr.A_Reg(0);
offset = Op2(rm, op, amount);
}
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
}
void Compiler::A_Comp_MemHalf()
{
Op2 offset = CurInstr.Instr & (1 << 22)
? Op2(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
: Op2(CurInstr.A_Reg(0), 0, 0);
int op = (CurInstr.Instr >> 5) & 0x3;
bool load = CurInstr.Instr & (1 << 20);
bool signExtend = false;
int size;
if (!load)
{
size = op == 1 ? 16 : 32;
load = op == 2;
}
else if (load)
{
size = op == 2 ? 8 : 16;
signExtend = op > 1;
}
if (size == 32 && Num == 1)
return; // NOP
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
}
void Compiler::T_Comp_MemReg()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op & 0x2;
bool byte = op & 0x1;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(CurInstr.T_Reg(6), 0, 0),
byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::A_Comp_LDM_STM()
{
BitSet16 regs(CurInstr.Instr & 0xFFFF);
bool load = CurInstr.Instr & (1 << 20);
bool pre = CurInstr.Instr & (1 << 24);
bool add = CurInstr.Instr & (1 << 23);
bool writeback = CurInstr.Instr & (1 << 21);
bool usermode = CurInstr.Instr & (1 << 22);
OpArg rn = MapReg(CurInstr.A_Reg(16));
if (load && writeback && regs[CurInstr.A_Reg(16)])
writeback = Num == 0
&& (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1));
s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode, load && writeback);
if (writeback && offset)
ADD(32, rn, Imm32(offset));
}
void Compiler::T_Comp_MemImm()
{
int op = (CurInstr.Instr >> 11) & 0x3;
bool load = op & 0x1;
bool byte = op & 0x2;
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset),
byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::T_Comp_MemRegHalf()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op != 0;
int size = op != 1 ? 16 : 8;
bool signExtend = op & 1;
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(CurInstr.T_Reg(6), 0, 0),
size, flags);
}
void Compiler::T_Comp_MemImmHalf()
{
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16,
load ? 0 : memop_Store);
}
void Compiler::T_Comp_LoadPCRel()
{
u32 offset = (CurInstr.Instr & 0xFF) << 2;
u32 addr = (R15 & ~0x2) + offset;
if (!NDS.JIT.LiteralOptimizationsEnabled() || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr))
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0);
}
void Compiler::T_Comp_MemSPRel()
{
u32 offset = (CurInstr.Instr & 0xFF) * 4;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32,
load ? 0 : memop_Store);
}
void Compiler::T_Comp_PUSH_POP()
{
bool load = CurInstr.Instr & (1 << 11);
BitSet16 regs(CurInstr.Instr & 0xFF);
if (CurInstr.Instr & (1 << 8))
{
if (load)
regs[15] = true;
else
regs[14] = true;
}
OpArg sp = MapReg(13);
s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false, false);
if (offset)
ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max
}
void Compiler::T_Comp_LDMIA_STMIA()
{
BitSet16 regs(CurInstr.Instr & 0xFF);
OpArg rb = MapReg(CurInstr.T_Reg(8));
bool load = CurInstr.Instr & (1 << 11);
bool writeback = !load || !regs[CurInstr.T_Reg(8)];
s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false, load && writeback);
if (writeback && offset)
ADD(32, rb, Imm8(offset));
}
}

View File

@ -1,565 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "ARM_InstrInfo.h"
#include <stdio.h>
#include "ARMJIT.h"
namespace melonDS::ARMInstrInfo
{
#define ak(x) ((x) << 23)
enum {
A_Read0 = 1 << 0,
A_Read16 = 1 << 1,
A_Read8 = 1 << 2,
A_Read12 = 1 << 3,
A_Write12 = 1 << 4,
A_Write16 = 1 << 5,
A_MemWriteback = 1 << 6,
A_BranchAlways = 1 << 7,
// for STRD/LDRD
A_Read12Double = 1 << 8,
A_Write12Double = 1 << 9,
A_Link = 1 << 10,
A_UnkOnARM7 = 1 << 11,
A_SetNZ = 1 << 12,
A_SetCV = 1 << 13,
A_SetMaybeC = 1 << 14,
A_MulFlags = 1 << 15,
A_ReadC = 1 << 16,
A_RRXReadC = 1 << 17,
A_StaticShiftSetC = 1 << 18,
A_SetC = 1 << 19,
A_SetCImm = 1 << 20,
A_WriteMem = 1 << 21,
A_LoadMem = 1 << 22
};
#define A_BIOP A_Read16
#define A_MONOOP 0
#define A_ARITH_LSL_IMM A_SetCV
#define A_LOGIC_LSL_IMM A_StaticShiftSetC
#define A_ARITH_SHIFT_IMM A_SetCV
#define A_LOGIC_SHIFT_IMM A_SetC
#define A_ARITH_SHIFT_REG A_SetCV
#define A_LOGIC_SHIFT_REG A_SetMaybeC
#define A_ARITH_IMM A_SetCV
#define A_LOGIC_IMM A_SetCImm
#define A_IMPLEMENT_ALU_OP(x,k,a,c) \
const u32 A_##x##_IMM = A_Write12 | c | A_##k | ak(ak_##x##_IMM); \
const u32 A_##x##_REG_LSL_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
const u32 A_##x##_REG_LSR_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
const u32 A_##x##_REG_ASR_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
const u32 A_##x##_REG_ROR_IMM = A_RRXReadC | A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
const u32 A_##x##_REG_LSL_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
const u32 A_##x##_REG_LSR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
const u32 A_##x##_REG_ASR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
const u32 A_##x##_REG_ROR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG); \
\
const u32 A_##x##_IMM_S = A_SetNZ | c | A_##a##_IMM | A_Write12 | A_##k | ak(ak_##x##_IMM_S); \
const u32 A_##x##_REG_LSL_IMM_S = A_SetNZ | c | A_##a##_LSL_IMM | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM_S); \
const u32 A_##x##_REG_LSR_IMM_S = A_SetNZ | c | A_##a##_SHIFT_IMM | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM_S); \
const u32 A_##x##_REG_ASR_IMM_S = A_SetNZ | c | A_##a##_SHIFT_IMM | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM_S); \
const u32 A_##x##_REG_ROR_IMM_S = A_RRXReadC | A_SetNZ | c | A_##a##_SHIFT_IMM | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM_S); \
const u32 A_##x##_REG_LSL_REG_S = A_SetNZ | c | A_##a##_SHIFT_REG | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG_S); \
const u32 A_##x##_REG_LSR_REG_S = A_SetNZ | c | A_##a##_SHIFT_REG | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG_S); \
const u32 A_##x##_REG_ASR_REG_S = A_SetNZ | c | A_##a##_SHIFT_REG | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG_S); \
const u32 A_##x##_REG_ROR_REG_S = A_SetNZ | c | A_##a##_SHIFT_REG | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG_S);
A_IMPLEMENT_ALU_OP(AND,BIOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(EOR,BIOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(SUB,BIOP,ARITH,0)
A_IMPLEMENT_ALU_OP(RSB,BIOP,ARITH,0)
A_IMPLEMENT_ALU_OP(ADD,BIOP,ARITH,0)
A_IMPLEMENT_ALU_OP(ADC,BIOP,ARITH,A_ReadC)
A_IMPLEMENT_ALU_OP(SBC,BIOP,ARITH,A_ReadC)
A_IMPLEMENT_ALU_OP(RSC,BIOP,ARITH,A_ReadC)
A_IMPLEMENT_ALU_OP(ORR,BIOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(MOV,MONOOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(BIC,BIOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(MVN,MONOOP,LOGIC,0)
const u32 A_MOV_REG_LSL_IMM_DBG = A_MOV_REG_LSL_IMM;
#define A_IMPLEMENT_ALU_TEST(x,a) \
const u32 A_##x##_IMM = A_SetNZ | A_Read16 | A_##a##_IMM | ak(ak_##x##_IMM); \
const u32 A_##x##_REG_LSL_IMM = A_SetNZ | A_Read16 | A_##a##_LSL_IMM | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
const u32 A_##x##_REG_LSR_IMM = A_SetNZ | A_Read16 | A_##a##_SHIFT_IMM | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
const u32 A_##x##_REG_ASR_IMM = A_SetNZ | A_Read16 | A_##a##_SHIFT_IMM | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
const u32 A_##x##_REG_ROR_IMM = A_RRXReadC | A_SetNZ | A_Read16 | A_##a##_SHIFT_IMM | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
const u32 A_##x##_REG_LSL_REG = A_SetNZ | A_Read16 | A_##a##_SHIFT_REG | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
const u32 A_##x##_REG_LSR_REG = A_SetNZ | A_Read16 | A_##a##_SHIFT_REG | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
const u32 A_##x##_REG_ASR_REG = A_SetNZ | A_Read16 | A_##a##_SHIFT_REG | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
const u32 A_##x##_REG_ROR_REG = A_SetNZ | A_Read16 | A_##a##_SHIFT_REG | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG);
A_IMPLEMENT_ALU_TEST(TST,LOGIC)
A_IMPLEMENT_ALU_TEST(TEQ,LOGIC)
A_IMPLEMENT_ALU_TEST(CMP,ARITH)
A_IMPLEMENT_ALU_TEST(CMN,ARITH)
const u32 A_MUL = A_MulFlags | A_Write16 | A_Read0 | A_Read8 | ak(ak_MUL);
const u32 A_MLA = A_MulFlags | A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_MLA);
const u32 A_UMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_UMULL);
const u32 A_UMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_UMLAL);
const u32 A_SMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_SMULL);
const u32 A_SMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLAL);
const u32 A_SMLAxy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAxy);
const u32 A_SMLAWy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAWy);
const u32 A_SMULWy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULWy);
const u32 A_SMLALxy = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLALxy);
const u32 A_SMULxy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULxy);
const u32 A_CLZ = A_Write12 | A_Read0 | A_UnkOnARM7 | ak(ak_CLZ);
const u32 A_QADD = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QADD);
const u32 A_QSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QSUB);
const u32 A_QDADD = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDADD);
const u32 A_QDSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDSUB);
#define A_LDR A_Write12 | A_LoadMem
#define A_STR A_Read12 | A_WriteMem
#define A_IMPLEMENT_WB_LDRSTR(x,k) \
const u32 A_##x##_IMM = A_##k | A_Read16 | A_MemWriteback | ak(ak_##x##_IMM); \
const u32 A_##x##_REG_LSL = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_LSL); \
const u32 A_##x##_REG_LSR = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_LSR); \
const u32 A_##x##_REG_ASR = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_ASR); \
const u32 A_##x##_REG_ROR = A_##k | A_RRXReadC | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_ROR); \
\
const u32 A_##x##_POST_IMM = A_##k | A_Read16 | A_Write16 | ak(ak_##x##_POST_IMM); \
const u32 A_##x##_POST_REG_LSL = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_LSL); \
const u32 A_##x##_POST_REG_LSR = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_LSR); \
const u32 A_##x##_POST_REG_ASR = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_ASR); \
const u32 A_##x##_POST_REG_ROR = A_##k | A_RRXReadC | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_ROR);
A_IMPLEMENT_WB_LDRSTR(STR,STR)
A_IMPLEMENT_WB_LDRSTR(STRB,STR)
A_IMPLEMENT_WB_LDRSTR(LDR,LDR)
A_IMPLEMENT_WB_LDRSTR(LDRB,LDR)
#define A_LDRD A_Write12Double | A_LoadMem
#define A_STRD A_Read12Double | A_WriteMem
#define A_IMPLEMENT_HD_LDRSTR(x,k) \
const u32 A_##x##_IMM = A_##k | A_Read16 | A_MemWriteback | ak(ak_##x##_IMM); \
const u32 A_##x##_REG = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG); \
const u32 A_##x##_POST_IMM = A_##k | A_Read16 | A_Write16 | ak(ak_##x##_POST_IMM); \
const u32 A_##x##_POST_REG = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG);
A_IMPLEMENT_HD_LDRSTR(STRH,STR)
A_IMPLEMENT_HD_LDRSTR(LDRD,LDRD)
A_IMPLEMENT_HD_LDRSTR(STRD,STRD)
A_IMPLEMENT_HD_LDRSTR(LDRH,LDR)
A_IMPLEMENT_HD_LDRSTR(LDRSB,LDR)
A_IMPLEMENT_HD_LDRSTR(LDRSH,LDR)
const u32 A_SWP = A_Write12 | A_Read16 | A_Read0 | A_LoadMem | A_WriteMem | ak(ak_SWP);
const u32 A_SWPB = A_Write12 | A_Read16 | A_Read0 | A_LoadMem | A_WriteMem | ak(ak_SWPB);
const u32 A_LDM = A_Read16 | A_MemWriteback | A_LoadMem | ak(ak_LDM);
const u32 A_STM = A_Read16 | A_MemWriteback | A_WriteMem | ak(ak_STM);
const u32 A_B = A_BranchAlways | ak(ak_B);
const u32 A_BL = A_BranchAlways | A_Link | ak(ak_BL);
const u32 A_BLX_IMM = A_BranchAlways | A_Link | ak(ak_BLX_IMM);
const u32 A_BX = A_BranchAlways | A_Read0 | ak(ak_BX);
const u32 A_BLX_REG = A_BranchAlways | A_Link | A_Read0 | ak(ak_BLX_REG);
const u32 A_UNK = A_BranchAlways | A_Link | ak(ak_UNK);
const u32 A_MSR_IMM = ak(ak_MSR_IMM);
const u32 A_MSR_REG = A_Read0 | ak(ak_MSR_REG);
const u32 A_MRS = A_Write12 | ak(ak_MRS);
const u32 A_MCR = A_Read12 | ak(ak_MCR);
const u32 A_MRC = A_Write12 | ak(ak_MRC);
const u32 A_SVC = A_BranchAlways | A_Link | ak(ak_SVC);
// THUMB
#define tk(x) ((x) << 22)
enum {
T_Read0 = 1 << 0,
T_Read3 = 1 << 1,
T_Read6 = 1 << 2,
T_Read8 = 1 << 3,
T_Write0 = 1 << 4,
T_Write8 = 1 << 5,
T_ReadHi0 = 1 << 6,
T_ReadHi3 = 1 << 7,
T_WriteHi0 = 1 << 8,
T_ReadR13 = 1 << 9,
T_WriteR13 = 1 << 10,
T_BranchAlways = 1 << 12,
T_ReadR14 = 1 << 13,
T_WriteR14 = 1 << 14,
T_SetNZ = 1 << 15,
T_SetCV = 1 << 16,
T_SetMaybeC = 1 << 17,
T_ReadC = 1 << 18,
T_SetC = 1 << 19,
T_WriteMem = 1 << 20,
T_LoadMem = 1 << 21,
};
const u32 T_LSL_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSL_IMM);
const u32 T_LSR_IMM = T_SetNZ | T_SetC | T_Write0 | T_Read3 | tk(tk_LSR_IMM);
const u32 T_ASR_IMM = T_SetNZ | T_SetC | T_Write0 | T_Read3 | tk(tk_ASR_IMM);
const u32 T_ADD_REG_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | T_Read6 | tk(tk_ADD_REG_);
const u32 T_SUB_REG_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | T_Read6 | tk(tk_SUB_REG_);
const u32 T_ADD_IMM_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_ADD_IMM_);
const u32 T_SUB_IMM_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_SUB_IMM_);
const u32 T_MOV_IMM = T_SetNZ | T_Write8 | tk(tk_MOV_IMM);
const u32 T_CMP_IMM = T_SetNZ | T_SetCV | T_Read8 | tk(tk_CMP_IMM);
const u32 T_ADD_IMM = T_SetNZ | T_SetCV | T_Write8 | T_Read8 | tk(tk_ADD_IMM);
const u32 T_SUB_IMM = T_SetNZ | T_SetCV | T_Write8 | T_Read8 | tk(tk_SUB_IMM);
const u32 T_AND_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_AND_REG);
const u32 T_EOR_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_EOR_REG);
const u32 T_LSL_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_LSL_REG);
const u32 T_LSR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_LSR_REG);
const u32 T_ASR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_ASR_REG);
const u32 T_ADC_REG = T_ReadC | T_SetNZ | T_SetCV | T_Write0 | T_Read0 | T_Read3 | tk(tk_ADC_REG);
const u32 T_SBC_REG = T_ReadC | T_SetNZ | T_SetCV | T_Write0 | T_Read0 | T_Read3 | tk(tk_SBC_REG);
const u32 T_ROR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_ROR_REG);
const u32 T_TST_REG = T_SetNZ | T_Read0 | T_Read3 | tk(tk_TST_REG);
const u32 T_NEG_REG = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_NEG_REG);
const u32 T_CMP_REG = T_SetNZ | T_SetCV | T_Read0 | T_Read3 | tk(tk_CMP_REG);
const u32 T_CMN_REG = T_SetNZ | T_SetCV | T_Read0 | T_Read3 | tk(tk_CMN_REG);
const u32 T_ORR_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_ORR_REG);
const u32 T_MUL_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_MUL_REG);
const u32 T_BIC_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_BIC_REG);
const u32 T_MVN_REG = T_SetNZ | T_Write0 | T_Read3 | tk(tk_MVN_REG);
const u32 T_ADD_HIREG = T_WriteHi0 | T_ReadHi0 | T_ReadHi3 | tk(tk_ADD_HIREG);
const u32 T_CMP_HIREG = T_SetNZ | T_SetCV | T_ReadHi0 | T_ReadHi3 | tk(tk_CMP_HIREG);
const u32 T_MOV_HIREG = T_WriteHi0 | T_ReadHi3 | tk(tk_MOV_HIREG);
const u32 T_ADD_PCREL = T_Write8 | tk(tk_ADD_PCREL);
const u32 T_ADD_SPREL = T_Write8 | T_ReadR13 | tk(tk_ADD_SPREL);
const u32 T_ADD_SP = T_WriteR13 | T_ReadR13 | tk(tk_ADD_SP);
const u32 T_LDR_PCREL = T_Write8 | T_LoadMem | tk(tk_LDR_PCREL);
const u32 T_STR_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STR_REG);
const u32 T_STRB_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRB_REG);
const u32 T_LDR_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDR_REG);
const u32 T_LDRB_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRB_REG);
const u32 T_STRH_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRH_REG);
const u32 T_LDRSB_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRSB_REG);
const u32 T_LDRH_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRH_REG);
const u32 T_LDRSH_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRSH_REG);
const u32 T_STR_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STR_IMM);
const u32 T_LDR_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDR_IMM);
const u32 T_STRB_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRB_IMM);
const u32 T_LDRB_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDRB_IMM);
const u32 T_STRH_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRH_IMM);
const u32 T_LDRH_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDRH_IMM);
const u32 T_STR_SPREL = T_Read8 | T_ReadR13 | T_WriteMem | tk(tk_STR_SPREL);
const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | T_LoadMem | tk(tk_LDR_SPREL);
const u32 T_PUSH = T_ReadR13 | T_WriteR13 | T_WriteMem | tk(tk_PUSH);
const u32 T_POP = T_ReadR13 | T_WriteR13 | T_LoadMem | tk(tk_POP);
const u32 T_LDMIA = T_Read8 | T_Write8 | T_LoadMem | tk(tk_LDMIA);
const u32 T_STMIA = T_Read8 | T_Write8 | T_WriteMem | tk(tk_STMIA);
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
const u32 T_BX = T_BranchAlways | T_ReadHi3 | tk(tk_BX);
const u32 T_BLX_REG = T_BranchAlways | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG);
const u32 T_B = T_BranchAlways | tk(tk_B);
const u32 T_BL_LONG_1 = T_WriteR14 | tk(tk_BL_LONG_1);
const u32 T_BL_LONG_2 = T_BranchAlways | T_ReadR14 | T_WriteR14 | tk(tk_BL_LONG_2);
const u32 T_UNK = T_BranchAlways | T_WriteR14 | tk(tk_UNK);
const u32 T_SVC = T_BranchAlways | T_WriteR14 | tk(tk_SVC);
#define INSTRFUNC_PROTO(x) u32 x
#include "ARM_InstrTable.h"
#undef INSTRFUNC_PROTO
Info Decode(bool thumb, u32 num, u32 instr, bool literaloptimizations)
{
const u8 FlagsReadPerCond[7] = {
flag_Z,
flag_C,
flag_N,
flag_V,
flag_C | flag_Z,
flag_N | flag_V,
flag_Z | flag_N | flag_V};
Info res = {0};
if (thumb)
{
u32 data = THUMBInstrTable[(instr >> 6) & 0x3FF];
res.Kind = (data >> 22) & 0x3F;
if (data & T_Read0)
res.SrcRegs |= 1 << (instr & 0x7);
if (data & T_Read3)
res.SrcRegs |= 1 << ((instr >> 3) & 0x7);
if (data & T_Read6)
res.SrcRegs |= 1 << ((instr >> 6) & 0x7);
if (data & T_Read8)
res.SrcRegs |= 1 << ((instr >> 8) & 0x7);
if (data & T_Write0)
res.DstRegs |= 1 << (instr & 0x7);
if (data & T_Write8)
res.DstRegs |= 1 << ((instr >> 8) & 0x7);
if (data & T_ReadHi0)
res.SrcRegs |= 1 << ((instr & 0x7) | ((instr >> 4) & 0x8));
if (data & T_ReadHi3)
res.SrcRegs |= 1 << ((instr >> 3) & 0xF);
if (data & T_WriteHi0)
res.DstRegs |= 1 << ((instr & 0x7) | ((instr >> 4) & 0x8));
if (data & T_ReadR13)
res.SrcRegs |= (1 << 13);
if (data & T_WriteR13)
res.DstRegs |= (1 << 13);
if (data & T_WriteR14)
res.DstRegs |= (1 << 14);
if (data & T_ReadR14)
res.SrcRegs |= (1 << 14);
if (data & T_BranchAlways)
res.DstRegs |= (1 << 15);
if (res.Kind == tk_POP && instr & (1 << 8))
res.DstRegs |= 1 << 15;
if (data & T_SetNZ)
res.WriteFlags |= flag_N | flag_Z;
if (data & T_SetCV)
res.WriteFlags |= flag_C | flag_V;
if (data & T_SetMaybeC)
res.WriteFlags |= flag_C << 4;
if (data & T_ReadC)
res.ReadFlags |= flag_C;
if (data & T_SetC)
res.WriteFlags |= flag_C;
if (data & T_WriteMem)
res.SpecialKind = special_WriteMem;
if (data & T_LoadMem)
{
if (res.Kind == tk_LDR_PCREL)
{
if (!literaloptimizations)
res.SrcRegs |= 1 << 15;
res.SpecialKind = special_LoadLiteral;
}
else
{
res.SpecialKind = special_LoadMem;
}
}
if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
{
u32 set = (instr & 0xFF);
res.NotStrictlyNeeded |= set & ~(res.DstRegs|res.SrcRegs);
res.DstRegs |= set;
}
if (res.Kind == tk_STMIA || res.Kind == tk_PUSH)
{
u32 set = (instr & 0xFF);
if (res.Kind == tk_PUSH && instr & (1 << 8))
set |= (1 << 14);
res.NotStrictlyNeeded |= set & ~(res.DstRegs|res.SrcRegs);
res.SrcRegs |= set;
}
res.EndBlock |= res.Branches();
if (res.Kind == tk_BCOND)
res.ReadFlags |= FlagsReadPerCond[(instr >> 9) & 0x7];
return res;
}
else
{
u32 data = ARMInstrTable[((instr >> 4) & 0xF) | ((instr >> 16) & 0xFF0)];
if (num == 0 && (instr & 0xFE000000) == 0xFA000000)
data = A_BLX_IMM;
else if ((instr >> 28) == 0xF)
data = ak(ak_Nop);
if (data & A_UnkOnARM7 && num == 1)
data = A_UNK;
res.Kind = (data >> 23) & 0x1FF;
if (res.Kind >= ak_SMLAxy && res.Kind <= ak_SMULxy && num == 1)
{
data = ak(ak_Nop);
res.Kind = ak_Nop;
}
if (res.Kind == ak_MCR)
{
u32 cn = (instr >> 16) & 0xF;
u32 cm = instr & 0xF;
u32 cpinfo = (instr >> 5) & 0x7;
u32 id = (cn<<8)|(cm<<4)|cpinfo;
if (id == 0x704 || id == 0x782 || id == 0x750 || id == 0x751 || id == 0x752)
res.EndBlock |= true;
if (id == 0x704 || id == 0x782)
res.SpecialKind = special_WaitForInterrupt;
}
if (res.Kind == ak_MCR || res.Kind == ak_MRC)
{
u32 cp = ((instr >> 8) & 0xF);
if ((num == 0 && cp != 15) || (num == 1 && cp != 14))
{
data = A_UNK;
res.Kind = ak_UNK;
}
}
if (res.Kind == ak_MRS && !(instr & (1 << 22)))
res.ReadFlags |= flag_N | flag_Z | flag_C | flag_V;
if ((res.Kind == ak_MSR_IMM || res.Kind == ak_MSR_REG) && instr & (1 << 19))
res.WriteFlags |= flag_N | flag_Z | flag_C | flag_V;
if (data & A_Read0)
res.SrcRegs |= 1 << (instr & 0xF);
if (data & A_Read16)
res.SrcRegs |= 1 << ((instr >> 16) & 0xF);
if (data & A_Read8)
res.SrcRegs |= 1 << ((instr >> 8) & 0xF);
if (data & A_Read12)
res.SrcRegs |= 1 << ((instr >> 12) & 0xF);
if (data & A_Write12)
res.DstRegs |= 1 << ((instr >> 12) & 0xF);
if (data & A_Write16)
res.DstRegs |= 1 << ((instr >> 16) & 0xF);
if (data & A_MemWriteback && instr & (1 << 21))
res.DstRegs |= 1 << ((instr >> 16) & 0xF);
if (data & A_BranchAlways)
res.DstRegs |= 1 << 15;
if (data & A_Read12Double)
{
res.SrcRegs |= 1 << ((instr >> 12) & 0xF);
res.SrcRegs |= 1 << (((instr >> 12) & 0xF) + 1);
}
if (data & A_Write12Double)
{
res.DstRegs |= 1 << ((instr >> 12) & 0xF);
res.DstRegs |= 1 << (((instr >> 12) & 0xF) + 1);
}
if (data & A_Link)
res.DstRegs |= 1 << 14;
if (res.Kind == ak_LDM)
res.DstRegs |= instr & (1 << 15); // this is right
if (res.Kind == ak_STM)
res.SrcRegs |= instr & (1 << 15);
if (data & A_SetNZ)
res.WriteFlags |= flag_N | flag_Z;
if (data & A_SetCV)
res.WriteFlags |= flag_C | flag_V;
if (data & A_SetMaybeC)
res.WriteFlags |= flag_C << 4;
if ((data & A_MulFlags) && (instr & (1 << 20)))
res.WriteFlags |= flag_N | flag_Z;
if (data & A_ReadC)
res.ReadFlags |= flag_C;
if ((data & A_RRXReadC) && !((instr >> 7) & 0x1F))
res.ReadFlags |= flag_C;
if ((data & A_SetC)
|| ((data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F))
|| ((data & A_SetCImm) && ((instr >> 7) & 0x1E)))
res.WriteFlags |= flag_C;
if (data & A_WriteMem)
res.SpecialKind = special_WriteMem;
if (data & A_LoadMem)
{
if (res.SrcRegs == (1 << 15))
res.SpecialKind = special_LoadLiteral;
else
res.SpecialKind = special_LoadMem;
}
if (res.Kind == ak_LDM)
{
u16 set = (instr & 0xFFFF);
res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15));
res.DstRegs |= set;
// when the instruction is executed not in usermode a banked register in memory will be written to
// but the unbanked register will still be allocated, so it is expected to carry the proper value
// thus it is a source register
if (instr & (1<<22))
res.SrcRegs |= set & 0x7F00;
}
if (res.Kind == ak_STM)
{
u16 set = (instr & 0xFFFF);
res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15));
res.SrcRegs |= set;
}
if ((instr >> 28) < 0xE)
{
// make non conditional flag sets conditional
res.WriteFlags = (res.WriteFlags | (res.WriteFlags << 4)) & 0xF0;
res.ReadFlags |= FlagsReadPerCond[instr >> 29];
}
res.EndBlock |= res.Branches();
return res;
}
}
}

View File

@ -1,281 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMINSTRINFO_H
#define ARMINSTRINFO_H
#include "types.h"
namespace melonDS::ARMInstrInfo
{
// Instruction kinds, for faster dispatch
#define ak_ALU(n) \
ak_##n##_REG_LSL_IMM, \
ak_##n##_REG_LSR_IMM, \
ak_##n##_REG_ASR_IMM, \
ak_##n##_REG_ROR_IMM, \
\
ak_##n##_REG_LSL_REG, \
ak_##n##_REG_LSR_REG, \
ak_##n##_REG_ASR_REG, \
ak_##n##_REG_ROR_REG, \
\
ak_##n##_IMM, \
\
ak_##n##_REG_LSL_IMM_S, \
ak_##n##_REG_LSR_IMM_S, \
ak_##n##_REG_ASR_IMM_S, \
ak_##n##_REG_ROR_IMM_S, \
\
ak_##n##_REG_LSL_REG_S, \
ak_##n##_REG_LSR_REG_S, \
ak_##n##_REG_ASR_REG_S, \
ak_##n##_REG_ROR_REG_S, \
\
ak_##n##_IMM_S \
#define ak_Test(n) \
ak_##n##_REG_LSL_IMM, \
ak_##n##_REG_LSR_IMM, \
ak_##n##_REG_ASR_IMM, \
ak_##n##_REG_ROR_IMM, \
\
ak_##n##_REG_LSL_REG, \
ak_##n##_REG_LSR_REG, \
ak_##n##_REG_ASR_REG, \
ak_##n##_REG_ROR_REG, \
\
ak_##n##_IMM
#define ak_WB_LDRSTR(n) \
ak_##n##_REG_LSL, \
ak_##n##_REG_LSR, \
ak_##n##_REG_ASR, \
ak_##n##_REG_ROR, \
\
ak_##n##_IMM, \
\
ak_##n##_POST_REG_LSL, \
ak_##n##_POST_REG_LSR, \
ak_##n##_POST_REG_ASR, \
ak_##n##_POST_REG_ROR, \
\
ak_##n##_POST_IMM
#define ak_HD_LDRSTR(n) \
ak_##n##_REG, \
ak_##n##_IMM, \
\
ak_##n##_POST_REG, \
ak_##n##_POST_IMM
enum
{
ak_ALU(AND),
ak_ALU(EOR),
ak_ALU(SUB),
ak_ALU(RSB),
ak_ALU(ADD),
ak_ALU(ADC),
ak_ALU(SBC),
ak_ALU(RSC),
ak_ALU(ORR),
ak_ALU(MOV),
ak_ALU(BIC),
ak_ALU(MVN),
ak_Test(TST),
ak_Test(TEQ),
ak_Test(CMP),
ak_Test(CMN),
ak_MUL,
ak_MLA,
ak_UMULL,
ak_UMLAL,
ak_SMULL,
ak_SMLAL,
ak_SMLAxy,
ak_SMLAWy,
ak_SMULWy,
ak_SMLALxy,
ak_SMULxy,
ak_CLZ,
ak_QADD,
ak_QSUB,
ak_QDADD,
ak_QDSUB,
ak_WB_LDRSTR(STR),
ak_WB_LDRSTR(STRB),
ak_WB_LDRSTR(LDR),
ak_WB_LDRSTR(LDRB),
ak_HD_LDRSTR(STRH),
ak_HD_LDRSTR(LDRD),
ak_HD_LDRSTR(STRD),
ak_HD_LDRSTR(LDRH),
ak_HD_LDRSTR(LDRSB),
ak_HD_LDRSTR(LDRSH),
ak_SWP,
ak_SWPB,
ak_LDM,
ak_STM,
ak_B,
ak_BL,
ak_BLX_IMM,
ak_BX,
ak_BLX_REG,
ak_UNK,
ak_MSR_IMM,
ak_MSR_REG,
ak_MRS,
ak_MCR,
ak_MRC,
ak_SVC,
ak_Nop,
ak_Count,
tk_LSL_IMM = 0,
tk_LSR_IMM,
tk_ASR_IMM,
tk_ADD_REG_,
tk_SUB_REG_,
tk_ADD_IMM_,
tk_SUB_IMM_,
tk_MOV_IMM,
tk_CMP_IMM,
tk_ADD_IMM,
tk_SUB_IMM,
tk_AND_REG,
tk_EOR_REG,
tk_LSL_REG,
tk_LSR_REG,
tk_ASR_REG,
tk_ADC_REG,
tk_SBC_REG,
tk_ROR_REG,
tk_TST_REG,
tk_NEG_REG,
tk_CMP_REG,
tk_CMN_REG,
tk_ORR_REG,
tk_MUL_REG,
tk_BIC_REG,
tk_MVN_REG,
tk_ADD_HIREG,
tk_CMP_HIREG,
tk_MOV_HIREG,
tk_ADD_PCREL,
tk_ADD_SPREL,
tk_ADD_SP,
tk_LDR_PCREL,
tk_STR_REG,
tk_STRB_REG,
tk_LDR_REG,
tk_LDRB_REG,
tk_STRH_REG,
tk_LDRSB_REG,
tk_LDRH_REG,
tk_LDRSH_REG,
tk_STR_IMM,
tk_LDR_IMM,
tk_STRB_IMM,
tk_LDRB_IMM,
tk_STRH_IMM,
tk_LDRH_IMM,
tk_STR_SPREL,
tk_LDR_SPREL,
tk_PUSH,
tk_POP,
tk_LDMIA,
tk_STMIA,
tk_BCOND,
tk_BX,
tk_BLX_REG,
tk_B,
tk_BL_LONG_1,
tk_BL_LONG_2,
tk_UNK,
tk_SVC,
// not a real instruction
tk_BL_LONG,
tk_Count
};
enum
{
flag_N = 1 << 3,
flag_Z = 1 << 2,
flag_C = 1 << 1,
flag_V = 1 << 0,
};
enum
{
special_NotSpecialAtAll = 0,
special_WriteMem,
special_LoadMem,
special_WaitForInterrupt,
special_LoadLiteral
};
struct Info
{
u16 DstRegs, SrcRegs, NotStrictlyNeeded;
u16 Kind;
u8 SpecialKind;
u8 ReadFlags;
// lower 4 bits - set always
// upper 4 bits - might set flag
u8 WriteFlags;
bool EndBlock;
bool Branches() const
{
return DstRegs & (1 << 15);
}
};
Info Decode(bool thumb, u32 num, u32 instr, bool literaloptimizations);
}
#endif

View File

@ -1,5 +1,5 @@
/*
Copyright 2016-2024 melonDS team
Copyright 2016-2019 Arisotura
This file is part of melonDS.

View File

@ -1,140 +0,0 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef MELONDS_ARGS_H
#define MELONDS_ARGS_H
#include <array>
#include <optional>
#include <memory>
#include "NDSCart.h"
#include "GBACart.h"
#include "types.h"
#include "MemConstants.h"
#include "DSi_NAND.h"
#include "FATStorage.h"
#include "FreeBIOS.h"
#include "GPU3D_Soft.h"
#include "SPI_Firmware.h"
#include "SPU.h"
namespace melonDS
{
namespace NDSCart { class CartCommon; }
namespace GBACart { class CartCommon; }
template<size_t N>
constexpr std::array<u8, N> BrokenBIOS = []() constexpr {
std::array<u8, N> broken {};
for (int i = 0; i < 16; i++)
{
broken[i*4+0] = 0xE7;
broken[i*4+1] = 0xFF;
broken[i*4+2] = 0xDE;
broken[i*4+3] = 0xFF;
}
return broken;
}();
/// Arguments that configure the JIT.
/// Ignored in builds that don't have the JIT included.
struct JITArgs
{
unsigned MaxBlockSize = 32;
bool LiteralOptimizations = true;
bool BranchOptimizations = true;
/// Ignored in builds that have fast memory excluded
/// (even if the JIT is otherwise available).
/// Enabled by default, but frontends should disable this when debugging
/// so the constants segfaults don't hinder debugging.
bool FastMemory = true;
};
using ARM9BIOSImage = std::array<u8, ARM9BIOSSize>;
using ARM7BIOSImage = std::array<u8, ARM7BIOSSize>;
using DSiBIOSImage = std::array<u8, DSiBIOSSize>;
struct GDBArgs
{
u16 PortARM7 = 0;
u16 PortARM9 = 0;
bool ARM7BreakOnStartup = false;
bool ARM9BreakOnStartup = false;
};
/// Arguments to pass into the NDS constructor.
/// New fields here should have default values if possible.
struct NDSArgs
{
/// NDS ARM9 BIOS to install.
/// Defaults to FreeBIOS, which is not compatible with DSi mode.
std::unique_ptr<ARM9BIOSImage> ARM9BIOS = std::make_unique<ARM9BIOSImage>(bios_arm9_bin);
/// NDS ARM7 BIOS to install.
/// Defaults to FreeBIOS, which is not compatible with DSi mode.
std::unique_ptr<ARM7BIOSImage> ARM7BIOS = std::make_unique<ARM7BIOSImage>(bios_arm7_bin);
/// Firmware image to install.
/// Defaults to generated NDS firmware.
/// Generated firmware is not compatible with DSi mode.
melonDS::Firmware Firmware {0};
/// How the JIT should be configured when initializing.
/// Defaults to enabled, with default settings.
/// To disable the JIT, set this to std::nullopt.
/// Ignored in builds that don't have the JIT included.
std::optional<JITArgs> JIT = JITArgs();
AudioBitDepth BitDepth = AudioBitDepth::Auto;
AudioInterpolation Interpolation = AudioInterpolation::None;
/// How the GDB stub should be handled.
/// Defaults to disabled.
/// Ignored in builds that don't have the GDB stub included.
std::optional<GDBArgs> GDB = std::nullopt;
/// The 3D renderer to initialize the DS with.
/// Defaults to the software renderer.
/// Can be changed later at any time.
std::unique_ptr<melonDS::Renderer3D> Renderer3D = std::make_unique<SoftRenderer>();
};
/// Arguments to pass into the DSi constructor.
/// New fields here should have default values if possible.
/// Contains no virtual methods, so there's no vtable.
struct DSiArgs final : public NDSArgs
{
std::unique_ptr<DSiBIOSImage> ARM9iBIOS = std::make_unique<DSiBIOSImage>(BrokenBIOS<DSiBIOSSize>);
std::unique_ptr<DSiBIOSImage> ARM7iBIOS = std::make_unique<DSiBIOSImage>(BrokenBIOS<DSiBIOSSize>);
/// NAND image to install.
/// Required, there is no default value.
DSi_NAND::NANDImage NANDImage;
/// SD card to install.
/// Defaults to std::nullopt, which means no SD card.
std::optional<FATStorage> DSiSDCard;
bool FullBIOSBoot = false;
};
}
#endif //MELONDS_ARGS_H

View File

@ -1,214 +1,33 @@
set (CMAKE_CXX_STANDARD 17)
include(FixInterfaceIncludes)
project(core)
add_library(core STATIC
ARCodeFile.cpp
AREngine.cpp
ARM.cpp
ARM_InstrTable.h
ARMInterpreter.cpp
ARMInterpreter_ALU.cpp
ARMInterpreter_Branch.cpp
ARMInterpreter_LoadStore.cpp
CP15.cpp
CRC32.cpp
DMA.cpp
DMA_Timings.h
DMA_Timings.cpp
DSi.cpp
DSi_AES.cpp
DSi_Camera.cpp
DSi_DSP.cpp
DSi_I2C.cpp
DSi_NAND.cpp
DSi_NDMA.cpp
DSi_NWifi.cpp
DSi_SD.cpp
DSi_SPI_TSC.cpp
FATIO.cpp
FATStorage.cpp
FIFO.h
GBACart.cpp
GPU.cpp
GPU2D.cpp
GPU2D_Soft.cpp
GPU3D.cpp
GPU3D_Soft.cpp
GPU3D_Texcache.cpp
GPU3D_Texcache.h
melonDLDI.h
NDS.cpp
NDSCart.cpp
NDSCartR4.cpp
Platform.h
ROMList.h
ROMList.cpp
FreeBIOS.h
FreeBIOS.cpp
RTC.cpp
Savestate.cpp
SPI.cpp
SPI_Firmware.cpp
SPU.cpp
types.h
Utils.cpp
Utils.h
Wifi.cpp
WifiAP.cpp
fatfs/ff.c
fatfs/ffsystem.c
fatfs/ffunicode.c
fatfs/ffconf.h
sha1/sha1.c
tiny-AES-c/aes.c
xxhash/xxhash.c)
if (ENABLE_GDBSTUB)
message(NOTICE "Enabling GDB stub")
target_sources(core PRIVATE
debug/GdbStub.cpp
debug/GdbProto.cpp
debug/GdbCmds.cpp
)
endif()
if (ENABLE_OGLRENDERER)
target_sources(core PRIVATE
GPU_OpenGL.cpp
GPU_OpenGL_shaders.h
GPU3D_OpenGL.cpp
GPU3D_Compute.cpp
GPU3D_TexcacheOpenGL.cpp
GPU3D_TexcacheOpenGL.h
GPU3D_OpenGL_shaders.h
OpenGLSupport.cpp)
target_compile_definitions(core PUBLIC OGLRENDERER_ENABLED)
endif()
if (ENABLE_JIT)
enable_language(ASM)
target_sources(core PRIVATE
ARM_InstrInfo.cpp
ARMJIT.cpp
ARMJIT_Memory.cpp
ARMJIT_Global.cpp
dolphin/CommonFuncs.cpp)
if (WIN32)
# Required for memory mapping-related functions introduced in Windows 8
target_compile_definitions(core PRIVATE -D_WIN32_WINNT=_WIN32_WINNT_WIN8)
target_link_libraries(core PRIVATE onecore)
endif()
if (ARCHITECTURE STREQUAL x86_64)
target_sources(core PRIVATE
dolphin/x64ABI.cpp
dolphin/x64CPUDetect.cpp
dolphin/x64Emitter.cpp
ARMJIT_x64/ARMJIT_Compiler.cpp
ARMJIT_x64/ARMJIT_ALU.cpp
ARMJIT_x64/ARMJIT_LoadStore.cpp
ARMJIT_x64/ARMJIT_Branch.cpp
ARMJIT_x64/ARMJIT_Linkage.S)
endif()
if (ARCHITECTURE STREQUAL ARM64)
target_sources(core PRIVATE
dolphin/Arm64Emitter.cpp
dolphin/MathUtil.cpp
ARMJIT_A64/ARMJIT_Compiler.cpp
ARMJIT_A64/ARMJIT_ALU.cpp
ARMJIT_A64/ARMJIT_LoadStore.cpp
ARMJIT_A64/ARMJIT_Branch.cpp
ARMJIT_A64/ARMJIT_Linkage.S)
endif()
endif()
target_include_directories(core INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}")
set(MELONDS_VERSION_SUFFIX "$ENV{MELONDS_VERSION_SUFFIX}" CACHE STRING "Suffix to add to displayed melonDS version")
option(MELONDS_EMBED_BUILD_INFO "Embed detailed build info into the binary" OFF)
set(MELONDS_GIT_BRANCH "$ENV{MELONDS_GIT_BRANCH}" CACHE STRING "The Git branch used for this build")
set(MELONDS_GIT_HASH "$ENV{MELONDS_GIT_HASH}" CACHE STRING "The hash of the Git commit")
set(MELONDS_BUILD_PROVIDER "$ENV{MELONDS_BUILD_PROVIDER}" CACHE STRING "The name of the provider of this build")
if (MELONDS_EMBED_BUILD_INFO)
target_compile_definitions(core PUBLIC MELONDS_EMBED_BUILD_INFO)
if (NOT MELONDS_GIT_BRANCH OR NOT MELONDS_GIT_HASH OR NOT MELONDS_BUILD_PROVIDER)
message(FATAL_ERROR "When embedding build information, all fields must be filled out. See src/CMakeLists.txt.")
endif()
endif()
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/version.h.in" "${CMAKE_CURRENT_BINARY_DIR}/version.h")
target_sources(core PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/version.h")
target_include_directories(core PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")
set(BUILD_SHARED_LIBS OFF)
add_subdirectory(teakra EXCLUDE_FROM_ALL)
# Workaround for building teakra with -O0 on Windows either failing or hanging forever
target_compile_options(teakra PRIVATE "$<$<CONFIG:DEBUG>:-Og>")
target_link_libraries(core PRIVATE teakra)
if (NOT MSVC)
# MSVC has its own compiler flag syntax; if we ever support it,
# be sure to add equivalent flags here.
target_compile_options(core PUBLIC -fwrapv)
target_compile_options(core PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-Wno-invalid-offsetof>")
# These warnings are excessive, and are only triggered in the ARMJIT code
# (which is fundamentally non-portable, so this is fine)
endif()
find_library(m MATH_LIBRARY)
if (MATH_LIBRARY)
target_link_libraries(core PRIVATE ${MATH_LIBRARY})
endif()
if (ENABLE_JIT)
target_compile_definitions(core PUBLIC JIT_ENABLED)
if (ENABLE_JIT_PROFILING)
include(../cmake/FindVTune.cmake)
add_definitions(-DJIT_PROFILING_ENABLED)
endif()
endif()
ARM.cpp
ARMInterpreter.cpp
ARMInterpreter_ALU.cpp
ARMInterpreter_Branch.cpp
ARMInterpreter_LoadStore.cpp
Config.cpp
CP15.cpp
CRC32.cpp
DMA.cpp
GPU.cpp
GPU2D.cpp
GPU3D.cpp
GPU3D_OpenGL.cpp
GPU3D_Soft.cpp
NDS.cpp
NDSCart.cpp
OpenGLSupport.cpp
RTC.cpp
Savestate.cpp
SPI.cpp
SPU.cpp
Wifi.cpp
WifiAP.cpp
)
if (WIN32)
target_link_libraries(core PRIVATE ole32 comctl32 wsock32 ws2_32)
target_compile_definitions(core PUBLIC WIN32_LEAN_AND_MEAN NOMINMAX)
elseif(NOT APPLE AND NOT HAIKU)
check_library_exists(rt shm_open "" NEED_LIBRT)
if (NEED_LIBRT)
target_link_libraries(core PRIVATE rt)
endif()
elseif(HAIKU)
target_link_libraries(core PRIVATE network)
target_link_libraries(core ole32 comctl32 ws2_32 opengl32)
else()
target_link_libraries(core GL EGL)
endif()
if (ENABLE_JIT_PROFILING)
target_include_directories(core PRIVATE "${VTUNE_INCLUDE_DIR}")
target_link_libraries(core PRIVATE "${VTUNE_LIBRARY}")
endif()
#if(CMAKE_BUILD_TYPE MATCHES "Debug")
# set(
# CMAKE_C_FLAGS
# "${CMAKE_C_FLAGS} -fsanitize=undefined -fsanitize=address"
# )
# target_link_options(core
# BEFORE PUBLIC -fsanitize=undefined PUBLIC -fsanitize=address
# )
#endif()

Some files were not shown because too many files have changed in this diff Show More