Merge branch 'master' into feature/zip-support

This commit is contained in:
WaluigiWare64 2020-12-19 17:43:53 +00:00 committed by GitHub
commit df190b0400
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
65 changed files with 4480 additions and 2972 deletions

39
.github/workflows/build-macos.yml vendored Normal file
View File

@ -0,0 +1,39 @@
name: CMake Build (macOS x86-64)
on:
push:
branches:
- master
pull_request:
branches:
- master
env:
BUILD_TYPE: Release
jobs:
build:
runs-on: macos-latest
steps:
- uses: actions/checkout@v1
- name: Install dependencies
working-directory: ${{runner.workspace}}
run: |
brew install cmake sdl2 qt5 libslirp
- name: Create build environment
run: mkdir ${{runner.workspace}}/build
- name: Configure
working-directory: ${{runner.workspace}}/build
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DQt5_DIR=$(brew --prefix qt5)/lib/cmake/Qt5
- name: Make
working-directory: ${{runner.workspace}}/build
run: |
make -j$(sysctl -n hw.ncpu)
mkdir dist
cp -r melonDS.app dist
- uses: actions/upload-artifact@v1
with:
name: melonDS.app
path: ${{runner.workspace}}/build/dist

View File

@ -29,6 +29,7 @@ jobs:
shell: bash shell: bash
working-directory: ${{runner.workspace}} working-directory: ${{runner.workspace}}
run: | run: |
sudo rm -f /etc/apt/sources.list.d/kubernetes.list
sudo dpkg --add-architecture arm64 sudo dpkg --add-architecture arm64
sudo sh -c "sed \"s|^deb \([a-z\.:/]*\) \([a-z\-]*\) \(.*\)$|deb [arch=amd64] \1 \2 \3\ndeb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports \2 \3|\" /etc/apt/sources.list > /etc/apt/sources.list.new" sudo sh -c "sed \"s|^deb \([a-z\.:/]*\) \([a-z\-]*\) \(.*\)$|deb [arch=amd64] \1 \2 \3\ndeb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports \2 \3|\" /etc/apt/sources.list > /etc/apt/sources.list.new"
sudo rm /etc/apt/sources.list sudo rm /etc/apt/sources.list

View File

@ -8,10 +8,6 @@ on:
branches: branches:
- master - master
env:
BUILD_TYPE: Release
CMAKE_VERSION: 3.15.2
jobs: jobs:
build: build:
@ -20,25 +16,21 @@ jobs:
steps: steps:
- uses: actions/checkout@v1 - uses: actions/checkout@v1
- name: Install dependencies - name: Install dependencies
shell: bash run: |
working-directory: ${{runner.workspace}} sudo rm -f /etc/apt/sources.list.d/dotnetdev.list /etc/apt/sources.list.d/microsoft-prod.list
run: | # Fetch a new version of CMake, because the default is too old. sudo apt update
sudo rm -f /etc/apt/sources.list.d/dotnetdev.list /etc/apt/sources.list.d/microsoft-prod.list \ sudo apt install cmake libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qt5-default libslirp0 libslirp-dev libarchive-dev --allow-downgrades
&& sudo apt update \
&& sudo apt install cmake libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qt5-default libslirp0=4.1.0-2ubuntu2.1 libslirp-dev libarchive-dev --allow-downgrades
- name: Create build environment - name: Create build environment
run: mkdir ${{runner.workspace}}/build run: mkdir ${{runner.workspace}}/build
- name: Configure - name: Configure
shell: bash
working-directory: ${{runner.workspace}}/build working-directory: ${{runner.workspace}}/build
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE run: cmake $GITHUB_WORKSPACE
- name: Make - name: Make
shell: bash
working-directory: ${{runner.workspace}}/build working-directory: ${{runner.workspace}}/build
run: | run: |
make -j$(nproc --all) \ make -j$(nproc --all)
&& mkdir dist \ mkdir dist
&& cp melonDS dist cp melonDS dist
- uses: actions/upload-artifact@v1 - uses: actions/upload-artifact@v1
with: with:
name: melonDS name: melonDS

2
.gitignore vendored
View File

@ -11,3 +11,5 @@ cmake-build-debug
.idea .idea
*.exe *.exe
.DS_Store

View File

@ -1,20 +1,30 @@
cmake_minimum_required(VERSION 3.13) cmake_minimum_required(VERSION 3.13)
include(CheckSymbolExists)
include(CheckLibraryExists)
cmake_policy(VERSION 3.13) cmake_policy(VERSION 3.13)
if (POLICY CMP0076) if (POLICY CMP0076)
cmake_policy(SET CMP0076 NEW) cmake_policy(SET CMP0076 NEW)
endif() endif()
set(CMAKE_CXX_STANDARD 14) set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version")
project(melonDS CXX)
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
project(melonDS) check_library_exists(m pow "" LIBM)
if(LIBM)
link_libraries(m)
endif()
if (NOT CMAKE_BUILD_TYPE) if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release) set(CMAKE_BUILD_TYPE Release)
endif() endif()
include(CheckSymbolExists)
function(detect_architecture symbol arch) function(detect_architecture symbol arch)
if (NOT DEFINED ARCHITECTURE) if (NOT DEFINED ARCHITECTURE)
set(CMAKE_REQUIRED_QUIET 1) set(CMAKE_REQUIRED_QUIET 1)
@ -65,15 +75,34 @@ if (CMAKE_BUILD_TYPE STREQUAL Release)
add_link_options(-s) add_link_options(-s)
endif() endif()
add_compile_options(-fno-pic)
add_link_options(-no-pie)
option(BUILD_QT_SDL "Build Qt/SDL frontend" ON)
if (WIN32) if (WIN32)
option(BUILD_STATIC "Statically link dependencies" OFF) option(BUILD_STATIC "Statically link dependencies" OFF)
endif() endif()
if (ENABLE_LTO)
if (WIN32)
add_compile_options(-flto)
add_link_options(-flto)
else()
add_compile_options(-flto -fPIC)
add_link_options(-flto -fuse-linker-plugin -pie)
endif()
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_AR "gcc-ar")
set(CMAKE_RANLIB "gcc-ranlib")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
find_program(LLD NAMES ld.lld ld64.lld lld-link)
if (NOT LLD STREQUAL "LLD-NOTFOUND")
add_link_options(-fuse-ld=lld)
endif()
set(CMAKE_AR "llvm-ar")
set(CMAKE_RANLIB "llvm-ranlib")
endif()
option(BUILD_QT_SDL "Build Qt/SDL frontend" ON)
add_subdirectory(src) add_subdirectory(src)
if (BUILD_QT_SDL) if (BUILD_QT_SDL)

View File

@ -38,7 +38,7 @@ As for the rest, the interface should be pretty straightforward. If you have a q
* Install dependencies: * Install dependencies:
```sh ```sh
sudo apt-get install cmake libgtk-3-dev libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qtbase5-dev qtdeclarative5-dev libslirp-dev libarchive-dev sudo apt-get install cmake libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qtbase5-dev qtdeclarative5-dev libslirp-dev libarchive-dev
``` ```
* Compile: * Compile:
@ -84,6 +84,21 @@ If everything went well, melonDS and the libraries it needs should now be in the
``` ```
If everything went well, melonDS should now be in the `dist` folder. If everything went well, melonDS should now be in the `dist` folder.
### macOS:
1. Install the [Homebrew Package Manager](https://brew.sh)
2. Install dependencies: `brew install git pkg-config cmake sdl2 qt5 libslirp libarchive`
3. Compile:
```zsh
git clone https://github.com/Arisotura/melonDS.git
cd melonDS
mkdir build && cd build
cmake .. -DQt5_DIR=$(brew --prefix qt5)/lib/cmake/Qt5
make -j$(sysctl -n hw.ncpu)
mkdir dist && cp -r melonDS.app dist
```
If everything went well, melonDS.app should now be in the `dist` folder.
## TODO LIST ## TODO LIST
* DSi emulation * DSi emulation

BIN
melonDS.icns Normal file

Binary file not shown.

24
melonDS.plist Normal file
View File

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleExecutable</key>
<string>melonDS</string>
<key>CFBundleIconFile</key>
<string>melonDS.icns</string>
<key>CFBundleIdentifier</key>
<string>net.kuribo64.melonDS</string>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleVersion</key>
<string>0.9</string>
<key>CFBundleShortVersionString</key>
<string>0.9</string>
<key>NSHumanReadableCopyright</key>
<string>Licensed under GPLv3</string>
<key>NSHighResolutionCapable</key>
<true/>
</dict>
</plist>

View File

@ -176,7 +176,7 @@ T SlowRead9(u32 addr, ARMv5* cpu)
} }
template <typename T, int ConsoleType> template <typename T, int ConsoleType>
void SlowWrite9(u32 addr, ARMv5* cpu, T val) void SlowWrite9(u32 addr, ARMv5* cpu, u32 val)
{ {
addr &= ~(sizeof(T) - 1); addr &= ~(sizeof(T) - 1);
@ -224,7 +224,7 @@ T SlowRead7(u32 addr)
} }
template <typename T, int ConsoleType> template <typename T, int ConsoleType>
void SlowWrite7(u32 addr, T val) void SlowWrite7(u32 addr, u32 val)
{ {
addr &= ~(sizeof(T) - 1); addr &= ~(sizeof(T) - 1);
@ -266,16 +266,16 @@ void SlowBlockTransfer7(u32 addr, u64* data, u32 num)
#define INSTANTIATE_SLOWMEM(consoleType) \ #define INSTANTIATE_SLOWMEM(consoleType) \
template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \ template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \
template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u16); \ template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u32); \
template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u8); \ template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u32); \
\ \
template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \ template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \
template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \ template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \
template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \ template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \
\ \
template void SlowWrite7<u32, consoleType>(u32, u32); \ template void SlowWrite7<u32, consoleType>(u32, u32); \
template void SlowWrite7<u16, consoleType>(u32, u16); \ template void SlowWrite7<u16, consoleType>(u32, u32); \
template void SlowWrite7<u8, consoleType>(u32, u8); \ template void SlowWrite7<u8, consoleType>(u32, u32); \
\ \
template u32 SlowRead7<u32, consoleType>(u32); \ template u32 SlowRead7<u32, consoleType>(u32); \
template u16 SlowRead7<u16, consoleType>(u32); \ template u16 SlowRead7<u16, consoleType>(u32); \
@ -298,6 +298,7 @@ void Init()
void DeInit() void DeInit()
{ {
ResetBlockCache();
ARMJIT_Memory::DeInit(); ARMJIT_Memory::DeInit();
delete JITCompiler; delete JITCompiler;
@ -594,7 +595,8 @@ void CompileBlock(ARM* cpu)
u32 r15 = cpu->R[15]; u32 r15 = cpu->R[15];
u32 addressRanges[Config::JIT_MaxBlockSize]; u32 addressRanges[Config::JIT_MaxBlockSize];
u32 addressMasks[Config::JIT_MaxBlockSize] = {0}; u32 addressMasks[Config::JIT_MaxBlockSize];
memset(addressMasks, 0, Config::JIT_MaxBlockSize * sizeof(u32));
u32 numAddressRanges = 0; u32 numAddressRanges = 0;
u32 numLiterals = 0; u32 numLiterals = 0;
@ -1116,6 +1118,7 @@ void ResetBlockCache()
range->Blocks.Clear(); range->Blocks.Clear();
range->Code = 0; range->Code = 0;
} }
delete block;
} }
JitBlocks9.clear(); JitBlocks9.clear();
JitBlocks7.clear(); JitBlocks7.clear();

View File

@ -1,5 +1,11 @@
#include "ARMJIT_Compiler.h"
#include "../ARMJIT_Internal.h"
#include "../ARMInterpreter.h"
#include "../Config.h"
#ifdef __SWITCH__ #ifdef __SWITCH__
#include "../switch/compat_switch.h" #include <switch.h>
extern char __start__; extern char __start__;
#else #else
@ -7,13 +13,7 @@ extern char __start__;
#include <unistd.h> #include <unistd.h>
#endif #endif
#include "ARMJIT_Compiler.h" #include <stdlib.h>
#include "../ARMJIT_Internal.h"
#include "../ARMInterpreter.h"
#include "../Config.h"
#include <malloc.h>
using namespace Arm64Gen; using namespace Arm64Gen;
@ -68,6 +68,11 @@ void Compiler::A_Comp_MRS()
MOV(rd, RCPSR); MOV(rd, RCPSR);
} }
void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode)
{
arm->UpdateMode(oldmode, newmode);
}
void Compiler::A_Comp_MSR() void Compiler::A_Comp_MSR()
{ {
Comp_AddCycles_C(); Comp_AddCycles_C();
@ -139,7 +144,7 @@ void Compiler::A_Comp_MSR()
PushRegs(true); PushRegs(true);
QuickCallFunction(X3, (void*)&ARM::UpdateMode); QuickCallFunction(X3, (void*)&UpdateModeTrampoline);
PopRegs(true); PopRegs(true);
} }
@ -179,7 +184,7 @@ void Compiler::PopRegs(bool saveHiRegs)
Compiler::Compiler() Compiler::Compiler()
{ {
#ifdef __SWITCH__ #ifdef __SWITCH__
JitRWBase = memalign(0x1000, JitMemSize); JitRWBase = aligned_alloc(0x1000, JitMemSize);
JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000; JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000;
JitRWStart = virtmemReserve(JitMemSize); JitRWStart = virtmemReserve(JitMemSize);

View File

@ -187,6 +187,7 @@ public:
void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs); void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr); bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
enum enum
{ {
memop_Writeback = 1 << 0, memop_Writeback = 1 << 0,
@ -213,8 +214,8 @@ public:
return (u8*)entry - GetRXBase(); return (u8*)entry - GetRXBase();
} }
bool IsJITFault(u64 pc); bool IsJITFault(u8* pc);
s64 RewriteMemAccess(u64 pc); u8* RewriteMemAccess(u8* pc);
void SwapCodeRegion() void SwapCodeRegion()
{ {

View File

@ -9,37 +9,34 @@ using namespace Arm64Gen;
namespace ARMJIT namespace ARMJIT
{ {
bool Compiler::IsJITFault(u64 pc) bool Compiler::IsJITFault(u8* pc)
{ {
return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize); return (u64)pc >= (u64)GetRXBase() && (u64)pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
} }
s64 Compiler::RewriteMemAccess(u64 pc) u8* Compiler::RewriteMemAccess(u8* pc)
{ {
ptrdiff_t pcOffset = pc - (u64)GetRXBase(); ptrdiff_t pcOffset = pc - GetRXBase();
auto it = LoadStorePatches.find(pcOffset); auto it = LoadStorePatches.find(pcOffset);
if (it != LoadStorePatches.end()) if (it != LoadStorePatches.end())
{ {
LoadStorePatch patch = it->second; LoadStorePatch patch = it->second;
LoadStorePatches.erase(it);
ptrdiff_t curCodeOffset = GetCodeOffset(); ptrdiff_t curCodeOffset = GetCodeOffset();
SetCodePtrUnsafe(pcOffset + patch.PatchOffset); SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
BL(patch.PatchFunc); BL(patch.PatchFunc);
for (int i = 0; i < patch.PatchSize / 4 - 1; i++) for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
HINT(HINT_NOP); HINT(HINT_NOP);
FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr()); FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
SetCodePtrUnsafe(curCodeOffset); SetCodePtrUnsafe(curCodeOffset);
LoadStorePatches.erase(it); return pc + (ptrdiff_t)patch.PatchOffset;
return patch.PatchOffset;
} }
printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc)); printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
abort(); abort();
@ -192,7 +189,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
else else
{ {
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7); LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
if (size == 32) if (size == 32 && !addrIsStatic)
{ {
UBFIZ(W0, W0, 3, 2); UBFIZ(W0, W0, 3, 2);
RORV(rdMapped, rdMapped, W0); RORV(rdMapped, rdMapped, W0);

View File

@ -216,9 +216,9 @@ template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset); void LinkBlock(ARM* cpu, u32 codeOffset);
template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu); template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu);
template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, T val); template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, u32 val);
template <typename T, int ConsoleType> T SlowRead7(u32 addr); template <typename T, int ConsoleType> T SlowRead7(u32 addr);
template <typename T, int ConsoleType> void SlowWrite7(u32 addr, T val); template <typename T, int ConsoleType> void SlowWrite7(u32 addr, u32 val);
template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu); template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num); template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);

View File

@ -1,5 +1,6 @@
#if defined(__SWITCH__) #if defined(__SWITCH__)
#include "switch/compat_switch.h" #include <switch.h>
#include "frontend/switch/FaultHandler.h"
#elif defined(_WIN32) #elif defined(_WIN32)
#include <windows.h> #include <windows.h>
#else #else
@ -10,6 +11,12 @@
#include <signal.h> #include <signal.h>
#endif #endif
#if defined(__ANDROID__)
#include <dlfcn.h>
#include <linux/ashmem.h>
#include <sys/ioctl.h>
#endif
#include "ARMJIT_Memory.h" #include "ARMJIT_Memory.h"
#include "ARMJIT_Internal.h" #include "ARMJIT_Internal.h"
@ -22,7 +29,7 @@
#include "NDSCart.h" #include "NDSCart.h"
#include "SPU.h" #include "SPU.h"
#include <malloc.h> #include <stdlib.h>
/* /*
We're handling fastmem here. We're handling fastmem here.
@ -40,7 +47,8 @@
We handle this by only mapping those regions which are actually We handle this by only mapping those regions which are actually
used and by praying the games don't go wild. used and by praying the games don't go wild.
Beware, this file is full of platform specific code. Beware, this file is full of platform specific code and copied
from Dolphin, so enjoy the copied comments!
*/ */
@ -49,12 +57,16 @@ namespace ARMJIT_Memory
struct FaultDescription struct FaultDescription
{ {
u32 EmulatedFaultAddr; u32 EmulatedFaultAddr;
u64 FaultPC; u8* FaultPC;
}; };
bool FaultHandler(FaultDescription* faultDesc, s32& offset); bool FaultHandler(FaultDescription& faultDesc);
} }
#if defined(__ANDROID__)
#define ASHMEM_DEVICE "/dev/ashmem"
#endif
#if defined(__SWITCH__) #if defined(__SWITCH__)
// with LTO the symbols seem to be not properly overriden // with LTO the symbols seem to be not properly overriden
// if they're somewhere else // if they're somewhere else
@ -75,7 +87,7 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
ARMJIT_Memory::FaultDescription desc; ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea; desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea;
desc.FaultPC = ctx->pc.x; desc.FaultPC = (u8*)ctx->pc.x;
u64 integerRegisters[33]; u64 integerRegisters[33];
memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29); memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29);
@ -84,23 +96,14 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
integerRegisters[31] = ctx->sp.x; integerRegisters[31] = ctx->sp.x;
integerRegisters[32] = ctx->pc.x; integerRegisters[32] = ctx->pc.x;
s32 offset; if (ARMJIT_Memory::FaultHandler(desc))
if (ARMJIT_Memory::FaultHandler(&desc, offset))
{ {
integerRegisters[32] += offset; integerRegisters[32] = (u64)desc.FaultPC;
ARM_RestoreContext(integerRegisters); ARM_RestoreContext(integerRegisters);
} }
if (ctx->pc.x >= (u64)&__start__ && ctx->pc.x < (u64)&__rodata_start) HandleFault(ctx->pc.x, ctx->lr.x, ctx->fp.x, ctx->far.x, ctx->error_desc);
{
printf("unintentional fault in .text at 0x%x (type %d) (trying to access 0x%x?)\n",
ctx->pc.x - (u64)&__start__, ctx->error_desc, ctx->far.x);
}
else
{
printf("unintentional fault somewhere in deep (address) space at %x (type %d)\n", ctx->pc.x, ctx->error_desc);
}
} }
} }
@ -117,12 +120,11 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
ARMJIT_Memory::FaultDescription desc; ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea; desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea;
desc.FaultPC = exceptionInfo->ContextRecord->Rip; desc.FaultPC = (u8*)exceptionInfo->ContextRecord->Rip;
s32 offset = 0; if (ARMJIT_Memory::FaultHandler(desc))
if (ARMJIT_Memory::FaultHandler(&desc, offset))
{ {
exceptionInfo->ContextRecord->Rip += offset; exceptionInfo->ContextRecord->Rip = (u64)desc.FaultPC;
return EXCEPTION_CONTINUE_EXECUTION; return EXCEPTION_CONTINUE_EXECUTION;
} }
@ -131,50 +133,84 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
#else #else
struct sigaction NewSa; static struct sigaction OldSaSegv;
struct sigaction OldSa; static struct sigaction OldSaBus;
static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext)
{ {
if (sig != SIGSEGV && sig != SIGBUS)
{
// We are not interested in other signals - handle it as usual.
return;
}
if (info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR)
{
// Huh? Return.
return;
}
ucontext_t* context = (ucontext_t*)rawContext; ucontext_t* context = (ucontext_t*)rawContext;
ARMJIT_Memory::FaultDescription desc; ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
#ifdef __x86_64__ #ifdef __x86_64__
desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea;
desc.FaultPC = context->uc_mcontext.gregs[REG_RIP]; #ifdef __APPLE__
desc.FaultPC = (u8*)context->uc_mcontext->__ss.__rip;
#else
desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP];
#endif
#else
#ifdef __APPLE__
desc.EmulatedFaultAddr = (u8*)context->uc_mcontext->__es.__far - curArea;
desc.FaultPC = (u8*)context->uc_mcontext->__ss.__pc;
#else #else
desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea;
desc.FaultPC = context->uc_mcontext.pc; desc.FaultPC = (u8*)context->uc_mcontext.pc;
#endif
#endif #endif
s32 offset = 0; if (ARMJIT_Memory::FaultHandler(desc))
if (ARMJIT_Memory::FaultHandler(&desc, offset))
{ {
#ifdef __x86_64__ #ifdef __x86_64__
context->uc_mcontext.gregs[REG_RIP] += offset; #ifdef __APPLE__
context->uc_mcontext->__ss.__rip = (u64)desc.FaultPC;
#else #else
context->uc_mcontext.pc += offset; context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC;
#endif
#else
#ifdef __APPLE__
context->uc_mcontext->__ss.__pc = (u64)desc.FaultPC;
#else
context->uc_mcontext.pc = (u64)desc.FaultPC;
#endif
#endif #endif
return; return;
} }
if (OldSa.sa_flags & SA_SIGINFO) struct sigaction* oldSa;
if (sig == SIGSEGV)
oldSa = &OldSaSegv;
else
oldSa = &OldSaBus;
if (oldSa->sa_flags & SA_SIGINFO)
{ {
OldSa.sa_sigaction(sig, info, rawContext); oldSa->sa_sigaction(sig, info, rawContext);
return; return;
} }
if (OldSa.sa_handler == SIG_DFL) if (oldSa->sa_handler == SIG_DFL)
{ {
signal(sig, SIG_DFL); signal(sig, SIG_DFL);
return; return;
} }
if (OldSa.sa_handler == SIG_IGN) if (oldSa->sa_handler == SIG_IGN)
{ {
// Ignore signal // Ignore signal
return; return;
} }
OldSa.sa_handler(sig); oldSa->sa_handler(sig);
} }
#endif #endif
@ -231,7 +267,7 @@ enum
{ {
memstate_Unmapped, memstate_Unmapped,
memstate_MappedRW, memstate_MappedRW,
// on switch this is unmapped as well // on Switch this is unmapped as well
memstate_MappedProtected, memstate_MappedProtected,
}; };
@ -314,14 +350,16 @@ struct Mapping
void Unmap(int region) void Unmap(int region)
{ {
u32 dtcmStart = NDS::ARM9->DTCMBase;
u32 dtcmSize = NDS::ARM9->DTCMSize;
bool skipDTCM = Num == 0 && region != memregion_DTCM; bool skipDTCM = Num == 0 && region != memregion_DTCM;
u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7; u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7;
u32 offset = 0; u32 offset = 0;
while (offset < Size) while (offset < Size)
{ {
if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase) if (skipDTCM && Addr + offset == dtcmStart)
{ {
offset += NDS::ARM9->DTCMSize; offset += dtcmSize;
} }
else else
{ {
@ -329,7 +367,7 @@ struct Mapping
u8 status = statuses[(Addr + offset) >> 12]; u8 status = statuses[(Addr + offset) >> 12];
while (statuses[(Addr + offset) >> 12] == status while (statuses[(Addr + offset) >> 12] == status
&& offset < Size && offset < Size
&& (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase)) && (!skipDTCM || Addr + offset != dtcmStart))
{ {
assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped);
statuses[(Addr + offset) >> 12] = memstate_Unmapped; statuses[(Addr + offset) >> 12] = memstate_Unmapped;
@ -347,9 +385,33 @@ struct Mapping
#endif #endif
} }
} }
#ifndef __SWITCH__ #ifndef __SWITCH__
#ifndef _WIN32
u32 dtcmEnd = dtcmStart + dtcmSize;
if (Num == 0
&& dtcmEnd >= Addr
&& dtcmStart < Addr + Size)
{
bool success;
if (dtcmStart > Addr)
{
success = UnmapFromRange(Addr, 0, OffsetsPerRegion[region] + LocalOffset, dtcmStart - Addr);
assert(success);
}
if (dtcmEnd < Addr + Size)
{
u32 offset = dtcmStart - Addr + dtcmSize;
success = UnmapFromRange(dtcmEnd, 0, OffsetsPerRegion[region] + LocalOffset + offset, Size - offset);
assert(success);
}
}
else
#endif
{
bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size);
assert(succeded); assert(succeded);
}
#endif #endif
} }
}; };
@ -418,10 +480,10 @@ void RemapDTCM(u32 newBase, u32 newSize)
printf("unmapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset); printf("unmapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset);
bool oldOverlap = NDS::ARM9->DTCMSize > 0 && !(oldDTCMBase >= end || oldDTCBEnd <= start); bool overlap = (NDS::ARM9->DTCMSize > 0 && oldDTCMBase < end && oldDTCBEnd > start)
bool newOverlap = newSize > 0 && !(newBase >= end || newEnd <= start); || (newSize > 0 && newBase < end && newEnd > start);
if (mapping.Num == 0 && (oldOverlap || newOverlap)) if (mapping.Num == 0 && overlap)
{ {
mapping.Unmap(region); mapping.Unmap(region);
Mappings[region].Remove(i); Mappings[region].Remove(i);
@ -445,8 +507,8 @@ void RemapNWRAM(int num)
for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;) for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;)
{ {
Mapping& mapping = Mappings[memregion_SharedWRAM][i]; Mapping& mapping = Mappings[memregion_SharedWRAM][i];
if (!(DSi::NWRAMStart[mapping.Num][num] >= mapping.Addr + mapping.Size if (DSi::NWRAMStart[mapping.Num][num] < mapping.Addr + mapping.Size
|| DSi::NWRAMEnd[mapping.Num][num] < mapping.Addr)) && DSi::NWRAMEnd[mapping.Num][num] > mapping.Addr)
{ {
mapping.Unmap(memregion_SharedWRAM); mapping.Unmap(memregion_SharedWRAM);
Mappings[memregion_SharedWRAM].Remove(i); Mappings[memregion_SharedWRAM].Remove(i);
@ -469,7 +531,7 @@ void RemapSWRAM()
for (int i = 0; i < Mappings[memregion_WRAM7].Length;) for (int i = 0; i < Mappings[memregion_WRAM7].Length;)
{ {
Mapping& mapping = Mappings[memregion_WRAM7][i]; Mapping& mapping = Mappings[memregion_WRAM7][i];
if (mapping.Addr + mapping.Size < 0x03800000) if (mapping.Addr + mapping.Size <= 0x03800000)
{ {
mapping.Unmap(memregion_WRAM7); mapping.Unmap(memregion_WRAM7);
Mappings[memregion_WRAM7].Remove(i); Mappings[memregion_WRAM7].Remove(i);
@ -501,26 +563,53 @@ bool MapAtAddress(u32 addr)
return false; return false;
u8* states = num == 0 ? MappingStatus9 : MappingStatus7; u8* states = num == 0 ? MappingStatus9 : MappingStatus7;
printf("trying to create mapping %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); printf("mapping mirror %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num);
bool isExecutable = ARMJIT::CodeMemRegions[region]; bool isExecutable = ARMJIT::CodeMemRegions[region];
u32 dtcmStart = NDS::ARM9->DTCMBase;
u32 dtcmSize = NDS::ARM9->DTCMSize;
u32 dtcmEnd = dtcmStart + dtcmSize;
#ifndef __SWITCH__ #ifndef __SWITCH__
#ifndef _WIN32
if (num == 0
&& dtcmEnd >= mirrorStart
&& dtcmStart < mirrorStart + mirrorSize)
{
bool success;
if (dtcmStart > mirrorStart)
{
success = MapIntoRange(mirrorStart, 0, OffsetsPerRegion[region] + memoryOffset, dtcmStart - mirrorStart);
assert(success);
}
if (dtcmEnd < mirrorStart + mirrorSize)
{
u32 offset = dtcmStart - mirrorStart + dtcmSize;
success = MapIntoRange(dtcmEnd, 0, OffsetsPerRegion[region] + memoryOffset + offset, mirrorSize - offset);
assert(success);
}
}
else
#endif
{
bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
assert(succeded); assert(succeded);
}
#endif #endif
ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512; ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512;
// this overcomplicated piece of code basically just finds whole pieces of code memory // this overcomplicated piece of code basically just finds whole pieces of code memory
// which can be mapped // which can be mapped/protected
u32 offset = 0; u32 offset = 0;
bool skipDTCM = num == 0 && region != memregion_DTCM; bool skipDTCM = num == 0 && region != memregion_DTCM;
while (offset < mirrorSize) while (offset < mirrorSize)
{ {
if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase) if (skipDTCM && mirrorStart + offset == dtcmStart)
{ {
SetCodeProtectionRange(NDS::ARM9->DTCMBase, NDS::ARM9->DTCMSize, 0, 0); #ifdef _WIN32
offset += NDS::ARM9->DTCMSize; SetCodeProtectionRange(dtcmStart, dtcmSize, 0, 0);
#endif
offset += dtcmSize;
} }
else else
{ {
@ -557,37 +646,36 @@ bool MapAtAddress(u32 addr)
Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num}; Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num};
Mappings[region].Add(mapping); Mappings[region].Add(mapping);
printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1); //printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1);
return true; return true;
} }
bool FaultHandler(FaultDescription* faultDesc, s32& offset) bool FaultHandler(FaultDescription& faultDesc)
{ {
if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC)) if (ARMJIT::JITCompiler->IsJITFault(faultDesc.FaultPC))
{ {
bool rewriteToSlowPath = true; bool rewriteToSlowPath = true;
u32 addr = faultDesc->EmulatedFaultAddr; u8* memStatus = NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7;
if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped) if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped)
rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr); rewriteToSlowPath = !MapAtAddress(faultDesc.EmulatedFaultAddr);
if (rewriteToSlowPath) if (rewriteToSlowPath)
{ faultDesc.FaultPC = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc.FaultPC);
offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC);
}
return true; return true;
} }
return false; return false;
} }
void Init()
{
const u64 AddrSpaceSize = 0x100000000; const u64 AddrSpaceSize = 0x100000000;
void Init()
{
#if defined(__SWITCH__) #if defined(__SWITCH__)
MemoryBase = (u8*)memalign(0x1000, MemoryTotalSize); MemoryBase = (u8*)aligned_alloc(0x1000, MemoryTotalSize);
MemoryBaseCodeMem = (u8*)virtmemReserve(MemoryTotalSize); MemoryBaseCodeMem = (u8*)virtmemReserve(MemoryTotalSize);
bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem,
@ -624,22 +712,52 @@ void Init()
u8* basePtr = MemoryBase; u8* basePtr = MemoryBase;
#else #else
FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); // this used to be allocated with three different mmaps
FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); // The idea was to give the OS more freedom where to position the buffers,
// but something was bad about this so instead we take this vmem eating monster
// which seems to work better.
MemoryBase = (u8*)mmap(NULL, AddrSpaceSize*4, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
munmap(MemoryBase, AddrSpaceSize*4);
FastMem9Start = MemoryBase;
FastMem7Start = MemoryBase + AddrSpaceSize;
MemoryBase = MemoryBase + AddrSpaceSize*2;
MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); #if defined(__ANDROID__)
static void* libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL);
using type_ASharedMemory_create = int(*)(const char* name, size_t size);
static void* symbol = dlsym(libandroid, "ASharedMemory_create");
static auto shared_memory_create = reinterpret_cast<type_ASharedMemory_create>(symbol);
if (shared_memory_create)
{
MemoryFile = shared_memory_create("melondsfastmem", MemoryTotalSize);
}
else
{
int fd = open(ASHMEM_DEVICE, O_RDWR);
ioctl(fd, ASHMEM_SET_NAME, "melondsfastmem");
ioctl(fd, ASHMEM_SET_SIZE, MemoryTotalSize);
MemoryFile = fd;
}
#elif defined(__APPLE__)
char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1];
sprintf(fastmemPidName, "melondsfastmem%d", getpid());
MemoryFile = shm_open(fastmemPidName, O_RDWR|O_CREAT, 0600);
delete[] fastmemPidName;
#else
MemoryFile = memfd_create("melondsfastmem", 0); MemoryFile = memfd_create("melondsfastmem", 0);
#endif
ftruncate(MemoryFile, MemoryTotalSize); ftruncate(MemoryFile, MemoryTotalSize);
NewSa.sa_flags = SA_SIGINFO; struct sigaction sa;
sigemptyset(&NewSa.sa_mask); sa.sa_handler = nullptr;
NewSa.sa_sigaction = SigsegvHandler; sa.sa_sigaction = &SigsegvHandler;
sigaction(SIGSEGV, &NewSa, &OldSa); sa.sa_flags = SA_SIGINFO;
sigemptyset(&sa.sa_mask);
munmap(MemoryBase, MemoryTotalSize); sigaction(SIGSEGV, &sa, &OldSaSegv);
munmap(FastMem9Start, AddrSpaceSize); #ifdef __APPLE__
munmap(FastMem7Start, AddrSpaceSize); sigaction(SIGBUS, &sa, &OldSaBus);
#endif
mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0); mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0);
@ -657,17 +775,30 @@ void Init()
void DeInit() void DeInit()
{ {
#if defined(__SWITCH__) #if defined(__SWITCH__)
virtmemFree(FastMem9Start, 0x100000000); virtmemFree(FastMem9Start, AddrSpaceSize);
virtmemFree(FastMem7Start, 0x100000000); virtmemFree(FastMem7Start, AddrSpaceSize);
svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize); svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize);
virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); virtmemFree(MemoryBaseCodeMem, MemoryTotalSize);
free(MemoryBase); free(MemoryBase);
#elif defined(__APPLE__)
char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1];
sprintf(fastmemPidName, "melondsfastmem%d", getpid());
shm_unlink(fastmemPidName);
delete[] fastmemPidName;
#elif defined(_WIN32) #elif defined(_WIN32)
assert(UnmapViewOfFile(MemoryBase)); assert(UnmapViewOfFile(MemoryBase));
CloseHandle(MemoryFile); CloseHandle(MemoryFile);
RemoveVectoredExceptionHandler(ExceptionHandlerHandle); RemoveVectoredExceptionHandler(ExceptionHandlerHandle);
#else
sigaction(SIGSEGV, &OldSaSegv, nullptr);
#ifdef __APPLE__
sigaction(SIGBUS, &OldSaBus, nullptr);
#endif
munmap(MemoryBase, MemoryTotalSize);
close(MemoryFile);
#endif #endif
} }
@ -997,10 +1128,12 @@ int ClassifyAddress7(u32 addr)
case 0x06000000: case 0x06000000:
case 0x06800000: case 0x06800000:
return memregion_VWRAM; return memregion_VWRAM;
}
} default:
return memregion_Other; return memregion_Other;
} }
}
}
void WifiWrite32(u32 addr, u32 val) void WifiWrite32(u32 addr, u32 val)
{ {

View File

@ -130,6 +130,16 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
} }
void ARMv4JumpToTrampoline(ARMv4* arm, u32 addr, bool restorecpsr)
{
arm->JumpTo(addr, restorecpsr);
}
void ARMv5JumpToTrampoline(ARMv5* arm, u32 addr, bool restorecpsr)
{
arm->JumpTo(addr, restorecpsr);
}
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
{ {
IrregularCycles = true; IrregularCycles = true;
@ -146,9 +156,9 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
else else
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
if (Num == 0) if (Num == 0)
CALL((void*)&ARMv5::JumpTo); CALL((void*)&ARMv5JumpToTrampoline);
else else
CALL((void*)&ARMv4::JumpTo); CALL((void*)&ARMv4JumpToTrampoline);
PopRegs(restoreCPSR); PopRegs(restoreCPSR);

View File

@ -101,6 +101,11 @@ void Compiler::A_Comp_MRS()
MOV(32, rd, R(RCPSR)); MOV(32, rd, R(RCPSR));
} }
void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode)
{
arm->UpdateMode(oldmode, newmode);
}
void Compiler::A_Comp_MSR() void Compiler::A_Comp_MSR()
{ {
Comp_AddCycles_C(); Comp_AddCycles_C();
@ -185,7 +190,7 @@ void Compiler::A_Comp_MSR()
MOV(32, R(ABI_PARAM3), R(RCPSR)); MOV(32, R(ABI_PARAM3), R(RCPSR));
MOV(32, R(ABI_PARAM2), R(RSCRATCH3)); MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU)); MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((void*)&ARM::UpdateMode); CALL((void*)&UpdateModeTrampoline);
PopRegs(true); PopRegs(true);
} }
@ -216,6 +221,8 @@ Compiler::Compiler()
#ifdef _WIN32 #ifdef _WIN32
DWORD dummy; DWORD dummy;
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy); VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(__APPLE__)
pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0);
#else #else
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif #endif
@ -612,9 +619,9 @@ void Compiler::Reset()
LoadStorePatches.clear(); LoadStorePatches.clear();
} }
bool Compiler::IsJITFault(u64 addr) bool Compiler::IsJITFault(u8* addr)
{ {
return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory); return (u64)addr >= (u64)ResetStart && (u64)addr < (u64)ResetStart + CodeMemSize;
} }
void Compiler::Comp_SpecialBranchBehaviour(bool taken) void Compiler::Comp_SpecialBranchBehaviour(bool taken)
@ -896,5 +903,4 @@ void Compiler::Comp_AddCycles_CD()
else else
ConstantCycles += cycles; ConstantCycles += cycles;
} }
} }

View File

@ -208,9 +208,9 @@ public:
SetCodePtr(FarCode); SetCodePtr(FarCode);
} }
bool IsJITFault(u64 addr); bool IsJITFault(u8* addr);
s32 RewriteMemAccess(u64 pc); u8* RewriteMemAccess(u8* pc);
u8* FarCode; u8* FarCode;
u8* NearCode; u8* NearCode;

View File

@ -29,8 +29,13 @@
.p2align 4,,15 .p2align 4,,15
#ifdef __APPLE__
.global _ARM_Dispatch
_ARM_Dispatch:
#else
.global ARM_Dispatch .global ARM_Dispatch
ARM_Dispatch: ARM_Dispatch:
#endif
#ifdef WIN64 #ifdef WIN64
push rdi push rdi
push rsi push rsi
@ -54,8 +59,13 @@ ARM_Dispatch:
.p2align 4,,15 .p2align 4,,15
#ifdef __APPLE__
.global _ARM_Ret
_ARM_Ret:
#else
.global ARM_Ret .global ARM_Ret
ARM_Ret: ARM_Ret:
#endif
mov [RCPU + ARM_CPSR_offset], RCPSR mov [RCPU + ARM_CPSR_offset], RCPSR
#ifdef WIN64 #ifdef WIN64

View File

@ -15,28 +15,24 @@ int squeezePointer(T* ptr)
return truncated; return truncated;
} }
s32 Compiler::RewriteMemAccess(u64 pc) u8* Compiler::RewriteMemAccess(u8* pc)
{ {
auto it = LoadStorePatches.find((u8*)pc); auto it = LoadStorePatches.find(pc);
if (it != LoadStorePatches.end()) if (it != LoadStorePatches.end())
{ {
LoadStorePatch patch = it->second; LoadStorePatch patch = it->second;
LoadStorePatches.erase(it); LoadStorePatches.erase(it);
u8* curCodePtr = GetWritableCodePtr(); //printf("rewriting memory access %p %d %d\n", (u8*)pc-ResetStart, patch.Offset, patch.Size);
u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset;
SetCodePtr(rewritePtr);
CALL(patch.PatchFunc); XEmitter emitter(pc + (ptrdiff_t)patch.Offset);
u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr); emitter.CALL(patch.PatchFunc);
ptrdiff_t remainingSize = (ptrdiff_t)patch.Size - 5;
assert(remainingSize >= 0);
if (remainingSize > 0) if (remainingSize > 0)
NOP(remainingSize); emitter.NOP(remainingSize);
//printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size); return pc + (ptrdiff_t)patch.Offset;
SetCodePtr(curCodePtr);
return patch.Offset;
} }
printf("this is a JIT bug %llx\n", pc); printf("this is a JIT bug %llx\n", pc);
@ -192,6 +188,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
u8* memopStart = GetWritableCodePtr(); u8* memopStart = GetWritableCodePtr();
LoadStorePatch patch; LoadStorePatch patch;
assert(rdMapped.GetSimpleReg() >= 0 && rdMapped.GetSimpleReg() < 16);
patch.PatchFunc = flags & memop_Store patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()] ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()]
: PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()]; : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()];

View File

@ -27,6 +27,7 @@ add_library(core STATIC
GBACart.cpp GBACart.cpp
GPU.cpp GPU.cpp
GPU2D.cpp GPU2D.cpp
GPU2D_Soft.cpp
GPU3D.cpp GPU3D.cpp
GPU3D_Soft.cpp GPU3D_Soft.cpp
melonDLDI.h melonDLDI.h
@ -80,9 +81,8 @@ if (ENABLE_JIT)
ARMJIT_x64/ARMJIT_LoadStore.cpp ARMJIT_x64/ARMJIT_LoadStore.cpp
ARMJIT_x64/ARMJIT_Branch.cpp ARMJIT_x64/ARMJIT_Branch.cpp
ARMJIT_x64/ARMJIT_Linkage.s ARMJIT_x64/ARMJIT_Linkage.S
) )
set_source_files_properties(ARMJIT_x64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif() endif()
if (ARCHITECTURE STREQUAL ARM64) if (ARCHITECTURE STREQUAL ARM64)
target_sources(core PRIVATE target_sources(core PRIVATE
@ -94,15 +94,21 @@ if (ENABLE_JIT)
ARMJIT_A64/ARMJIT_LoadStore.cpp ARMJIT_A64/ARMJIT_LoadStore.cpp
ARMJIT_A64/ARMJIT_Branch.cpp ARMJIT_A64/ARMJIT_Branch.cpp
ARMJIT_A64/ARMJIT_Linkage.s ARMJIT_A64/ARMJIT_Linkage.S
) )
set_source_files_properties(ARMJIT_A64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif() endif()
endif() endif()
if (APPLE)
target_include_directories(core PUBLIC /usr/local/include)
target_link_directories(core PUBLIC /usr/local/lib)
endif()
if (ENABLE_OGLRENDERER) if (ENABLE_OGLRENDERER)
if (WIN32) if (WIN32)
target_link_libraries(core ole32 comctl32 ws2_32 opengl32) target_link_libraries(core ole32 comctl32 ws2_32 opengl32)
elseif (APPLE)
target_link_libraries(core "-framework OpenGL")
else() else()
target_link_libraries(core GL EGL) target_link_libraries(core GL EGL)
endif() endif()

View File

@ -73,8 +73,12 @@ ConfigEntry ConfigFile[] =
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0}, {"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BranchOptimisations, 1, NULL, 0}, {"JIT_BranchOptimisations", 0, &JIT_BranchOptimisations, 1, NULL, 0},
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0}, {"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
#ifdef __APPLE__
{"JIT_FastMemory", 0, &JIT_FastMemory, 0, NULL, 0},
#else
{"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0}, {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0},
#endif #endif
#endif
{"", -1, NULL, 0, NULL, 0} {"", -1, NULL, 0, NULL, 0}
}; };

View File

@ -77,21 +77,6 @@ void DMA::Reset()
Running = false; Running = false;
InProgress = false; InProgress = false;
if (NDS::ConsoleType == 1)
{
BusRead16 = (CPU==0) ? DSi::ARM9Read16 : DSi::ARM7Read16;
BusRead32 = (CPU==0) ? DSi::ARM9Read32 : DSi::ARM7Read32;
BusWrite16 = (CPU==0) ? DSi::ARM9Write16 : DSi::ARM7Write16;
BusWrite32 = (CPU==0) ? DSi::ARM9Write32 : DSi::ARM7Write32;
}
else
{
BusRead16 = (CPU==0) ? NDS::ARM9Read16 : NDS::ARM7Read16;
BusRead32 = (CPU==0) ? NDS::ARM9Read32 : NDS::ARM7Read32;
BusWrite16 = (CPU==0) ? NDS::ARM9Write16 : NDS::ARM7Write16;
BusWrite32 = (CPU==0) ? NDS::ARM9Write32 : NDS::ARM7Write32;
}
} }
void DMA::DoSavestate(Savestate* file) void DMA::DoSavestate(Savestate* file)
@ -198,13 +183,7 @@ void DMA::Start()
NDS::StopCPU(CPU, 1<<Num); NDS::StopCPU(CPU, 1<<Num);
} }
void DMA::Run() template <int ConsoleType>
{
if (!Running) return;
if (CPU == 0) return Run9();
else return Run7();
}
void DMA::Run9() void DMA::Run9()
{ {
if (NDS::ARM9Timestamp >= NDS::ARM9Target) return; if (NDS::ARM9Timestamp >= NDS::ARM9Target) return;
@ -242,7 +221,10 @@ void DMA::Run9()
{ {
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift); NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);
BusWrite16(CurDstAddr, BusRead16(CurSrcAddr)); if (ConsoleType == 1)
DSi::ARM9Write16(CurDstAddr, DSi::ARM9Read16(CurSrcAddr));
else
NDS::ARM9Write16(CurDstAddr, NDS::ARM9Read16(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<1; CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1; CurDstAddr += DstAddrInc<<1;
@ -278,7 +260,10 @@ void DMA::Run9()
{ {
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift); NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);
BusWrite32(CurDstAddr, BusRead32(CurSrcAddr)); if (ConsoleType == 1)
DSi::ARM9Write32(CurDstAddr, DSi::ARM9Read32(CurSrcAddr));
else
NDS::ARM9Write32(CurDstAddr, NDS::ARM9Read32(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<2; CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2; CurDstAddr += DstAddrInc<<2;
@ -317,6 +302,7 @@ void DMA::Run9()
NDS::ResumeCPU(0, 1<<Num); NDS::ResumeCPU(0, 1<<Num);
} }
template <int ConsoleType>
void DMA::Run7() void DMA::Run7()
{ {
if (NDS::ARM7Timestamp >= NDS::ARM7Target) return; if (NDS::ARM7Timestamp >= NDS::ARM7Target) return;
@ -354,7 +340,10 @@ void DMA::Run7()
{ {
NDS::ARM7Timestamp += unitcycles; NDS::ARM7Timestamp += unitcycles;
BusWrite16(CurDstAddr, BusRead16(CurSrcAddr)); if (ConsoleType == 1)
DSi::ARM7Write16(CurDstAddr, DSi::ARM7Read16(CurSrcAddr));
else
NDS::ARM7Write16(CurDstAddr, NDS::ARM7Read16(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<1; CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1; CurDstAddr += DstAddrInc<<1;
@ -390,7 +379,10 @@ void DMA::Run7()
{ {
NDS::ARM7Timestamp += unitcycles; NDS::ARM7Timestamp += unitcycles;
BusWrite32(CurDstAddr, BusRead32(CurSrcAddr)); if (ConsoleType == 1)
DSi::ARM7Write32(CurDstAddr, DSi::ARM7Read32(CurSrcAddr));
else
NDS::ARM7Write32(CurDstAddr, NDS::ARM7Read32(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<2; CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2; CurDstAddr += DstAddrInc<<2;
@ -425,3 +417,14 @@ void DMA::Run7()
InProgress = false; InProgress = false;
NDS::ResumeCPU(1, 1<<Num); NDS::ResumeCPU(1, 1<<Num);
} }
template <int ConsoleType>
void DMA::Run()
{
if (!Running) return;
if (CPU == 0) return Run9<ConsoleType>();
else return Run7<ConsoleType>();
}
template void DMA::Run<0>();
template void DMA::Run<1>();

View File

@ -34,9 +34,12 @@ public:
void WriteCnt(u32 val); void WriteCnt(u32 val);
void Start(); void Start();
template <int ConsoleType>
void Run(); void Run();
template <int ConsoleType>
void Run9(); void Run9();
template <int ConsoleType>
void Run7(); void Run7();
bool IsInMode(u32 mode) bool IsInMode(u32 mode)
@ -86,11 +89,6 @@ private:
bool Stall; bool Stall;
bool IsGXFIFODMA; bool IsGXFIFODMA;
u16 (*BusRead16)(u32 addr);
u32 (*BusRead32)(u32 addr);
void (*BusWrite16)(u32 addr, u16 val);
void (*BusWrite32)(u32 addr, u32 val);
}; };
#endif #endif

View File

@ -35,6 +35,7 @@
#include "DSi_I2C.h" #include "DSi_I2C.h"
#include "DSi_SD.h" #include "DSi_SD.h"
#include "DSi_AES.h" #include "DSi_AES.h"
#include "DSi_Camera.h"
#include "tiny-AES-c/aes.hpp" #include "tiny-AES-c/aes.hpp"
@ -542,15 +543,15 @@ void MapNWRAM_A(u32 num, u8 val)
return; return;
} }
#ifdef JIT_ENABLED
ARMJIT_Memory::RemapNWRAM(0);
#endif
int mbkn = 0, mbks = 8*num; int mbkn = 0, mbks = 8*num;
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
if (oldval == val) return; if (oldval == val) return;
#ifdef JIT_ENABLED
ARMJIT_Memory::RemapNWRAM(0);
#endif
MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] &= ~(0xFF << mbks);
MBK[0][mbkn] |= (val << mbks); MBK[0][mbkn] |= (val << mbks);
MBK[1][mbkn] = MBK[0][mbkn]; MBK[1][mbkn] = MBK[0][mbkn];
@ -577,15 +578,15 @@ void MapNWRAM_B(u32 num, u8 val)
return; return;
} }
#ifdef JIT_ENABLED
ARMJIT_Memory::RemapNWRAM(1);
#endif
int mbkn = 1+(num>>2), mbks = 8*(num&3); int mbkn = 1+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
if (oldval == val) return; if (oldval == val) return;
#ifdef JIT_ENABLED
ARMJIT_Memory::RemapNWRAM(1);
#endif
MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] &= ~(0xFF << mbks);
MBK[0][mbkn] |= (val << mbks); MBK[0][mbkn] |= (val << mbks);
MBK[1][mbkn] = MBK[0][mbkn]; MBK[1][mbkn] = MBK[0][mbkn];
@ -616,15 +617,15 @@ void MapNWRAM_C(u32 num, u8 val)
return; return;
} }
#ifdef JIT_ENABLED
ARMJIT_Memory::RemapNWRAM(2);
#endif
int mbkn = 3+(num>>2), mbks = 8*(num&3); int mbkn = 3+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
if (oldval == val) return; if (oldval == val) return;
#ifdef JIT_ENABLED
ARMJIT_Memory::RemapNWRAM(2);
#endif
MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] &= ~(0xFF << mbks);
MBK[0][mbkn] |= (val << mbks); MBK[0][mbkn] |= (val << mbks);
MBK[1][mbkn] = MBK[0][mbkn]; MBK[1][mbkn] = MBK[0][mbkn];
@ -1406,6 +1407,12 @@ u8 ARM9IORead8(u32 addr)
CASE_READ8_32BIT(0x04004060, MBK[0][8]) CASE_READ8_32BIT(0x04004060, MBK[0][8])
} }
if ((addr & 0xFFFFFF00) == 0x04004200)
{
if (!(SCFG_EXT[0] & (1<<17))) return 0;
return DSi_Camera::Read8(addr);
}
return NDS::ARM9IORead8(addr); return NDS::ARM9IORead8(addr);
} }
@ -1428,6 +1435,12 @@ u16 ARM9IORead16(u32 addr)
CASE_READ16_32BIT(0x04004060, MBK[0][8]) CASE_READ16_32BIT(0x04004060, MBK[0][8])
} }
if ((addr & 0xFFFFFF00) == 0x04004200)
{
if (!(SCFG_EXT[0] & (1<<17))) return 0;
return DSi_Camera::Read16(addr);
}
return NDS::ARM9IORead16(addr); return NDS::ARM9IORead16(addr);
} }
@ -1480,6 +1493,12 @@ u32 ARM9IORead32(u32 addr)
case 0x04004170: return NDMAs[3]->Cnt; case 0x04004170: return NDMAs[3]->Cnt;
} }
if ((addr & 0xFFFFFF00) == 0x04004200)
{
if (!(SCFG_EXT[0] & (1<<17))) return 0;
return DSi_Camera::Read32(addr);
}
return NDS::ARM9IORead32(addr); return NDS::ARM9IORead32(addr);
} }
@ -1519,6 +1538,12 @@ void ARM9IOWrite8(u32 addr, u8 val)
case 0x04004053: MapNWRAM_C(7, val); return; case 0x04004053: MapNWRAM_C(7, val); return;
} }
if ((addr & 0xFFFFFF00) == 0x04004200)
{
if (!(SCFG_EXT[0] & (1<<17))) return;
return DSi_Camera::Write8(addr, val);
}
return NDS::ARM9IOWrite8(addr, val); return NDS::ARM9IOWrite8(addr, val);
} }
@ -1572,6 +1597,12 @@ void ARM9IOWrite16(u32 addr, u16 val)
return; return;
} }
if ((addr & 0xFFFFFF00) == 0x04004200)
{
if (!(SCFG_EXT[0] & (1<<17))) return;
return DSi_Camera::Write16(addr, val);
}
return NDS::ARM9IOWrite16(addr, val); return NDS::ARM9IOWrite16(addr, val);
} }
@ -1678,6 +1709,12 @@ void ARM9IOWrite32(u32 addr, u32 val)
case 0x04004170: NDMAs[3]->WriteCnt(val); return; case 0x04004170: NDMAs[3]->WriteCnt(val); return;
} }
if ((addr & 0xFFFFFF00) == 0x04004200)
{
if (!(SCFG_EXT[0] & (1<<17))) return;
return DSi_Camera::Write32(addr, val);
}
return NDS::ARM9IOWrite32(addr, val); return NDS::ARM9IOWrite32(addr, val);
} }

View File

@ -18,12 +18,28 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "DSi.h"
#include "DSi_Camera.h" #include "DSi_Camera.h"
DSi_Camera* DSi_Camera0; // 78 / facing outside DSi_Camera* DSi_Camera0; // 78 / facing outside
DSi_Camera* DSi_Camera1; // 7A / selfie cam DSi_Camera* DSi_Camera1; // 7A / selfie cam
u16 DSi_Camera::ModuleCnt;
u16 DSi_Camera::Cnt;
u8 DSi_Camera::FrameBuffer[640*480*4];
u32 DSi_Camera::FrameLength;
u32 DSi_Camera::TransferPos;
// note on camera data/etc intervals
// on hardware those are likely affected by several factors
// namely, how long cameras take to process frames
// camera IRQ is fired at roughly 15FPS with default config
const u32 kIRQInterval = 1120000; // ~30 FPS
const u32 kTransferStart = 60000;
bool DSi_Camera::Init() bool DSi_Camera::Init()
{ {
@ -43,6 +59,87 @@ void DSi_Camera::Reset()
{ {
DSi_Camera0->ResetCam(); DSi_Camera0->ResetCam();
DSi_Camera1->ResetCam(); DSi_Camera1->ResetCam();
ModuleCnt = 0; // CHECKME
Cnt = 0;
memset(FrameBuffer, 0, 640*480*4);
TransferPos = 0;
FrameLength = 256*192*2; // TODO: make it check frame size, data type, etc
NDS::ScheduleEvent(NDS::Event_DSi_CamIRQ, true, kIRQInterval, IRQ, 0);
}
void DSi_Camera::IRQ(u32 param)
{
DSi_Camera* activecam = nullptr;
// TODO: check which camera has priority if both are activated
// (or does it just jumble both data sources together, like it
// does for, say, overlapping VRAM?)
if (DSi_Camera0->IsActivated()) activecam = DSi_Camera0;
else if (DSi_Camera1->IsActivated()) activecam = DSi_Camera1;
if (activecam)
{
RequestFrame(activecam->Num);
if (Cnt & (1<<11))
NDS::SetIRQ(0, NDS::IRQ_DSi_Camera);
if (Cnt & (1<<15))
NDS::ScheduleEvent(NDS::Event_DSi_CamTransfer, false, kTransferStart, Transfer, 0);
}
NDS::ScheduleEvent(NDS::Event_DSi_CamIRQ, true, kIRQInterval, IRQ, 0);
}
void DSi_Camera::RequestFrame(u32 cam)
{
if (!(Cnt & (1<<13))) printf("CAMERA: !! REQUESTING YUV FRAME\n");
// TODO: picture size, data type, cropping, etc
// generate test pattern
// TODO: get picture from platform (actual camera, video file, whatever source)
for (u32 y = 0; y < 192; y++)
{
for (u32 x = 0; x < 256; x++)
{
u16* px = (u16*)&FrameBuffer[((y*256) + x) * 2];
if ((x & 0x8) ^ (y & 0x8))
*px = 0x8000;
else
*px = 0xFC00 | ((y >> 3) << 5);
}
}
}
void DSi_Camera::Transfer(u32 pos)
{
u32 numscan = (Cnt & 0x000F) + 1;
u32 numpix = numscan * 256; // CHECKME
// TODO: present data
//printf("CAM TRANSFER POS=%d/%d\n", pos, 0x6000*2);
DSi::CheckNDMAs(0, 0x0B);
pos += numpix;
if (pos >= 0x6000*2) // HACK
{
// transfer done
}
else
{
// keep going
// TODO: must be tweaked such that each block has enough time to transfer
u32 delay = numpix*2 + 16;
NDS::ScheduleEvent(NDS::Event_DSi_CamTransfer, false, delay, Transfer, pos);
}
} }
@ -62,16 +159,28 @@ void DSi_Camera::ResetCam()
RegAddr = 0; RegAddr = 0;
RegData = 0; RegData = 0;
PLLCnt = 0; PLLDiv = 0x0366;
PLLPDiv = 0x00F5;
PLLCnt = 0x21F9;
ClocksCnt = 0;
StandbyCnt = 0x4029; // checkme StandbyCnt = 0x4029; // checkme
MiscCnt = 0;
}
bool DSi_Camera::IsActivated()
{
if (StandbyCnt & (1<<14)) return false; // standby
if (!(MiscCnt & (1<<9))) return false; // data transfer not enabled
return true;
} }
void DSi_Camera::Start() void DSi_Camera::I2C_Start()
{ {
} }
u8 DSi_Camera::Read(bool last) u8 DSi_Camera::I2C_Read(bool last)
{ {
u8 ret; u8 ret;
@ -89,7 +198,7 @@ u8 DSi_Camera::Read(bool last)
} }
else else
{ {
RegData = ReadReg(RegAddr); RegData = I2C_ReadReg(RegAddr);
ret = RegData >> 8; ret = RegData >> 8;
} }
} }
@ -100,7 +209,7 @@ u8 DSi_Camera::Read(bool last)
return ret; return ret;
} }
void DSi_Camera::Write(u8 val, bool last) void DSi_Camera::I2C_Write(u8 val, bool last)
{ {
if (DataPos < 2) if (DataPos < 2)
{ {
@ -116,7 +225,7 @@ void DSi_Camera::Write(u8 val, bool last)
if (DataPos & 0x1) if (DataPos & 0x1)
{ {
RegData |= val; RegData |= val;
WriteReg(RegAddr, RegData); I2C_WriteReg(RegAddr, RegData);
RegAddr += 2; // checkme RegAddr += 2; // checkme
} }
else else
@ -129,38 +238,172 @@ void DSi_Camera::Write(u8 val, bool last)
else DataPos++; else DataPos++;
} }
u16 DSi_Camera::ReadReg(u16 addr) u16 DSi_Camera::I2C_ReadReg(u16 addr)
{ {
switch (addr) switch (addr)
{ {
case 0x0000: return 0x2280; // chip ID case 0x0000: return 0x2280; // chip ID
case 0x0010: return PLLDiv;
case 0x0012: return PLLPDiv;
case 0x0014: return PLLCnt; case 0x0014: return PLLCnt;
case 0x0016: return ClocksCnt;
case 0x0018: return StandbyCnt; case 0x0018: return StandbyCnt;
case 0x001A: return MiscCnt;
case 0x301A: return ((~StandbyCnt) & 0x4000) >> 12; case 0x301A: return ((~StandbyCnt) & 0x4000) >> 12;
} }
//printf("DSi_Camera%d: unknown read %04X\n", Num, addr); if(Num==1)printf("DSi_Camera%d: unknown read %04X\n", Num, addr);
return 0; return 0;
} }
void DSi_Camera::WriteReg(u16 addr, u16 val) void DSi_Camera::I2C_WriteReg(u16 addr, u16 val)
{ {
switch (addr) switch (addr)
{ {
case 0x0010:
PLLDiv = val & 0x3FFF;
return;
case 0x0012:
PLLPDiv = val & 0xBFFF;
return;
case 0x0014: case 0x0014:
// shouldn't be instant either? // shouldn't be instant either?
val &= 0x7FFF; val &= 0x7FFF;
val |= ((val & 0x0002) << 14); val |= ((val & 0x0002) << 14);
PLLCnt = val; PLLCnt = val;
return; return;
case 0x0016:
ClocksCnt = val;
printf("ClocksCnt=%04X\n", val);
return;
case 0x0018: case 0x0018:
// TODO: this shouldn't be instant, but uh // TODO: this shouldn't be instant, but uh
val &= 0x003F; val &= 0x003F;
val |= ((val & 0x0001) << 14); val |= ((val & 0x0001) << 14);
StandbyCnt = val; StandbyCnt = val;
printf("CAM%d STBCNT=%04X (%04X)\n", Num, StandbyCnt, val);
return;
case 0x001A:
MiscCnt = val & 0x0B7B;
printf("CAM%d MISCCNT=%04X (%04X)\n", Num, MiscCnt, val);
return; return;
} }
//printf("DSi_Camera%d: unknown write %04X %04X\n", Num, addr, val); if(Num==1)printf("DSi_Camera%d: unknown write %04X %04X\n", Num, addr, val);
}
u8 DSi_Camera::Read8(u32 addr)
{
//
printf("unknown DSi cam read8 %08X\n", addr);
return 0;
}
u16 DSi_Camera::Read16(u32 addr)
{printf("CAM READ %08X %08X\n", addr, NDS::GetPC(0));
switch (addr)
{
case 0x04004200: return ModuleCnt;
case 0x04004202: return Cnt;
}
printf("unknown DSi cam read16 %08X\n", addr);
return 0;
}
u32 dorp = 0;
u32 DSi_Camera::Read32(u32 addr)
{
switch (addr)
{
case 0x04004204:
{
return 0xFC00801F;
if (!(Cnt & (1<<15))) return 0; // CHECKME
u32 ret = *(u32*)&FrameBuffer[TransferPos];
TransferPos += 4;
if (TransferPos >= FrameLength) TransferPos = 0;
dorp += 4;
//if (dorp >= (256*4*2))
if (TransferPos == 0)
{
dorp = 0;
Cnt &= ~(1<<4);
}
return ret;
}
}
printf("unknown DSi cam read32 %08X\n", addr);
return 0;
}
void DSi_Camera::Write8(u32 addr, u8 val)
{
//
printf("unknown DSi cam write8 %08X %02X\n", addr, val);
}
void DSi_Camera::Write16(u32 addr, u16 val)
{printf("CAM WRITE %08X %04X %08X\n", addr, val, NDS::GetPC(0));
switch (addr)
{
case 0x04004200:
{
u16 oldcnt = ModuleCnt;
ModuleCnt = val;
if ((ModuleCnt & (1<<1)) && !(oldcnt & (1<<1)))
{
// reset shit to zero
// CHECKME
Cnt = 0;
}
if ((ModuleCnt & (1<<5)) && !(oldcnt & (1<<5)))
{
// TODO: reset I2C??
}
}
return;
case 0x04004202:
{
// checkme
u16 oldmask;
if (Cnt & 0x8000)
{
val &= 0x8F20;
oldmask = 0x601F;
}
else
{
val &= 0xEF2F;
oldmask = 0x0010;
}
Cnt = (Cnt & oldmask) | (val & ~0x0020);
if (val & (1<<5)) Cnt &= ~(1<<4);
if ((val & (1<<15)) && !(Cnt & (1<<15)))
{
// start transfer
//DSi::CheckNDMAs(0, 0x0B);
}
}
return;
}
printf("unknown DSi cam write16 %08X %04X\n", addr, val);
}
void DSi_Camera::Write32(u32 addr, u32 val)
{
//
printf("unknown DSi cam write32 %08X %08X\n", addr, val);
} }

View File

@ -28,27 +28,56 @@ public:
static void DeInit(); static void DeInit();
static void Reset(); static void Reset();
static void IRQ(u32 param);
static void RequestFrame(u32 cam);
static void Transfer(u32 pos);
DSi_Camera(u32 num); DSi_Camera(u32 num);
~DSi_Camera(); ~DSi_Camera();
void ResetCam(); void ResetCam();
bool IsActivated();
void Start(); void I2C_Start();
u8 Read(bool last); u8 I2C_Read(bool last);
void Write(u8 val, bool last); void I2C_Write(u8 val, bool last);
static u8 Read8(u32 addr);
static u16 Read16(u32 addr);
static u32 Read32(u32 addr);
static void Write8(u32 addr, u8 val);
static void Write16(u32 addr, u16 val);
static void Write32(u32 addr, u32 val);
private:
u32 Num; u32 Num;
private:
u32 DataPos; u32 DataPos;
u32 RegAddr; u32 RegAddr;
u16 RegData; u16 RegData;
u16 ReadReg(u16 addr); u16 I2C_ReadReg(u16 addr);
void WriteReg(u16 addr, u16 val); void I2C_WriteReg(u16 addr, u16 val);
u16 PLLDiv;
u16 PLLPDiv;
u16 PLLCnt; u16 PLLCnt;
u16 ClocksCnt;
u16 StandbyCnt; u16 StandbyCnt;
u16 MiscCnt;
u16 MCUAddr;
u16* MCUData;
u8 MCURegs[0x8000];
static u16 ModuleCnt;
static u16 Cnt;
static u8 FrameBuffer[640*480*4];
static u32 TransferPos;
static u32 FrameLength;
}; };

View File

@ -50,7 +50,7 @@ void Reset()
Registers[0x10] = 0x00; // power btn Registers[0x10] = 0x00; // power btn
Registers[0x11] = 0x00; // reset Registers[0x11] = 0x00; // reset
Registers[0x12] = 0x00; // power btn tap Registers[0x12] = 0x00; // power btn tap
Registers[0x20] = 0x83; // battery Registers[0x20] = 0x8F; // battery
Registers[0x21] = 0x07; Registers[0x21] = 0x07;
Registers[0x30] = 0x13; Registers[0x30] = 0x13;
Registers[0x31] = 0x00; // camera power Registers[0x31] = 0x00; // camera power
@ -187,8 +187,10 @@ void WriteCnt(u8 val)
switch (Device) switch (Device)
{ {
case 0x4A: Data = DSi_BPTWL::Read(islast); break; case 0x4A: Data = DSi_BPTWL::Read(islast); break;
case 0x78: Data = DSi_Camera0->Read(islast); break; case 0x78: Data = DSi_Camera0->I2C_Read(islast); break;
case 0x7A: Data = DSi_Camera1->Read(islast); break; case 0x7A: Data = DSi_Camera1->I2C_Read(islast); break;
case 0xA0:
case 0xE0: Data = 0xFF; break;
default: default:
printf("I2C: read on unknown device %02X, cnt=%02X, data=%02X, last=%d\n", Device, val, 0, islast); printf("I2C: read on unknown device %02X, cnt=%02X, data=%02X, last=%d\n", Device, val, 0, islast);
Data = 0xFF; Data = 0xFF;
@ -211,8 +213,10 @@ void WriteCnt(u8 val)
switch (Device) switch (Device)
{ {
case 0x4A: DSi_BPTWL::Start(); break; case 0x4A: DSi_BPTWL::Start(); break;
case 0x78: DSi_Camera0->Start(); break; case 0x78: DSi_Camera0->I2C_Start(); break;
case 0x7A: DSi_Camera1->Start(); break; case 0x7A: DSi_Camera1->I2C_Start(); break;
case 0xA0:
case 0xE0: ack = false; break;
default: default:
printf("I2C: %s start on unknown device %02X\n", (Data&0x01)?"read":"write", Device); printf("I2C: %s start on unknown device %02X\n", (Data&0x01)?"read":"write", Device);
ack = false; ack = false;
@ -226,8 +230,10 @@ void WriteCnt(u8 val)
switch (Device) switch (Device)
{ {
case 0x4A: DSi_BPTWL::Write(Data, islast); break; case 0x4A: DSi_BPTWL::Write(Data, islast); break;
case 0x78: DSi_Camera0->Write(Data, islast); break; case 0x78: DSi_Camera0->I2C_Write(Data, islast); break;
case 0x7A: DSi_Camera1->Write(Data, islast); break; case 0x7A: DSi_Camera1->I2C_Write(Data, islast); break;
case 0xA0:
case 0xE0: ack = false; break;
default: default:
printf("I2C: write on unknown device %02X, cnt=%02X, data=%02X, last=%d\n", Device, val, Data, islast); printf("I2C: write on unknown device %02X, cnt=%02X, data=%02X, last=%d\n", Device, val, Data, islast);
ack = false; ack = false;

View File

@ -101,7 +101,7 @@ void DSi_NDMA::WriteCnt(u32 val)
Start(); Start();
if (StartMode != 0x10 && StartMode != 0x30 && if (StartMode != 0x10 && StartMode != 0x30 &&
StartMode != 0x04 && StartMode != 0x06 && StartMode != 0x07 && StartMode != 0x08 && StartMode != 0x09 && StartMode != 0x04 && StartMode != 0x06 && StartMode != 0x07 && StartMode != 0x08 && StartMode != 0x09 && StartMode != 0x0B &&
StartMode != 0x24 && StartMode != 0x26 && StartMode != 0x28 && StartMode != 0x29 && StartMode != 0x2A && StartMode != 0x2B) StartMode != 0x24 && StartMode != 0x26 && StartMode != 0x28 && StartMode != 0x29 && StartMode != 0x2A && StartMode != 0x2B)
printf("UNIMPLEMENTED ARM%d NDMA%d START MODE %02X, %08X->%08X LEN=%d BLK=%d CNT=%08X\n", printf("UNIMPLEMENTED ARM%d NDMA%d START MODE %02X, %08X->%08X LEN=%d BLK=%d CNT=%08X\n",
CPU?7:9, Num, StartMode, SrcAddr, DstAddr, TotalLength, BlockLength, Cnt); CPU?7:9, Num, StartMode, SrcAddr, DstAddr, TotalLength, BlockLength, Cnt);

View File

@ -778,6 +778,23 @@ void DSi_MMCStorage::SendCMD(u8 cmd, u32 param)
Host->SendResponse(CSR, true); Host->SendResponse(CSR, true);
return; return;
case 1: // SEND_OP_COND
// CHECKME!!
// also TODO: it's different for the SD card
if (Internal)
{
param &= ~(1<<30);
OCR &= 0xBF000000;
OCR |= (param & 0x40FFFFFF);
Host->SendResponse(OCR, true);
SetState(0x01);
}
else
{
printf("CMD1 on SD card!!\n");
}
return;
case 2: case 2:
case 10: // get CID case 10: // get CID
Host->SendResponse(*(u32*)&CID[12], false); Host->SendResponse(*(u32*)&CID[12], false);
@ -801,6 +818,11 @@ void DSi_MMCStorage::SendCMD(u8 cmd, u32 param)
} }
return; return;
case 6: // MMC: 'SWITCH'
// TODO!
Host->SendResponse(CSR, true);
return;
case 7: // select card (by RCA) case 7: // select card (by RCA)
Host->SendResponse(CSR, true); Host->SendResponse(CSR, true);
return; return;

View File

@ -49,8 +49,8 @@ u8 VRAM_F[ 16*1024];
u8 VRAM_G[ 16*1024]; u8 VRAM_G[ 16*1024];
u8 VRAM_H[ 32*1024]; u8 VRAM_H[ 32*1024];
u8 VRAM_I[ 16*1024]; u8 VRAM_I[ 16*1024];
u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
u32 VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
u8 VRAMCNT[9]; u8 VRAMCNT[9];
u8 VRAMSTAT; u8 VRAMSTAT;
@ -85,11 +85,67 @@ bool Accelerated;
GPU2D* GPU2D_A; GPU2D* GPU2D_A;
GPU2D* GPU2D_B; GPU2D* GPU2D_B;
/*
VRAM invalidation tracking
- we want to know when a VRAM region used for graphics changed
- for some regions unmapping is mandatory to modify them (Texture, TexPal and ExtPal) and
we don't want to completely invalidate them every time they're unmapped and remapped
For this reason we don't track the dirtyness per mapping region, but instead per VRAM bank
with VRAMDirty. Writes to LCDC go directly into VRAMDirty, while writes via other mapping regions
like BG or OBJ are first tracked in VRAMWritten_* and need to be flushed using SyncDirtyFlags.
This is more or less a description of VRAMTrackingSet::DeriveState
Each time before the memory is read two things could have happened
to each 16kb piece (16kb is the smallest unit in which mappings can
be made thus also the size VRAMMap_* use):
- this piece was remapped compared to last time we checked,
which means this location in memory is invalid.
- this piece wasn't remapped, which means we need to check whether
it was changed. This can be archived by checking VRAMDirty.
VRAMDirty need to be reset for the respective VRAM bank.
*/
VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
u8 VRAMFlat_ABG[512*1024];
u8 VRAMFlat_BBG[128*1024];
u8 VRAMFlat_AOBJ[256*1024];
u8 VRAMFlat_BOBJ[128*1024];
u8 VRAMFlat_ABGExtPal[32*1024];
u8 VRAMFlat_BBGExtPal[32*1024];
u8 VRAMFlat_AOBJExtPal[8*1024];
u8 VRAMFlat_BOBJExtPal[8*1024];
u8 VRAMFlat_Texture[512*1024];
u8 VRAMFlat_TexPal[128*1024];
bool Init() bool Init()
{ {
GPU2D_A = new GPU2D(0); GPU2D_A = new GPU2D_Soft(0);
GPU2D_B = new GPU2D(1); GPU2D_B = new GPU2D_Soft(1);
if (!GPU3D::Init()) return false; if (!GPU3D::Init()) return false;
FrontBuffer = 0; FrontBuffer = 0;
@ -113,6 +169,34 @@ void DeInit()
if (Framebuffer[1][1]) delete[] Framebuffer[1][1]; if (Framebuffer[1][1]) delete[] Framebuffer[1][1];
} }
void ResetVRAMCache()
{
for (int i = 0; i < 9; i++)
VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>();
VRAMDirty_ABG.Reset();
VRAMDirty_BBG.Reset();
VRAMDirty_AOBJ.Reset();
VRAMDirty_BOBJ.Reset();
VRAMDirty_ABGExtPal.Reset();
VRAMDirty_BBGExtPal.Reset();
VRAMDirty_AOBJExtPal.Reset();
VRAMDirty_BOBJExtPal.Reset();
VRAMDirty_Texture.Reset();
VRAMDirty_TexPal.Reset();
memset(VRAMFlat_ABG, 0, sizeof(VRAMFlat_ABG));
memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG));
memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ));
memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ));
memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal));
memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal));
memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal));
memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal));
memset(VRAMFlat_Texture, 0, sizeof(VRAMFlat_Texture));
memset(VRAMFlat_TexPal, 0, sizeof(VRAMFlat_TexPal));
}
void Reset() void Reset()
{ {
VCount = 0; VCount = 0;
@ -186,6 +270,8 @@ void Reset()
GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]); GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]);
ResetRenderer(); ResetRenderer();
ResetVRAMCache();
} }
void Stop() void Stop()
@ -261,6 +347,8 @@ void DoSavestate(Savestate* file)
GPU2D_A->DoSavestate(file); GPU2D_A->DoSavestate(file);
GPU2D_B->DoSavestate(file); GPU2D_B->DoSavestate(file);
GPU3D::DoSavestate(file); GPU3D::DoSavestate(file);
ResetVRAMCache();
} }
void AssignFramebuffers() void AssignFramebuffers()
@ -411,18 +499,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
u8* GetUniqueBankPtr(u32 mask, u32 offset) u8* GetUniqueBankPtr(u32 mask, u32 offset)
{ {
if (!mask) return NULL; if (!mask || (mask & (mask - 1)) != 0) return NULL;
int num = __builtin_ctz(mask);
int num = 0;
if (!(mask & 0xFF)) { mask >>= 8; num += 8; }
else
{
if (!(mask & 0xF)) { mask >>= 4; num += 4; }
if (!(mask & 0x3)) { mask >>= 2; num += 2; }
if (!(mask & 0x1)) { mask >>= 1; num += 1; }
}
if (mask != 1) return NULL;
return &VRAM[num][offset & VRAMMask[num]]; return &VRAM[num][offset & VRAMMask[num]];
} }
@ -606,8 +684,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette case 4: // ABG ext palette
UNMAP_RANGE(ABGExtPal, 0, 4); UNMAP_RANGE(ABGExtPal, 0, 4);
GPU2D_A->BGExtPalDirty(0);
GPU2D_A->BGExtPalDirty(2);
break; break;
} }
} }
@ -634,8 +710,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette case 4: // ABG ext palette
MAP_RANGE(ABGExtPal, 0, 4); MAP_RANGE(ABGExtPal, 0, 4);
GPU2D_A->BGExtPalDirty(0);
GPU2D_A->BGExtPalDirty(2);
break; break;
} }
} }
@ -687,12 +761,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette case 4: // ABG ext palette
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask; VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask;
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask; VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask;
GPU2D_A->BGExtPalDirty((oldofs & 0x1) << 1);
break; break;
case 5: // AOBJ ext palette case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal &= ~bankmask; VRAMMap_AOBJExtPal &= ~bankmask;
GPU2D_A->OBJExtPalDirty();
break; break;
} }
} }
@ -732,12 +804,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette case 4: // ABG ext palette
VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask; VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask;
VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask; VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask;
GPU2D_A->BGExtPalDirty((ofs & 0x1) << 1);
break; break;
case 5: // AOBJ ext palette case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal |= bankmask; VRAMMap_AOBJExtPal |= bankmask;
GPU2D_A->OBJExtPalDirty();
break; break;
} }
} }
@ -773,8 +843,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette case 2: // BBG ext palette
UNMAP_RANGE(BBGExtPal, 0, 4); UNMAP_RANGE(BBGExtPal, 0, 4);
GPU2D_B->BGExtPalDirty(0);
GPU2D_B->BGExtPalDirty(2);
break; break;
} }
} }
@ -800,8 +868,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette case 2: // BBG ext palette
MAP_RANGE(BBGExtPal, 0, 4); MAP_RANGE(BBGExtPal, 0, 4);
GPU2D_B->BGExtPalDirty(0);
GPU2D_B->BGExtPalDirty(2);
break; break;
} }
} }
@ -841,7 +907,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal &= ~bankmask; VRAMMap_BOBJExtPal &= ~bankmask;
GPU2D_B->OBJExtPalDirty();
break; break;
} }
} }
@ -871,7 +936,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal |= bankmask; VRAMMap_BOBJExtPal |= bankmask;
GPU2D_B->OBJExtPalDirty();
break; break;
} }
} }
@ -937,6 +1001,8 @@ void StartHBlank(u32 line)
DispStat[0] |= (1<<1); DispStat[0] |= (1<<1);
DispStat[1] |= (1<<1); DispStat[1] |= (1<<1);
SyncDirtyFlags();
if (VCount < 192) if (VCount < 192)
{ {
// draw // draw
@ -1096,4 +1162,224 @@ void SetVCount(u16 val)
NextVCount = val; NextVCount = val;
} }
template <u32 Size, u32 MappingGranularity>
NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranularity>::DeriveState(u32* currentMappings)
{
NonStupidBitField<Size/VRAMDirtyGranularity> result;
u16 banksToBeZeroed = 0;
for (u32 i = 0; i < Size / MappingGranularity; i++)
{
if (currentMappings[i] != Mapping[i])
{
result |= NonStupidBitField<Size/VRAMDirtyGranularity>(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
banksToBeZeroed |= currentMappings[i];
Mapping[i] = currentMappings[i];
}
else
{
u32 mapping = Mapping[i];
banksToBeZeroed |= mapping;
while (mapping != 0)
{
u32 num = __builtin_ctz(mapping);
mapping &= ~(1 << num);
// hack for **speed**
// this could probably be done less ugly but then we would rely
// on the compiler for vectorisation
static_assert(VRAMDirtyGranularity == 512);
if (MappingGranularity == 16*1024)
{
u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)];
((u32*)result.Data)[i] |= dirty;
}
else if (MappingGranularity == 8*1024)
{
u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)];
((u16*)result.Data)[i] |= dirty;
}
else if (MappingGranularity == 128*1024)
{
((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0];
((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1];
((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2];
((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3];
}
else
{
// welp
abort();
}
}
}
}
while (banksToBeZeroed != 0)
{
u32 num = __builtin_ctz(banksToBeZeroed);
banksToBeZeroed &= ~(1 << num);
memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data));
}
return result;
}
template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*);
template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*);
template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*);
template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*);
template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*);
template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*);
template <u32 Size>
void SyncDirtyFlags(u32* mappings, NonStupidBitField<Size>& writtenFlags)
{
const u32 VRAMWrittenBitsPer16KB = 16*1024/VRAMDirtyGranularity;
for (typename NonStupidBitField<Size>::Iterator it = writtenFlags.Begin(); it != writtenFlags.End(); it++)
{
u32 mapping = mappings[*it / VRAMWrittenBitsPer16KB];
while (mapping != 0)
{
u32 num = __builtin_ctz(mapping);
VRAMDirty[num][*it & (VRAMMask[num] / VRAMDirtyGranularity)] = true;
mapping &= ~(1 << num);
}
}
memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data));
}
void SyncDirtyFlags()
{
SyncDirtyFlags(VRAMMap_ABG, VRAMWritten_ABG);
SyncDirtyFlags(VRAMMap_AOBJ, VRAMWritten_AOBJ);
SyncDirtyFlags(VRAMMap_BBG, VRAMWritten_BBG);
SyncDirtyFlags(VRAMMap_BOBJ, VRAMWritten_BOBJ);
SyncDirtyFlags(VRAMMap_ARM7, VRAMWritten_ARM7);
}
template <u32 MappingGranularity, u32 Size>
inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField<Size>& dirty, u64 (*slowAccess)(u32 addr))
{
const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
bool change = false;
typename NonStupidBitField<Size>::Iterator it = dirty.Begin();
while (it != dirty.End())
{
u32 offset = *it * VRAMDirtyGranularity;
u8* dst = flat + offset;
u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset);
if (fastAccess)
{
memcpy(dst, fastAccess, VRAMDirtyGranularity);
}
else
{
for (u32 i = 0; i < VRAMDirtyGranularity; i += 8)
*(u64*)&dst[i] = slowAccess(offset + i);
}
change = true;
it++;
}
return change;
}
bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture<u64>);
}
bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal<u64>);
}
bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG<u64>);
}
bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG<u64>);
}
bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ<u64>);
}
bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ<u64>);
}
template<typename T>
T ReadVRAM_ABGExtPal(u32 addr)
{
u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3];
T ret = 0;
if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF];
if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
return ret;
}
template<typename T>
T ReadVRAM_BBGExtPal(u32 addr)
{
u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3];
T ret = 0;
if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF];
return ret;
}
template<typename T>
T ReadVRAM_AOBJExtPal(u32 addr)
{
u32 mask = VRAMMap_AOBJExtPal;
T ret = 0;
if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF];
if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF];
return ret;
}
template<typename T>
T ReadVRAM_BOBJExtPal(u32 addr)
{
u32 mask = VRAMMap_BOBJExtPal;
T ret = 0;
if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF];
return ret;
}
bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal<u64>);
}
bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal<u64>);
}
bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal<u64>);
}
bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal<u64>);
}
} }

View File

@ -20,6 +20,7 @@
#define GPU_H #define GPU_H
#include "GPU2D.h" #include "GPU2D.h"
#include "NonStupidBitfield.h"
namespace GPU namespace GPU
{ {
@ -45,7 +46,7 @@ extern u8 VRAM_G[ 16*1024];
extern u8 VRAM_H[ 32*1024]; extern u8 VRAM_H[ 32*1024];
extern u8 VRAM_I[ 16*1024]; extern u8 VRAM_I[ 16*1024];
extern u8* VRAM[9]; extern u8* const VRAM[9];
extern u32 VRAMMap_LCDC; extern u32 VRAMMap_LCDC;
extern u32 VRAMMap_ABG[0x20]; extern u32 VRAMMap_ABG[0x20];
@ -73,6 +74,78 @@ extern GPU2D* GPU2D_B;
extern int Renderer; extern int Renderer;
const u32 VRAMDirtyGranularity = 512;
extern NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
template <u32 Size, u32 MappingGranularity>
struct VRAMTrackingSet
{
u16 Mapping[Size / MappingGranularity];
const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
void Reset()
{
for (int i = 0; i < Size / MappingGranularity; i++)
{
// this is not a real VRAM bank
// so it will always be a mismatch => the bank will be completely invalidated
Mapping[i] = 0x8000;
}
}
NonStupidBitField<Size/VRAMDirtyGranularity> DeriveState(u32* currentMappings);
};
extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
extern u8 VRAMFlat_ABG[512*1024];
extern u8 VRAMFlat_BBG[128*1024];
extern u8 VRAMFlat_AOBJ[256*1024];
extern u8 VRAMFlat_BOBJ[128*1024];
extern u8 VRAMFlat_ABGExtPal[32*1024];
extern u8 VRAMFlat_BBGExtPal[32*1024];
extern u8 VRAMFlat_AOBJExtPal[8*1024];
extern u8 VRAMFlat_BOBJExtPal[8*1024];
extern u8 VRAMFlat_Texture[512*1024];
extern u8 VRAMFlat_TexPal[128*1024];
bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
void SyncDirtyFlags();
typedef struct typedef struct
{ {
@ -233,7 +306,11 @@ void WriteVRAM_LCDC(u32 addr, T val)
default: return; default: return;
} }
if (VRAMMap_LCDC & (1<<bank)) *(T*)&VRAM[bank][addr] = val; if (VRAMMap_LCDC & (1<<bank))
{
*(T*)&VRAM[bank][addr] = val;
VRAMDirty[bank][addr / VRAMDirtyGranularity] = true;
}
} }
@ -262,6 +339,8 @@ void WriteVRAM_ABG(u32 addr, T val)
{ {
u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F];
VRAMWritten_ABG[(addr & 0x7FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
@ -295,6 +374,8 @@ void WriteVRAM_AOBJ(u32 addr, T val)
{ {
u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
VRAMWritten_AOBJ[(addr & 0x3FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val; if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
@ -324,6 +405,8 @@ void WriteVRAM_BBG(u32 addr, T val)
{ {
u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
VRAMWritten_BBG[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val; if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
@ -350,11 +433,12 @@ void WriteVRAM_BOBJ(u32 addr, T val)
{ {
u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
VRAMWritten_BOBJ[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
} }
template<typename T> template<typename T>
T ReadVRAM_ARM7(u32 addr) T ReadVRAM_ARM7(u32 addr)
{ {
@ -372,6 +456,8 @@ void WriteVRAM_ARM7(u32 addr, T val)
{ {
u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1];
VRAMWritten_ARM7[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
} }

File diff suppressed because it is too large Load Diff

View File

@ -19,11 +19,14 @@
#ifndef GPU2D_H #ifndef GPU2D_H
#define GPU2D_H #define GPU2D_H
#include "types.h"
#include "Savestate.h"
class GPU2D class GPU2D
{ {
public: public:
GPU2D(u32 num); GPU2D(u32 num);
~GPU2D(); virtual ~GPU2D() {}
void Reset(); void Reset();
@ -31,7 +34,7 @@ public:
void SetEnabled(bool enable) { Enabled = enable; } void SetEnabled(bool enable) { Enabled = enable; }
void SetFramebuffer(u32* buf); void SetFramebuffer(u32* buf);
void SetRenderSettings(bool accel); virtual void SetRenderSettings(bool accel) = 0;
u8 Read8(u32 addr); u8 Read8(u32 addr);
u16 Read16(u32 addr); u16 Read16(u32 addr);
@ -52,36 +55,24 @@ public:
void SampleFIFO(u32 offset, u32 num); void SampleFIFO(u32 offset, u32 num);
void DrawScanline(u32 line); virtual void DrawScanline(u32 line) = 0;
void DrawSprites(u32 line); virtual void DrawSprites(u32 line) = 0;
void VBlank(); void VBlank();
void VBlankEnd(); virtual void VBlankEnd();
void CheckWindows(u32 line); void CheckWindows(u32 line);
void BGExtPalDirty(u32 base);
void OBJExtPalDirty();
u16* GetBGExtPal(u32 slot, u32 pal); u16* GetBGExtPal(u32 slot, u32 pal);
u16* GetOBJExtPal(); u16* GetOBJExtPal();
private: void GetBGVRAM(u8*& data, u32& mask);
void GetOBJVRAM(u8*& data, u32& mask);
protected:
u32 Num; u32 Num;
bool Enabled; bool Enabled;
u32* Framebuffer; u32* Framebuffer;
bool Accelerated;
u32 BGOBJLine[256*3] __attribute__((aligned (8)));
u32* _3DLine;
u8 WindowMask[256] __attribute__((aligned (8)));
u32 OBJLine[256] __attribute__((aligned (8)));
u8 OBJWindow[256] __attribute__((aligned (8)));
u8 OBJIndex[256] __attribute__((aligned (8)));
u32 NumSprites;
u16 DispFIFO[16]; u16 DispFIFO[16];
u32 DispFIFOReadPtr; u32 DispFIFOReadPtr;
u32 DispFIFOWritePtr; u32 DispFIFOWritePtr;
@ -114,23 +105,54 @@ private:
u8 BGMosaicY, BGMosaicYMax; u8 BGMosaicY, BGMosaicYMax;
u8 OBJMosaicYCount, OBJMosaicY, OBJMosaicYMax; u8 OBJMosaicYCount, OBJMosaicY, OBJMosaicYMax;
u8 MosaicTable[16][256];
u8* CurBGXMosaicTable;
u8* CurOBJXMosaicTable;
u16 BlendCnt; u16 BlendCnt;
u16 BlendAlpha; u16 BlendAlpha;
u8 EVA, EVB; u8 EVA, EVB;
u8 EVY; u8 EVY;
bool CaptureLatch;
u32 CaptureCnt; u32 CaptureCnt;
u16 MasterBrightness; u16 MasterBrightness;
u16 BGExtPalCache[4][16*256]; u8 WindowMask[256] __attribute__((aligned (8)));
u16 OBJExtPalCache[16*256]; u8 OBJWindow[256] __attribute__((aligned (8)));
u32 BGExtPalStatus[4];
u32 OBJExtPalStatus; void UpdateMosaicCounters(u32 line);
void CalculateWindowMask(u32 line);
virtual void MosaicXSizeChanged() = 0;
};
class GPU2D_Soft : public GPU2D
{
public:
GPU2D_Soft(u32 num);
~GPU2D_Soft() override {}
void SetRenderSettings(bool accel) override;
void DrawScanline(u32 line) override;
void DrawSprites(u32 line) override;
void VBlankEnd() override;
protected:
void MosaicXSizeChanged() override;
private:
bool Accelerated;
u32 BGOBJLine[256*3] __attribute__((aligned (8)));
u32* _3DLine;
u32 OBJLine[256] __attribute__((aligned (8)));
u8 OBJIndex[256] __attribute__((aligned (8)));
u32 NumSprites;
u8 MosaicTable[16][256];
u8* CurBGXMosaicTable;
u8* CurOBJXMosaicTable;
u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
u32 ColorBlend5(u32 val1, u32 val2); u32 ColorBlend5(u32 val1, u32 val2);
@ -138,8 +160,6 @@ private:
u32 ColorBrightnessDown(u32 val, u32 factor); u32 ColorBrightnessDown(u32 val, u32 factor);
u32 ColorComposite(int i, u32 val1, u32 val2); u32 ColorComposite(int i, u32 val1, u32 val2);
void UpdateMosaicCounters(u32 line);
template<u32 bgmode> void DrawScanlineBGMode(u32 line); template<u32 bgmode> void DrawScanlineBGMode(u32 line);
void DrawScanlineBGMode6(u32 line); void DrawScanlineBGMode6(u32 line);
void DrawScanlineBGMode7(u32 line); void DrawScanlineBGMode7(u32 line);
@ -147,22 +167,22 @@ private:
static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); static void DrawPixel_Normal(u32* dst, u16 color, u32 flag);
static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); static void DrawPixel_Accel(u32* dst, u16 color, u32 flag);
void (*DrawPixel)(u32* dst, u16 color, u32 flag);
typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag);
void DrawBG_3D(); void DrawBG_3D();
template<bool mosaic> void DrawBG_Text(u32 line, u32 bgnum); template<bool mosaic, DrawPixel drawPixel> void DrawBG_Text(u32 line, u32 bgnum);
template<bool mosaic> void DrawBG_Affine(u32 line, u32 bgnum); template<bool mosaic, DrawPixel drawPixel> void DrawBG_Affine(u32 line, u32 bgnum);
template<bool mosaic> void DrawBG_Extended(u32 line, u32 bgnum); template<bool mosaic, DrawPixel drawPixel> void DrawBG_Extended(u32 line, u32 bgnum);
template<bool mosaic> void DrawBG_Large(u32 line); template<bool mosaic, DrawPixel drawPixel> void DrawBG_Large(u32 line);
void ApplySpriteMosaicX(); void ApplySpriteMosaicX();
template<DrawPixel drawPixel>
void InterleaveSprites(u32 prio); void InterleaveSprites(u32 prio);
template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos);
template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos); template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);
void DoCapture(u32 line, u32 width); void DoCapture(u32 line, u32 width);
void CalculateWindowMask(u32 line);
}; };
#endif #endif

2215
src/GPU2D_Soft.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -179,6 +179,10 @@ u8 RenderFogDensityTable[34];
u32 RenderClearAttr1, RenderClearAttr2; u32 RenderClearAttr1, RenderClearAttr2;
bool RenderFrameIdentical;
u16 RenderXPos;
u32 ZeroDotWLimit; u32 ZeroDotWLimit;
u32 GXStat; u32 GXStat;
@ -383,6 +387,8 @@ void Reset()
FlushAttributes = 0; FlushAttributes = 0;
ResetRenderingState(); ResetRenderingState();
RenderXPos = 0;
} }
void DoSavestate(Savestate* file) void DoSavestate(Savestate* file)
@ -428,6 +434,8 @@ void DoSavestate(Savestate* file)
file->Var32(&RenderClearAttr1); file->Var32(&RenderClearAttr1);
file->Var32(&RenderClearAttr2); file->Var32(&RenderClearAttr2);
file->Var16(&RenderXPos);
file->Var32(&ZeroDotWLimit); file->Var32(&ZeroDotWLimit);
file->Var32(&GXStat); file->Var32(&GXStat);
@ -585,8 +593,6 @@ void DoSavestate(Savestate* file)
} }
} }
// probably not worth storing the vblank-latched Renderxxxxxx variables
CmdStallQueue->DoSavestate(file); CmdStallQueue->DoSavestate(file);
file->Var32((u32*)&VertexPipeline); file->Var32((u32*)&VertexPipeline);
file->Var32((u32*)&NormalPipeline); file->Var32((u32*)&NormalPipeline);
@ -606,6 +612,22 @@ void DoSavestate(Savestate* file)
// might cause a blank frame but atleast it won't shit itself // might cause a blank frame but atleast it won't shit itself
RenderNumPolygons = 0; RenderNumPolygons = 0;
} }
file->VarArray(CurVertex, sizeof(s16)*3);
file->VarArray(VertexColor, sizeof(u8)*3);
file->VarArray(TexCoords, sizeof(s16)*2);
file->VarArray(RawTexCoords, sizeof(s16)*2);
file->VarArray(Normal, sizeof(s16)*3);
file->VarArray(LightDirection, sizeof(s16)*4*3);
file->VarArray(LightColor, sizeof(u8)*4*3);
file->VarArray(MatDiffuse, sizeof(u8)*3);
file->VarArray(MatAmbient, sizeof(u8)*3);
file->VarArray(MatSpecular, sizeof(u8)*3);
file->VarArray(MatEmission, sizeof(u8)*3);
file->Bool32(&UseShininessTable);
file->VarArray(ShininessTable, 128*sizeof(u8));
} }
@ -2491,6 +2513,19 @@ void VBlank()
} }
RenderNumPolygons = NumPolygons; RenderNumPolygons = NumPolygons;
RenderFrameIdentical = false;
}
else
{
RenderFrameIdentical = RenderDispCnt == DispCnt
&& RenderAlphaRef == AlphaRef
&& RenderClearAttr1 == ClearAttr1
&& RenderClearAttr2 == ClearAttr2
&& RenderFogColor == FogColor
&& RenderFogOffset == FogOffset * 0x200
&& memcmp(RenderEdgeTable, EdgeTable, 8*2) == 0
&& memcmp(RenderFogDensityTable + 1, FogDensityTable, 32) == 0
&& memcmp(RenderToonTable, ToonTable, 32*2) == 0;
} }
RenderDispCnt = DispCnt; RenderDispCnt = DispCnt;
@ -2533,14 +2568,46 @@ void VCount215()
#endif #endif
} }
void SetRenderXPos(u16 xpos)
{
if (!RenderingEnabled) return;
RenderXPos = xpos & 0x01FF;
}
u32 ScrolledLine[256];
u32* GetLine(int line) u32* GetLine(int line)
{ {
if (GPU::Renderer == 0) return SoftRenderer::GetLine(line); u32* rawline = NULL;
if (GPU::Renderer == 0) rawline = SoftRenderer::GetLine(line);
#ifdef OGLRENDERER_ENABLED #ifdef OGLRENDERER_ENABLED
else return GLRenderer::GetLine(line); else rawline = GLRenderer::GetLine(line);
#else
return NULL;
#endif #endif
if (RenderXPos == 0) return rawline;
// apply X scroll
if (RenderXPos & 0x100)
{
int i = 0, j = RenderXPos;
for (; j < 512; i++, j++)
ScrolledLine[i] = 0;
for (j = 0; i < 256; i++, j++)
ScrolledLine[i] = rawline[j];
}
else
{
int i = 0, j = RenderXPos;
for (; j < 256; i++, j++)
ScrolledLine[i] = rawline[j];
for (; i < 256; i++)
ScrolledLine[i] = 0;
}
return ScrolledLine;
} }

View File

@ -87,6 +87,10 @@ extern u8 RenderFogDensityTable[34];
extern u32 RenderClearAttr1, RenderClearAttr2; extern u32 RenderClearAttr1, RenderClearAttr2;
extern bool RenderFrameIdentical;
extern u16 RenderXPos;
extern std::array<Polygon*,2048> RenderPolygonRAM; extern std::array<Polygon*,2048> RenderPolygonRAM;
extern u32 RenderNumPolygons; extern u32 RenderNumPolygons;
@ -112,6 +116,8 @@ void CheckFIFODMA();
void VCount144(); void VCount144();
void VBlank(); void VBlank();
void VCount215(); void VCount215();
void SetRenderXPos(u16 xpos);
u32* GetLine(int line); u32* GetLine(int line);
void WriteToGXFIFO(u32 val); void WriteToGXFIFO(u32 val);

View File

@ -53,17 +53,18 @@ GLuint CurShaderID = -1;
GLuint FinalPassEdgeShader[3]; GLuint FinalPassEdgeShader[3];
GLuint FinalPassFogShader[3]; GLuint FinalPassFogShader[3];
// std140 compliant structure
struct struct
{ {
float uScreenSize[2]; float uScreenSize[2]; // vec2 0 / 2
u32 uDispCnt; u32 uDispCnt; // int 2 / 1
u32 __pad0; u32 __pad0;
float uToonColors[32][4]; float uToonColors[32][4]; // vec4[32] 4 / 128
float uEdgeColors[8][4]; float uEdgeColors[8][4]; // vec4[8] 132 / 32
float uFogColor[4]; float uFogColor[4]; // vec4 164 / 4
float uFogDensity[34][4]; float uFogDensity[34][4]; // float[34] 168 / 136
u32 uFogOffset; u32 uFogOffset; // int 304 / 1
u32 uFogShift; u32 uFogShift; // int 305 / 1
} ShaderConfig; } ShaderConfig;
@ -74,11 +75,11 @@ typedef struct
Polygon* PolyData; Polygon* PolyData;
u32 NumIndices; u32 NumIndices;
u16* Indices; u32 IndicesOffset;
GLuint PrimType; GLuint PrimType;
u32 NumEdgeIndices; u32 NumEdgeIndices;
u16* EdgeIndices; u32 EdgeIndicesOffset;
u32 RenderKey; u32 RenderKey;
@ -107,7 +108,11 @@ u32 VertexBuffer[10240 * 7];
u32 NumVertices; u32 NumVertices;
GLuint VertexArrayID; GLuint VertexArrayID;
GLuint IndexBufferID;
u16 IndexBuffer[2048 * 40]; u16 IndexBuffer[2048 * 40];
u32 NumIndices, NumEdgeIndices;
const u32 EdgeIndicesOffset = 2048 * 30;
GLuint TexMemID; GLuint TexMemID;
GLuint TexPalMemID; GLuint TexPalMemID;
@ -280,7 +285,7 @@ bool Init()
glGenBuffers(1, &ShaderConfigUBO); glGenBuffers(1, &ShaderConfigUBO);
glBindBuffer(GL_UNIFORM_BUFFER, ShaderConfigUBO); glBindBuffer(GL_UNIFORM_BUFFER, ShaderConfigUBO);
glBufferData(GL_UNIFORM_BUFFER, sizeof(ShaderConfig), &ShaderConfig, GL_STATIC_DRAW); glBufferData(GL_UNIFORM_BUFFER, (sizeof(ShaderConfig) + 15) & ~15, &ShaderConfig, GL_STATIC_DRAW);
glBindBufferBase(GL_UNIFORM_BUFFER, 0, ShaderConfigUBO); glBindBufferBase(GL_UNIFORM_BUFFER, 0, ShaderConfigUBO);
@ -320,6 +325,9 @@ bool Init()
glEnableVertexAttribArray(3); // attrib glEnableVertexAttribArray(3); // attrib
glVertexAttribIPointer(3, 3, GL_UNSIGNED_INT, 7*4, (void*)(4*4)); glVertexAttribIPointer(3, 3, GL_UNSIGNED_INT, 7*4, (void*)(4*4));
glGenBuffers(1, &IndexBufferID);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, IndexBufferID);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(IndexBuffer), NULL, GL_DYNAMIC_DRAW);
glGenFramebuffers(4, &FramebufferID[0]); glGenFramebuffers(4, &FramebufferID[0]);
glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]); glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]);
@ -563,15 +571,15 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
u32* vptr = &VertexBuffer[0]; u32* vptr = &VertexBuffer[0];
u32 vidx = 0; u32 vidx = 0;
u16* iptr = &IndexBuffer[0]; u32 iidx = 0;
u16* eiptr = &IndexBuffer[2048*30]; u32 eidx = EdgeIndicesOffset;
for (int i = 0; i < npolys; i++) for (int i = 0; i < npolys; i++)
{ {
RendererPolygon* rp = &polygons[i]; RendererPolygon* rp = &polygons[i];
Polygon* poly = rp->PolyData; Polygon* poly = rp->PolyData;
rp->Indices = iptr; rp->IndicesOffset = iidx;
rp->NumIndices = 0; rp->NumIndices = 0;
u32 vidx_first = vidx; u32 vidx_first = vidx;
@ -606,7 +614,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
vptr = SetupVertex(poly, j, vtx, vtxattr, vptr); vptr = SetupVertex(poly, j, vtx, vtxattr, vptr);
*iptr++ = vidx; IndexBuffer[iidx++] = vidx;
rp->NumIndices++; rp->NumIndices++;
vidx++; vidx++;
@ -627,9 +635,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
} }
// build a triangle // build a triangle
*iptr++ = vidx_first; IndexBuffer[iidx++] = vidx_first;
*iptr++ = vidx - 2; IndexBuffer[iidx++] = vidx - 2;
*iptr++ = vidx - 1; IndexBuffer[iidx++] = vidx - 1;
rp->NumIndices += 3; rp->NumIndices += 3;
} }
else // quad, pentagon, etc else // quad, pentagon, etc
@ -649,9 +657,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
if (j >= 2) if (j >= 2)
{ {
// build a triangle // build a triangle
*iptr++ = vidx_first; IndexBuffer[iidx++] = vidx_first;
*iptr++ = vidx - 1; IndexBuffer[iidx++] = vidx - 1;
*iptr++ = vidx; IndexBuffer[iidx++] = vidx;
rp->NumIndices += 3; rp->NumIndices += 3;
} }
@ -743,46 +751,50 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
if (j >= 1) if (j >= 1)
{ {
// build a triangle // build a triangle
*iptr++ = vidx_first; IndexBuffer[iidx++] = vidx_first;
*iptr++ = vidx - 1; IndexBuffer[iidx++] = vidx - 1;
*iptr++ = vidx; IndexBuffer[iidx++] = vidx;
rp->NumIndices += 3; rp->NumIndices += 3;
} }
vidx++; vidx++;
} }
*iptr++ = vidx_first; IndexBuffer[iidx++] = vidx_first;
*iptr++ = vidx - 1; IndexBuffer[iidx++] = vidx - 1;
*iptr++ = vidx_first + 1; IndexBuffer[iidx++] = vidx_first + 1;
rp->NumIndices += 3; rp->NumIndices += 3;
} }
} }
rp->EdgeIndices = eiptr; rp->EdgeIndicesOffset = eidx;
rp->NumEdgeIndices = 0; rp->NumEdgeIndices = 0;
u32 vidx_cur = vidx_first; u32 vidx_cur = vidx_first;
for (int j = 1; j < poly->NumVertices; j++) for (int j = 1; j < poly->NumVertices; j++)
{ {
*eiptr++ = vidx_cur; IndexBuffer[eidx++] = vidx_cur;
*eiptr++ = vidx_cur + 1; IndexBuffer[eidx++] = vidx_cur + 1;
vidx_cur++; vidx_cur++;
rp->NumEdgeIndices += 2; rp->NumEdgeIndices += 2;
} }
*eiptr++ = vidx_cur; IndexBuffer[eidx++] = vidx_cur;
*eiptr++ = vidx_first; IndexBuffer[eidx++] = vidx_first;
rp->NumEdgeIndices += 2; rp->NumEdgeIndices += 2;
} }
NumVertices = vidx; NumVertices = vidx;
NumIndices = iidx;
NumEdgeIndices = eidx - EdgeIndicesOffset;
} }
void RenderSinglePolygon(int i) int RenderSinglePolygon(int i)
{ {
RendererPolygon* rp = &PolygonList[i]; RendererPolygon* rp = &PolygonList[i];
glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, rp->Indices); glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2));
return 1;
} }
int RenderPolygonBatch(int i) int RenderPolygonBatch(int i)
@ -803,7 +815,7 @@ int RenderPolygonBatch(int i)
numindices += cur_rp->NumIndices; numindices += cur_rp->NumIndices;
} }
glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, rp->Indices); glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2));
return numpolys; return numpolys;
} }
@ -823,7 +835,7 @@ int RenderPolygonEdgeBatch(int i)
numindices += cur_rp->NumEdgeIndices; numindices += cur_rp->NumEdgeIndices;
} }
glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, rp->EdgeIndices); glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->EdgeIndicesOffset * 2));
return numpolys; return numpolys;
} }
@ -857,6 +869,7 @@ void RenderSceneChunk(int y, int h)
RendererPolygon* rp = &PolygonList[i]; RendererPolygon* rp = &PolygonList[i];
if (rp->PolyData->IsShadowMask) { i++; continue; } if (rp->PolyData->IsShadowMask) { i++; continue; }
if (rp->PolyData->Translucent) { i++; continue; }
if (rp->PolyData->Attr & (1<<14)) if (rp->PolyData->Attr & (1<<14))
glDepthFunc(GL_LEQUAL); glDepthFunc(GL_LEQUAL);
@ -874,7 +887,8 @@ void RenderSceneChunk(int y, int h)
} }
// if edge marking is enabled, mark all opaque edges // if edge marking is enabled, mark all opaque edges
if (RenderDispCnt & (1<<5)) // TODO BETTER EDGE MARKING!!! THIS SUCKS
/*if (RenderDispCnt & (1<<5))
{ {
UseRenderShader(flags | RenderFlag_Edge); UseRenderShader(flags | RenderFlag_Edge);
glLineWidth(1.5); glLineWidth(1.5);
@ -899,7 +913,7 @@ void RenderSceneChunk(int y, int h)
} }
glDepthMask(GL_TRUE); glDepthMask(GL_TRUE);
} }*/
glEnable(GL_BLEND); glEnable(GL_BLEND);
glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX); glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX);
@ -944,15 +958,32 @@ void RenderSceneChunk(int y, int h)
} }
else if (rp->PolyData->Translucent) else if (rp->PolyData->Translucent)
{ {
UseRenderShader(flags | RenderFlag_Trans); bool needopaque = ((rp->PolyData->Attr & 0x001F0000) == 0x001F0000);
if (rp->PolyData->Attr & (1<<14)) u32 polyattr = rp->PolyData->Attr;
u32 polyid = (polyattr >> 24) & 0x3F;
if (polyattr & (1<<14))
glDepthFunc(GL_LEQUAL); glDepthFunc(GL_LEQUAL);
else else
glDepthFunc(GL_LESS); glDepthFunc(GL_LESS);
u32 polyattr = rp->PolyData->Attr; if (needopaque)
u32 polyid = (polyattr >> 24) & 0x3F; {
UseRenderShader(flags);
glDisable(GL_BLEND);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glColorMaski(1, GL_TRUE, GL_TRUE, fogenable, GL_FALSE);
glStencilFunc(GL_ALWAYS, polyid, 0xFF);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glStencilMask(0xFF);
RenderSinglePolygon(i);
}
UseRenderShader(flags | RenderFlag_Trans);
GLboolean transfog; GLboolean transfog;
if (!(polyattr & (1<<15))) transfog = fogenable; if (!(polyattr & (1<<15))) transfog = fogenable;
@ -975,7 +1006,7 @@ void RenderSceneChunk(int y, int h)
if (polyattr & (1<<11)) glDepthMask(GL_TRUE); if (polyattr & (1<<11)) glDepthMask(GL_TRUE);
else glDepthMask(GL_FALSE); else glDepthMask(GL_FALSE);
i += RenderPolygonBatch(i); i += needopaque ? RenderSinglePolygon(i) : RenderPolygonBatch(i);
} }
else else
{ {
@ -989,7 +1020,7 @@ void RenderSceneChunk(int y, int h)
if (polyattr & (1<<11)) glDepthMask(GL_TRUE); if (polyattr & (1<<11)) glDepthMask(GL_TRUE);
else glDepthMask(GL_FALSE); else glDepthMask(GL_FALSE);
i += RenderPolygonBatch(i); i += needopaque ? RenderSinglePolygon(i) : RenderPolygonBatch(i);
} }
} }
else else
@ -1030,20 +1061,37 @@ void RenderSceneChunk(int y, int h)
} }
else if (rp->PolyData->Translucent) else if (rp->PolyData->Translucent)
{ {
UseRenderShader(flags | RenderFlag_Trans); bool needopaque = ((rp->PolyData->Attr & 0x001F0000) == 0x001F0000);
u32 polyattr = rp->PolyData->Attr; u32 polyattr = rp->PolyData->Attr;
u32 polyid = (polyattr >> 24) & 0x3F; u32 polyid = (polyattr >> 24) & 0x3F;
GLboolean transfog; if (polyattr & (1<<14))
if (!(polyattr & (1<<15))) transfog = fogenable;
else transfog = GL_FALSE;
if (rp->PolyData->Attr & (1<<14))
glDepthFunc(GL_LEQUAL); glDepthFunc(GL_LEQUAL);
else else
glDepthFunc(GL_LESS); glDepthFunc(GL_LESS);
if (needopaque)
{
UseRenderShader(flags);
glDisable(GL_BLEND);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glColorMaski(1, GL_TRUE, GL_TRUE, fogenable, GL_FALSE);
glStencilFunc(GL_ALWAYS, polyid, 0xFF);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glStencilMask(0xFF);
RenderSinglePolygon(i);
}
UseRenderShader(flags | RenderFlag_Trans);
GLboolean transfog;
if (!(polyattr & (1<<15))) transfog = fogenable;
else transfog = GL_FALSE;
if (rp->PolyData->IsShadow) if (rp->PolyData->IsShadow)
{ {
glDisable(GL_BLEND); glDisable(GL_BLEND);
@ -1067,8 +1115,7 @@ void RenderSceneChunk(int y, int h)
if (polyattr & (1<<11)) glDepthMask(GL_TRUE); if (polyattr & (1<<11)) glDepthMask(GL_TRUE);
else glDepthMask(GL_FALSE); else glDepthMask(GL_FALSE);
RenderSinglePolygon(i); i += RenderSinglePolygon(i);
i++;
} }
else else
{ {
@ -1083,7 +1130,7 @@ void RenderSceneChunk(int y, int h)
if (polyattr & (1<<11)) glDepthMask(GL_TRUE); if (polyattr & (1<<11)) glDepthMask(GL_TRUE);
else glDepthMask(GL_FALSE); else glDepthMask(GL_FALSE);
i += RenderPolygonBatch(i); i += needopaque ? RenderSinglePolygon(i) : RenderPolygonBatch(i);
} }
} }
else else
@ -1320,6 +1367,11 @@ void RenderFrame()
glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID); glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID);
glBufferSubData(GL_ARRAY_BUFFER, 0, NumVertices*7*4, VertexBuffer); glBufferSubData(GL_ARRAY_BUFFER, 0, NumVertices*7*4, VertexBuffer);
// bind to access the index buffer
glBindVertexArray(VertexArrayID);
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, NumIndices * 2, IndexBuffer);
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, EdgeIndicesOffset * 2, NumEdgeIndices * 2, IndexBuffer + EdgeIndicesOffset);
RenderSceneChunk(0, 192); RenderSceneChunk(0, 192);
} }

View File

@ -58,15 +58,17 @@ bool PrevIsShadowMask;
bool Enabled; bool Enabled;
bool FrameIdentical;
// threading // threading
bool Threaded; bool Threaded;
void* RenderThread; Platform::Thread* RenderThread;
bool RenderThreadRunning; bool RenderThreadRunning;
bool RenderThreadRendering; bool RenderThreadRendering;
void* Sema_RenderStart; Platform::Semaphore* Sema_RenderStart;
void* Sema_RenderDone; Platform::Semaphore* Sema_RenderDone;
void* Sema_ScanlineCount; Platform::Semaphore* Sema_ScanlineCount;
void RenderThreadFunc(); void RenderThreadFunc();
@ -550,6 +552,16 @@ typedef struct
RendererPolygon PolygonList[2048]; RendererPolygon PolygonList[2048];
template <typename T>
inline T ReadVRAM_Texture(u32 addr)
{
return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
}
template <typename T>
inline T ReadVRAM_TexPal(u32 addr)
{
return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
}
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
{ {
@ -606,10 +618,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 1: // A3I5 case 1: // A3I5
{ {
vramaddr += ((t * width) + s); vramaddr += ((t * width) + s);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4; texpal <<= 4;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1)); *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6); *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
} }
break; break;
@ -617,12 +629,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 2: // 4-color case 2: // 4-color
{ {
vramaddr += (((t * width) + s) >> 2); vramaddr += (((t * width) + s) >> 2);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
pixel >>= ((s & 0x3) << 1); pixel >>= ((s & 0x3) << 1);
pixel &= 0x3; pixel &= 0x3;
texpal <<= 3; texpal <<= 3;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31; *alpha = (pixel==0) ? alpha0 : 31;
} }
break; break;
@ -630,12 +642,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 3: // 16-color case 3: // 16-color
{ {
vramaddr += (((t * width) + s) >> 1); vramaddr += (((t * width) + s) >> 1);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
if (s & 0x1) pixel >>= 4; if (s & 0x1) pixel >>= 4;
else pixel &= 0xF; else pixel &= 0xF;
texpal <<= 4; texpal <<= 4;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31; *alpha = (pixel==0) ? alpha0 : 31;
} }
break; break;
@ -643,10 +655,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 4: // 256-color case 4: // 256-color
{ {
vramaddr += ((t * width) + s); vramaddr += ((t * width) + s);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4; texpal <<= 4;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31; *alpha = (pixel==0) ? alpha0 : 31;
} }
break; break;
@ -660,30 +672,30 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
if (vramaddr >= 0x40000) if (vramaddr >= 0x40000)
slot1addr += 0x10000; slot1addr += 0x10000;
u8 val = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 val = ReadVRAM_Texture<u8>(vramaddr);
val >>= (2 * (s & 0x3)); val >>= (2 * (s & 0x3));
u16 palinfo = GPU::ReadVRAM_Texture<u16>(slot1addr); u16 palinfo = ReadVRAM_Texture<u16>(slot1addr);
u32 paloffset = (palinfo & 0x3FFF) << 2; u32 paloffset = (palinfo & 0x3FFF) << 2;
texpal <<= 4; texpal <<= 4;
switch (val & 0x3) switch (val & 0x3)
{ {
case 0: case 0:
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); *color = ReadVRAM_TexPal<u16>(texpal + paloffset);
*alpha = 31; *alpha = 31;
break; break;
case 1: case 1:
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
*alpha = 31; *alpha = 31;
break; break;
case 2: case 2:
if ((palinfo >> 14) == 1) if ((palinfo >> 14) == 1)
{ {
u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F; u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0; u32 g0 = color0 & 0x03E0;
@ -700,8 +712,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
} }
else if ((palinfo >> 14) == 3) else if ((palinfo >> 14) == 3)
{ {
u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F; u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0; u32 g0 = color0 & 0x03E0;
@ -717,20 +729,20 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
*color = r | g | b; *color = r | g | b;
} }
else else
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 4); *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
*alpha = 31; *alpha = 31;
break; break;
case 3: case 3:
if ((palinfo >> 14) == 2) if ((palinfo >> 14) == 2)
{ {
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 6); *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
*alpha = 31; *alpha = 31;
} }
else if ((palinfo >> 14) == 3) else if ((palinfo >> 14) == 3)
{ {
u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F; u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0; u32 g0 = color0 & 0x03E0;
@ -759,10 +771,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 6: // A5I3 case 6: // A5I3
{ {
vramaddr += ((t * width) + s); vramaddr += ((t * width) + s);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4; texpal <<= 4;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1)); *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
*alpha = (pixel >> 3); *alpha = (pixel >> 3);
} }
break; break;
@ -770,7 +782,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 7: // direct color case 7: // direct color
{ {
vramaddr += (((t * width) + s) << 1); vramaddr += (((t * width) + s) << 1);
*color = GPU::ReadVRAM_Texture<u16>(vramaddr); *color = ReadVRAM_Texture<u16>(vramaddr);
*alpha = (*color & 0x8000) ? 31 : 0; *alpha = (*color & 0x8000) ? 31 : 0;
} }
break; break;
@ -2007,8 +2019,8 @@ void ClearBuffers()
{ {
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
u16 val2 = GPU::ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1)); u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
u16 val3 = GPU::ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1)); u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
// TODO: confirm color conversion // TODO: confirm color conversion
u32 r = (val2 << 1) & 0x3E; if (r) r++; u32 r = (val2 << 1) & 0x3E; if (r) r++;
@ -2088,11 +2100,19 @@ void VCount144()
void RenderFrame() void RenderFrame()
{ {
auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture);
auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal);
bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty);
bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty);
FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical;
if (RenderThreadRunning) if (RenderThreadRunning)
{ {
Platform::Semaphore_Post(Sema_RenderStart); Platform::Semaphore_Post(Sema_RenderStart);
} }
else else if (!FrameIdentical)
{ {
ClearBuffers(); ClearBuffers();
RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons); RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons);
@ -2107,8 +2127,15 @@ void RenderThreadFunc()
if (!RenderThreadRunning) return; if (!RenderThreadRunning) return;
RenderThreadRendering = true; RenderThreadRendering = true;
if (FrameIdentical)
{
Platform::Semaphore_Post(Sema_ScanlineCount, 192);
}
else
{
ClearBuffers(); ClearBuffers();
RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons); RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
}
Platform::Semaphore_Post(Sema_RenderDone); Platform::Semaphore_Post(Sema_RenderDone);
RenderThreadRendering = false; RenderThreadRendering = false;

View File

@ -36,6 +36,7 @@ int ScreenH, ScreenW;
GLuint CompShader[1][3]; GLuint CompShader[1][3];
GLuint CompScaleLoc[1]; GLuint CompScaleLoc[1];
GLuint Comp3DXPosLoc[1];
GLuint CompVertexBufferID; GLuint CompVertexBufferID;
GLuint CompVertexArrayID; GLuint CompVertexArrayID;
@ -64,6 +65,7 @@ bool Init()
return false; return false;
CompScaleLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DScale"); CompScaleLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DScale");
Comp3DXPosLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DXPos");
glUseProgram(CompShader[i][2]); glUseProgram(CompShader[i][2]);
uni_id = glGetUniformLocation(CompShader[i][2], "ScreenTex"); uni_id = glGetUniformLocation(CompShader[i][2], "ScreenTex");
@ -180,6 +182,9 @@ void RenderFrame()
OpenGL::UseShaderProgram(CompShader[0]); OpenGL::UseShaderProgram(CompShader[0]);
glUniform1ui(CompScaleLoc[0], Scale); glUniform1ui(CompScaleLoc[0], Scale);
// TODO: support setting this midframe, if ever needed
glUniform1i(Comp3DXPosLoc[0], ((int)GPU3D::RenderXPos << 23) >> 23);
int frontbuf = GPU::FrontBuffer; int frontbuf = GPU::FrontBuffer;
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, CompScreenInputTex); glBindTexture(GL_TEXTURE_2D, CompScreenInputTex);

View File

@ -40,6 +40,7 @@ void main()
const char* kCompositorFS_Nearest = R"(#version 140 const char* kCompositorFS_Nearest = R"(#version 140
uniform uint u3DScale; uniform uint u3DScale;
uniform int u3DXPos;
uniform usampler2D ScreenTex; uniform usampler2D ScreenTex;
uniform sampler2D _3DTex; uniform sampler2D _3DTex;
@ -52,6 +53,8 @@ void main()
{ {
ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0));
float _3dxpos = float(u3DXPos);
ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0));
int dispmode = mbright.b & 0x3; int dispmode = mbright.b & 0x3;
@ -68,7 +71,7 @@ void main()
{ {
// 3D on top, blending // 3D on top, blending
float xpos = val3.r + fract(fTexcoord.x); float xpos = fTexcoord.x + _3dxpos;
float ypos = mod(fTexcoord.y, 192); float ypos = mod(fTexcoord.y, 192);
ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra
* vec4(63,63,63,31)); * vec4(63,63,63,31));
@ -89,7 +92,7 @@ void main()
{ {
// 3D on bottom, blending // 3D on bottom, blending
float xpos = val3.r + fract(fTexcoord.x); float xpos = fTexcoord.x + _3dxpos;
float ypos = mod(fTexcoord.y, 192); float ypos = mod(fTexcoord.y, 192);
ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra
* vec4(63,63,63,31)); * vec4(63,63,63,31));
@ -109,7 +112,7 @@ void main()
{ {
// 3D on top, normal/fade // 3D on top, normal/fade
float xpos = val3.r + fract(fTexcoord.x); float xpos = fTexcoord.x + _3dxpos;
float ypos = mod(fTexcoord.y, 192); float ypos = mod(fTexcoord.y, 192);
ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra
* vec4(63,63,63,31)); * vec4(63,63,63,31));

View File

@ -210,13 +210,13 @@ bool Init()
void DeInit() void DeInit()
{ {
delete ARM9;
delete ARM7;
#ifdef JIT_ENABLED #ifdef JIT_ENABLED
ARMJIT::DeInit(); ARMJIT::DeInit();
#endif #endif
delete ARM9;
delete ARM7;
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
delete DMAs[i]; delete DMAs[i];
@ -908,7 +908,7 @@ void RunSystem(u64 timestamp)
} }
} }
template <bool EnableJIT> template <bool EnableJIT, int ConsoleType>
u32 RunFrame() u32 RunFrame()
{ {
FrameStartTimestamp = SysTimestamp; FrameStartTimestamp = SysTimestamp;
@ -934,10 +934,10 @@ u32 RunFrame()
} }
else if (CPUStop & 0x0FFF) else if (CPUStop & 0x0FFF)
{ {
DMAs[0]->Run(); DMAs[0]->Run<ConsoleType>();
if (!(CPUStop & 0x80000000)) DMAs[1]->Run(); if (!(CPUStop & 0x80000000)) DMAs[1]->Run<ConsoleType>();
if (!(CPUStop & 0x80000000)) DMAs[2]->Run(); if (!(CPUStop & 0x80000000)) DMAs[2]->Run<ConsoleType>();
if (!(CPUStop & 0x80000000)) DMAs[3]->Run(); if (!(CPUStop & 0x80000000)) DMAs[3]->Run<ConsoleType>();
if (ConsoleType == 1) DSi::RunNDMAs(0); if (ConsoleType == 1) DSi::RunNDMAs(0);
} }
else else
@ -962,10 +962,10 @@ u32 RunFrame()
if (CPUStop & 0x0FFF0000) if (CPUStop & 0x0FFF0000)
{ {
DMAs[4]->Run(); DMAs[4]->Run<ConsoleType>();
DMAs[5]->Run(); DMAs[5]->Run<ConsoleType>();
DMAs[6]->Run(); DMAs[6]->Run<ConsoleType>();
DMAs[7]->Run(); DMAs[7]->Run<ConsoleType>();
if (ConsoleType == 1) DSi::RunNDMAs(1); if (ConsoleType == 1) DSi::RunNDMAs(1);
} }
else else
@ -999,6 +999,9 @@ u32 RunFrame()
ARM7Timestamp-SysTimestamp, ARM7Timestamp-SysTimestamp,
GPU3D::Timestamp-SysTimestamp); GPU3D::Timestamp-SysTimestamp);
#endif #endif
SPU::TransferOutput();
NDSCart::FlushSRAMFile();
NumFrames++; NumFrames++;
@ -1009,10 +1012,14 @@ u32 RunFrame()
{ {
#ifdef JIT_ENABLED #ifdef JIT_ENABLED
if (Config::JIT_Enable) if (Config::JIT_Enable)
return RunFrame<true>(); return NDS::ConsoleType == 1
? RunFrame<true, 1>()
: RunFrame<true, 0>();
else else
#endif #endif
return RunFrame<false>(); return NDS::ConsoleType == 1
? RunFrame<false, 1>()
: RunFrame<false, 0>();
} }
void Reschedule(u64 target) void Reschedule(u64 target)
@ -1470,7 +1477,7 @@ void HandleTimerOverflow(u32 tid)
{ {
Timer* timer = &Timers[tid]; Timer* timer = &Timers[tid];
timer->Counter += timer->Reload << 16; timer->Counter += (timer->Reload << 10);
if (timer->Cnt & (1<<6)) if (timer->Cnt & (1<<6))
SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3)); SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3));
@ -1486,11 +1493,11 @@ void HandleTimerOverflow(u32 tid)
if ((timer->Cnt & 0x84) != 0x84) if ((timer->Cnt & 0x84) != 0x84)
break; break;
timer->Counter += 0x10000; timer->Counter += (1 << 10);
if (timer->Counter >> 16) if (!(timer->Counter >> 26))
break; break;
timer->Counter = timer->Reload << 16; timer->Counter = timer->Reload << 10;
if (timer->Cnt & (1<<6)) if (timer->Cnt & (1<<6))
SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3)); SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3));
@ -1505,9 +1512,14 @@ void RunTimer(u32 tid, s32 cycles)
u32 oldcount = timer->Counter; u32 oldcount = timer->Counter;
timer->Counter += (cycles << timer->CycleShift); timer->Counter += (cycles << timer->CycleShift);
if (timer->Counter < oldcount) //if (timer->Counter < oldcount)
// HandleTimerOverflow(tid);
while (timer->Counter >> 26)
{
timer->Counter -= (1 << 26);
HandleTimerOverflow(tid); HandleTimerOverflow(tid);
} }
}
void RunTimers(u32 cpu) void RunTimers(u32 cpu)
{ {
@ -1623,7 +1635,7 @@ u16 TimerGetCounter(u32 timer)
RunTimers(timer>>2); RunTimers(timer>>2);
u32 ret = Timers[timer].Counter; u32 ret = Timers[timer].Counter;
return ret >> 16; return ret >> 10;
} }
void TimerStart(u32 id, u16 cnt) void TimerStart(u32 id, u16 cnt)
@ -1633,11 +1645,11 @@ void TimerStart(u32 id, u16 cnt)
u16 newstart = cnt & (1<<7); u16 newstart = cnt & (1<<7);
timer->Cnt = cnt; timer->Cnt = cnt;
timer->CycleShift = 16 - TimerPrescaler[cnt & 0x03]; timer->CycleShift = 10 - TimerPrescaler[cnt & 0x03];
if ((!curstart) && newstart) if ((!curstart) && newstart)
{ {
timer->Counter = timer->Reload << 16; timer->Counter = timer->Reload << 10;
/*if ((cnt & 0x84) == 0x80) /*if ((cnt & 0x84) == 0x80)
{ {
@ -1824,14 +1836,14 @@ void debug(u32 param)
fclose(shit);*/ fclose(shit);*/
FILE* FILE*
shit = fopen("debug/picto9.bin", "wb"); shit = fopen("debug/power9.bin", "wb");
for (u32 i = 0x02000000; i < 0x04000000; i+=4) for (u32 i = 0x02000000; i < 0x04000000; i+=4)
{ {
u32 val = DSi::ARM9Read32(i); u32 val = DSi::ARM9Read32(i);
fwrite(&val, 4, 1, shit); fwrite(&val, 4, 1, shit);
} }
fclose(shit); fclose(shit);
shit = fopen("debug/picto7.bin", "wb"); shit = fopen("debug/power7.bin", "wb");
for (u32 i = 0x02000000; i < 0x04000000; i+=4) for (u32 i = 0x02000000; i < 0x04000000; i+=4)
{ {
u32 val = DSi::ARM7Read32(i); u32 val = DSi::ARM7Read32(i);
@ -3001,6 +3013,7 @@ u32 ARM9IORead32(u32 addr)
case 0x04000130: return (KeyInput & 0xFFFF) | (KeyCnt << 16); case 0x04000130: return (KeyInput & 0xFFFF) | (KeyCnt << 16);
case 0x04000180: return IPCSync9; case 0x04000180: return IPCSync9;
case 0x04000184: return ARM9IORead16(addr);
case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16); case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16);
case 0x040001A4: return NDSCart::ROMCnt; case 0x040001A4: return NDSCart::ROMCnt;
@ -3115,6 +3128,10 @@ void ARM9IOWrite8(u32 addr, u8 val)
NDSCart::WriteSPIData(val); NDSCart::WriteSPIData(val);
return; return;
case 0x04000188:
ARM9IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24));
return;
case 0x040001A8: NDSCart::ROMCommand[0] = val; return; case 0x040001A8: NDSCart::ROMCommand[0] = val; return;
case 0x040001A9: NDSCart::ROMCommand[1] = val; return; case 0x040001A9: NDSCart::ROMCommand[1] = val; return;
case 0x040001AA: NDSCart::ROMCommand[2] = val; return; case 0x040001AA: NDSCart::ROMCommand[2] = val; return;
@ -3228,7 +3245,11 @@ void ARM9IOWrite16(u32 addr, u16 val)
SetIRQ(0, IRQ_IPCRecv); SetIRQ(0, IRQ_IPCRecv);
if (val & 0x4000) if (val & 0x4000)
IPCFIFOCnt9 &= ~0x4000; IPCFIFOCnt9 &= ~0x4000;
IPCFIFOCnt9 = val & 0x8404; IPCFIFOCnt9 = (val & 0x8404) | (IPCFIFOCnt9 & 0x4000);
return;
case 0x04000188:
ARM9IOWrite32(addr, val | (val << 16));
return; return;
case 0x040001A0: case 0x040001A0:
@ -3378,10 +3399,11 @@ void ARM9IOWrite32(u32 addr, u32 val)
case 0x04000130: case 0x04000130:
KeyCnt = val >> 16; KeyCnt = val >> 16;
return; return;
case 0x04000180: case 0x04000180:
case 0x04000184:
ARM9IOWrite16(addr, val); ARM9IOWrite16(addr, val);
return; return;
case 0x04000188: case 0x04000188:
if (IPCFIFOCnt9 & 0x8000) if (IPCFIFOCnt9 & 0x8000)
{ {
@ -3640,6 +3662,7 @@ u32 ARM7IORead32(u32 addr)
case 0x04000138: return RTC::Read(); case 0x04000138: return RTC::Read();
case 0x04000180: return IPCSync7; case 0x04000180: return IPCSync7;
case 0x04000184: return ARM7IORead16(addr);
case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16); case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16);
case 0x040001A4: return NDSCart::ROMCnt; case 0x040001A4: return NDSCart::ROMCnt;
@ -3716,6 +3739,10 @@ void ARM7IOWrite8(u32 addr, u8 val)
case 0x04000138: RTC::Write(val, true); return; case 0x04000138: RTC::Write(val, true); return;
case 0x04000188:
ARM7IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24));
return;
case 0x040001A0: case 0x040001A0:
if (ExMemCnt[0] & (1<<11)) if (ExMemCnt[0] & (1<<11))
{ {
@ -3821,7 +3848,11 @@ void ARM7IOWrite16(u32 addr, u16 val)
SetIRQ(1, IRQ_IPCRecv); SetIRQ(1, IRQ_IPCRecv);
if (val & 0x4000) if (val & 0x4000)
IPCFIFOCnt7 &= ~0x4000; IPCFIFOCnt7 &= ~0x4000;
IPCFIFOCnt7 = val & 0x8404; IPCFIFOCnt7 = (val & 0x8404) | (IPCFIFOCnt7 & 0x4000);
return;
case 0x04000188:
ARM7IOWrite32(addr, val | (val << 16));
return; return;
case 0x040001A0: case 0x040001A0:
@ -3940,6 +3971,7 @@ void ARM7IOWrite32(u32 addr, u32 val)
case 0x04000138: RTC::Write(val & 0xFFFF, false); return; case 0x04000138: RTC::Write(val & 0xFFFF, false); return;
case 0x04000180: case 0x04000180:
case 0x04000184:
ARM7IOWrite16(addr, val); ARM7IOWrite16(addr, val);
return; return;
case 0x04000188: case 0x04000188:
@ -3984,6 +4016,11 @@ void ARM7IOWrite32(u32 addr, u32 val)
case 0x040001B0: *(u32*)&ROMSeed0[8] = val; return; case 0x040001B0: *(u32*)&ROMSeed0[8] = val; return;
case 0x040001B4: *(u32*)&ROMSeed1[8] = val; return; case 0x040001B4: *(u32*)&ROMSeed1[8] = val; return;
case 0x040001C0:
SPI::WriteCnt(val & 0xFFFF);
SPI::WriteData((val >> 16) & 0xFF);
return;
case 0x04000208: IME[1] = val & 0x1; UpdateIRQ(1); return; case 0x04000208: IME[1] = val & 0x1; UpdateIRQ(1); return;
case 0x04000210: IE[1] = val; UpdateIRQ(1); return; case 0x04000210: IE[1] = val; UpdateIRQ(1); return;
case 0x04000214: IF[1] &= ~val; UpdateIRQ(1); return; case 0x04000214: IF[1] &= ~val; UpdateIRQ(1); return;

View File

@ -46,6 +46,8 @@ enum
Event_DSi_SDMMCTransfer, Event_DSi_SDMMCTransfer,
Event_DSi_SDIOTransfer, Event_DSi_SDIOTransfer,
Event_DSi_NWifi, Event_DSi_NWifi,
Event_DSi_CamIRQ,
Event_DSi_CamTransfer,
Event_DSi_RAMSizeChange, Event_DSi_RAMSizeChange,
@ -82,7 +84,7 @@ enum
IRQ_IPCSendDone, IRQ_IPCSendDone,
IRQ_IPCRecv, IRQ_IPCRecv,
IRQ_CartSendDone, // TODO: less misleading name IRQ_CartSendDone, // TODO: less misleading name
IRQ_CartIREQMC, // IRQ triggered by game cart (example: Pok<EFBFBD>mon Typing Adventure, BT controller) IRQ_CartIREQMC, // IRQ triggered by game cart (example: Pokémon Typing Adventure, BT controller)
IRQ_GXFIFO, IRQ_GXFIFO,
IRQ_LidOpen, IRQ_LidOpen,
IRQ_SPI, IRQ_SPI,

View File

@ -37,6 +37,7 @@ u8* SRAM;
u32 SRAMLength; u32 SRAMLength;
char SRAMPath[1024]; char SRAMPath[1024];
bool SRAMFileDirty;
void (*WriteFunc)(u8 val, bool islast); void (*WriteFunc)(u8 val, bool islast);
@ -445,8 +446,16 @@ void Write(u8 val, u32 hold)
break; break;
} }
if (islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0)) SRAMFileDirty |= islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0);
}
void FlushSRAMFile()
{ {
if (!SRAMFileDirty)
return;
SRAMFileDirty = false;
FILE* f = Platform::OpenFile(SRAMPath, "wb"); FILE* f = Platform::OpenFile(SRAMPath, "wb");
if (f) if (f)
{ {
@ -454,7 +463,6 @@ void Write(u8 val, u32 hold)
fclose(f); fclose(f);
} }
} }
}
} }
@ -1034,6 +1042,11 @@ void RelocateSave(const char* path, bool write)
NDSCart_SRAM::RelocateSave(path, write); NDSCart_SRAM::RelocateSave(path, write);
} }
void FlushSRAMFile()
{
NDSCart_SRAM::FlushSRAMFile();
}
int ImportSRAM(const u8* data, u32 length) int ImportSRAM(const u8* data, u32 length)
{ {
memcpy(NDSCart_SRAM::SRAM, data, std::min(length, NDSCart_SRAM::SRAMLength)); memcpy(NDSCart_SRAM::SRAM, data, std::min(length, NDSCart_SRAM::SRAMLength));

View File

@ -46,6 +46,9 @@ void DoSavestate(Savestate* file);
void DecryptSecureArea(u8* out); void DecryptSecureArea(u8* out);
bool LoadROM(const char* path, const char* sram, bool direct); bool LoadROM(const char* path, const char* sram, bool direct);
void FlushSRAMFile();
void RelocateSave(const char* path, bool write); void RelocateSave(const char* path, bool write);
int ImportSRAM(const u8* data, u32 length); int ImportSRAM(const u8* data, u32 length);

149
src/NonStupidBitfield.h Normal file
View File

@ -0,0 +1,149 @@
#ifndef NONSTUPIDBITFIELD_H
#define NONSTUPIDBITFIELD_H
#include "types.h"
#include <memory.h>
#include <initializer_list>
#include <algorithm>
// like std::bitset but less stupid and optimised for
// our use case (keeping track of memory invalidations)
template <u32 Size>
struct NonStupidBitField
{
static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8");
static const u32 DataLength = Size / 8;
u8 Data[DataLength];
struct Ref
{
NonStupidBitField<Size>& BitField;
u32 Idx;
operator bool()
{
return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7));
}
Ref& operator=(bool set)
{
BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7));
BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7));
return *this;
}
};
struct Iterator
{
NonStupidBitField<Size>& BitField;
u32 DataIdx;
u32 BitIdx;
u64 RemainingBits;
u32 operator*() { return DataIdx * 8 + BitIdx; }
bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; }
bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; }
template <typename T>
void Next()
{
while (RemainingBits == 0 && DataIdx < DataLength)
{
DataIdx += sizeof(T);
RemainingBits = *(T*)&BitField.Data[DataIdx];
}
BitIdx = __builtin_ctzll(RemainingBits);
RemainingBits &= ~(1ULL << BitIdx);
}
Iterator operator++(int)
{
Iterator prev(*this);
++*this;
return prev;
}
Iterator& operator++()
{
if ((DataLength % 8) == 0)
Next<u64>();
else if ((DataLength % 4) == 0)
Next<u32>();
else if ((DataLength % 2) == 0)
Next<u16>();
else
Next<u8>();
return *this;
}
};
NonStupidBitField(u32 start, u32 size)
{
memset(Data, 0, sizeof(Data));
if (size == 0)
return;
u32 roundedStartBit = (start + 7) & ~7;
u32 roundedEndBit = (start + size) & ~7;
if (roundedStartBit != roundedEndBit)
memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8);
if (start & 0x7)
Data[start >> 3] = 0xFF << (start & 0x7);
if ((start + size) & 0x7)
Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7);
}
NonStupidBitField()
{
memset(Data, 0, sizeof(Data));
}
Iterator End()
{
return Iterator{*this, DataLength, 0, 0};
}
Iterator Begin()
{
if ((DataLength % 8) == 0)
return ++Iterator{*this, 0, 0, *(u64*)Data};
else if ((DataLength % 4) == 0)
return ++Iterator{*this, 0, 0, *(u32*)Data};
else if ((DataLength % 2) == 0)
return ++Iterator{*this, 0, 0, *(u16*)Data};
else
return ++Iterator{*this, 0, 0, *Data};
}
Ref operator[](u32 idx)
{
return Ref{*this, idx};
}
NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
{
for (u32 i = 0; i < DataLength; i++)
{
Data[i] |= other.Data[i];
}
return *this;
}
NonStupidBitField& operator&=(const NonStupidBitField<Size>& other)
{
for (u32 i = 0; i < DataLength; i++)
{
Data[i] &= other.Data[i];
}
return *this;
}
};
#endif

View File

@ -23,8 +23,13 @@
#include <string.h> #include <string.h>
// TODO: different includes for each platform // TODO: different includes for each platform
#ifdef __APPLE__
#include <OpenGL/gl3.h>
#include <OpenGL/gl3ext.h>
#else
#include <GL/gl.h> #include <GL/gl.h>
#include <GL/glext.h> #include <GL/glext.h>
#endif
#include "Platform.h" #include "Platform.h"
@ -61,6 +66,11 @@
#endif #endif
#ifdef __APPLE__
#define DO_PROCLIST(func)
#else
#define DO_PROCLIST(func) \ #define DO_PROCLIST(func) \
DO_PROCLIST_1_3(func) \ DO_PROCLIST_1_3(func) \
@ -128,6 +138,7 @@
\ \
func(GLGETSTRINGI, glGetStringi); \ func(GLGETSTRINGI, glGetStringi); \
#endif
namespace OpenGL namespace OpenGL
{ {

View File

@ -67,15 +67,24 @@ inline bool LocalFileExists(const char* name)
return true; return true;
} }
void* Thread_Create(void (*func)()); struct Thread;
void Thread_Free(void* thread); Thread* Thread_Create(void (*func)());
void Thread_Wait(void* thread); void Thread_Free(Thread* thread);
void Thread_Wait(Thread* thread);
void* Semaphore_Create(); struct Semaphore;
void Semaphore_Free(void* sema); Semaphore* Semaphore_Create();
void Semaphore_Reset(void* sema); void Semaphore_Free(Semaphore* sema);
void Semaphore_Wait(void* sema); void Semaphore_Reset(Semaphore* sema);
void Semaphore_Post(void* sema); void Semaphore_Wait(Semaphore* sema);
void Semaphore_Post(Semaphore* sema, int count = 1);
struct Mutex;
Mutex* Mutex_Create();
void Mutex_Free(Mutex* mutex);
void Mutex_Lock(Mutex* mutex);
void Mutex_Unlock(Mutex* mutex);
bool Mutex_TryLock(Mutex* mutex);
void* GL_GetProcAddress(const char* proc); void* GL_GetProcAddress(const char* proc);

View File

@ -1143,7 +1143,7 @@ ROMListEntry ROMList[] =
{0x454A4943, 0x00800000, 0x00000001}, {0x454A4943, 0x00800000, 0x00000001},
{0x454A4956, 0x04000000, 0x00000003}, {0x454A4956, 0x04000000, 0x00000003},
{0x454A4A42, 0x01000000, 0x00000001}, {0x454A4A42, 0x01000000, 0x00000001},
{0x454A4A43, 0x00800000, 0x00000001}, {0x454A4A43, 0x00800000, 0x00000002},
{0x454A4C41, 0x01000000, 0x00000001}, {0x454A4C41, 0x01000000, 0x00000001},
{0x454A4C42, 0x04000000, 0x00000001}, {0x454A4C42, 0x04000000, 0x00000001},
{0x454A4C43, 0x08000000, 0x00000002}, {0x454A4C43, 0x08000000, 0x00000002},

View File

@ -18,6 +18,7 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "Platform.h"
#include "NDS.h" #include "NDS.h"
#include "DSi.h" #include "DSi.h"
#include "SPU.h" #include "SPU.h"
@ -61,13 +62,15 @@ const s16 PSGTable[8][8] =
{-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF} {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF}
}; };
const u32 kSamplesPerRun = 1; const u32 OutputBufferSize = 2*2048;
s16 OutputBackbuffer[2 * OutputBufferSize];
u32 OutputBackbufferWritePosition;
const u32 OutputBufferSize = 2*1024; s16 OutputFrontBuffer[2 * OutputBufferSize];
s16 OutputBuffer[2 * OutputBufferSize]; u32 OutputFrontBufferWritePosition;
volatile u32 OutputReadOffset; u32 OutputFrontBufferReadPosition;
volatile u32 OutputWriteOffset;
Platform::Mutex* AudioLock;
u16 Cnt; u16 Cnt;
u8 MasterVolume; u8 MasterVolume;
@ -85,6 +88,8 @@ bool Init()
Capture[0] = new CaptureUnit(0); Capture[0] = new CaptureUnit(0);
Capture[1] = new CaptureUnit(1); Capture[1] = new CaptureUnit(1);
AudioLock = Platform::Mutex_Create();
return true; return true;
} }
@ -95,6 +100,8 @@ void DeInit()
delete Capture[0]; delete Capture[0];
delete Capture[1]; delete Capture[1];
Platform::Mutex_Free(AudioLock);
} }
void Reset() void Reset()
@ -111,15 +118,18 @@ void Reset()
Capture[0]->Reset(); Capture[0]->Reset();
Capture[1]->Reset(); Capture[1]->Reset();
NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun); NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0);
} }
void Stop() void Stop()
{ {
memset(OutputBuffer, 0, 2*OutputBufferSize*2); Platform::Mutex_Lock(AudioLock);
memset(OutputFrontBuffer, 0, 2*OutputBufferSize*2);
OutputReadOffset = 0; OutputBackbufferWritePosition = 0;
OutputWriteOffset = 0; OutputFrontBufferReadPosition = 0;
OutputFrontBufferWritePosition = 0;
Platform::Mutex_Unlock(AudioLock);
} }
void DoSavestate(Savestate* file) void DoSavestate(Savestate* file)
@ -416,11 +426,11 @@ void Channel::NextSample_Noise()
} }
template<u32 type> template<u32 type>
void Channel::Run(s32* buf, u32 samples) s32 Channel::Run()
{ {
if (!(Cnt & (1<<31))) return; if (!(Cnt & (1<<31))) return 0;
if ((type < 3) && ((Length+LoopPos) < 16)) return; if ((type < 3) && ((Length+LoopPos) < 16)) return 0;
if (KeyOn) if (KeyOn)
{ {
@ -428,8 +438,6 @@ void Channel::Run(s32* buf, u32 samples)
KeyOn = false; KeyOn = false;
} }
for (u32 s = 0; s < samples; s++)
{
Timer += 512; // 1 sample = 512 cycles at 16MHz Timer += 512; // 1 sample = 512 cycles at 16MHz
while (Timer >> 16) while (Timer >> 16)
@ -449,24 +457,13 @@ void Channel::Run(s32* buf, u32 samples)
s32 val = (s32)CurSample; s32 val = (s32)CurSample;
val <<= VolumeShift; val <<= VolumeShift;
val *= Volume; val *= Volume;
buf[s] = val; return val;
if (!(Cnt & (1<<31))) break;
}
} }
void Channel::PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf) void Channel::PanOutput(s32 in, s32& left, s32& right)
{ {
for (u32 s = 0; s < samples; s++) left += ((s64)in * (128-Pan)) >> 10;
{ right += ((s64)in * Pan) >> 10;
s32 val = (s32)inbuf[s];
s32 l = ((s64)val * (128-Pan)) >> 10;
s32 r = ((s64)val * Pan) >> 10;
leftbuf[s] += l;
rightbuf[s] += r;
}
} }
@ -602,39 +599,31 @@ void CaptureUnit::Run(s32 sample)
} }
void Mix(u32 samples) void Mix(u32 dummy)
{ {
s32 channelbuf[32]; s32 left = 0, right = 0;
s32 leftbuf[32], rightbuf[32]; s32 leftoutput = 0, rightoutput = 0;
s32 ch0buf[32], ch1buf[32], ch2buf[32], ch3buf[32];
s32 leftoutput[32], rightoutput[32];
for (u32 s = 0; s < samples; s++)
{
leftbuf[s] = 0; rightbuf[s] = 0;
leftoutput[s] = 0; rightoutput[s] = 0;
}
if (Cnt & (1<<15)) if (Cnt & (1<<15))
{ {
Channels[0]->DoRun(ch0buf, samples); s32 ch0 = Channels[0]->DoRun();
Channels[1]->DoRun(ch1buf, samples); s32 ch1 = Channels[1]->DoRun();
Channels[2]->DoRun(ch2buf, samples); s32 ch2 = Channels[2]->DoRun();
Channels[3]->DoRun(ch3buf, samples); s32 ch3 = Channels[3]->DoRun();
// TODO: addition from capture registers // TODO: addition from capture registers
Channels[0]->PanOutput(ch0buf, samples, leftbuf, rightbuf); Channels[0]->PanOutput(ch0, left, right);
Channels[2]->PanOutput(ch2buf, samples, leftbuf, rightbuf); Channels[2]->PanOutput(ch2, left, right);
if (!(Cnt & (1<<12))) Channels[1]->PanOutput(ch1buf, samples, leftbuf, rightbuf); if (!(Cnt & (1<<12))) Channels[1]->PanOutput(ch1, left, right);
if (!(Cnt & (1<<13))) Channels[3]->PanOutput(ch3buf, samples, leftbuf, rightbuf); if (!(Cnt & (1<<13))) Channels[3]->PanOutput(ch3, left, right);
for (int i = 4; i < 16; i++) for (int i = 4; i < 16; i++)
{ {
Channel* chan = Channels[i]; Channel* chan = Channels[i];
chan->DoRun(channelbuf, samples); s32 channel = chan->DoRun();
chan->PanOutput(channelbuf, samples, leftbuf, rightbuf); chan->PanOutput(channel, left, right);
} }
// sound capture // sound capture
@ -642,32 +631,24 @@ void Mix(u32 samples)
if (Capture[0]->Cnt & (1<<7)) if (Capture[0]->Cnt & (1<<7))
{ {
for (u32 s = 0; s < samples; s++) s32 val = left;
{
s32 val = leftbuf[s];
val >>= 8; val >>= 8;
if (val < -0x8000) val = -0x8000; if (val < -0x8000) val = -0x8000;
else if (val > 0x7FFF) val = 0x7FFF; else if (val > 0x7FFF) val = 0x7FFF;
Capture[0]->Run(val); Capture[0]->Run(val);
if (!(Capture[0]->Cnt & (1<<7))) break;
}
} }
if (Capture[1]->Cnt & (1<<7)) if (Capture[1]->Cnt & (1<<7))
{ {
for (u32 s = 0; s < samples; s++) s32 val = right;
{
s32 val = rightbuf[s];
val >>= 8; val >>= 8;
if (val < -0x8000) val = -0x8000; if (val < -0x8000) val = -0x8000;
else if (val > 0x7FFF) val = 0x7FFF; else if (val > 0x7FFF) val = 0x7FFF;
Capture[1]->Run(val); Capture[1]->Run(val);
if (!(Capture[1]->Cnt & (1<<7))) break;
}
} }
// final output // final output
@ -675,31 +656,25 @@ void Mix(u32 samples)
switch (Cnt & 0x0300) switch (Cnt & 0x0300)
{ {
case 0x0000: // left mixer case 0x0000: // left mixer
{ leftoutput = left;
for (u32 s = 0; s < samples; s++)
leftoutput[s] = leftbuf[s];
}
break; break;
case 0x0100: // channel 1 case 0x0100: // channel 1
{ {
s32 pan = 128 - Channels[1]->Pan; s32 pan = 128 - Channels[1]->Pan;
for (u32 s = 0; s < samples; s++) leftoutput = ((s64)ch1 * pan) >> 10;
leftoutput[s] = ((s64)ch1buf[s] * pan) >> 10;
} }
break; break;
case 0x0200: // channel 3 case 0x0200: // channel 3
{ {
s32 pan = 128 - Channels[3]->Pan; s32 pan = 128 - Channels[3]->Pan;
for (u32 s = 0; s < samples; s++) leftoutput = ((s64)ch3 * pan) >> 10;
leftoutput[s] = ((s64)ch3buf[s] * pan) >> 10;
} }
break; break;
case 0x0300: // channel 1+3 case 0x0300: // channel 1+3
{ {
s32 pan1 = 128 - Channels[1]->Pan; s32 pan1 = 128 - Channels[1]->Pan;
s32 pan3 = 128 - Channels[3]->Pan; s32 pan3 = 128 - Channels[3]->Pan;
for (u32 s = 0; s < samples; s++) leftoutput = (((s64)ch1 * pan1) >> 10) + (((s64)ch3 * pan3) >> 10);
leftoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10);
} }
break; break;
} }
@ -707,105 +682,122 @@ void Mix(u32 samples)
switch (Cnt & 0x0C00) switch (Cnt & 0x0C00)
{ {
case 0x0000: // right mixer case 0x0000: // right mixer
{ rightoutput = right;
for (u32 s = 0; s < samples; s++)
rightoutput[s] = rightbuf[s];
}
break; break;
case 0x0400: // channel 1 case 0x0400: // channel 1
{ {
s32 pan = Channels[1]->Pan; s32 pan = Channels[1]->Pan;
for (u32 s = 0; s < samples; s++) rightoutput = ((s64)ch1 * pan) >> 10;
rightoutput[s] = ((s64)ch1buf[s] * pan) >> 10;
} }
break; break;
case 0x0800: // channel 3 case 0x0800: // channel 3
{ {
s32 pan = Channels[3]->Pan; s32 pan = Channels[3]->Pan;
for (u32 s = 0; s < samples; s++) rightoutput = ((s64)ch3 * pan) >> 10;
rightoutput[s] = ((s64)ch3buf[s] * pan) >> 10;
} }
break; break;
case 0x0C00: // channel 1+3 case 0x0C00: // channel 1+3
{ {
s32 pan1 = Channels[1]->Pan; s32 pan1 = Channels[1]->Pan;
s32 pan3 = Channels[3]->Pan; s32 pan3 = Channels[3]->Pan;
for (u32 s = 0; s < samples; s++) rightoutput = (((s64)ch1 * pan1) >> 10) + (((s64)ch3 * pan3) >> 10);
rightoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10);
} }
break; break;
} }
} }
for (u32 s = 0; s < samples; s++) leftoutput = ((s64)leftoutput * MasterVolume) >> 7;
rightoutput = ((s64)rightoutput * MasterVolume) >> 7;
leftoutput >>= 8;
if (leftoutput < -0x8000) leftoutput = -0x8000;
else if (leftoutput > 0x7FFF) leftoutput = 0x7FFF;
rightoutput >>= 8;
if (rightoutput < -0x8000) rightoutput = -0x8000;
else if (rightoutput > 0x7FFF) rightoutput = 0x7FFF;
// OutputBufferFrame can never get full because it's
// transfered to OutputBuffer at the end of the frame
OutputBackbuffer[OutputBackbufferWritePosition ] = leftoutput >> 1;
OutputBackbuffer[OutputBackbufferWritePosition + 1] = rightoutput >> 1;
OutputBackbufferWritePosition += 2;
NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0);
}
void TransferOutput()
{ {
s32 l = leftoutput[s]; Platform::Mutex_Lock(AudioLock);
s32 r = rightoutput[s]; for (u32 i = 0; i < OutputBackbufferWritePosition; i += 2)
{
l = ((s64)l * MasterVolume) >> 7; OutputFrontBuffer[OutputFrontBufferWritePosition ] = OutputBackbuffer[i ];
r = ((s64)r * MasterVolume) >> 7; OutputFrontBuffer[OutputFrontBufferWritePosition + 1] = OutputBackbuffer[i + 1];
l >>= 8; OutputFrontBufferWritePosition += 2;
if (l < -0x8000) l = -0x8000; OutputFrontBufferWritePosition &= OutputBufferSize*2-1;
else if (l > 0x7FFF) l = 0x7FFF; if (OutputFrontBufferWritePosition == OutputFrontBufferReadPosition)
r >>= 8;
if (r < -0x8000) r = -0x8000;
else if (r > 0x7FFF) r = 0x7FFF;
OutputBuffer[OutputWriteOffset ] = l >> 1;
OutputBuffer[OutputWriteOffset + 1] = r >> 1;
OutputWriteOffset += 2;
OutputWriteOffset &= ((2*OutputBufferSize)-1);
if (OutputWriteOffset == OutputReadOffset)
{ {
//printf("!! SOUND FIFO OVERFLOW %d\n", OutputWriteOffset>>1);
// advance the read position too, to avoid losing the entire FIFO // advance the read position too, to avoid losing the entire FIFO
OutputReadOffset += 2; OutputFrontBufferReadPosition += 2;
OutputReadOffset &= ((2*OutputBufferSize)-1); OutputFrontBufferReadPosition &= OutputBufferSize*2-1;
} }
} }
OutputBackbufferWritePosition = 0;
NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun); Platform::Mutex_Unlock(AudioLock);
} }
void TrimOutput() void TrimOutput()
{ {
Platform::Mutex_Lock(AudioLock);
const int halflimit = (OutputBufferSize / 2); const int halflimit = (OutputBufferSize / 2);
int readpos = OutputWriteOffset - (halflimit*2); int readpos = OutputFrontBufferWritePosition - (halflimit*2);
if (readpos < 0) readpos += (OutputBufferSize*2); if (readpos < 0) readpos += (OutputBufferSize*2);
OutputReadOffset = readpos; OutputFrontBufferReadPosition = readpos;
Platform::Mutex_Unlock(AudioLock);
} }
void DrainOutput() void DrainOutput()
{ {
OutputReadOffset = 0; Platform::Mutex_Lock(AudioLock);
OutputWriteOffset = 0; OutputFrontBufferWritePosition = 0;
OutputFrontBufferReadPosition = 0;
Platform::Mutex_Unlock(AudioLock);
} }
void InitOutput() void InitOutput()
{ {
memset(OutputBuffer, 0, 2*OutputBufferSize*2); Platform::Mutex_Lock(AudioLock);
OutputReadOffset = 0; memset(OutputBackbuffer, 0, 2*OutputBufferSize*2);
OutputWriteOffset = OutputBufferSize; memset(OutputFrontBuffer, 0, 2*OutputBufferSize*2);
OutputFrontBufferReadPosition = 0;
OutputFrontBufferWritePosition = 0;
Platform::Mutex_Unlock(AudioLock);
} }
int GetOutputSize() int GetOutputSize()
{ {
Platform::Mutex_Lock(AudioLock);
int ret; int ret;
if (OutputWriteOffset >= OutputReadOffset) if (OutputFrontBufferWritePosition >= OutputFrontBufferReadPosition)
ret = OutputWriteOffset - OutputReadOffset; ret = OutputFrontBufferWritePosition - OutputFrontBufferReadPosition;
else else
ret = (OutputBufferSize*2) - OutputReadOffset + OutputWriteOffset; ret = (OutputBufferSize*2) - OutputFrontBufferReadPosition + OutputFrontBufferWritePosition;
ret >>= 1; ret >>= 1;
Platform::Mutex_Unlock(AudioLock);
return ret; return ret;
} }
void Sync(bool wait) void Sync(bool wait)
{ {
// this function is currently not used anywhere
// depending on the usage context the thread safety measures could be made
// a lot faster
// sync to audio output in case the core is running too fast // sync to audio output in case the core is running too fast
// * wait=true: wait until enough audio data has been played // * wait=true: wait until enough audio data has been played
// * wait=false: merely skip some audio data to avoid a FIFO overflow // * wait=false: merely skip some audio data to avoid a FIFO overflow
@ -819,32 +811,42 @@ void Sync(bool wait)
} }
else if (GetOutputSize() > halflimit) else if (GetOutputSize() > halflimit)
{ {
int readpos = OutputWriteOffset - (halflimit*2); Platform::Mutex_Lock(AudioLock);
int readpos = OutputFrontBufferWritePosition - (halflimit*2);
if (readpos < 0) readpos += (OutputBufferSize*2); if (readpos < 0) readpos += (OutputBufferSize*2);
OutputReadOffset = readpos; OutputFrontBufferReadPosition = readpos;
Platform::Mutex_Unlock(AudioLock);
} }
} }
int ReadOutput(s16* data, int samples) int ReadOutput(s16* data, int samples)
{ {
if (OutputReadOffset == OutputWriteOffset) Platform::Mutex_Lock(AudioLock);
if (OutputFrontBufferReadPosition == OutputFrontBufferWritePosition)
{
Platform::Mutex_Unlock(AudioLock);
return 0; return 0;
}
for (int i = 0; i < samples; i++) for (int i = 0; i < samples; i++)
{ {
*data++ = OutputBuffer[OutputReadOffset]; *data++ = OutputFrontBuffer[OutputFrontBufferReadPosition];
*data++ = OutputBuffer[OutputReadOffset + 1]; *data++ = OutputFrontBuffer[OutputFrontBufferReadPosition + 1];
//if (OutputReadOffset != OutputWriteOffset) OutputFrontBufferReadPosition += 2;
OutputFrontBufferReadPosition &= ((2*OutputBufferSize)-1);
if (OutputFrontBufferWritePosition == OutputFrontBufferReadPosition)
{ {
OutputReadOffset += 2; Platform::Mutex_Unlock(AudioLock);
OutputReadOffset &= ((2*OutputBufferSize)-1);
}
if (OutputReadOffset == OutputWriteOffset)
return i+1; return i+1;
} }
}
Platform::Mutex_Unlock(AudioLock);
return samples; return samples;
} }

View File

@ -33,7 +33,7 @@ void DoSavestate(Savestate* file);
void SetBias(u16 bias); void SetBias(u16 bias);
void Mix(u32 samples); void Mix(u32 dummy);
void TrimOutput(); void TrimOutput();
void DrainOutput(); void DrainOutput();
@ -41,6 +41,7 @@ void InitOutput();
int GetOutputSize(); int GetOutputSize();
void Sync(bool wait); void Sync(bool wait);
int ReadOutput(s16* data, int samples); int ReadOutput(s16* data, int samples);
void TransferOutput();
u8 Read8(u32 addr); u8 Read8(u32 addr);
u16 Read16(u32 addr); u16 Read16(u32 addr);
@ -123,26 +124,24 @@ public:
void NextSample_PSG(); void NextSample_PSG();
void NextSample_Noise(); void NextSample_Noise();
template<u32 type> void Run(s32* buf, u32 samples); template<u32 type> s32 Run();
void DoRun(s32* buf, u32 samples) s32 DoRun()
{ {
for (u32 s = 0; s < samples; s++)
buf[s] = 0;
switch ((Cnt >> 29) & 0x3) switch ((Cnt >> 29) & 0x3)
{ {
case 0: Run<0>(buf, samples); break; case 0: return Run<0>(); break;
case 1: Run<1>(buf, samples); break; case 1: return Run<1>(); break;
case 2: Run<2>(buf, samples); break; case 2: return Run<2>(); break;
case 3: case 3:
if (Num >= 14) Run<4>(buf, samples); if (Num >= 14) return Run<4>();
else if (Num >= 8) Run<3>(buf, samples); else if (Num >= 8) return Run<3>();
break; default:
return 0;
} }
} }
void PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf); void PanOutput(s32 in, s32& left, s32& right);
private: private:
u32 (*BusRead32)(u32 addr); u32 (*BusRead32)(u32 addr);

View File

@ -22,7 +22,7 @@
#include <stdio.h> #include <stdio.h>
#include "types.h" #include "types.h"
#define SAVESTATE_MAJOR 6 #define SAVESTATE_MAJOR 7
#define SAVESTATE_MINOR 0 #define SAVESTATE_MINOR 0
class Savestate class Savestate

View File

@ -0,0 +1,13 @@
#ifndef SHAREDCONFIG_H
#define SHAREDCONFIG_H
namespace Config
{
extern int ConsoleType;
extern int DirectBoot;
extern int SavestateRelocSRAM;
}
#endif

View File

@ -21,7 +21,7 @@
#include "FrontendUtil.h" #include "FrontendUtil.h"
#include "Config.h" #include "Config.h"
#include "qt_sdl/PlatformConfig.h" // FIXME!!! #include "SharedConfig.h"
#include "Platform.h" #include "Platform.h"
#include "NDS.h" #include "NDS.h"

View File

@ -100,6 +100,19 @@ if (PORTABLE)
add_definitions(-DPORTABLE) add_definitions(-DPORTABLE)
endif() endif()
if (APPLE)
set_target_properties(melonDS PROPERTIES
MACOSX_BUNDLE true
MACOSX_BUNDLE_INFO_PLIST ${CMAKE_SOURCE_DIR}/melonDS.plist
OUTPUT_NAME melonDS
)
# Copy icon into the bundle
target_sources(melonDS PRIVATE "${CMAKE_SOURCE_DIR}/melonDS.icns")
set_source_files_properties("${CMAKE_SOURCE_DIR}/melonDS.icns" PROPERTIES MACOSX_PACKAGE_LOCATION Resources)
endif()
install(FILES ../../../net.kuribo64.melonDS.desktop DESTINATION ${CMAKE_INSTALL_PREFIX}/share/applications) install(FILES ../../../net.kuribo64.melonDS.desktop DESTINATION ${CMAKE_INSTALL_PREFIX}/share/applications)
install(FILES ../../../icon/melon_16x16.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/16x16/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_16x16.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/16x16/apps RENAME net.kuribo64.melonDS.png)
install(FILES ../../../icon/melon_32x32.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/32x32/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_32x32.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/32x32/apps RENAME net.kuribo64.melonDS.png)
@ -107,4 +120,4 @@ install(FILES ../../../icon/melon_48x48.png DESTINATION ${CMAKE_INSTALL_PREFIX}/
install(FILES ../../../icon/melon_64x64.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/64x64/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_64x64.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/64x64/apps RENAME net.kuribo64.melonDS.png)
install(FILES ../../../icon/melon_128x128.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/128x128/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_128x128.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/128x128/apps RENAME net.kuribo64.melonDS.png)
install(FILES ../../../icon/melon_256x256.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/256x256/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_256x256.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/256x256/apps RENAME net.kuribo64.melonDS.png)
install(TARGETS melonDS RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) install(TARGETS melonDS BUNDLE DESTINATION ${CMAKE_BINARY_DIR} RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)

View File

@ -65,6 +65,9 @@ EmuSettingsDialog::EmuSettingsDialog(QWidget* parent) : QDialog(parent), ui(new
ui->chkJITBranchOptimisations->setChecked(Config::JIT_BranchOptimisations != 0); ui->chkJITBranchOptimisations->setChecked(Config::JIT_BranchOptimisations != 0);
ui->chkJITLiteralOptimisations->setChecked(Config::JIT_LiteralOptimisations != 0); ui->chkJITLiteralOptimisations->setChecked(Config::JIT_LiteralOptimisations != 0);
ui->chkJITFastMemory->setChecked(Config::JIT_FastMemory != 0); ui->chkJITFastMemory->setChecked(Config::JIT_FastMemory != 0);
#ifdef __APPLE__
ui->chkJITFastMemory->setDisabled(true);
#endif
ui->spnJITMaximumBlockSize->setValue(Config::JIT_MaxBlockSize); ui->spnJITMaximumBlockSize->setValue(Config::JIT_MaxBlockSize);
#else #else
ui->chkEnableJIT->setDisabled(true); ui->chkEnableJIT->setDisabled(true);
@ -329,6 +332,8 @@ void EmuSettingsDialog::on_chkEnableJIT_toggled()
bool disabled = !ui->chkEnableJIT->isChecked(); bool disabled = !ui->chkEnableJIT->isChecked();
ui->chkJITBranchOptimisations->setDisabled(disabled); ui->chkJITBranchOptimisations->setDisabled(disabled);
ui->chkJITLiteralOptimisations->setDisabled(disabled); ui->chkJITLiteralOptimisations->setDisabled(disabled);
#ifndef __APPLE__
ui->chkJITFastMemory->setDisabled(disabled); ui->chkJITFastMemory->setDisabled(disabled);
#endif
ui->spnJITMaximumBlockSize->setDisabled(disabled); ui->spnJITMaximumBlockSize->setDisabled(disabled);
} }

View File

@ -216,6 +216,7 @@ KeyMapButton::KeyMapButton(int* mapping, bool hotkey) : QPushButton()
setCheckable(true); setCheckable(true);
setText(mappingText()); setText(mappingText());
setFocusPolicy(Qt::StrongFocus); //Fixes binding keys in macOS
connect(this, &KeyMapButton::clicked, this, &KeyMapButton::onClick); connect(this, &KeyMapButton::clicked, this, &KeyMapButton::onClick);
} }

View File

@ -33,7 +33,11 @@
#include <sys/types.h> #include <sys/types.h>
#include <ifaddrs.h> #include <ifaddrs.h>
#include <netinet/in.h> #include <netinet/in.h>
#ifdef __linux__
#include <linux/if_packet.h> #include <linux/if_packet.h>
#else
#include <net/if_dl.h>
#endif
#endif #endif
@ -66,6 +70,9 @@ const char* PCapLibNames[] =
#ifdef __WIN32__ #ifdef __WIN32__
// TODO: name for npcap in non-WinPCap mode // TODO: name for npcap in non-WinPCap mode
"wpcap.dll", "wpcap.dll",
#elif defined(__APPLE__)
"libpcap.A.dylib",
"libpcap.dylib",
#else #else
// Linux lib names // Linux lib names
"libpcap.so.1", "libpcap.so.1",
@ -276,6 +283,7 @@ bool Init(bool open_adapter)
struct sockaddr_in* sa = (sockaddr_in*)curaddr->ifa_addr; struct sockaddr_in* sa = (sockaddr_in*)curaddr->ifa_addr;
memcpy(adata->IP_v4, &sa->sin_addr, 4); memcpy(adata->IP_v4, &sa->sin_addr, 4);
} }
#ifdef __linux__
else if (af == AF_PACKET) else if (af == AF_PACKET)
{ {
struct sockaddr_ll* sa = (sockaddr_ll*)curaddr->ifa_addr; struct sockaddr_ll* sa = (sockaddr_ll*)curaddr->ifa_addr;
@ -284,7 +292,16 @@ bool Init(bool open_adapter)
else else
memcpy(adata->MAC, sa->sll_addr, 6); memcpy(adata->MAC, sa->sll_addr, 6);
} }
#else
else if (af == AF_LINK)
{
struct sockaddr_dl* sa = (sockaddr_dl*)curaddr->ifa_addr;
if (sa->sdl_alen != 6)
printf("weird MAC length %d for %s\n", sa->sdl_alen, curaddr->ifa_name);
else
memcpy(adata->MAC, LLADDR(sa), 6);
}
#endif
curaddr = curaddr->ifa_next; curaddr = curaddr->ifa_next;
} }
} }

View File

@ -23,6 +23,7 @@
#include <QDir> #include <QDir>
#include <QThread> #include <QThread>
#include <QSemaphore> #include <QSemaphore>
#include <QMutex>
#include <QOpenGLContext> #include <QOpenGLContext>
#include "Platform.h" #include "Platform.h"
@ -187,53 +188,77 @@ FILE* OpenLocalFile(const char* path, const char* mode)
return OpenFile(fullpath.toUtf8(), mode, mode[0] != 'w'); return OpenFile(fullpath.toUtf8(), mode, mode[0] != 'w');
} }
void* Thread_Create(void (* func)()) Thread* Thread_Create(void (* func)())
{ {
QThread* t = QThread::create(func); QThread* t = QThread::create(func);
t->start(); t->start();
return (void*) t; return (Thread*) t;
} }
void Thread_Free(void* thread) void Thread_Free(Thread* thread)
{ {
QThread* t = (QThread*) thread; QThread* t = (QThread*) thread;
t->terminate(); t->terminate();
delete t; delete t;
} }
void Thread_Wait(void* thread) void Thread_Wait(Thread* thread)
{ {
((QThread*) thread)->wait(); ((QThread*) thread)->wait();
} }
void* Semaphore_Create() Semaphore* Semaphore_Create()
{ {
return new QSemaphore(); return (Semaphore*)new QSemaphore();
} }
void Semaphore_Free(void* sema) void Semaphore_Free(Semaphore* sema)
{ {
delete (QSemaphore*) sema; delete (QSemaphore*) sema;
} }
void Semaphore_Reset(void* sema) void Semaphore_Reset(Semaphore* sema)
{ {
QSemaphore* s = (QSemaphore*) sema; QSemaphore* s = (QSemaphore*) sema;
s->acquire(s->available()); s->acquire(s->available());
} }
void Semaphore_Wait(void* sema) void Semaphore_Wait(Semaphore* sema)
{ {
((QSemaphore*) sema)->acquire(); ((QSemaphore*) sema)->acquire();
} }
void Semaphore_Post(void* sema) void Semaphore_Post(Semaphore* sema, int count)
{ {
((QSemaphore*) sema)->release(); ((QSemaphore*) sema)->release(count);
} }
Mutex* Mutex_Create()
{
return (Mutex*)new QMutex();
}
void Mutex_Free(Mutex* mutex)
{
delete (QMutex*) mutex;
}
void Mutex_Lock(Mutex* mutex)
{
((QMutex*) mutex)->lock();
}
void Mutex_Unlock(Mutex* mutex)
{
((QMutex*) mutex)->unlock();
}
bool Mutex_TryLock(Mutex* mutex)
{
return ((QMutex*) mutex)->try_lock();
}
void* GL_GetProcAddress(const char* proc) void* GL_GetProcAddress(const char* proc)
{ {

View File

@ -120,7 +120,7 @@ ConfigEntry PlatformConfigFile[] =
{"HKJoy_Reset", 0, &HKJoyMapping[HK_Reset], -1, NULL, 0}, {"HKJoy_Reset", 0, &HKJoyMapping[HK_Reset], -1, NULL, 0},
{"HKJoy_FastForward", 0, &HKJoyMapping[HK_FastForward], -1, NULL, 0}, {"HKJoy_FastForward", 0, &HKJoyMapping[HK_FastForward], -1, NULL, 0},
{"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FastForwardToggle], -1, NULL, 0}, {"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FastForwardToggle], -1, NULL, 0},
{"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0}, {"HKJoy_FullscreenToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0},
{"HKJoy_SolarSensorDecrease", 0, &HKJoyMapping[HK_SolarSensorDecrease], -1, NULL, 0}, {"HKJoy_SolarSensorDecrease", 0, &HKJoyMapping[HK_SolarSensorDecrease], -1, NULL, 0},
{"HKJoy_SolarSensorIncrease", 0, &HKJoyMapping[HK_SolarSensorIncrease], -1, NULL, 0}, {"HKJoy_SolarSensorIncrease", 0, &HKJoyMapping[HK_SolarSensorIncrease], -1, NULL, 0},

View File

@ -54,7 +54,7 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne
LAN_Socket::Init(); LAN_Socket::Init();
haspcap = LAN_PCap::Init(false); haspcap = LAN_PCap::Init(false);
ui->cbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)"); ui->rbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)");
ui->cbBindAnyAddr->setChecked(Config::SocketBindAnyAddr != 0); ui->cbBindAnyAddr->setChecked(Config::SocketBindAnyAddr != 0);
ui->cbRandomizeMAC->setChecked(Config::RandomizeMAC != 0); ui->cbRandomizeMAC->setChecked(Config::RandomizeMAC != 0);
@ -71,8 +71,9 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne
} }
ui->cbxDirectAdapter->setCurrentIndex(sel); ui->cbxDirectAdapter->setCurrentIndex(sel);
ui->cbDirectMode->setChecked(Config::DirectLAN != 0); ui->rbDirectMode->setChecked(Config::DirectLAN != 0);
if (!haspcap) ui->cbDirectMode->setEnabled(false); ui->rbIndirectMode->setChecked(Config::DirectLAN == 0);
if (!haspcap) ui->rbDirectMode->setEnabled(false);
updateAdapterControls(); updateAdapterControls();
} }
@ -101,7 +102,7 @@ void WifiSettingsDialog::done(int r)
Config::SocketBindAnyAddr = ui->cbBindAnyAddr->isChecked() ? 1:0; Config::SocketBindAnyAddr = ui->cbBindAnyAddr->isChecked() ? 1:0;
Config::RandomizeMAC = randommac; Config::RandomizeMAC = randommac;
Config::DirectLAN = ui->cbDirectMode->isChecked() ? 1:0; Config::DirectLAN = ui->rbDirectMode->isChecked() ? 1:0;
int sel = ui->cbxDirectAdapter->currentIndex(); int sel = ui->cbxDirectAdapter->currentIndex();
if (sel < 0 || sel >= LAN_PCap::NumAdapters) sel = 0; if (sel < 0 || sel >= LAN_PCap::NumAdapters) sel = 0;
@ -125,11 +126,14 @@ void WifiSettingsDialog::done(int r)
closeDlg(); closeDlg();
} }
void WifiSettingsDialog::on_cbDirectMode_stateChanged(int state) void WifiSettingsDialog::on_rbDirectMode_clicked()
{
updateAdapterControls();
}
void WifiSettingsDialog::on_rbIndirectMode_clicked()
{ {
updateAdapterControls(); updateAdapterControls();
} }
void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel) void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel)
{ {
if (!haspcap) return; if (!haspcap) return;
@ -153,7 +157,7 @@ void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel)
void WifiSettingsDialog::updateAdapterControls() void WifiSettingsDialog::updateAdapterControls()
{ {
bool enable = haspcap && ui->cbDirectMode->isChecked(); bool enable = haspcap && ui->rbDirectMode->isChecked();
ui->cbxDirectAdapter->setEnabled(enable); ui->cbxDirectAdapter->setEnabled(enable);
ui->lblAdapterMAC->setEnabled(enable); ui->lblAdapterMAC->setEnabled(enable);

View File

@ -55,7 +55,8 @@ public:
private slots: private slots:
void done(int r); void done(int r);
void on_cbDirectMode_stateChanged(int state); void on_rbDirectMode_clicked();
void on_rbIndirectMode_clicked();
void on_cbxDirectAdapter_currentIndexChanged(int sel); void on_cbxDirectAdapter_currentIndexChanged(int sel);
private: private:

View File

@ -6,8 +6,8 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>479</width> <width>572</width>
<height>240</height> <height>296</height>
</rect> </rect>
</property> </property>
<property name="sizePolicy"> <property name="sizePolicy">
@ -58,24 +58,20 @@
<string>Online</string> <string>Online</string>
</property> </property>
<layout class="QGridLayout" name="gridLayout_2"> <layout class="QGridLayout" name="gridLayout_2">
<item row="2" column="0"> <item row="3" column="0" rowspan="3" colspan="2">
<widget class="QLabel" name="label_2"> <widget class="QGroupBox" name="groupBox_3">
<property name="title">
<string>Direct Mode Settings</string>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text"> <property name="text">
<string>MAC address:</string> <string>Network adapter:</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="0" column="0" colspan="2"> <item row="0" column="1">
<widget class="QCheckBox" name="cbDirectMode">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Direct mode [TEXT PLACEHOLDER]</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="cbxDirectAdapter"> <widget class="QComboBox" name="cbxDirectAdapter">
<property name="sizePolicy"> <property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed"> <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
@ -85,7 +81,7 @@
</property> </property>
<property name="minimumSize"> <property name="minimumSize">
<size> <size>
<width>350</width> <width>300</width>
<height>0</height> <height>0</height>
</size> </size>
</property> </property>
@ -95,13 +91,20 @@
</widget> </widget>
</item> </item>
<item row="1" column="0"> <item row="1" column="0">
<widget class="QLabel" name="label"> <widget class="QLabel" name="label_2">
<property name="text"> <property name="text">
<string>Network adapter:</string> <string>MAC address:</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="0"> <item row="1" column="1">
<widget class="QLabel" name="lblAdapterMAC">
<property name="text">
<string>[PLACEHOLDER]</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_3"> <widget class="QLabel" name="label_3">
<property name="text"> <property name="text">
<string>IP address:</string> <string>IP address:</string>
@ -109,16 +112,32 @@
</widget> </widget>
</item> </item>
<item row="2" column="1"> <item row="2" column="1">
<widget class="QLabel" name="lblAdapterMAC"> <widget class="QLabel" name="lblAdapterIP">
<property name="text"> <property name="text">
<string>[PLACEHOLDER]</string> <string>[PLACEHOLDER]</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="1"> </layout>
<widget class="QLabel" name="lblAdapterIP"> </widget>
</item>
<item row="1" column="0">
<widget class="QRadioButton" name="rbIndirectMode">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Indirect mode uses libslirp. It requires no extra setup and is easy to use.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text"> <property name="text">
<string>[PLACEHOLDER]</string> <string>Indirect Mode (uses libslirp, recommended)</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QRadioButton" name="rbDirectMode">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Direct mode [TEXT PLACEHOLDER]</string>
</property> </property>
</widget> </widget>
</item> </item>

View File

@ -274,6 +274,7 @@ EmuThread::EmuThread(QObject* parent) : QThread(parent)
connect(this, SIGNAL(windowEmuStop()), mainWindow, SLOT(onEmuStop())); connect(this, SIGNAL(windowEmuStop()), mainWindow, SLOT(onEmuStop()));
connect(this, SIGNAL(windowEmuPause()), mainWindow->actPause, SLOT(trigger())); connect(this, SIGNAL(windowEmuPause()), mainWindow->actPause, SLOT(trigger()));
connect(this, SIGNAL(windowEmuReset()), mainWindow->actReset, SLOT(trigger())); connect(this, SIGNAL(windowEmuReset()), mainWindow->actReset, SLOT(trigger()));
connect(this, SIGNAL(windowLimitFPSChange()), mainWindow->actLimitFramerate, SLOT(trigger()));
connect(this, SIGNAL(screenLayoutChange()), mainWindow->panel, SLOT(onScreenLayoutChanged())); connect(this, SIGNAL(screenLayoutChange()), mainWindow->panel, SLOT(onScreenLayoutChanged()));
connect(this, SIGNAL(windowFullscreenToggle()), mainWindow, SLOT(onFullscreenToggled())); connect(this, SIGNAL(windowFullscreenToggle()), mainWindow, SLOT(onFullscreenToggled()));
@ -363,10 +364,10 @@ void EmuThread::run()
Input::Init(); Input::Init();
u32 nframes = 0; u32 nframes = 0;
u32 starttick = SDL_GetTicks(); double perfCountsSec = 1.0 / SDL_GetPerformanceFrequency();
u32 lasttick = starttick; double lastTime = SDL_GetPerformanceCounter() * perfCountsSec;
u32 lastmeasuretick = lasttick; double frameLimitError = 0.0;
u32 fpslimitcount = 0; double lastMeasureTime = lastTime;
char melontitle[100]; char melontitle[100];
@ -500,49 +501,43 @@ void EmuThread::run()
SDL_UnlockMutex(audioSyncLock); SDL_UnlockMutex(audioSyncLock);
} }
float framerate = (1000.0f * nlines) / (60.0f * 263.0f); double frametimeStep = nlines / (60.0 * 263.0);
{ {
u32 curtick = SDL_GetTicks();
u32 delay = curtick - lasttick;
bool limitfps = Config::LimitFPS && !fastforward; bool limitfps = Config::LimitFPS && !fastforward;
if (limitfps)
{
float wantedtickF = starttick + (framerate * (fpslimitcount+1));
u32 wantedtick = (u32)ceil(wantedtickF);
if (curtick < wantedtick) SDL_Delay(wantedtick - curtick);
lasttick = SDL_GetTicks(); double practicalFramelimit = limitfps ? frametimeStep : 1.0 / 1000.0;
fpslimitcount++;
if ((abs(wantedtickF - (float)wantedtick) < 0.001312) || (fpslimitcount > 60)) double curtime = SDL_GetPerformanceCounter() * perfCountsSec;
frameLimitError += practicalFramelimit - (curtime - lastTime);
if (frameLimitError < -practicalFramelimit)
frameLimitError = -practicalFramelimit;
if (frameLimitError > practicalFramelimit)
frameLimitError = practicalFramelimit;
if (round(frameLimitError * 1000.0) > 0.0)
{ {
fpslimitcount = 0; SDL_Delay(round(frameLimitError * 1000.0));
starttick = lasttick; double timeBeforeSleep = curtime;
} curtime = SDL_GetPerformanceCounter() * perfCountsSec;
} frameLimitError -= curtime - timeBeforeSleep;
else
{
if (delay < 1) SDL_Delay(1);
lasttick = SDL_GetTicks();
} }
lastTime = curtime;
} }
nframes++; nframes++;
if (nframes >= 30) if (nframes >= 30)
{ {
u32 tick = SDL_GetTicks(); double time = SDL_GetPerformanceCounter() * perfCountsSec;
u32 diff = tick - lastmeasuretick; double dt = time - lastMeasureTime;
lastmeasuretick = tick; lastMeasureTime = time;
u32 fps; u32 fps = round(nframes / dt);
if (diff < 1) fps = 77777;
else fps = (nframes * 1000) / diff;
nframes = 0; nframes = 0;
float fpstarget; float fpstarget = 1.0/frametimeStep;
if (framerate < 1) fpstarget = 999;
else fpstarget = 1000.0f/framerate;
sprintf(melontitle, "[%d/%.0f] melonDS " MELONDS_VERSION, fps, fpstarget); sprintf(melontitle, "[%d/%.0f] melonDS " MELONDS_VERSION, fps, fpstarget);
changeWindowTitle(melontitle); changeWindowTitle(melontitle);
@ -552,10 +547,8 @@ void EmuThread::run()
{ {
// paused // paused
nframes = 0; nframes = 0;
lasttick = SDL_GetTicks(); lastTime = SDL_GetPerformanceCounter() * perfCountsSec;
starttick = lasttick; lastMeasureTime = lastTime;
lastmeasuretick = lasttick;
fpslimitcount = 0;
emit windowUpdate(); emit windowUpdate();
@ -1339,6 +1332,7 @@ void MainWindow::keyPressEvent(QKeyEvent* event)
{ {
if (event->isAutoRepeat()) return; if (event->isAutoRepeat()) return;
// TODO!! REMOVE ME IN RELEASE BUILDS!!
if (event->key() == Qt::Key_F11) NDS::debug(0); if (event->key() == Qt::Key_F11) NDS::debug(0);
Input::KeyPress(event); Input::KeyPress(event);
@ -1362,7 +1356,7 @@ void MainWindow::dragEnterEvent(QDragEnterEvent* event)
QString filename = urls.at(0).toLocalFile(); QString filename = urls.at(0).toLocalFile();
QString ext = filename.right(3); QString ext = filename.right(3);
if (ext == "nds" || ext == "srl" || ext == "dsi" || (ext == "gba" && RunningSomething)) if (ext == "nds" || ext == "srl" || ext == "dsi" || ext == "gba")
event->acceptProposedAction(); event->acceptProposedAction();
} }