diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml
new file mode 100644
index 00000000..e2b942ab
--- /dev/null
+++ b/.github/workflows/build-macos.yml
@@ -0,0 +1,39 @@
+name: CMake Build (macOS x86-64)
+
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+ branches:
+ - master
+
+env:
+ BUILD_TYPE: Release
+
+jobs:
+ build:
+
+ runs-on: macos-latest
+
+ steps:
+ - uses: actions/checkout@v1
+ - name: Install dependencies
+ working-directory: ${{runner.workspace}}
+ run: |
+ brew install cmake sdl2 qt5 libslirp
+ - name: Create build environment
+ run: mkdir ${{runner.workspace}}/build
+ - name: Configure
+ working-directory: ${{runner.workspace}}/build
+ run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DQt5_DIR=$(brew --prefix qt5)/lib/cmake/Qt5
+ - name: Make
+ working-directory: ${{runner.workspace}}/build
+ run: |
+ make -j$(sysctl -n hw.ncpu)
+ mkdir dist
+ cp -r melonDS.app dist
+ - uses: actions/upload-artifact@v1
+ with:
+ name: melonDS.app
+ path: ${{runner.workspace}}/build/dist
diff --git a/.github/workflows/build-ubuntu-aarch64.yml b/.github/workflows/build-ubuntu-aarch64.yml
index c5ce2eb2..6ea78ea8 100644
--- a/.github/workflows/build-ubuntu-aarch64.yml
+++ b/.github/workflows/build-ubuntu-aarch64.yml
@@ -29,6 +29,7 @@ jobs:
shell: bash
working-directory: ${{runner.workspace}}
run: |
+ sudo rm -f /etc/apt/sources.list.d/kubernetes.list
sudo dpkg --add-architecture arm64
sudo sh -c "sed \"s|^deb \([a-z\.:/]*\) \([a-z\-]*\) \(.*\)$|deb [arch=amd64] \1 \2 \3\ndeb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports \2 \3|\" /etc/apt/sources.list > /etc/apt/sources.list.new"
sudo rm /etc/apt/sources.list
diff --git a/.github/workflows/build-ubuntu.yml b/.github/workflows/build-ubuntu.yml
index 97825f05..d2070d8b 100644
--- a/.github/workflows/build-ubuntu.yml
+++ b/.github/workflows/build-ubuntu.yml
@@ -8,10 +8,6 @@ on:
branches:
- master
-env:
- BUILD_TYPE: Release
- CMAKE_VERSION: 3.15.2
-
jobs:
build:
@@ -20,25 +16,21 @@ jobs:
steps:
- uses: actions/checkout@v1
- name: Install dependencies
- shell: bash
- working-directory: ${{runner.workspace}}
- run: | # Fetch a new version of CMake, because the default is too old.
- sudo rm -f /etc/apt/sources.list.d/dotnetdev.list /etc/apt/sources.list.d/microsoft-prod.list \
- && sudo apt update \
- && sudo apt install cmake libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qt5-default libslirp0=4.1.0-2ubuntu2.1 libslirp-dev libarchive-dev --allow-downgrades
+ run: |
+ sudo rm -f /etc/apt/sources.list.d/dotnetdev.list /etc/apt/sources.list.d/microsoft-prod.list
+ sudo apt update
+ sudo apt install cmake libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qt5-default libslirp0 libslirp-dev libarchive-dev --allow-downgrades
- name: Create build environment
run: mkdir ${{runner.workspace}}/build
- name: Configure
- shell: bash
working-directory: ${{runner.workspace}}/build
- run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE
+ run: cmake $GITHUB_WORKSPACE
- name: Make
- shell: bash
working-directory: ${{runner.workspace}}/build
run: |
- make -j$(nproc --all) \
- && mkdir dist \
- && cp melonDS dist
+ make -j$(nproc --all)
+ mkdir dist
+ cp melonDS dist
- uses: actions/upload-artifact@v1
with:
name: melonDS
diff --git a/.gitignore b/.gitignore
index 3c877403..a38b5a38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,5 @@ cmake-build-debug
.idea
*.exe
+
+.DS_Store
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9cecc50d..59a3f2d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,20 +1,30 @@
cmake_minimum_required(VERSION 3.13)
+include(CheckSymbolExists)
+include(CheckLibraryExists)
+
cmake_policy(VERSION 3.13)
if (POLICY CMP0076)
cmake_policy(SET CMP0076 NEW)
endif()
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version")
+
+project(melonDS CXX)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
-project(melonDS)
-
-if (NOT CMAKE_BUILD_TYPE)
- set(CMAKE_BUILD_TYPE Release)
+check_library_exists(m pow "" LIBM)
+if(LIBM)
+ link_libraries(m)
+endif()
+
+if (NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
endif()
-include(CheckSymbolExists)
function(detect_architecture symbol arch)
if (NOT DEFINED ARCHITECTURE)
set(CMAKE_REQUIRED_QUIET 1)
@@ -65,17 +75,36 @@ if (CMAKE_BUILD_TYPE STREQUAL Release)
add_link_options(-s)
endif()
-add_compile_options(-fno-pic)
-add_link_options(-no-pie)
-
-option(BUILD_QT_SDL "Build Qt/SDL frontend" ON)
-
if (WIN32)
option(BUILD_STATIC "Statically link dependencies" OFF)
endif()
+if (ENABLE_LTO)
+ if (WIN32)
+ add_compile_options(-flto)
+ add_link_options(-flto)
+ else()
+ add_compile_options(-flto -fPIC)
+ add_link_options(-flto -fuse-linker-plugin -pie)
+ endif()
+endif()
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ set(CMAKE_AR "gcc-ar")
+ set(CMAKE_RANLIB "gcc-ranlib")
+elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ find_program(LLD NAMES ld.lld ld64.lld lld-link)
+ if (NOT LLD STREQUAL "LLD-NOTFOUND")
+ add_link_options(-fuse-ld=lld)
+ endif()
+ set(CMAKE_AR "llvm-ar")
+ set(CMAKE_RANLIB "llvm-ranlib")
+endif()
+
+option(BUILD_QT_SDL "Build Qt/SDL frontend" ON)
+
add_subdirectory(src)
if (BUILD_QT_SDL)
add_subdirectory(src/frontend/qt_sdl)
-endif()
+endif()
\ No newline at end of file
diff --git a/README.md b/README.md
index ec218dd7..8df34df2 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ As for the rest, the interface should be pretty straightforward. If you have a q
* Install dependencies:
```sh
-sudo apt-get install cmake libgtk-3-dev libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qtbase5-dev qtdeclarative5-dev libslirp-dev libarchive-dev
+sudo apt-get install cmake libcurl4-gnutls-dev libpcap0.8-dev libsdl2-dev qtbase5-dev qtdeclarative5-dev libslirp-dev libarchive-dev
```
* Compile:
@@ -84,6 +84,21 @@ If everything went well, melonDS and the libraries it needs should now be in the
```
If everything went well, melonDS should now be in the `dist` folder.
+### macOS:
+1. Install the [Homebrew Package Manager](https://brew.sh)
+2. Install dependencies: `brew install git pkg-config cmake sdl2 qt5 libslirp libarchive`
+3. Compile:
+ ```zsh
+ git clone https://github.com/Arisotura/melonDS.git
+ cd melonDS
+ mkdir build && cd build
+ cmake .. -DQt5_DIR=$(brew --prefix qt5)/lib/cmake/Qt5
+ make -j$(sysctl -n hw.ncpu)
+ mkdir dist && cp -r melonDS.app dist
+ ```
+If everything went well, melonDS.app should now be in the `dist` folder.
+
+
## TODO LIST
* DSi emulation
diff --git a/melonDS.icns b/melonDS.icns
new file mode 100644
index 00000000..b4f37335
Binary files /dev/null and b/melonDS.icns differ
diff --git a/melonDS.plist b/melonDS.plist
new file mode 100644
index 00000000..1328777e
--- /dev/null
+++ b/melonDS.plist
@@ -0,0 +1,24 @@
+
+
+
+
+ CFBundleExecutable
+ melonDS
+ CFBundleIconFile
+ melonDS.icns
+ CFBundleIdentifier
+ net.kuribo64.melonDS
+ CFBundleDevelopmentRegion
+ English
+ CFBundlePackageType
+ APPL
+ CFBundleVersion
+ 0.9
+ CFBundleShortVersionString
+ 0.9
+ NSHumanReadableCopyright
+ Licensed under GPLv3
+ NSHighResolutionCapable
+
+
+
\ No newline at end of file
diff --git a/src/ARM.cpp b/src/ARM.cpp
index 7eeacb7f..29110e56 100644
--- a/src/ARM.cpp
+++ b/src/ARM.cpp
@@ -592,7 +592,7 @@ void ARMv5::Execute()
else
AddCycles_C();
}
-
+
// TODO optimize this shit!!!
if (Halted)
{
@@ -651,7 +651,7 @@ void ARMv5::ExecuteJIT()
return;
}
- ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(0, FastBlockLookup,
+ ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(0, FastBlockLookup,
instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
@@ -802,7 +802,7 @@ void ARMv4::ExecuteJIT()
return;
}
- ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(1, FastBlockLookup,
+ ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(1, FastBlockLookup,
instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
@@ -879,4 +879,4 @@ void ARMv4::FillPipeline()
NextInstr[0] = CodeRead32(R[15] - 4);
NextInstr[1] = CodeRead32(R[15]);
}
-}
\ No newline at end of file
+}
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp
index c9d2b623..1921f132 100644
--- a/src/ARMJIT.cpp
+++ b/src/ARMJIT.cpp
@@ -176,7 +176,7 @@ T SlowRead9(u32 addr, ARMv5* cpu)
}
template
-void SlowWrite9(u32 addr, ARMv5* cpu, T val)
+void SlowWrite9(u32 addr, ARMv5* cpu, u32 val)
{
addr &= ~(sizeof(T) - 1);
@@ -224,7 +224,7 @@ T SlowRead7(u32 addr)
}
template
-void SlowWrite7(u32 addr, T val)
+void SlowWrite7(u32 addr, u32 val)
{
addr &= ~(sizeof(T) - 1);
@@ -266,16 +266,16 @@ void SlowBlockTransfer7(u32 addr, u64* data, u32 num)
#define INSTANTIATE_SLOWMEM(consoleType) \
template void SlowWrite9(u32, ARMv5*, u32); \
- template void SlowWrite9(u32, ARMv5*, u16); \
- template void SlowWrite9(u32, ARMv5*, u8); \
+ template void SlowWrite9(u32, ARMv5*, u32); \
+ template void SlowWrite9(u32, ARMv5*, u32); \
\
template u32 SlowRead9(u32, ARMv5*); \
template u16 SlowRead9(u32, ARMv5*); \
template u8 SlowRead9(u32, ARMv5*); \
\
template void SlowWrite7(u32, u32); \
- template void SlowWrite7(u32, u16); \
- template void SlowWrite7(u32, u8); \
+ template void SlowWrite7(u32, u32); \
+ template void SlowWrite7(u32, u32); \
\
template u32 SlowRead7(u32); \
template u16 SlowRead7(u32); \
@@ -298,6 +298,7 @@ void Init()
void DeInit()
{
+ ResetBlockCache();
ARMJIT_Memory::DeInit();
delete JITCompiler;
@@ -594,7 +595,8 @@ void CompileBlock(ARM* cpu)
u32 r15 = cpu->R[15];
u32 addressRanges[Config::JIT_MaxBlockSize];
- u32 addressMasks[Config::JIT_MaxBlockSize] = {0};
+ u32 addressMasks[Config::JIT_MaxBlockSize];
+ memset(addressMasks, 0, Config::JIT_MaxBlockSize * sizeof(u32));
u32 numAddressRanges = 0;
u32 numLiterals = 0;
@@ -1116,6 +1118,7 @@ void ResetBlockCache()
range->Blocks.Clear();
range->Code = 0;
}
+ delete block;
}
JitBlocks9.clear();
JitBlocks7.clear();
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
index 80c7f041..5fe3fe77 100644
--- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
@@ -1,5 +1,11 @@
+#include "ARMJIT_Compiler.h"
+
+#include "../ARMJIT_Internal.h"
+#include "../ARMInterpreter.h"
+#include "../Config.h"
+
#ifdef __SWITCH__
-#include "../switch/compat_switch.h"
+#include
extern char __start__;
#else
@@ -7,13 +13,7 @@ extern char __start__;
#include
#endif
-#include "ARMJIT_Compiler.h"
-
-#include "../ARMJIT_Internal.h"
-#include "../ARMInterpreter.h"
-#include "../Config.h"
-
-#include
+#include
using namespace Arm64Gen;
@@ -68,6 +68,11 @@ void Compiler::A_Comp_MRS()
MOV(rd, RCPSR);
}
+void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode)
+{
+ arm->UpdateMode(oldmode, newmode);
+}
+
void Compiler::A_Comp_MSR()
{
Comp_AddCycles_C();
@@ -139,7 +144,7 @@ void Compiler::A_Comp_MSR()
PushRegs(true);
- QuickCallFunction(X3, (void*)&ARM::UpdateMode);
+ QuickCallFunction(X3, (void*)&UpdateModeTrampoline);
PopRegs(true);
}
@@ -179,7 +184,7 @@ void Compiler::PopRegs(bool saveHiRegs)
Compiler::Compiler()
{
#ifdef __SWITCH__
- JitRWBase = memalign(0x1000, JitMemSize);
+ JitRWBase = aligned_alloc(0x1000, JitMemSize);
JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000;
JitRWStart = virtmemReserve(JitMemSize);
@@ -915,4 +920,4 @@ void Compiler::Comp_AddCycles_CD()
ConstantCycles += cycles;
}
-}
\ No newline at end of file
+}
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h
index af7497a3..a79e9daf 100644
--- a/src/ARMJIT_A64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.h
@@ -187,6 +187,7 @@ public:
void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
+
enum
{
memop_Writeback = 1 << 0,
@@ -213,8 +214,8 @@ public:
return (u8*)entry - GetRXBase();
}
- bool IsJITFault(u64 pc);
- s64 RewriteMemAccess(u64 pc);
+ bool IsJITFault(u8* pc);
+ u8* RewriteMemAccess(u8* pc);
void SwapCodeRegion()
{
diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.S
similarity index 100%
rename from src/ARMJIT_A64/ARMJIT_Linkage.s
rename to src/ARMJIT_A64/ARMJIT_Linkage.S
diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
index 86e257a3..2c14dc6a 100644
--- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
@@ -9,37 +9,34 @@ using namespace Arm64Gen;
namespace ARMJIT
{
-bool Compiler::IsJITFault(u64 pc)
+bool Compiler::IsJITFault(u8* pc)
{
- return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
+ return (u64)pc >= (u64)GetRXBase() && (u64)pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
}
-s64 Compiler::RewriteMemAccess(u64 pc)
+u8* Compiler::RewriteMemAccess(u8* pc)
{
- ptrdiff_t pcOffset = pc - (u64)GetRXBase();
+ ptrdiff_t pcOffset = pc - GetRXBase();
auto it = LoadStorePatches.find(pcOffset);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
+ LoadStorePatches.erase(it);
ptrdiff_t curCodeOffset = GetCodeOffset();
SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
BL(patch.PatchFunc);
-
for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
HINT(HINT_NOP);
-
FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
SetCodePtrUnsafe(curCodeOffset);
- LoadStorePatches.erase(it);
-
- return patch.PatchOffset;
+ return pc + (ptrdiff_t)patch.PatchOffset;
}
printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
abort();
@@ -192,7 +189,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
else
{
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
- if (size == 32)
+ if (size == 32 && !addrIsStatic)
{
UBFIZ(W0, W0, 3, 2);
RORV(rdMapped, rdMapped, W0);
diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h
index 42444701..b1e35f58 100644
--- a/src/ARMJIT_Internal.h
+++ b/src/ARMJIT_Internal.h
@@ -216,9 +216,9 @@ template
void LinkBlock(ARM* cpu, u32 codeOffset);
template T SlowRead9(u32 addr, ARMv5* cpu);
-template void SlowWrite9(u32 addr, ARMv5* cpu, T val);
+template void SlowWrite9(u32 addr, ARMv5* cpu, u32 val);
template T SlowRead7(u32 addr);
-template void SlowWrite7(u32 addr, T val);
+template void SlowWrite7(u32 addr, u32 val);
template void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
template void SlowBlockTransfer7(u32 addr, u64* data, u32 num);
diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp
index d321d2f1..33d6bcfe 100644
--- a/src/ARMJIT_Memory.cpp
+++ b/src/ARMJIT_Memory.cpp
@@ -1,5 +1,6 @@
#if defined(__SWITCH__)
-#include "switch/compat_switch.h"
+#include
+#include "frontend/switch/FaultHandler.h"
#elif defined(_WIN32)
#include
#else
@@ -10,6 +11,12 @@
#include
#endif
+#if defined(__ANDROID__)
+#include
+#include
+#include
+#endif
+
#include "ARMJIT_Memory.h"
#include "ARMJIT_Internal.h"
@@ -22,7 +29,7 @@
#include "NDSCart.h"
#include "SPU.h"
-#include
+#include
/*
We're handling fastmem here.
@@ -40,7 +47,8 @@
We handle this by only mapping those regions which are actually
used and by praying the games don't go wild.
- Beware, this file is full of platform specific code.
+ Beware, this file is full of platform specific code and copied
+ from Dolphin, so enjoy the copied comments!
*/
@@ -49,12 +57,16 @@ namespace ARMJIT_Memory
struct FaultDescription
{
u32 EmulatedFaultAddr;
- u64 FaultPC;
+ u8* FaultPC;
};
-bool FaultHandler(FaultDescription* faultDesc, s32& offset);
+bool FaultHandler(FaultDescription& faultDesc);
}
+#if defined(__ANDROID__)
+#define ASHMEM_DEVICE "/dev/ashmem"
+#endif
+
#if defined(__SWITCH__)
// with LTO the symbols seem to be not properly overriden
// if they're somewhere else
@@ -75,7 +87,7 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea;
- desc.FaultPC = ctx->pc.x;
+ desc.FaultPC = (u8*)ctx->pc.x;
u64 integerRegisters[33];
memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29);
@@ -84,23 +96,14 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
integerRegisters[31] = ctx->sp.x;
integerRegisters[32] = ctx->pc.x;
- s32 offset;
- if (ARMJIT_Memory::FaultHandler(&desc, offset))
+ if (ARMJIT_Memory::FaultHandler(desc))
{
- integerRegisters[32] += offset;
+ integerRegisters[32] = (u64)desc.FaultPC;
ARM_RestoreContext(integerRegisters);
}
- if (ctx->pc.x >= (u64)&__start__ && ctx->pc.x < (u64)&__rodata_start)
- {
- printf("unintentional fault in .text at 0x%x (type %d) (trying to access 0x%x?)\n",
- ctx->pc.x - (u64)&__start__, ctx->error_desc, ctx->far.x);
- }
- else
- {
- printf("unintentional fault somewhere in deep (address) space at %x (type %d)\n", ctx->pc.x, ctx->error_desc);
- }
+ HandleFault(ctx->pc.x, ctx->lr.x, ctx->fp.x, ctx->far.x, ctx->error_desc);
}
}
@@ -117,12 +120,11 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea;
- desc.FaultPC = exceptionInfo->ContextRecord->Rip;
+ desc.FaultPC = (u8*)exceptionInfo->ContextRecord->Rip;
- s32 offset = 0;
- if (ARMJIT_Memory::FaultHandler(&desc, offset))
+ if (ARMJIT_Memory::FaultHandler(desc))
{
- exceptionInfo->ContextRecord->Rip += offset;
+ exceptionInfo->ContextRecord->Rip = (u64)desc.FaultPC;
return EXCEPTION_CONTINUE_EXECUTION;
}
@@ -131,50 +133,84 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
#else
-struct sigaction NewSa;
-struct sigaction OldSa;
+static struct sigaction OldSaSegv;
+static struct sigaction OldSaBus;
static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext)
{
+ if (sig != SIGSEGV && sig != SIGBUS)
+ {
+ // We are not interested in other signals - handle it as usual.
+ return;
+ }
+ if (info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR)
+ {
+ // Huh? Return.
+ return;
+ }
+
ucontext_t* context = (ucontext_t*)rawContext;
-
+
ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
#ifdef __x86_64__
desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea;
- desc.FaultPC = context->uc_mcontext.gregs[REG_RIP];
+ #ifdef __APPLE__
+ desc.FaultPC = (u8*)context->uc_mcontext->__ss.__rip;
+ #else
+ desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP];
+ #endif
+
#else
- desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea;
- desc.FaultPC = context->uc_mcontext.pc;
+ #ifdef __APPLE__
+ desc.EmulatedFaultAddr = (u8*)context->uc_mcontext->__es.__far - curArea;
+ desc.FaultPC = (u8*)context->uc_mcontext->__ss.__pc;
+ #else
+ desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea;
+ desc.FaultPC = (u8*)context->uc_mcontext.pc;
+ #endif
#endif
- s32 offset = 0;
- if (ARMJIT_Memory::FaultHandler(&desc, offset))
+ if (ARMJIT_Memory::FaultHandler(desc))
{
#ifdef __x86_64__
- context->uc_mcontext.gregs[REG_RIP] += offset;
+ #ifdef __APPLE__
+ context->uc_mcontext->__ss.__rip = (u64)desc.FaultPC;
+ #else
+ context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC;
+ #endif
#else
- context->uc_mcontext.pc += offset;
+ #ifdef __APPLE__
+ context->uc_mcontext->__ss.__pc = (u64)desc.FaultPC;
+ #else
+ context->uc_mcontext.pc = (u64)desc.FaultPC;
+ #endif
#endif
return;
}
- if (OldSa.sa_flags & SA_SIGINFO)
+ struct sigaction* oldSa;
+ if (sig == SIGSEGV)
+ oldSa = &OldSaSegv;
+ else
+ oldSa = &OldSaBus;
+
+ if (oldSa->sa_flags & SA_SIGINFO)
{
- OldSa.sa_sigaction(sig, info, rawContext);
+ oldSa->sa_sigaction(sig, info, rawContext);
return;
}
- if (OldSa.sa_handler == SIG_DFL)
+ if (oldSa->sa_handler == SIG_DFL)
{
signal(sig, SIG_DFL);
return;
}
- if (OldSa.sa_handler == SIG_IGN)
+ if (oldSa->sa_handler == SIG_IGN)
{
// Ignore signal
return;
}
- OldSa.sa_handler(sig);
+ oldSa->sa_handler(sig);
}
#endif
@@ -231,7 +267,7 @@ enum
{
memstate_Unmapped,
memstate_MappedRW,
- // on switch this is unmapped as well
+ // on Switch this is unmapped as well
memstate_MappedProtected,
};
@@ -314,14 +350,16 @@ struct Mapping
void Unmap(int region)
{
+ u32 dtcmStart = NDS::ARM9->DTCMBase;
+ u32 dtcmSize = NDS::ARM9->DTCMSize;
bool skipDTCM = Num == 0 && region != memregion_DTCM;
u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7;
u32 offset = 0;
while (offset < Size)
{
- if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase)
+ if (skipDTCM && Addr + offset == dtcmStart)
{
- offset += NDS::ARM9->DTCMSize;
+ offset += dtcmSize;
}
else
{
@@ -329,7 +367,7 @@ struct Mapping
u8 status = statuses[(Addr + offset) >> 12];
while (statuses[(Addr + offset) >> 12] == status
&& offset < Size
- && (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase))
+ && (!skipDTCM || Addr + offset != dtcmStart))
{
assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped);
statuses[(Addr + offset) >> 12] = memstate_Unmapped;
@@ -347,9 +385,33 @@ struct Mapping
#endif
}
}
+
#ifndef __SWITCH__
- bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size);
- assert(succeded);
+#ifndef _WIN32
+ u32 dtcmEnd = dtcmStart + dtcmSize;
+ if (Num == 0
+ && dtcmEnd >= Addr
+ && dtcmStart < Addr + Size)
+ {
+ bool success;
+ if (dtcmStart > Addr)
+ {
+ success = UnmapFromRange(Addr, 0, OffsetsPerRegion[region] + LocalOffset, dtcmStart - Addr);
+ assert(success);
+ }
+ if (dtcmEnd < Addr + Size)
+ {
+ u32 offset = dtcmStart - Addr + dtcmSize;
+ success = UnmapFromRange(dtcmEnd, 0, OffsetsPerRegion[region] + LocalOffset + offset, Size - offset);
+ assert(success);
+ }
+ }
+ else
+#endif
+ {
+ bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size);
+ assert(succeded);
+ }
#endif
}
};
@@ -418,10 +480,10 @@ void RemapDTCM(u32 newBase, u32 newSize)
printf("unmapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset);
- bool oldOverlap = NDS::ARM9->DTCMSize > 0 && !(oldDTCMBase >= end || oldDTCBEnd <= start);
- bool newOverlap = newSize > 0 && !(newBase >= end || newEnd <= start);
+ bool overlap = (NDS::ARM9->DTCMSize > 0 && oldDTCMBase < end && oldDTCBEnd > start)
+ || (newSize > 0 && newBase < end && newEnd > start);
- if (mapping.Num == 0 && (oldOverlap || newOverlap))
+ if (mapping.Num == 0 && overlap)
{
mapping.Unmap(region);
Mappings[region].Remove(i);
@@ -445,8 +507,8 @@ void RemapNWRAM(int num)
for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;)
{
Mapping& mapping = Mappings[memregion_SharedWRAM][i];
- if (!(DSi::NWRAMStart[mapping.Num][num] >= mapping.Addr + mapping.Size
- || DSi::NWRAMEnd[mapping.Num][num] < mapping.Addr))
+ if (DSi::NWRAMStart[mapping.Num][num] < mapping.Addr + mapping.Size
+ && DSi::NWRAMEnd[mapping.Num][num] > mapping.Addr)
{
mapping.Unmap(memregion_SharedWRAM);
Mappings[memregion_SharedWRAM].Remove(i);
@@ -469,7 +531,7 @@ void RemapSWRAM()
for (int i = 0; i < Mappings[memregion_WRAM7].Length;)
{
Mapping& mapping = Mappings[memregion_WRAM7][i];
- if (mapping.Addr + mapping.Size < 0x03800000)
+ if (mapping.Addr + mapping.Size <= 0x03800000)
{
mapping.Unmap(memregion_WRAM7);
Mappings[memregion_WRAM7].Remove(i);
@@ -501,26 +563,53 @@ bool MapAtAddress(u32 addr)
return false;
u8* states = num == 0 ? MappingStatus9 : MappingStatus7;
- printf("trying to create mapping %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num);
+ printf("mapping mirror %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num);
bool isExecutable = ARMJIT::CodeMemRegions[region];
+ u32 dtcmStart = NDS::ARM9->DTCMBase;
+ u32 dtcmSize = NDS::ARM9->DTCMSize;
+ u32 dtcmEnd = dtcmStart + dtcmSize;
#ifndef __SWITCH__
- bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
- assert(succeded);
+#ifndef _WIN32
+ if (num == 0
+ && dtcmEnd >= mirrorStart
+ && dtcmStart < mirrorStart + mirrorSize)
+ {
+ bool success;
+ if (dtcmStart > mirrorStart)
+ {
+ success = MapIntoRange(mirrorStart, 0, OffsetsPerRegion[region] + memoryOffset, dtcmStart - mirrorStart);
+ assert(success);
+ }
+ if (dtcmEnd < mirrorStart + mirrorSize)
+ {
+ u32 offset = dtcmStart - mirrorStart + dtcmSize;
+ success = MapIntoRange(dtcmEnd, 0, OffsetsPerRegion[region] + memoryOffset + offset, mirrorSize - offset);
+ assert(success);
+ }
+ }
+ else
+#endif
+ {
+ bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
+ assert(succeded);
+ }
#endif
ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512;
// this overcomplicated piece of code basically just finds whole pieces of code memory
- // which can be mapped
+ // which can be mapped/protected
u32 offset = 0;
bool skipDTCM = num == 0 && region != memregion_DTCM;
while (offset < mirrorSize)
{
- if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase)
+ if (skipDTCM && mirrorStart + offset == dtcmStart)
{
- SetCodeProtectionRange(NDS::ARM9->DTCMBase, NDS::ARM9->DTCMSize, 0, 0);
- offset += NDS::ARM9->DTCMSize;
+#ifdef _WIN32
+ SetCodeProtectionRange(dtcmStart, dtcmSize, 0, 0);
+#endif
+ offset += dtcmSize;
}
else
{
@@ -557,37 +646,36 @@ bool MapAtAddress(u32 addr)
Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num};
Mappings[region].Add(mapping);
- printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1);
+ //printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1);
return true;
}
-bool FaultHandler(FaultDescription* faultDesc, s32& offset)
+bool FaultHandler(FaultDescription& faultDesc)
{
- if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC))
+ if (ARMJIT::JITCompiler->IsJITFault(faultDesc.FaultPC))
{
bool rewriteToSlowPath = true;
- u32 addr = faultDesc->EmulatedFaultAddr;
+ u8* memStatus = NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7;
- if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped)
- rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr);
+ if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped)
+ rewriteToSlowPath = !MapAtAddress(faultDesc.EmulatedFaultAddr);
if (rewriteToSlowPath)
- {
- offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC);
- }
+ faultDesc.FaultPC = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc.FaultPC);
+
return true;
}
return false;
}
+const u64 AddrSpaceSize = 0x100000000;
+
void Init()
{
- const u64 AddrSpaceSize = 0x100000000;
-
#if defined(__SWITCH__)
- MemoryBase = (u8*)memalign(0x1000, MemoryTotalSize);
+ MemoryBase = (u8*)aligned_alloc(0x1000, MemoryTotalSize);
MemoryBaseCodeMem = (u8*)virtmemReserve(MemoryTotalSize);
bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem,
@@ -624,22 +712,52 @@ void Init()
u8* basePtr = MemoryBase;
#else
- FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
- FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
+ // this used to be allocated with three different mmaps
+ // The idea was to give the OS more freedom where to position the buffers,
+ // but something was bad about this so instead we take this vmem eating monster
+ // which seems to work better.
+ MemoryBase = (u8*)mmap(NULL, AddrSpaceSize*4, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
+ munmap(MemoryBase, AddrSpaceSize*4);
+ FastMem9Start = MemoryBase;
+ FastMem7Start = MemoryBase + AddrSpaceSize;
+ MemoryBase = MemoryBase + AddrSpaceSize*2;
- MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
+#if defined(__ANDROID__)
+ static void* libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL);
+ using type_ASharedMemory_create = int(*)(const char* name, size_t size);
+ static void* symbol = dlsym(libandroid, "ASharedMemory_create");
+ static auto shared_memory_create = reinterpret_cast(symbol);
+ if (shared_memory_create)
+ {
+ MemoryFile = shared_memory_create("melondsfastmem", MemoryTotalSize);
+ }
+ else
+ {
+ int fd = open(ASHMEM_DEVICE, O_RDWR);
+ ioctl(fd, ASHMEM_SET_NAME, "melondsfastmem");
+ ioctl(fd, ASHMEM_SET_SIZE, MemoryTotalSize);
+ MemoryFile = fd;
+ }
+#elif defined(__APPLE__)
+ char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1];
+ sprintf(fastmemPidName, "melondsfastmem%d", getpid());
+ MemoryFile = shm_open(fastmemPidName, O_RDWR|O_CREAT, 0600);
+ delete[] fastmemPidName;
+#else
MemoryFile = memfd_create("melondsfastmem", 0);
+#endif
ftruncate(MemoryFile, MemoryTotalSize);
- NewSa.sa_flags = SA_SIGINFO;
- sigemptyset(&NewSa.sa_mask);
- NewSa.sa_sigaction = SigsegvHandler;
- sigaction(SIGSEGV, &NewSa, &OldSa);
-
- munmap(MemoryBase, MemoryTotalSize);
- munmap(FastMem9Start, AddrSpaceSize);
- munmap(FastMem7Start, AddrSpaceSize);
+ struct sigaction sa;
+ sa.sa_handler = nullptr;
+ sa.sa_sigaction = &SigsegvHandler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGSEGV, &sa, &OldSaSegv);
+#ifdef __APPLE__
+ sigaction(SIGBUS, &sa, &OldSaBus);
+#endif
mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0);
@@ -657,17 +775,30 @@ void Init()
void DeInit()
{
#if defined(__SWITCH__)
- virtmemFree(FastMem9Start, 0x100000000);
- virtmemFree(FastMem7Start, 0x100000000);
+ virtmemFree(FastMem9Start, AddrSpaceSize);
+ virtmemFree(FastMem7Start, AddrSpaceSize);
svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize);
virtmemFree(MemoryBaseCodeMem, MemoryTotalSize);
free(MemoryBase);
+#elif defined(__APPLE__)
+ char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1];
+ sprintf(fastmemPidName, "melondsfastmem%d", getpid());
+ shm_unlink(fastmemPidName);
+ delete[] fastmemPidName;
#elif defined(_WIN32)
assert(UnmapViewOfFile(MemoryBase));
CloseHandle(MemoryFile);
RemoveVectoredExceptionHandler(ExceptionHandlerHandle);
+#else
+ sigaction(SIGSEGV, &OldSaSegv, nullptr);
+#ifdef __APPLE__
+ sigaction(SIGBUS, &OldSaBus, nullptr);
+#endif
+
+ munmap(MemoryBase, MemoryTotalSize);
+ close(MemoryFile);
#endif
}
@@ -997,9 +1128,11 @@ int ClassifyAddress7(u32 addr)
case 0x06000000:
case 0x06800000:
return memregion_VWRAM;
+
+ default:
+ return memregion_Other;
}
}
- return memregion_Other;
}
void WifiWrite32(u32 addr, u32 val)
@@ -1176,4 +1309,4 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
return NULL;
}
-}
\ No newline at end of file
+}
diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp
index 819fe3cd..70ec781c 100644
--- a/src/ARMJIT_x64/ARMJIT_Branch.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp
@@ -130,6 +130,16 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
}
+void ARMv4JumpToTrampoline(ARMv4* arm, u32 addr, bool restorecpsr)
+{
+ arm->JumpTo(addr, restorecpsr);
+}
+
+void ARMv5JumpToTrampoline(ARMv5* arm, u32 addr, bool restorecpsr)
+{
+ arm->JumpTo(addr, restorecpsr);
+}
+
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
{
IrregularCycles = true;
@@ -146,9 +156,9 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
else
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
if (Num == 0)
- CALL((void*)&ARMv5::JumpTo);
+ CALL((void*)&ARMv5JumpToTrampoline);
else
- CALL((void*)&ARMv4::JumpTo);
+ CALL((void*)&ARMv4JumpToTrampoline);
PopRegs(restoreCPSR);
@@ -269,4 +279,4 @@ void Compiler::T_Comp_BL_Merged()
Comp_JumpTo(target);
}
-}
\ No newline at end of file
+}
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index c6419c90..cc4ad800 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -101,6 +101,11 @@ void Compiler::A_Comp_MRS()
MOV(32, rd, R(RCPSR));
}
+void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode)
+{
+ arm->UpdateMode(oldmode, newmode);
+}
+
void Compiler::A_Comp_MSR()
{
Comp_AddCycles_C();
@@ -185,7 +190,7 @@ void Compiler::A_Comp_MSR()
MOV(32, R(ABI_PARAM3), R(RCPSR));
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU));
- CALL((void*)&ARM::UpdateMode);
+ CALL((void*)&UpdateModeTrampoline);
PopRegs(true);
}
@@ -216,6 +221,8 @@ Compiler::Compiler()
#ifdef _WIN32
DWORD dummy;
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
+ #elif defined(__APPLE__)
+ pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0);
#else
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
@@ -340,7 +347,7 @@ Compiler::Compiler()
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
- switch ((8 << size) | num)
+ switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9); break;
case 33: ABI_CallFunction(SlowWrite7); break;
@@ -352,7 +359,7 @@ Compiler::Compiler()
}
else
{
- switch ((8 << size) | num)
+ switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9); break;
case 33: ABI_CallFunction(SlowWrite7); break;
@@ -375,7 +382,7 @@ Compiler::Compiler()
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
- switch ((8 << size) | num)
+ switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9); break;
case 33: ABI_CallFunction(SlowRead7); break;
@@ -387,7 +394,7 @@ Compiler::Compiler()
}
else
{
- switch ((8 << size) | num)
+ switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9); break;
case 33: ABI_CallFunction(SlowRead7); break;
@@ -612,9 +619,9 @@ void Compiler::Reset()
LoadStorePatches.clear();
}
-bool Compiler::IsJITFault(u64 addr)
+bool Compiler::IsJITFault(u8* addr)
{
- return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory);
+ return (u64)addr >= (u64)ResetStart && (u64)addr < (u64)ResetStart + CodeMemSize;
}
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
@@ -896,5 +903,4 @@ void Compiler::Comp_AddCycles_CD()
else
ConstantCycles += cycles;
}
-
-}
\ No newline at end of file
+}
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index 3e900c33..57aab7b5 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -208,9 +208,9 @@ public:
SetCodePtr(FarCode);
}
- bool IsJITFault(u64 addr);
+ bool IsJITFault(u8* addr);
- s32 RewriteMemAccess(u64 pc);
+ u8* RewriteMemAccess(u8* pc);
u8* FarCode;
u8* NearCode;
diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.S
similarity index 89%
rename from src/ARMJIT_x64/ARMJIT_Linkage.s
rename to src/ARMJIT_x64/ARMJIT_Linkage.S
index 0a84df07..8cc0b5f9 100644
--- a/src/ARMJIT_x64/ARMJIT_Linkage.s
+++ b/src/ARMJIT_x64/ARMJIT_Linkage.S
@@ -29,8 +29,13 @@
.p2align 4,,15
+#ifdef __APPLE__
+.global _ARM_Dispatch
+_ARM_Dispatch:
+#else
.global ARM_Dispatch
ARM_Dispatch:
+#endif
#ifdef WIN64
push rdi
push rsi
@@ -54,8 +59,13 @@ ARM_Dispatch:
.p2align 4,,15
+#ifdef __APPLE__
+.global _ARM_Ret
+_ARM_Ret:
+#else
.global ARM_Ret
ARM_Ret:
+#endif
mov [RCPU + ARM_CPSR_offset], RCPSR
#ifdef WIN64
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index 8b4e8fe9..d80b25b5 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -15,28 +15,24 @@ int squeezePointer(T* ptr)
return truncated;
}
-s32 Compiler::RewriteMemAccess(u64 pc)
+u8* Compiler::RewriteMemAccess(u8* pc)
{
- auto it = LoadStorePatches.find((u8*)pc);
+ auto it = LoadStorePatches.find(pc);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
LoadStorePatches.erase(it);
- u8* curCodePtr = GetWritableCodePtr();
- u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset;
- SetCodePtr(rewritePtr);
+ //printf("rewriting memory access %p %d %d\n", (u8*)pc-ResetStart, patch.Offset, patch.Size);
- CALL(patch.PatchFunc);
- u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr);
+ XEmitter emitter(pc + (ptrdiff_t)patch.Offset);
+ emitter.CALL(patch.PatchFunc);
+ ptrdiff_t remainingSize = (ptrdiff_t)patch.Size - 5;
+ assert(remainingSize >= 0);
if (remainingSize > 0)
- NOP(remainingSize);
+ emitter.NOP(remainingSize);
- //printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size);
-
- SetCodePtr(curCodePtr);
-
- return patch.Offset;
+ return pc + (ptrdiff_t)patch.Offset;
}
printf("this is a JIT bug %llx\n", pc);
@@ -192,6 +188,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
u8* memopStart = GetWritableCodePtr();
LoadStorePatch patch;
+ assert(rdMapped.GetSimpleReg() >= 0 && rdMapped.GetSimpleReg() < 16);
patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()]
: PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()];
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d6c38971..9f07cea6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -27,6 +27,7 @@ add_library(core STATIC
GBACart.cpp
GPU.cpp
GPU2D.cpp
+ GPU2D_Soft.cpp
GPU3D.cpp
GPU3D_Soft.cpp
melonDLDI.h
@@ -80,9 +81,8 @@ if (ENABLE_JIT)
ARMJIT_x64/ARMJIT_LoadStore.cpp
ARMJIT_x64/ARMJIT_Branch.cpp
- ARMJIT_x64/ARMJIT_Linkage.s
+ ARMJIT_x64/ARMJIT_Linkage.S
)
- set_source_files_properties(ARMJIT_x64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif()
if (ARCHITECTURE STREQUAL ARM64)
target_sources(core PRIVATE
@@ -94,16 +94,22 @@ if (ENABLE_JIT)
ARMJIT_A64/ARMJIT_LoadStore.cpp
ARMJIT_A64/ARMJIT_Branch.cpp
- ARMJIT_A64/ARMJIT_Linkage.s
+ ARMJIT_A64/ARMJIT_Linkage.S
)
- set_source_files_properties(ARMJIT_A64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif()
endif()
+if (APPLE)
+ target_include_directories(core PUBLIC /usr/local/include)
+ target_link_directories(core PUBLIC /usr/local/lib)
+endif()
+
if (ENABLE_OGLRENDERER)
if (WIN32)
target_link_libraries(core ole32 comctl32 ws2_32 opengl32)
- else()
+ elseif (APPLE)
+ target_link_libraries(core "-framework OpenGL")
+ else()
target_link_libraries(core GL EGL)
endif()
else()
diff --git a/src/Config.cpp b/src/Config.cpp
index 341b14c3..f7db2528 100644
--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -73,7 +73,11 @@ ConfigEntry ConfigFile[] =
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BranchOptimisations, 1, NULL, 0},
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
- {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0},
+ #ifdef __APPLE__
+ {"JIT_FastMemory", 0, &JIT_FastMemory, 0, NULL, 0},
+ #else
+ {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0},
+ #endif
#endif
{"", -1, NULL, 0, NULL, 0}
diff --git a/src/DMA.cpp b/src/DMA.cpp
index 18b8a2f1..cd3465f6 100644
--- a/src/DMA.cpp
+++ b/src/DMA.cpp
@@ -77,21 +77,6 @@ void DMA::Reset()
Running = false;
InProgress = false;
-
- if (NDS::ConsoleType == 1)
- {
- BusRead16 = (CPU==0) ? DSi::ARM9Read16 : DSi::ARM7Read16;
- BusRead32 = (CPU==0) ? DSi::ARM9Read32 : DSi::ARM7Read32;
- BusWrite16 = (CPU==0) ? DSi::ARM9Write16 : DSi::ARM7Write16;
- BusWrite32 = (CPU==0) ? DSi::ARM9Write32 : DSi::ARM7Write32;
- }
- else
- {
- BusRead16 = (CPU==0) ? NDS::ARM9Read16 : NDS::ARM7Read16;
- BusRead32 = (CPU==0) ? NDS::ARM9Read32 : NDS::ARM7Read32;
- BusWrite16 = (CPU==0) ? NDS::ARM9Write16 : NDS::ARM7Write16;
- BusWrite32 = (CPU==0) ? NDS::ARM9Write32 : NDS::ARM7Write32;
- }
}
void DMA::DoSavestate(Savestate* file)
@@ -198,13 +183,7 @@ void DMA::Start()
NDS::StopCPU(CPU, 1<
void DMA::Run9()
{
if (NDS::ARM9Timestamp >= NDS::ARM9Target) return;
@@ -242,7 +221,10 @@ void DMA::Run9()
{
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);
- BusWrite16(CurDstAddr, BusRead16(CurSrcAddr));
+ if (ConsoleType == 1)
+ DSi::ARM9Write16(CurDstAddr, DSi::ARM9Read16(CurSrcAddr));
+ else
+ NDS::ARM9Write16(CurDstAddr, NDS::ARM9Read16(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1;
@@ -278,7 +260,10 @@ void DMA::Run9()
{
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);
- BusWrite32(CurDstAddr, BusRead32(CurSrcAddr));
+ if (ConsoleType == 1)
+ DSi::ARM9Write32(CurDstAddr, DSi::ARM9Read32(CurSrcAddr));
+ else
+ NDS::ARM9Write32(CurDstAddr, NDS::ARM9Read32(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
@@ -317,6 +302,7 @@ void DMA::Run9()
NDS::ResumeCPU(0, 1<
void DMA::Run7()
{
if (NDS::ARM7Timestamp >= NDS::ARM7Target) return;
@@ -354,7 +340,10 @@ void DMA::Run7()
{
NDS::ARM7Timestamp += unitcycles;
- BusWrite16(CurDstAddr, BusRead16(CurSrcAddr));
+ if (ConsoleType == 1)
+ DSi::ARM7Write16(CurDstAddr, DSi::ARM7Read16(CurSrcAddr));
+ else
+ NDS::ARM7Write16(CurDstAddr, NDS::ARM7Read16(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1;
@@ -390,7 +379,10 @@ void DMA::Run7()
{
NDS::ARM7Timestamp += unitcycles;
- BusWrite32(CurDstAddr, BusRead32(CurSrcAddr));
+ if (ConsoleType == 1)
+ DSi::ARM7Write32(CurDstAddr, DSi::ARM7Read32(CurSrcAddr));
+ else
+ NDS::ARM7Write32(CurDstAddr, NDS::ARM7Read32(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
@@ -425,3 +417,14 @@ void DMA::Run7()
InProgress = false;
NDS::ResumeCPU(1, 1<
+void DMA::Run()
+{
+ if (!Running) return;
+ if (CPU == 0) return Run9();
+ else return Run7();
+}
+
+template void DMA::Run<0>();
+template void DMA::Run<1>();
diff --git a/src/DMA.h b/src/DMA.h
index 0344fbac..b0b4ab2a 100644
--- a/src/DMA.h
+++ b/src/DMA.h
@@ -34,9 +34,12 @@ public:
void WriteCnt(u32 val);
void Start();
+ template
void Run();
+ template
void Run9();
+ template
void Run7();
bool IsInMode(u32 mode)
@@ -86,11 +89,6 @@ private:
bool Stall;
bool IsGXFIFODMA;
-
- u16 (*BusRead16)(u32 addr);
- u32 (*BusRead32)(u32 addr);
- void (*BusWrite16)(u32 addr, u16 val);
- void (*BusWrite32)(u32 addr, u32 val);
};
#endif
diff --git a/src/DSi.cpp b/src/DSi.cpp
index e8b12315..bcc1f925 100644
--- a/src/DSi.cpp
+++ b/src/DSi.cpp
@@ -35,6 +35,7 @@
#include "DSi_I2C.h"
#include "DSi_SD.h"
#include "DSi_AES.h"
+#include "DSi_Camera.h"
#include "tiny-AES-c/aes.hpp"
@@ -542,15 +543,15 @@ void MapNWRAM_A(u32 num, u8 val)
return;
}
-#ifdef JIT_ENABLED
- ARMJIT_Memory::RemapNWRAM(0);
-#endif
-
int mbkn = 0, mbks = 8*num;
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
if (oldval == val) return;
+#ifdef JIT_ENABLED
+ ARMJIT_Memory::RemapNWRAM(0);
+#endif
+
MBK[0][mbkn] &= ~(0xFF << mbks);
MBK[0][mbkn] |= (val << mbks);
MBK[1][mbkn] = MBK[0][mbkn];
@@ -577,15 +578,15 @@ void MapNWRAM_B(u32 num, u8 val)
return;
}
-#ifdef JIT_ENABLED
- ARMJIT_Memory::RemapNWRAM(1);
-#endif
-
int mbkn = 1+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
if (oldval == val) return;
+#ifdef JIT_ENABLED
+ ARMJIT_Memory::RemapNWRAM(1);
+#endif
+
MBK[0][mbkn] &= ~(0xFF << mbks);
MBK[0][mbkn] |= (val << mbks);
MBK[1][mbkn] = MBK[0][mbkn];
@@ -616,15 +617,15 @@ void MapNWRAM_C(u32 num, u8 val)
return;
}
-#ifdef JIT_ENABLED
- ARMJIT_Memory::RemapNWRAM(2);
-#endif
-
int mbkn = 3+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
if (oldval == val) return;
+#ifdef JIT_ENABLED
+ ARMJIT_Memory::RemapNWRAM(2);
+#endif
+
MBK[0][mbkn] &= ~(0xFF << mbks);
MBK[0][mbkn] |= (val << mbks);
MBK[1][mbkn] = MBK[0][mbkn];
@@ -1406,6 +1407,12 @@ u8 ARM9IORead8(u32 addr)
CASE_READ8_32BIT(0x04004060, MBK[0][8])
}
+ if ((addr & 0xFFFFFF00) == 0x04004200)
+ {
+ if (!(SCFG_EXT[0] & (1<<17))) return 0;
+ return DSi_Camera::Read8(addr);
+ }
+
return NDS::ARM9IORead8(addr);
}
@@ -1428,6 +1435,12 @@ u16 ARM9IORead16(u32 addr)
CASE_READ16_32BIT(0x04004060, MBK[0][8])
}
+ if ((addr & 0xFFFFFF00) == 0x04004200)
+ {
+ if (!(SCFG_EXT[0] & (1<<17))) return 0;
+ return DSi_Camera::Read16(addr);
+ }
+
return NDS::ARM9IORead16(addr);
}
@@ -1480,6 +1493,12 @@ u32 ARM9IORead32(u32 addr)
case 0x04004170: return NDMAs[3]->Cnt;
}
+ if ((addr & 0xFFFFFF00) == 0x04004200)
+ {
+ if (!(SCFG_EXT[0] & (1<<17))) return 0;
+ return DSi_Camera::Read32(addr);
+ }
+
return NDS::ARM9IORead32(addr);
}
@@ -1519,6 +1538,12 @@ void ARM9IOWrite8(u32 addr, u8 val)
case 0x04004053: MapNWRAM_C(7, val); return;
}
+ if ((addr & 0xFFFFFF00) == 0x04004200)
+ {
+ if (!(SCFG_EXT[0] & (1<<17))) return;
+ return DSi_Camera::Write8(addr, val);
+ }
+
return NDS::ARM9IOWrite8(addr, val);
}
@@ -1572,6 +1597,12 @@ void ARM9IOWrite16(u32 addr, u16 val)
return;
}
+ if ((addr & 0xFFFFFF00) == 0x04004200)
+ {
+ if (!(SCFG_EXT[0] & (1<<17))) return;
+ return DSi_Camera::Write16(addr, val);
+ }
+
return NDS::ARM9IOWrite16(addr, val);
}
@@ -1678,6 +1709,12 @@ void ARM9IOWrite32(u32 addr, u32 val)
case 0x04004170: NDMAs[3]->WriteCnt(val); return;
}
+ if ((addr & 0xFFFFFF00) == 0x04004200)
+ {
+ if (!(SCFG_EXT[0] & (1<<17))) return;
+ return DSi_Camera::Write32(addr, val);
+ }
+
return NDS::ARM9IOWrite32(addr, val);
}
diff --git a/src/DSi_Camera.cpp b/src/DSi_Camera.cpp
index 56cba1cb..79cfe3fd 100644
--- a/src/DSi_Camera.cpp
+++ b/src/DSi_Camera.cpp
@@ -18,12 +18,28 @@
#include
#include
+#include "DSi.h"
#include "DSi_Camera.h"
DSi_Camera* DSi_Camera0; // 78 / facing outside
DSi_Camera* DSi_Camera1; // 7A / selfie cam
+u16 DSi_Camera::ModuleCnt;
+u16 DSi_Camera::Cnt;
+
+u8 DSi_Camera::FrameBuffer[640*480*4];
+u32 DSi_Camera::FrameLength;
+u32 DSi_Camera::TransferPos;
+
+// note on camera data/etc intervals
+// on hardware those are likely affected by several factors
+// namely, how long cameras take to process frames
+// camera IRQ is fired at roughly 15FPS with default config
+
+const u32 kIRQInterval = 1120000; // ~30 FPS
+const u32 kTransferStart = 60000;
+
bool DSi_Camera::Init()
{
@@ -43,6 +59,87 @@ void DSi_Camera::Reset()
{
DSi_Camera0->ResetCam();
DSi_Camera1->ResetCam();
+
+ ModuleCnt = 0; // CHECKME
+ Cnt = 0;
+
+ memset(FrameBuffer, 0, 640*480*4);
+ TransferPos = 0;
+ FrameLength = 256*192*2; // TODO: make it check frame size, data type, etc
+
+ NDS::ScheduleEvent(NDS::Event_DSi_CamIRQ, true, kIRQInterval, IRQ, 0);
+}
+
+
+void DSi_Camera::IRQ(u32 param)
+{
+ DSi_Camera* activecam = nullptr;
+
+ // TODO: check which camera has priority if both are activated
+ // (or does it just jumble both data sources together, like it
+ // does for, say, overlapping VRAM?)
+ if (DSi_Camera0->IsActivated()) activecam = DSi_Camera0;
+ else if (DSi_Camera1->IsActivated()) activecam = DSi_Camera1;
+
+ if (activecam)
+ {
+ RequestFrame(activecam->Num);
+
+ if (Cnt & (1<<11))
+ NDS::SetIRQ(0, NDS::IRQ_DSi_Camera);
+
+ if (Cnt & (1<<15))
+ NDS::ScheduleEvent(NDS::Event_DSi_CamTransfer, false, kTransferStart, Transfer, 0);
+ }
+
+ NDS::ScheduleEvent(NDS::Event_DSi_CamIRQ, true, kIRQInterval, IRQ, 0);
+}
+
+void DSi_Camera::RequestFrame(u32 cam)
+{
+ if (!(Cnt & (1<<13))) printf("CAMERA: !! REQUESTING YUV FRAME\n");
+
+ // TODO: picture size, data type, cropping, etc
+ // generate test pattern
+ // TODO: get picture from platform (actual camera, video file, whatever source)
+ for (u32 y = 0; y < 192; y++)
+ {
+ for (u32 x = 0; x < 256; x++)
+ {
+ u16* px = (u16*)&FrameBuffer[((y*256) + x) * 2];
+
+ if ((x & 0x8) ^ (y & 0x8))
+ *px = 0x8000;
+ else
+ *px = 0xFC00 | ((y >> 3) << 5);
+ }
+ }
+}
+
+void DSi_Camera::Transfer(u32 pos)
+{
+ u32 numscan = (Cnt & 0x000F) + 1;
+ u32 numpix = numscan * 256; // CHECKME
+
+ // TODO: present data
+ //printf("CAM TRANSFER POS=%d/%d\n", pos, 0x6000*2);
+
+ DSi::CheckNDMAs(0, 0x0B);
+
+ pos += numpix;
+ if (pos >= 0x6000*2) // HACK
+ {
+ // transfer done
+ }
+ else
+ {
+ // keep going
+
+ // TODO: must be tweaked such that each block has enough time to transfer
+ u32 delay = numpix*2 + 16;
+
+ NDS::ScheduleEvent(NDS::Event_DSi_CamTransfer, false, delay, Transfer, pos);
+ }
}
@@ -62,16 +159,28 @@ void DSi_Camera::ResetCam()
RegAddr = 0;
RegData = 0;
- PLLCnt = 0;
+ PLLDiv = 0x0366;
+ PLLPDiv = 0x00F5;
+ PLLCnt = 0x21F9;
+ ClocksCnt = 0;
StandbyCnt = 0x4029; // checkme
+ MiscCnt = 0;
+}
+
+bool DSi_Camera::IsActivated()
+{
+ if (StandbyCnt & (1<<14)) return false; // standby
+ if (!(MiscCnt & (1<<9))) return false; // data transfer not enabled
+
+ return true;
}
-void DSi_Camera::Start()
+void DSi_Camera::I2C_Start()
{
}
-u8 DSi_Camera::Read(bool last)
+u8 DSi_Camera::I2C_Read(bool last)
{
u8 ret;
@@ -89,7 +198,7 @@ u8 DSi_Camera::Read(bool last)
}
else
{
- RegData = ReadReg(RegAddr);
+ RegData = I2C_ReadReg(RegAddr);
ret = RegData >> 8;
}
}
@@ -100,7 +209,7 @@ u8 DSi_Camera::Read(bool last)
return ret;
}
-void DSi_Camera::Write(u8 val, bool last)
+void DSi_Camera::I2C_Write(u8 val, bool last)
{
if (DataPos < 2)
{
@@ -116,7 +225,7 @@ void DSi_Camera::Write(u8 val, bool last)
if (DataPos & 0x1)
{
RegData |= val;
- WriteReg(RegAddr, RegData);
+ I2C_WriteReg(RegAddr, RegData);
RegAddr += 2; // checkme
}
else
@@ -129,38 +238,172 @@ void DSi_Camera::Write(u8 val, bool last)
else DataPos++;
}
-u16 DSi_Camera::ReadReg(u16 addr)
+u16 DSi_Camera::I2C_ReadReg(u16 addr)
{
switch (addr)
{
case 0x0000: return 0x2280; // chip ID
+ case 0x0010: return PLLDiv;
+ case 0x0012: return PLLPDiv;
case 0x0014: return PLLCnt;
+ case 0x0016: return ClocksCnt;
case 0x0018: return StandbyCnt;
+ case 0x001A: return MiscCnt;
case 0x301A: return ((~StandbyCnt) & 0x4000) >> 12;
}
- //printf("DSi_Camera%d: unknown read %04X\n", Num, addr);
+ if(Num==1)printf("DSi_Camera%d: unknown read %04X\n", Num, addr);
return 0;
}
-void DSi_Camera::WriteReg(u16 addr, u16 val)
+void DSi_Camera::I2C_WriteReg(u16 addr, u16 val)
{
switch (addr)
{
+ case 0x0010:
+ PLLDiv = val & 0x3FFF;
+ return;
+ case 0x0012:
+ PLLPDiv = val & 0xBFFF;
+ return;
case 0x0014:
// shouldn't be instant either?
val &= 0x7FFF;
val |= ((val & 0x0002) << 14);
PLLCnt = val;
return;
+ case 0x0016:
+ ClocksCnt = val;
+ printf("ClocksCnt=%04X\n", val);
+ return;
case 0x0018:
// TODO: this shouldn't be instant, but uh
val &= 0x003F;
val |= ((val & 0x0001) << 14);
StandbyCnt = val;
+ printf("CAM%d STBCNT=%04X (%04X)\n", Num, StandbyCnt, val);
+ return;
+ case 0x001A:
+ MiscCnt = val & 0x0B7B;
+ printf("CAM%d MISCCNT=%04X (%04X)\n", Num, MiscCnt, val);
return;
}
- //printf("DSi_Camera%d: unknown write %04X %04X\n", Num, addr, val);
+ if(Num==1)printf("DSi_Camera%d: unknown write %04X %04X\n", Num, addr, val);
+}
+
+
+u8 DSi_Camera::Read8(u32 addr)
+{
+ //
+
+ printf("unknown DSi cam read8 %08X\n", addr);
+ return 0;
+}
+
+u16 DSi_Camera::Read16(u32 addr)
+{printf("CAM READ %08X %08X\n", addr, NDS::GetPC(0));
+ switch (addr)
+ {
+ case 0x04004200: return ModuleCnt;
+ case 0x04004202: return Cnt;
+ }
+
+ printf("unknown DSi cam read16 %08X\n", addr);
+ return 0;
+}
+u32 dorp = 0;
+u32 DSi_Camera::Read32(u32 addr)
+{
+ switch (addr)
+ {
+ case 0x04004204:
+ {
+ return 0xFC00801F;
+ if (!(Cnt & (1<<15))) return 0; // CHECKME
+ u32 ret = *(u32*)&FrameBuffer[TransferPos];
+ TransferPos += 4;
+ if (TransferPos >= FrameLength) TransferPos = 0;
+ dorp += 4;
+ //if (dorp >= (256*4*2))
+ if (TransferPos == 0)
+ {
+ dorp = 0;
+ Cnt &= ~(1<<4);
+ }
+ return ret;
+ }
+ }
+
+ printf("unknown DSi cam read32 %08X\n", addr);
+ return 0;
+}
+
+void DSi_Camera::Write8(u32 addr, u8 val)
+{
+ //
+
+ printf("unknown DSi cam write8 %08X %02X\n", addr, val);
+}
+
+void DSi_Camera::Write16(u32 addr, u16 val)
+{printf("CAM WRITE %08X %04X %08X\n", addr, val, NDS::GetPC(0));
+ switch (addr)
+ {
+ case 0x04004200:
+ {
+ u16 oldcnt = ModuleCnt;
+ ModuleCnt = val;
+
+ if ((ModuleCnt & (1<<1)) && !(oldcnt & (1<<1)))
+ {
+ // reset shit to zero
+ // CHECKME
+
+ Cnt = 0;
+ }
+
+ if ((ModuleCnt & (1<<5)) && !(oldcnt & (1<<5)))
+ {
+ // TODO: reset I2C??
+ }
+ }
+ return;
+
+ case 0x04004202:
+ {
+ // checkme
+ u16 oldmask;
+ if (Cnt & 0x8000)
+ {
+ val &= 0x8F20;
+ oldmask = 0x601F;
+ }
+ else
+ {
+ val &= 0xEF2F;
+ oldmask = 0x0010;
+ }
+
+ Cnt = (Cnt & oldmask) | (val & ~0x0020);
+ if (val & (1<<5)) Cnt &= ~(1<<4);
+
+ if ((val & (1<<15)) && !(Cnt & (1<<15)))
+ {
+ // start transfer
+ //DSi::CheckNDMAs(0, 0x0B);
+ }
+ }
+ return;
+ }
+
+ printf("unknown DSi cam write16 %08X %04X\n", addr, val);
+}
+
+void DSi_Camera::Write32(u32 addr, u32 val)
+{
+ //
+
+ printf("unknown DSi cam write32 %08X %08X\n", addr, val);
}
diff --git a/src/DSi_Camera.h b/src/DSi_Camera.h
index 844a4d28..108d76a5 100644
--- a/src/DSi_Camera.h
+++ b/src/DSi_Camera.h
@@ -28,27 +28,56 @@ public:
static void DeInit();
static void Reset();
+ static void IRQ(u32 param);
+ static void RequestFrame(u32 cam);
+
+ static void Transfer(u32 pos);
+
DSi_Camera(u32 num);
~DSi_Camera();
void ResetCam();
+ bool IsActivated();
- void Start();
- u8 Read(bool last);
- void Write(u8 val, bool last);
+ void I2C_Start();
+ u8 I2C_Read(bool last);
+ void I2C_Write(u8 val, bool last);
+
+ static u8 Read8(u32 addr);
+ static u16 Read16(u32 addr);
+ static u32 Read32(u32 addr);
+ static void Write8(u32 addr, u8 val);
+ static void Write16(u32 addr, u16 val);
+ static void Write32(u32 addr, u32 val);
-private:
u32 Num;
+private:
u32 DataPos;
u32 RegAddr;
u16 RegData;
- u16 ReadReg(u16 addr);
- void WriteReg(u16 addr, u16 val);
+ u16 I2C_ReadReg(u16 addr);
+ void I2C_WriteReg(u16 addr, u16 val);
+ u16 PLLDiv;
+ u16 PLLPDiv;
u16 PLLCnt;
+ u16 ClocksCnt;
u16 StandbyCnt;
+ u16 MiscCnt;
+
+ u16 MCUAddr;
+ u16* MCUData;
+
+ u8 MCURegs[0x8000];
+
+ static u16 ModuleCnt;
+ static u16 Cnt;
+
+ static u8 FrameBuffer[640*480*4];
+ static u32 TransferPos;
+ static u32 FrameLength;
};
diff --git a/src/DSi_I2C.cpp b/src/DSi_I2C.cpp
index d58a38cd..76664e5e 100644
--- a/src/DSi_I2C.cpp
+++ b/src/DSi_I2C.cpp
@@ -50,7 +50,7 @@ void Reset()
Registers[0x10] = 0x00; // power btn
Registers[0x11] = 0x00; // reset
Registers[0x12] = 0x00; // power btn tap
- Registers[0x20] = 0x83; // battery
+ Registers[0x20] = 0x8F; // battery
Registers[0x21] = 0x07;
Registers[0x30] = 0x13;
Registers[0x31] = 0x00; // camera power
@@ -187,8 +187,10 @@ void WriteCnt(u8 val)
switch (Device)
{
case 0x4A: Data = DSi_BPTWL::Read(islast); break;
- case 0x78: Data = DSi_Camera0->Read(islast); break;
- case 0x7A: Data = DSi_Camera1->Read(islast); break;
+ case 0x78: Data = DSi_Camera0->I2C_Read(islast); break;
+ case 0x7A: Data = DSi_Camera1->I2C_Read(islast); break;
+ case 0xA0:
+ case 0xE0: Data = 0xFF; break;
default:
printf("I2C: read on unknown device %02X, cnt=%02X, data=%02X, last=%d\n", Device, val, 0, islast);
Data = 0xFF;
@@ -211,8 +213,10 @@ void WriteCnt(u8 val)
switch (Device)
{
case 0x4A: DSi_BPTWL::Start(); break;
- case 0x78: DSi_Camera0->Start(); break;
- case 0x7A: DSi_Camera1->Start(); break;
+ case 0x78: DSi_Camera0->I2C_Start(); break;
+ case 0x7A: DSi_Camera1->I2C_Start(); break;
+ case 0xA0:
+ case 0xE0: ack = false; break;
default:
printf("I2C: %s start on unknown device %02X\n", (Data&0x01)?"read":"write", Device);
ack = false;
@@ -226,8 +230,10 @@ void WriteCnt(u8 val)
switch (Device)
{
case 0x4A: DSi_BPTWL::Write(Data, islast); break;
- case 0x78: DSi_Camera0->Write(Data, islast); break;
- case 0x7A: DSi_Camera1->Write(Data, islast); break;
+ case 0x78: DSi_Camera0->I2C_Write(Data, islast); break;
+ case 0x7A: DSi_Camera1->I2C_Write(Data, islast); break;
+ case 0xA0:
+ case 0xE0: ack = false; break;
default:
printf("I2C: write on unknown device %02X, cnt=%02X, data=%02X, last=%d\n", Device, val, Data, islast);
ack = false;
diff --git a/src/DSi_NDMA.cpp b/src/DSi_NDMA.cpp
index 707c777b..d6d289d4 100644
--- a/src/DSi_NDMA.cpp
+++ b/src/DSi_NDMA.cpp
@@ -101,7 +101,7 @@ void DSi_NDMA::WriteCnt(u32 val)
Start();
if (StartMode != 0x10 && StartMode != 0x30 &&
- StartMode != 0x04 && StartMode != 0x06 && StartMode != 0x07 && StartMode != 0x08 && StartMode != 0x09 &&
+ StartMode != 0x04 && StartMode != 0x06 && StartMode != 0x07 && StartMode != 0x08 && StartMode != 0x09 && StartMode != 0x0B &&
StartMode != 0x24 && StartMode != 0x26 && StartMode != 0x28 && StartMode != 0x29 && StartMode != 0x2A && StartMode != 0x2B)
printf("UNIMPLEMENTED ARM%d NDMA%d START MODE %02X, %08X->%08X LEN=%d BLK=%d CNT=%08X\n",
CPU?7:9, Num, StartMode, SrcAddr, DstAddr, TotalLength, BlockLength, Cnt);
diff --git a/src/DSi_SD.cpp b/src/DSi_SD.cpp
index 45a597b7..de82edb5 100644
--- a/src/DSi_SD.cpp
+++ b/src/DSi_SD.cpp
@@ -778,6 +778,23 @@ void DSi_MMCStorage::SendCMD(u8 cmd, u32 param)
Host->SendResponse(CSR, true);
return;
+ case 1: // SEND_OP_COND
+ // CHECKME!!
+ // also TODO: it's different for the SD card
+ if (Internal)
+ {
+ param &= ~(1<<30);
+ OCR &= 0xBF000000;
+ OCR |= (param & 0x40FFFFFF);
+ Host->SendResponse(OCR, true);
+ SetState(0x01);
+ }
+ else
+ {
+ printf("CMD1 on SD card!!\n");
+ }
+ return;
+
case 2:
case 10: // get CID
Host->SendResponse(*(u32*)&CID[12], false);
@@ -801,6 +818,11 @@ void DSi_MMCStorage::SendCMD(u8 cmd, u32 param)
}
return;
+ case 6: // MMC: 'SWITCH'
+ // TODO!
+ Host->SendResponse(CSR, true);
+ return;
+
case 7: // select card (by RCA)
Host->SendResponse(CSR, true);
return;
diff --git a/src/GPU.cpp b/src/GPU.cpp
index 7989750a..35ebaba1 100644
--- a/src/GPU.cpp
+++ b/src/GPU.cpp
@@ -49,8 +49,8 @@ u8 VRAM_F[ 16*1024];
u8 VRAM_G[ 16*1024];
u8 VRAM_H[ 32*1024];
u8 VRAM_I[ 16*1024];
-u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
-u32 VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
+u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
+u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
u8 VRAMCNT[9];
u8 VRAMSTAT;
@@ -85,11 +85,67 @@ bool Accelerated;
GPU2D* GPU2D_A;
GPU2D* GPU2D_B;
+/*
+ VRAM invalidation tracking
+
+ - we want to know when a VRAM region used for graphics changed
+ - for some regions unmapping is mandatory to modify them (Texture, TexPal and ExtPal) and
+ we don't want to completely invalidate them every time they're unmapped and remapped
+
+ For this reason we don't track the dirtyness per mapping region, but instead per VRAM bank
+ with VRAMDirty. Writes to LCDC go directly into VRAMDirty, while writes via other mapping regions
+ like BG or OBJ are first tracked in VRAMWritten_* and need to be flushed using SyncDirtyFlags.
+
+ This is more or less a description of VRAMTrackingSet::DeriveState
+ Each time before the memory is read two things could have happened
+ to each 16kb piece (16kb is the smallest unit in which mappings can
+ be made thus also the size VRAMMap_* use):
+ - this piece was remapped compared to last time we checked,
+ which means this location in memory is invalid.
+ - this piece wasn't remapped, which means we need to check whether
+ it was changed. This can be archived by checking VRAMDirty.
+ VRAMDirty need to be reset for the respective VRAM bank.
+*/
+
+VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
+VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
+VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
+VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
+
+VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
+VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
+VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
+VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
+
+VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
+VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
+
+
+NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
+NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
+NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
+NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
+NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
+
+NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
+
+u8 VRAMFlat_ABG[512*1024];
+u8 VRAMFlat_BBG[128*1024];
+u8 VRAMFlat_AOBJ[256*1024];
+u8 VRAMFlat_BOBJ[128*1024];
+
+u8 VRAMFlat_ABGExtPal[32*1024];
+u8 VRAMFlat_BBGExtPal[32*1024];
+u8 VRAMFlat_AOBJExtPal[8*1024];
+u8 VRAMFlat_BOBJExtPal[8*1024];
+
+u8 VRAMFlat_Texture[512*1024];
+u8 VRAMFlat_TexPal[128*1024];
bool Init()
{
- GPU2D_A = new GPU2D(0);
- GPU2D_B = new GPU2D(1);
+ GPU2D_A = new GPU2D_Soft(0);
+ GPU2D_B = new GPU2D_Soft(1);
if (!GPU3D::Init()) return false;
FrontBuffer = 0;
@@ -113,6 +169,34 @@ void DeInit()
if (Framebuffer[1][1]) delete[] Framebuffer[1][1];
}
+void ResetVRAMCache()
+{
+ for (int i = 0; i < 9; i++)
+ VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>();
+
+ VRAMDirty_ABG.Reset();
+ VRAMDirty_BBG.Reset();
+ VRAMDirty_AOBJ.Reset();
+ VRAMDirty_BOBJ.Reset();
+ VRAMDirty_ABGExtPal.Reset();
+ VRAMDirty_BBGExtPal.Reset();
+ VRAMDirty_AOBJExtPal.Reset();
+ VRAMDirty_BOBJExtPal.Reset();
+ VRAMDirty_Texture.Reset();
+ VRAMDirty_TexPal.Reset();
+
+ memset(VRAMFlat_ABG, 0, sizeof(VRAMFlat_ABG));
+ memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG));
+ memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ));
+ memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ));
+ memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal));
+ memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal));
+ memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal));
+ memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal));
+ memset(VRAMFlat_Texture, 0, sizeof(VRAMFlat_Texture));
+ memset(VRAMFlat_TexPal, 0, sizeof(VRAMFlat_TexPal));
+}
+
void Reset()
{
VCount = 0;
@@ -186,6 +270,8 @@ void Reset()
GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]);
ResetRenderer();
+
+ ResetVRAMCache();
}
void Stop()
@@ -261,6 +347,8 @@ void DoSavestate(Savestate* file)
GPU2D_A->DoSavestate(file);
GPU2D_B->DoSavestate(file);
GPU3D::DoSavestate(file);
+
+ ResetVRAMCache();
}
void AssignFramebuffers()
@@ -411,18 +499,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
u8* GetUniqueBankPtr(u32 mask, u32 offset)
{
- if (!mask) return NULL;
-
- int num = 0;
- if (!(mask & 0xFF)) { mask >>= 8; num += 8; }
- else
- {
- if (!(mask & 0xF)) { mask >>= 4; num += 4; }
- if (!(mask & 0x3)) { mask >>= 2; num += 2; }
- if (!(mask & 0x1)) { mask >>= 1; num += 1; }
- }
- if (mask != 1) return NULL;
-
+ if (!mask || (mask & (mask - 1)) != 0) return NULL;
+ int num = __builtin_ctz(mask);
return &VRAM[num][offset & VRAMMask[num]];
}
@@ -606,8 +684,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette
UNMAP_RANGE(ABGExtPal, 0, 4);
- GPU2D_A->BGExtPalDirty(0);
- GPU2D_A->BGExtPalDirty(2);
break;
}
}
@@ -634,8 +710,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette
MAP_RANGE(ABGExtPal, 0, 4);
- GPU2D_A->BGExtPalDirty(0);
- GPU2D_A->BGExtPalDirty(2);
break;
}
}
@@ -687,12 +761,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask;
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask;
- GPU2D_A->BGExtPalDirty((oldofs & 0x1) << 1);
break;
case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal &= ~bankmask;
- GPU2D_A->OBJExtPalDirty();
break;
}
}
@@ -732,12 +804,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette
VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask;
VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask;
- GPU2D_A->BGExtPalDirty((ofs & 0x1) << 1);
break;
case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal |= bankmask;
- GPU2D_A->OBJExtPalDirty();
break;
}
}
@@ -773,8 +843,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette
UNMAP_RANGE(BBGExtPal, 0, 4);
- GPU2D_B->BGExtPalDirty(0);
- GPU2D_B->BGExtPalDirty(2);
break;
}
}
@@ -800,8 +868,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette
MAP_RANGE(BBGExtPal, 0, 4);
- GPU2D_B->BGExtPalDirty(0);
- GPU2D_B->BGExtPalDirty(2);
break;
}
}
@@ -841,7 +907,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal &= ~bankmask;
- GPU2D_B->OBJExtPalDirty();
break;
}
}
@@ -871,7 +936,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal |= bankmask;
- GPU2D_B->OBJExtPalDirty();
break;
}
}
@@ -937,6 +1001,8 @@ void StartHBlank(u32 line)
DispStat[0] |= (1<<1);
DispStat[1] |= (1<<1);
+ SyncDirtyFlags();
+
if (VCount < 192)
{
// draw
@@ -1096,4 +1162,224 @@ void SetVCount(u16 val)
NextVCount = val;
}
+template
+NonStupidBitField VRAMTrackingSet::DeriveState(u32* currentMappings)
+{
+ NonStupidBitField result;
+ u16 banksToBeZeroed = 0;
+ for (u32 i = 0; i < Size / MappingGranularity; i++)
+ {
+ if (currentMappings[i] != Mapping[i])
+ {
+ result |= NonStupidBitField(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
+ banksToBeZeroed |= currentMappings[i];
+ Mapping[i] = currentMappings[i];
+ }
+ else
+ {
+ u32 mapping = Mapping[i];
+
+ banksToBeZeroed |= mapping;
+
+ while (mapping != 0)
+ {
+ u32 num = __builtin_ctz(mapping);
+ mapping &= ~(1 << num);
+
+ // hack for **speed**
+ // this could probably be done less ugly but then we would rely
+ // on the compiler for vectorisation
+ static_assert(VRAMDirtyGranularity == 512);
+ if (MappingGranularity == 16*1024)
+ {
+ u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)];
+ ((u32*)result.Data)[i] |= dirty;
+ }
+ else if (MappingGranularity == 8*1024)
+ {
+ u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)];
+ ((u16*)result.Data)[i] |= dirty;
+ }
+ else if (MappingGranularity == 128*1024)
+ {
+ ((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0];
+ ((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1];
+ ((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2];
+ ((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3];
+ }
+ else
+ {
+ // welp
+ abort();
+ }
+ }
+ }
+ }
+
+ while (banksToBeZeroed != 0)
+ {
+ u32 num = __builtin_ctz(banksToBeZeroed);
+ banksToBeZeroed &= ~(1 << num);
+ memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data));
+ }
+
+ return result;
}
+
+template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*);
+template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*);
+template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*);
+template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*);
+template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*);
+template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*);
+
+template
+void SyncDirtyFlags(u32* mappings, NonStupidBitField& writtenFlags)
+{
+ const u32 VRAMWrittenBitsPer16KB = 16*1024/VRAMDirtyGranularity;
+
+ for (typename NonStupidBitField::Iterator it = writtenFlags.Begin(); it != writtenFlags.End(); it++)
+ {
+ u32 mapping = mappings[*it / VRAMWrittenBitsPer16KB];
+ while (mapping != 0)
+ {
+ u32 num = __builtin_ctz(mapping);
+
+ VRAMDirty[num][*it & (VRAMMask[num] / VRAMDirtyGranularity)] = true;
+
+ mapping &= ~(1 << num);
+ }
+ }
+ memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data));
+}
+
+void SyncDirtyFlags()
+{
+ SyncDirtyFlags(VRAMMap_ABG, VRAMWritten_ABG);
+ SyncDirtyFlags(VRAMMap_AOBJ, VRAMWritten_AOBJ);
+ SyncDirtyFlags(VRAMMap_BBG, VRAMWritten_BBG);
+ SyncDirtyFlags(VRAMMap_BOBJ, VRAMWritten_BOBJ);
+ SyncDirtyFlags(VRAMMap_ARM7, VRAMWritten_ARM7);
+}
+
+template
+inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField& dirty, u64 (*slowAccess)(u32 addr))
+{
+ const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
+
+ bool change = false;
+
+ typename NonStupidBitField::Iterator it = dirty.Begin();
+ while (it != dirty.End())
+ {
+ u32 offset = *it * VRAMDirtyGranularity;
+ u8* dst = flat + offset;
+ u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset);
+ if (fastAccess)
+ {
+ memcpy(dst, fastAccess, VRAMDirtyGranularity);
+ }
+ else
+ {
+ for (u32 i = 0; i < VRAMDirtyGranularity; i += 8)
+ *(u64*)&dst[i] = slowAccess(offset + i);
+ }
+ change = true;
+ it++;
+ }
+ return change;
+}
+
+bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture);
+}
+bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal);
+}
+
+bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG);
+}
+bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG);
+}
+
+bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ);
+}
+bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ);
+}
+
+template
+T ReadVRAM_ABGExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3];
+
+ T ret = 0;
+ if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF];
+ if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
+ if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
+
+ return ret;
+}
+
+template
+T ReadVRAM_BBGExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3];
+
+ T ret = 0;
+ if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF];
+
+ return ret;
+}
+
+template
+T ReadVRAM_AOBJExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_AOBJExtPal;
+
+ T ret = 0;
+ if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF];
+ if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF];
+
+ return ret;
+}
+
+template
+T ReadVRAM_BOBJExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_BOBJExtPal;
+
+ T ret = 0;
+ if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF];
+
+ return ret;
+}
+
+bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal);
+}
+bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal);
+}
+
+bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal);
+}
+bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal);
+}
+
+}
\ No newline at end of file
diff --git a/src/GPU.h b/src/GPU.h
index 1564ef7f..cc62e1ea 100644
--- a/src/GPU.h
+++ b/src/GPU.h
@@ -20,6 +20,7 @@
#define GPU_H
#include "GPU2D.h"
+#include "NonStupidBitfield.h"
namespace GPU
{
@@ -45,7 +46,7 @@ extern u8 VRAM_G[ 16*1024];
extern u8 VRAM_H[ 32*1024];
extern u8 VRAM_I[ 16*1024];
-extern u8* VRAM[9];
+extern u8* const VRAM[9];
extern u32 VRAMMap_LCDC;
extern u32 VRAMMap_ABG[0x20];
@@ -73,6 +74,78 @@ extern GPU2D* GPU2D_B;
extern int Renderer;
+const u32 VRAMDirtyGranularity = 512;
+
+extern NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
+extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
+extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
+extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
+extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
+
+extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
+
+template
+struct VRAMTrackingSet
+{
+ u16 Mapping[Size / MappingGranularity];
+
+ const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
+
+ void Reset()
+ {
+ for (int i = 0; i < Size / MappingGranularity; i++)
+ {
+ // this is not a real VRAM bank
+ // so it will always be a mismatch => the bank will be completely invalidated
+ Mapping[i] = 0x8000;
+ }
+ }
+ NonStupidBitField DeriveState(u32* currentMappings);
+};
+
+extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
+extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
+extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
+extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
+
+extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
+extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
+extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
+extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
+
+extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
+extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
+
+extern u8 VRAMFlat_ABG[512*1024];
+extern u8 VRAMFlat_BBG[128*1024];
+extern u8 VRAMFlat_AOBJ[256*1024];
+extern u8 VRAMFlat_BOBJ[128*1024];
+
+extern u8 VRAMFlat_ABGExtPal[32*1024];
+extern u8 VRAMFlat_BBGExtPal[32*1024];
+
+extern u8 VRAMFlat_AOBJExtPal[8*1024];
+extern u8 VRAMFlat_BOBJExtPal[8*1024];
+
+extern u8 VRAMFlat_Texture[512*1024];
+extern u8 VRAMFlat_TexPal[128*1024];
+
+bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
+
+void SyncDirtyFlags();
typedef struct
{
@@ -233,7 +306,11 @@ void WriteVRAM_LCDC(u32 addr, T val)
default: return;
}
- if (VRAMMap_LCDC & (1<> 14) & 0x1F];
+ VRAMWritten_ABG[(addr & 0x7FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
@@ -295,6 +374,8 @@ void WriteVRAM_AOBJ(u32 addr, T val)
{
u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
+ VRAMWritten_AOBJ[(addr & 0x3FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
@@ -324,6 +405,8 @@ void WriteVRAM_BBG(u32 addr, T val)
{
u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
+ VRAMWritten_BBG[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
@@ -350,11 +433,12 @@ void WriteVRAM_BOBJ(u32 addr, T val)
{
u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
+ VRAMWritten_BOBJ[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
}
-
template
T ReadVRAM_ARM7(u32 addr)
{
@@ -372,6 +456,8 @@ void WriteVRAM_ARM7(u32 addr, T val)
{
u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1];
+ VRAMWritten_ARM7[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
}
diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp
index 7774c650..fa05e795 100644
--- a/src/GPU2D.cpp
+++ b/src/GPU2D.cpp
@@ -84,20 +84,6 @@
GPU2D::GPU2D(u32 num)
{
Num = num;
-
- // initialize mosaic table
- for (int m = 0; m < 16; m++)
- {
- for (int x = 0; x < 256; x++)
- {
- int offset = x % (m+1);
- MosaicTable[m][x] = offset;
- }
- }
-}
-
-GPU2D::~GPU2D()
-{
}
void GPU2D::Reset()
@@ -131,8 +117,6 @@ void GPU2D::Reset()
BGMosaicYMax = 0;
OBJMosaicY = 0;
OBJMosaicYMax = 0;
- CurBGXMosaicTable = MosaicTable[0];
- CurOBJXMosaicTable = MosaicTable[0];
BlendCnt = 0;
EVA = 16;
@@ -149,11 +133,7 @@ void GPU2D::Reset()
MasterBrightness = 0;
- BGExtPalStatus[0] = 0;
- BGExtPalStatus[1] = 0;
- BGExtPalStatus[2] = 0;
- BGExtPalStatus[3] = 0;
- OBJExtPalStatus = 0;
+ MosaicXSizeChanged();
}
void GPU2D::DoSavestate(Savestate* file)
@@ -206,18 +186,7 @@ void GPU2D::DoSavestate(Savestate* file)
file->Var32(&Win0Active);
file->Var32(&Win1Active);
- if (!file->Saving)
- {
- // refresh those
- BGExtPalStatus[0] = 0;
- BGExtPalStatus[1] = 0;
- BGExtPalStatus[2] = 0;
- BGExtPalStatus[3] = 0;
- OBJExtPalStatus = 0;
-
- CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]];
- CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]];
- }
+ MosaicXSizeChanged();
}
void GPU2D::SetFramebuffer(u32* buf)
@@ -225,15 +194,6 @@ void GPU2D::SetFramebuffer(u32* buf)
Framebuffer = buf;
}
-void GPU2D::SetRenderSettings(bool accel)
-{
- Accelerated = accel;
-
- if (Accelerated) DrawPixel = DrawPixel_Accel;
- else DrawPixel = DrawPixel_Normal;
-}
-
-
u8 GPU2D::Read8(u32 addr)
{
switch (addr & 0x00000FFF)
@@ -328,6 +288,13 @@ void GPU2D::Write8(u32 addr, u8 val)
DispCnt = (DispCnt & 0x00FFFFFF) | (val << 24);
if (Num) DispCnt &= 0xC0B1FFF7;
return;
+
+ case 0x10:
+ if (!Num) GPU3D::SetRenderXPos((GPU3D::RenderXPos & 0xFF00) | val);
+ break;
+ case 0x11:
+ if (!Num) GPU3D::SetRenderXPos((GPU3D::RenderXPos & 0x00FF) | (val << 8));
+ break;
}
if (!Enabled) return;
@@ -378,12 +345,12 @@ void GPU2D::Write8(u32 addr, u8 val)
case 0x04C:
BGMosaicSize[0] = val & 0xF;
BGMosaicSize[1] = val >> 4;
- CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]];
+ MosaicXSizeChanged();
return;
case 0x04D:
OBJMosaicSize[0] = val & 0xF;
OBJMosaicSize[1] = val >> 4;
- CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]];
+ MosaicXSizeChanged();
return;
case 0x050: BlendCnt = (BlendCnt & 0x3F00) | val; return;
@@ -420,6 +387,10 @@ void GPU2D::Write16(u32 addr, u16 val)
if (Num) DispCnt &= 0xC0B1FFF7;
return;
+ case 0x010:
+ if (!Num) GPU3D::SetRenderXPos(val);
+ break;
+
case 0x068:
DispFIFO[DispFIFOWritePtr] = val;
return;
@@ -526,10 +497,9 @@ void GPU2D::Write16(u32 addr, u16 val)
case 0x04C:
BGMosaicSize[0] = val & 0xF;
BGMosaicSize[1] = (val >> 4) & 0xF;
- CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]];
OBJMosaicSize[0] = (val >> 8) & 0xF;
OBJMosaicSize[1] = val >> 12;
- CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]];
+ MosaicXSizeChanged();
return;
case 0x050: BlendCnt = val & 0x3FFF; return;
@@ -603,138 +573,6 @@ void GPU2D::Write32(u32 addr, u32 val)
Write16(addr+2, val>>16);
}
-
-u32 GPU2D::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb)
-{
- u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4;
- u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 4) & 0x007F00;
- u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 4) & 0x7F0000;
-
- if (r > 0x00003F) r = 0x00003F;
- if (g > 0x003F00) g = 0x003F00;
- if (b > 0x3F0000) b = 0x3F0000;
-
- return r | g | b | 0xFF000000;
-}
-
-u32 GPU2D::ColorBlend5(u32 val1, u32 val2)
-{
- u32 eva = ((val1 >> 24) & 0x1F) + 1;
- u32 evb = 32 - eva;
-
- if (eva == 32) return val1;
-
- u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 5;
- u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 5) & 0x007F00;
- u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 5) & 0x7F0000;
-
- if (eva <= 16)
- {
- r += 0x000001;
- g += 0x000100;
- b += 0x010000;
- }
-
- if (r > 0x00003F) r = 0x00003F;
- if (g > 0x003F00) g = 0x003F00;
- if (b > 0x3F0000) b = 0x3F0000;
-
- return r | g | b | 0xFF000000;
-}
-
-u32 GPU2D::ColorBrightnessUp(u32 val, u32 factor)
-{
- u32 rb = val & 0x3F003F;
- u32 g = val & 0x003F00;
-
- rb += ((((0x3F003F - rb) * factor) >> 4) & 0x3F003F);
- g += ((((0x003F00 - g) * factor) >> 4) & 0x003F00);
-
- return rb | g | 0xFF000000;
-}
-
-u32 GPU2D::ColorBrightnessDown(u32 val, u32 factor)
-{
- u32 rb = val & 0x3F003F;
- u32 g = val & 0x003F00;
-
- rb -= (((rb * factor) >> 4) & 0x3F003F);
- g -= (((g * factor) >> 4) & 0x003F00);
-
- return rb | g | 0xFF000000;
-}
-
-u32 GPU2D::ColorComposite(int i, u32 val1, u32 val2)
-{
- u32 coloreffect = 0;
- u32 eva, evb;
-
- u32 flag1 = val1 >> 24;
- u32 flag2 = val2 >> 24;
-
- u32 target2;
- if (flag2 & 0x80) target2 = 0x1000;
- else if (flag2 & 0x40) target2 = 0x0100;
- else target2 = flag2 << 8;
-
- if ((flag1 & 0x80) && (BlendCnt & target2))
- {
- // sprite blending
-
- coloreffect = 1;
-
- if (flag1 & 0x40)
- {
- eva = flag1 & 0x1F;
- evb = 16 - eva;
- }
- else
- {
- eva = EVA;
- evb = EVB;
- }
- }
- else if ((flag1 & 0x40) && (BlendCnt & target2))
- {
- // 3D layer blending
-
- coloreffect = 4;
- }
- else
- {
- if (flag1 & 0x80) flag1 = 0x10;
- else if (flag1 & 0x40) flag1 = 0x01;
-
- if ((BlendCnt & flag1) && (WindowMask[i] & 0x20))
- {
- coloreffect = (BlendCnt >> 6) & 0x3;
-
- if (coloreffect == 1)
- {
- if (BlendCnt & target2)
- {
- eva = EVA;
- evb = EVB;
- }
- else
- coloreffect = 0;
- }
- }
- }
-
- switch (coloreffect)
- {
- case 0: return val1;
- case 1: return ColorBlend4(val1, val2, eva, evb);
- case 2: return ColorBrightnessUp(val1, EVY);
- case 3: return ColorBrightnessDown(val1, EVY);
- case 4: return ColorBlend5(val1, val2);
- }
-
- return val1;
-}
-
-
void GPU2D::UpdateMosaicCounters(u32 line)
{
// Y mosaic uses incrementing 4-bit counters
@@ -752,183 +590,13 @@ void GPU2D::UpdateMosaicCounters(u32 line)
}
}
-
-void GPU2D::DrawScanline(u32 line)
-{
- int stride = Accelerated ? (256*3 + 1) : 256;
- u32* dst = &Framebuffer[stride * line];
-
- int n3dline = line;
- line = GPU::VCount;
-
- bool forceblank = false;
-
- // scanlines that end up outside of the GPU drawing range
- // (as a result of writing to VCount) are filled white
- if (line > 192) forceblank = true;
-
- // GPU B can be completely disabled by POWCNT1
- // oddly that's not the case for GPU A
- if (Num && !Enabled) forceblank = true;
-
- if (forceblank)
- {
- for (int i = 0; i < 256; i++)
- dst[i] = 0xFFFFFFFF;
-
- if (Accelerated)
- {
- dst[256*3] = 0;
- }
- return;
- }
-
- u32 dispmode = DispCnt >> 16;
- dispmode &= (Num ? 0x1 : 0x3);
-
- if (Num == 0)
- {
- if (!Accelerated)
- _3DLine = GPU3D::GetLine(n3dline);
- else if ((CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1))
- {
- _3DLine = GPU3D::GetLine(n3dline);
- //GPU3D::GLRenderer::PrepareCaptureFrame();
- }
- }
-
- // always render regular graphics
- DrawScanline_BGOBJ(line);
- UpdateMosaicCounters(line);
-
- switch (dispmode)
- {
- case 0: // screen off
- {
- for (int i = 0; i < 256; i++)
- dst[i] = 0x003F3F3F;
- }
- break;
-
- case 1: // regular display
- {
- int i = 0;
- for (; i < (stride & ~1); i+=2)
- *(u64*)&dst[i] = *(u64*)&BGOBJLine[i];
- }
- break;
-
- case 2: // VRAM display
- {
- u32 vrambank = (DispCnt >> 18) & 0x3;
- if (GPU::VRAMMap_LCDC & (1<> 4;
- u8 b = (color & 0x7C00) >> 9;
-
- dst[i] = r | (g << 8) | (b << 16);
- }
- }
- else
- {
- for (int i = 0; i < 256; i++)
- {
- dst[i] = 0;
- }
- }
- }
- break;
-
- case 3: // FIFO display
- {
- for (int i = 0; i < 256; i++)
- {
- u16 color = DispFIFOBuffer[i];
- u8 r = (color & 0x001F) << 1;
- u8 g = (color & 0x03E0) >> 4;
- u8 b = (color & 0x7C00) >> 9;
-
- dst[i] = r | (g << 8) | (b << 16);
- }
- }
- break;
- }
-
- // capture
- if ((Num == 0) && (CaptureCnt & (1<<31)))
- {
- u32 capwidth, capheight;
- switch ((CaptureCnt >> 20) & 0x3)
- {
- case 0: capwidth = 128; capheight = 128; break;
- case 1: capwidth = 256; capheight = 64; break;
- case 2: capwidth = 256; capheight = 128; break;
- case 3: capwidth = 256; capheight = 192; break;
- }
-
- if (line < capheight)
- DoCapture(line, capwidth);
- }
-
- if (Accelerated)
- {
- dst[256*3] = MasterBrightness | (DispCnt & 0x30000);
- return;
- }
-
- // master brightness
- if (dispmode != 0)
- {
- if ((MasterBrightness >> 14) == 1)
- {
- // up
- u32 factor = MasterBrightness & 0x1F;
- if (factor > 16) factor = 16;
-
- for (int i = 0; i < 256; i++)
- {
- dst[i] = ColorBrightnessUp(dst[i], factor);
- }
- }
- else if ((MasterBrightness >> 14) == 2)
- {
- // down
- u32 factor = MasterBrightness & 0x1F;
- if (factor > 16) factor = 16;
-
- for (int i = 0; i < 256; i++)
- {
- dst[i] = ColorBrightnessDown(dst[i], factor);
- }
- }
- }
-
- // convert to 32-bit BGRA
- // note: 32-bit RGBA would be more straightforward, but
- // BGRA seems to be more compatible (Direct2D soft, cairo...)
- for (int i = 0; i < 256; i+=2)
- {
- u64 c = *(u64*)&dst[i];
-
- u64 r = (c << 18) & 0xFC000000FC0000;
- u64 g = (c << 2) & 0xFC000000FC00;
- u64 b = (c >> 14) & 0xFC000000FC;
- c = r | g | b;
-
- *(u64*)&dst[i] = c | ((c & 0x00C0C0C000C0C0C0) >> 6) | 0xFF000000FF000000;
- }
-}
-
void GPU2D::VBlank()
{
- CaptureCnt &= ~(1<<31);
+ if (CaptureLatch)
+ {
+ CaptureCnt &= ~(1<<31);
+ CaptureLatch = false;
+ }
DispFIFOReadPtr = 0;
DispFIFOWritePtr = 0;
@@ -948,235 +616,6 @@ void GPU2D::VBlankEnd()
//OBJMosaicYMax = OBJMosaicSize[1];
//OBJMosaicY = 0;
//OBJMosaicYCount = 0;
-
-#ifdef OGLRENDERER_ENABLED
- if (Accelerated)
- {
- if ((Num == 0) && (CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1))
- {
- GPU3D::GLRenderer::PrepareCaptureFrame();
- }
- }
-#endif
-}
-
-
-void GPU2D::DoCapture(u32 line, u32 width)
-{
- u32 dstvram = (CaptureCnt >> 16) & 0x3;
-
- // TODO: confirm this
- // it should work like VRAM display mode, which requires VRAM to be mapped to LCDC
- if (!(GPU::VRAMMap_LCDC & (1<> 18) & 0x3) << 14) + (line * width);
-
- // TODO: handle 3D in accelerated mode!!
-
- u32* srcA;
- if (CaptureCnt & (1<<24))
- {
- srcA = _3DLine;
- }
- else
- {
- srcA = BGOBJLine;
- if (Accelerated)
- {
- // in accelerated mode, compositing is normally done on the GPU
- // but when doing display capture, we do need the composited output
- // so we do it here
-
- for (int i = 0; i < 256; i++)
- {
- u32 val1 = BGOBJLine[i];
- u32 val2 = BGOBJLine[256+i];
- u32 val3 = BGOBJLine[512+i];
-
- u32 compmode = (val3 >> 24) & 0xF;
-
- if (compmode == 4)
- {
- // 3D on top, blending
-
- u32 _3dval = _3DLine[val3 & 0xFF];
- if ((_3dval >> 24) > 0)
- val1 = ColorBlend5(_3dval, val1);
- else
- val1 = val2;
- }
- else if (compmode == 1)
- {
- // 3D on bottom, blending
-
- u32 _3dval = _3DLine[val3 & 0xFF];
- if ((_3dval >> 24) > 0)
- {
- u32 eva = (val3 >> 8) & 0x1F;
- u32 evb = (val3 >> 16) & 0x1F;
-
- val1 = ColorBlend4(val1, _3dval, eva, evb);
- }
- else
- val1 = val2;
- }
- else if (compmode <= 3)
- {
- // 3D on top, normal/fade
-
- u32 _3dval = _3DLine[val3 & 0xFF];
- if ((_3dval >> 24) > 0)
- {
- u32 evy = (val3 >> 8) & 0x1F;
-
- val1 = _3dval;
- if (compmode == 2) val1 = ColorBrightnessUp(val1, evy);
- else if (compmode == 3) val1 = ColorBrightnessDown(val1, evy);
- }
- else
- val1 = val2;
- }
-
- BGOBJLine[i] = val1;
- }
- }
- }
-
- u16* srcB = NULL;
- u32 srcBaddr = line * 256;
-
- if (CaptureCnt & (1<<25))
- {
- srcB = &DispFIFOBuffer[0];
- srcBaddr = 0;
- }
- else
- {
- u32 srcvram = (DispCnt >> 18) & 0x3;
- if (GPU::VRAMMap_LCDC & (1<> 16) & 0x3) != 2)
- srcBaddr += ((CaptureCnt >> 26) & 0x3) << 14;
- }
-
- dstaddr &= 0xFFFF;
- srcBaddr &= 0xFFFF;
-
- switch ((CaptureCnt >> 29) & 0x3)
- {
- case 0: // source A
- {
- for (u32 i = 0; i < width; i++)
- {
- u32 val = srcA[i];
-
- // TODO: check what happens when alpha=0
-
- u32 r = (val >> 1) & 0x1F;
- u32 g = (val >> 9) & 0x1F;
- u32 b = (val >> 17) & 0x1F;
- u32 a = ((val >> 24) != 0) ? 0x8000 : 0;
-
- dst[dstaddr] = r | (g << 5) | (b << 10) | a;
- dstaddr = (dstaddr + 1) & 0xFFFF;
- }
- }
- break;
-
- case 1: // source B
- {
- if (srcB)
- {
- for (u32 i = 0; i < width; i++)
- {
- dst[dstaddr] = srcB[srcBaddr];
- srcBaddr = (srcBaddr + 1) & 0xFFFF;
- dstaddr = (dstaddr + 1) & 0xFFFF;
- }
- }
- else
- {
- for (u32 i = 0; i < width; i++)
- {
- dst[dstaddr] = 0;
- dstaddr = (dstaddr + 1) & 0xFFFF;
- }
- }
- }
- break;
-
- case 2: // sources A+B
- case 3:
- {
- u32 eva = CaptureCnt & 0x1F;
- u32 evb = (CaptureCnt >> 8) & 0x1F;
-
- // checkme
- if (eva > 16) eva = 16;
- if (evb > 16) evb = 16;
-
- if (srcB)
- {
- for (u32 i = 0; i < width; i++)
- {
- u32 val = srcA[i];
-
- // TODO: check what happens when alpha=0
-
- u32 rA = (val >> 1) & 0x1F;
- u32 gA = (val >> 9) & 0x1F;
- u32 bA = (val >> 17) & 0x1F;
- u32 aA = ((val >> 24) != 0) ? 1 : 0;
-
- val = srcB[srcBaddr];
-
- u32 rB = val & 0x1F;
- u32 gB = (val >> 5) & 0x1F;
- u32 bB = (val >> 10) & 0x1F;
- u32 aB = val >> 15;
-
- u32 rD = ((rA * aA * eva) + (rB * aB * evb)) >> 4;
- u32 gD = ((gA * aA * eva) + (gB * aB * evb)) >> 4;
- u32 bD = ((bA * aA * eva) + (bB * aB * evb)) >> 4;
- u32 aD = (eva>0 ? aA : 0) | (evb>0 ? aB : 0);
-
- if (rD > 0x1F) rD = 0x1F;
- if (gD > 0x1F) gD = 0x1F;
- if (bD > 0x1F) bD = 0x1F;
-
- dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15);
- srcBaddr = (srcBaddr + 1) & 0xFFFF;
- dstaddr = (dstaddr + 1) & 0xFFFF;
- }
- }
- else
- {
- for (u32 i = 0; i < width; i++)
- {
- u32 val = srcA[i];
-
- // TODO: check what happens when alpha=0
-
- u32 rA = (val >> 1) & 0x1F;
- u32 gA = (val >> 9) & 0x1F;
- u32 bA = (val >> 17) & 0x1F;
- u32 aA = ((val >> 24) != 0) ? 1 : 0;
-
- u32 rD = (rA * aA * eva) >> 4;
- u32 gD = (gA * aA * eva) >> 4;
- u32 bD = (bA * aA * eva) >> 4;
- u32 aD = (eva>0 ? aA : 0);
-
- dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15);
- dstaddr = (dstaddr + 1) & 0xFFFF;
- }
- }
- }
- break;
- }
}
void GPU2D::SampleFIFO(u32 offset, u32 num)
@@ -1191,88 +630,22 @@ void GPU2D::SampleFIFO(u32 offset, u32 num)
}
}
-
-void GPU2D::BGExtPalDirty(u32 base)
-{
- BGExtPalStatus[base] = 0;
- BGExtPalStatus[base+1] = 0;
-}
-
-void GPU2D::OBJExtPalDirty()
-{
- OBJExtPalStatus = 0;
-}
-
-
u16* GPU2D::GetBGExtPal(u32 slot, u32 pal)
{
- u16* dst = &BGExtPalCache[slot][pal << 8];
-
- if (!(BGExtPalStatus[slot] & (1< 0)) DrawBG_##type(line, num); else DrawBG_##type(line, num); }
-
-#define DoDrawBG_Large(line) \
- { if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) DrawBG_Large(line); else DrawBG_Large(line); }
-
-template
-void GPU2D::DrawScanlineBGMode(u32 line)
+void GPU2D::GetBGVRAM(u8*& data, u32& mask)
{
- for (int i = 3; i >= 0; i--)
+ if (Num == 0)
{
- if ((BGCnt[3] & 0x3) == i)
- {
- if (DispCnt & 0x0800)
- {
- if (bgmode >= 3)
- DoDrawBG(Extended, line, 3)
- else if (bgmode >= 1)
- DoDrawBG(Affine, line, 3)
- else
- DoDrawBG(Text, line, 3)
- }
- }
- if ((BGCnt[2] & 0x3) == i)
- {
- if (DispCnt & 0x0400)
- {
- if (bgmode == 5)
- DoDrawBG(Extended, line, 2)
- else if (bgmode == 4 || bgmode == 2)
- DoDrawBG(Affine, line, 2)
- else
- DoDrawBG(Text, line, 2)
- }
- }
- if ((BGCnt[1] & 0x3) == i)
- {
- if (DispCnt & 0x0200)
- {
- DoDrawBG(Text, line, 1)
- }
- }
- if ((BGCnt[0] & 0x3) == i)
- {
- if (DispCnt & 0x0100)
- {
- if ((!Num) && (DispCnt & 0x8))
- DrawBG_3D();
- else
- DoDrawBG(Text, line, 0)
- }
- }
- if ((DispCnt & 0x1000) && NumSprites)
- InterleaveSprites(0x40000 | (i<<16));
- }
-}
-
-void GPU2D::DrawScanlineBGMode6(u32 line)
-{
- for (int i = 3; i >= 0; i--)
- {
- if ((BGCnt[2] & 0x3) == i)
- {
- if (DispCnt & 0x0400)
- {
- DoDrawBG_Large(line)
- }
- }
- if ((BGCnt[0] & 0x3) == i)
- {
- if (DispCnt & 0x0100)
- {
- if ((!Num) && (DispCnt & 0x8))
- DrawBG_3D();
- }
- }
- if ((DispCnt & 0x1000) && NumSprites)
- InterleaveSprites(0x40000 | (i<<16));
- }
-}
-
-void GPU2D::DrawScanlineBGMode7(u32 line)
-{
- // mode 7 only has text-mode BG0 and BG1
-
- for (int i = 3; i >= 0; i--)
- {
- if ((BGCnt[1] & 0x3) == i)
- {
- if (DispCnt & 0x0200)
- {
- DoDrawBG(Text, line, 1)
- }
- }
- if ((BGCnt[0] & 0x3) == i)
- {
- if (DispCnt & 0x0100)
- {
- if ((!Num) && (DispCnt & 0x8))
- DrawBG_3D();
- else
- DoDrawBG(Text, line, 0)
- }
- }
- if ((DispCnt & 0x1000) && NumSprites)
- InterleaveSprites(0x40000 | (i<<16));
- }
-}
-
-void GPU2D::DrawScanline_BGOBJ(u32 line)
-{
- // forced blank disables BG/OBJ compositing
- if (DispCnt & (1<<7))
- {
- for (int i = 0; i < 256; i++)
- BGOBJLine[i] = 0xFF3F3F3F;
-
- return;
- }
-
- u64 backdrop;
- if (Num) backdrop = *(u16*)&GPU::Palette[0x400];
- else backdrop = *(u16*)&GPU::Palette[0];
-
- {
- u8 r = (backdrop & 0x001F) << 1;
- u8 g = (backdrop & 0x03E0) >> 4;
- u8 b = (backdrop & 0x7C00) >> 9;
-
- backdrop = r | (g << 8) | (b << 16) | 0x20000000;
- backdrop |= (backdrop << 32);
-
- for (int i = 0; i < 256; i+=2)
- *(u64*)&BGOBJLine[i] = backdrop;
- }
-
- if (DispCnt & 0xE000)
- CalculateWindowMask(line);
- else
- memset(WindowMask, 0xFF, 256);
-
- ApplySpriteMosaicX();
-
- switch (DispCnt & 0x7)
- {
- case 0: DrawScanlineBGMode<0>(line); break;
- case 1: DrawScanlineBGMode<1>(line); break;
- case 2: DrawScanlineBGMode<2>(line); break;
- case 3: DrawScanlineBGMode<3>(line); break;
- case 4: DrawScanlineBGMode<4>(line); break;
- case 5: DrawScanlineBGMode<5>(line); break;
- case 6: DrawScanlineBGMode6(line); break;
- case 7: DrawScanlineBGMode7(line); break;
- }
-
- // color special effects
- // can likely be optimized
-
- if (!Accelerated)
- {
- for (int i = 0; i < 256; i++)
- {
- u32 val1 = BGOBJLine[i];
- u32 val2 = BGOBJLine[256+i];
-
- BGOBJLine[i] = ColorComposite(i, val1, val2);
- }
+ data = GPU::VRAMFlat_ABG;
+ mask = 0x7FFFF;
}
else
{
- if (Num == 0)
- {
- for (int i = 0; i < 256; i++)
- {
- u32 val1 = BGOBJLine[i];
- u32 val2 = BGOBJLine[256+i];
- u32 val3 = BGOBJLine[512+i];
-
- u32 flag1 = val1 >> 24;
- u32 flag2 = val2 >> 24;
-
- u32 bldcnteffect = (BlendCnt >> 6) & 0x3;
-
- u32 target1;
- if (flag1 & 0x80) target1 = 0x0010;
- else if (flag1 & 0x40) target1 = 0x0001;
- else target1 = flag1;
-
- u32 target2;
- if (flag2 & 0x80) target2 = 0x1000;
- else if (flag2 & 0x40) target2 = 0x0100;
- else target2 = flag2 << 8;
-
- if (((flag1 & 0xC0) == 0x40) && (BlendCnt & target2))
- {
- // 3D on top, blending
-
- BGOBJLine[i] = val2;
- BGOBJLine[256+i] = ColorComposite(i, val2, val3);
- BGOBJLine[512+i] = 0x04000000 | (val1 & 0xFF);
- }
- else if ((flag1 & 0xC0) == 0x40)
- {
- // 3D on top, normal/fade
-
- if (bldcnteffect == 1) bldcnteffect = 0;
- if (!(BlendCnt & 0x0001)) bldcnteffect = 0;
- if (!(WindowMask[i] & 0x20)) bldcnteffect = 0;
-
- BGOBJLine[i] = val2;
- BGOBJLine[256+i] = ColorComposite(i, val2, val3);
- BGOBJLine[512+i] = (bldcnteffect << 24) | (EVY << 8) | (val1 & 0xFF);
- }
- else if (((flag2 & 0xC0) == 0x40) && ((BlendCnt & 0x01C0) == 0x0140))
- {
- // 3D on bottom, blending
-
- u32 eva, evb;
- if ((flag1 & 0xC0) == 0xC0)
- {
- eva = flag1 & 0x1F;
- evb = 16 - eva;
- }
- else if (((BlendCnt & target1) && (WindowMask[i] & 0x20)) ||
- ((flag1 & 0xC0) == 0x80))
- {
- eva = EVA;
- evb = EVB;
- }
- else
- bldcnteffect = 7;
-
- BGOBJLine[i] = val1;
- BGOBJLine[256+i] = ColorComposite(i, val1, val3);
- BGOBJLine[512+i] = (bldcnteffect << 24) | (EVB << 16) | (EVA << 8) | (val2 & 0xFF);
- }
- else
- {
- // no potential 3D pixel involved
-
- BGOBJLine[i] = ColorComposite(i, val1, val2);
- BGOBJLine[256+i] = 0;
- BGOBJLine[512+i] = 0x07000000;
- }
- }
- }
- else
- {
- for (int i = 0; i < 256; i++)
- {
- u32 val1 = BGOBJLine[i];
- u32 val2 = BGOBJLine[256+i];
-
- BGOBJLine[i] = ColorComposite(i, val1, val2);
- BGOBJLine[256+i] = 0;
- BGOBJLine[512+i] = 0x07000000;
- }
- }
+ data = GPU::VRAMFlat_BBG;
+ mask = 0x1FFFF;
}
-
- if (BGMosaicY >= BGMosaicYMax)
- {
- BGMosaicY = 0;
- BGMosaicYMax = BGMosaicSize[1];
- }
- else
- BGMosaicY++;
-
- /*if (OBJMosaicY >= OBJMosaicYMax)
- {
- OBJMosaicY = 0;
- OBJMosaicYMax = OBJMosaicSize[1];
- }
- else
- OBJMosaicY++;*/
}
-
-void GPU2D::DrawPixel_Normal(u32* dst, u16 color, u32 flag)
+void GPU2D::GetOBJVRAM(u8*& data, u32& mask)
{
- u8 r = (color & 0x001F) << 1;
- u8 g = (color & 0x03E0) >> 4;
- u8 b = (color & 0x7C00) >> 9;
- //g |= ((color & 0x8000) >> 15);
-
- *(dst+256) = *dst;
- *dst = r | (g << 8) | (b << 16) | flag;
-}
-
-void GPU2D::DrawPixel_Accel(u32* dst, u16 color, u32 flag)
-{
- u8 r = (color & 0x001F) << 1;
- u8 g = (color & 0x03E0) >> 4;
- u8 b = (color & 0x7C00) >> 9;
-
- *(dst+512) = *(dst+256);
- *(dst+256) = *dst;
- *dst = r | (g << 8) | (b << 16) | flag;
-}
-
-void GPU2D::DrawBG_3D()
-{
- u16 xoff = BGXPos[0];
- int i = 0;
- int iend = 256;
-
- if (xoff & 0x100)
+ if (Num == 0)
{
- i = (0x100 - (xoff & 0xFF));
- xoff += i;
- }
- if ((xoff - i + iend - 1) & 0x100)
- {
- iend -= (xoff & 0xFF);
- }
-
- if (Accelerated)
- {
- for (; i < iend; i++)
- {
- int pos = xoff++;
-
- if (!(WindowMask[i] & 0x01)) continue;
-
- BGOBJLine[i+512] = BGOBJLine[i+256];
- BGOBJLine[i+256] = BGOBJLine[i];
- BGOBJLine[i] = 0x40000000 | pos; // 3D-layer placeholder
- }
+ data = GPU::VRAMFlat_AOBJ;
+ mask = 0x3FFFF;
}
else
{
- for (; i < iend; i++)
- {
- u32 c = _3DLine[xoff];
- xoff++;
-
- if ((c >> 24) == 0) continue;
- if (!(WindowMask[i] & 0x01)) continue;
-
- BGOBJLine[i+256] = BGOBJLine[i];
- BGOBJLine[i] = c | 0x40000000;
- }
- }
-}
-
-template
-void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
-{
- u16 bgcnt = BGCnt[bgnum];
-
- u32 tilesetaddr, tilemapaddr;
- u16* pal;
- u32 extpal, extpalslot;
-
- u16 xoff = BGXPos[bgnum];
- u16 yoff = BGYPos[bgnum] + line;
-
- if (bgcnt & 0x0040)
- {
- // vertical mosaic
- yoff -= BGMosaicY;
- }
-
- u32 widexmask = (bgcnt & 0x4000) ? 0x100 : 0;
-
- extpal = (DispCnt & 0x40000000);
- if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum;
-
- if (Num)
- {
- tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
-
- pal = (u16*)&GPU::Palette[0x400];
- }
- else
- {
- tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
-
- pal = (u16*)&GPU::Palette[0];
- }
-
- // adjust Y position in tilemap
- if (bgcnt & 0x8000)
- {
- tilemapaddr += ((yoff & 0x1F8) << 3);
- if (bgcnt & 0x4000)
- tilemapaddr += ((yoff & 0x100) << 3);
- }
- else
- tilemapaddr += ((yoff & 0xF8) << 3);
-
- u16 curtile;
- u16* curpal;
- u32 pixelsaddr;
- u8 color;
- u32 lastxpos;
-
- if (bgcnt & 0x0080)
- {
- // 256-color
-
- // preload shit as needed
- if ((xoff & 0x7) || mosaic)
- {
- curtile = GPU::ReadVRAM_BG(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
-
- if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
- else curpal = pal;
-
- pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6)
- + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3);
- }
-
- if (mosaic) lastxpos = xoff;
-
- for (int i = 0; i < 256; i++)
- {
- u32 xpos;
- if (mosaic) xpos = xoff - CurBGXMosaicTable[i];
- else xpos = xoff;
-
- if ((!mosaic && (!(xpos & 0x7))) ||
- (mosaic && ((xpos >> 3) != (lastxpos >> 3))))
- {
- // load a new tile
- curtile = GPU::ReadVRAM_BG(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3));
-
- if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
- else curpal = pal;
-
- pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6)
- + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3);
-
- if (mosaic) lastxpos = xpos;
- }
-
- // draw pixel
- if (WindowMask[i] & (1<(pixelsaddr + tilexoff);
-
- if (color)
- DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
- curpal = pal + ((curtile & 0xF000) >> 8);
- pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
- + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
- }
-
- if (mosaic) lastxpos = xoff;
-
- for (int i = 0; i < 256; i++)
- {
- u32 xpos;
- if (mosaic) xpos = xoff - CurBGXMosaicTable[i];
- else xpos = xoff;
-
- if ((!mosaic && (!(xpos & 0x7))) ||
- (mosaic && ((xpos >> 3) != (lastxpos >> 3))))
- {
- // load a new tile
- curtile = GPU::ReadVRAM_BG(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3));
- curpal = pal + ((curtile & 0xF000) >> 8);
- pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
- + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
-
- if (mosaic) lastxpos = xpos;
- }
-
- // draw pixel
- if (WindowMask[i] & (1<(pixelsaddr + (tilexoff >> 1)) >> 4;
- }
- else
- {
- color = GPU::ReadVRAM_BG(pixelsaddr + (tilexoff >> 1)) & 0x0F;
- }
-
- if (color)
- DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<
-void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
-{
- u16 bgcnt = BGCnt[bgnum];
-
- u32 tilesetaddr, tilemapaddr;
- u16* pal;
-
- u32 coordmask;
- u32 yshift;
- switch (bgcnt & 0xC000)
- {
- case 0x0000: coordmask = 0x07800; yshift = 7; break;
- case 0x4000: coordmask = 0x0F800; yshift = 8; break;
- case 0x8000: coordmask = 0x1F800; yshift = 9; break;
- case 0xC000: coordmask = 0x3F800; yshift = 10; break;
- }
-
- u32 overflowmask;
- if (bgcnt & 0x2000) overflowmask = 0;
- else overflowmask = ~(coordmask | 0x7FF);
-
- s16 rotA = BGRotA[bgnum-2];
- s16 rotB = BGRotB[bgnum-2];
- s16 rotC = BGRotC[bgnum-2];
- s16 rotD = BGRotD[bgnum-2];
-
- s32 rotX = BGXRefInternal[bgnum-2];
- s32 rotY = BGYRefInternal[bgnum-2];
-
- if (bgcnt & 0x0040)
- {
- // vertical mosaic
- rotX -= (BGMosaicY * rotB);
- rotY -= (BGMosaicY * rotD);
- }
-
- if (Num)
- {
- tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
-
- pal = (u16*)&GPU::Palette[0x400];
- }
- else
- {
- tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
-
- pal = (u16*)&GPU::Palette[0];
- }
-
- u16 curtile;
- u8 color;
-
- yshift -= 3;
-
- for (int i = 0; i < 256; i++)
- {
- if (WindowMask[i] & (1<(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)));
-
- // draw pixel
- u32 tilexoff = (finalX >> 8) & 0x7;
- u32 tileyoff = (finalY >> 8) & 0x7;
-
- color = GPU::ReadVRAM_BG(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff);
-
- if (color)
- DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<
-void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
-{
- u16 bgcnt = BGCnt[bgnum];
-
- u32 tilesetaddr, tilemapaddr;
- u16* pal;
- u32 extpal;
-
- extpal = (DispCnt & 0x40000000);
-
- s16 rotA = BGRotA[bgnum-2];
- s16 rotB = BGRotB[bgnum-2];
- s16 rotC = BGRotC[bgnum-2];
- s16 rotD = BGRotD[bgnum-2];
-
- s32 rotX = BGXRefInternal[bgnum-2];
- s32 rotY = BGYRefInternal[bgnum-2];
-
- if (bgcnt & 0x0040)
- {
- // vertical mosaic
- rotX -= (BGMosaicY * rotB);
- rotY -= (BGMosaicY * rotD);
- }
-
- if (bgcnt & 0x0080)
- {
- // bitmap modes
-
- u32 xmask, ymask;
- u32 yshift;
- switch (bgcnt & 0xC000)
- {
- case 0x0000: xmask = 0x07FFF; ymask = 0x07FFF; yshift = 7; break;
- case 0x4000: xmask = 0x0FFFF; ymask = 0x0FFFF; yshift = 8; break;
- case 0x8000: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break;
- case 0xC000: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break;
- }
-
- u32 ofxmask, ofymask;
- if (bgcnt & 0x2000)
- {
- ofxmask = 0;
- ofymask = 0;
- }
- else
- {
- ofxmask = ~xmask;
- ofymask = ~ymask;
- }
-
- if (Num) tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 6);
- else tilemapaddr = 0x06000000 + ((bgcnt & 0x1F00) << 6);
-
- if (bgcnt & 0x0004)
- {
- // direct color bitmap
-
- u16 color;
-
- for (int i = 0; i < 256; i++)
- {
- if (WindowMask[i] & (1<(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1));
-
- if (color & 0x8000)
- DrawPixel(&BGOBJLine[i], color, 0x01000000<(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8));
-
- if (color)
- DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<> 8) + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
-
- pal = (u16*)&GPU::Palette[0];
- }
-
- u16 curtile;
- u16* curpal;
- u8 color;
-
- yshift -= 3;
-
- for (int i = 0; i < 256; i++)
- {
- if (WindowMask[i] & (1<(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1));
-
- if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12);
- else curpal = pal;
-
- // draw pixel
- u32 tilexoff = (finalX >> 8) & 0x7;
- u32 tileyoff = (finalY >> 8) & 0x7;
-
- if (curtile & 0x0400) tilexoff = 7-tilexoff;
- if (curtile & 0x0800) tileyoff = 7-tileyoff;
-
- color = GPU::ReadVRAM_BG(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff);
-
- if (color)
- DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<
-void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
-{
- u16 bgcnt = BGCnt[2];
-
- u32 tilesetaddr, tilemapaddr;
- u16* pal;
-
- // large BG sizes:
- // 0: 512x1024
- // 1: 1024x512
- // 2: 512x256
- // 3: 512x512
- u32 xmask, ymask;
- u32 yshift;
- switch (bgcnt & 0xC000)
- {
- case 0x0000: xmask = 0x1FFFF; ymask = 0x3FFFF; yshift = 9; break;
- case 0x4000: xmask = 0x3FFFF; ymask = 0x1FFFF; yshift = 10; break;
- case 0x8000: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break;
- case 0xC000: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break;
- }
-
- u32 ofxmask, ofymask;
- if (bgcnt & 0x2000)
- {
- ofxmask = 0;
- ofymask = 0;
- }
- else
- {
- ofxmask = ~xmask;
- ofymask = ~ymask;
- }
-
- s16 rotA = BGRotA[0];
- s16 rotB = BGRotB[0];
- s16 rotC = BGRotC[0];
- s16 rotD = BGRotD[0];
-
- s32 rotX = BGXRefInternal[0];
- s32 rotY = BGYRefInternal[0];
-
- if (bgcnt & 0x0040)
- {
- // vertical mosaic
- rotX -= (BGMosaicY * rotB);
- rotY -= (BGMosaicY * rotD);
- }
-
- if (Num) tilemapaddr = 0x06200000;
- else tilemapaddr = 0x06000000;
-
- // 256-color bitmap
-
- if (Num) pal = (u16*)&GPU::Palette[0x400];
- else pal = (u16*)&GPU::Palette[0];
-
- u8 color;
-
- for (int i = 0; i < 256; i++)
- {
- if (WindowMask[i] & (1<<2))
- {
- s32 finalX, finalY;
- if (mosaic)
- {
- int im = CurBGXMosaicTable[i];
- finalX = rotX - (im * rotA);
- finalY = rotY - (im * rotC);
- }
- else
- {
- finalX = rotX;
- finalY = rotY;
- }
-
- if (!(finalX & ofxmask) && !(finalY & ofymask))
- {
- color = GPU::ReadVRAM_BG(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8));
-
- if (color)
- DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2);
- }
- }
-
- rotX += rotA;
- rotY += rotC;
- }
-
- BGXRefInternal[0] += rotB;
- BGYRefInternal[0] += rotD;
-}
-
-// OBJ line buffer:
-// * bit0-15: color (bit15=1: direct color, bit15=0: palette index, bit12=0 to indicate extpal)
-// * bit16-17: BG-relative priority
-// * bit18: non-transparent sprite pixel exists here
-// * bit19: X mosaic should be applied here
-// * bit24-31: compositor flags
-
-void GPU2D::ApplySpriteMosaicX()
-{
- // apply X mosaic if needed
- // X mosaic for sprites is applied after all sprites are rendered
-
- if (OBJMosaicSize[0] == 0) return;
-
- u32 lastcolor = OBJLine[0];
-
- for (u32 i = 1; i < 256; i++)
- {
- if (!(OBJLine[i] & 0x100000))
- {
- // not a mosaic'd sprite pixel
- continue;
- }
-
- if ((OBJIndex[i] != OBJIndex[i-1]) || (CurOBJXMosaicTable[i] == 0))
- lastcolor = OBJLine[i];
- else
- OBJLine[i] = lastcolor;
- }
-}
-
-void GPU2D::InterleaveSprites(u32 prio)
-{
- u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
-
- if (DispCnt & 0x80000000)
- {
- u16* extpal = GetOBJExtPal();
-
- for (u32 i = 0; i < 256; i++)
- {
- if ((OBJLine[i] & 0x70000) != prio) continue;
- if (!(WindowMask[i] & 0x10)) continue;
-
- u16 color;
- u32 pixel = OBJLine[i];
-
- if (pixel & 0x8000)
- color = pixel & 0x7FFF;
- else if (pixel & 0x1000)
- color = pal[pixel & 0xFF];
- else
- color = extpal[pixel & 0xFFF];
-
- DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
- }
- }
- else
- {
- // optimized no-extpal version
-
- for (u32 i = 0; i < 256; i++)
- {
- if ((OBJLine[i] & 0x70000) != prio) continue;
- if (!(WindowMask[i] & 0x10)) continue;
-
- u16 color;
- u32 pixel = OBJLine[i];
-
- if (pixel & 0x8000)
- color = pixel & 0x7FFF;
- else
- color = pal[pixel & 0xFF];
-
- DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
- }
- }
-}
-
-#define DoDrawSprite(type, ...) \
- if (iswin) \
- { \
- DrawSprite_##type(__VA_ARGS__); \
- } \
- else \
- { \
- DrawSprite_##type(__VA_ARGS__); \
- }
-
-void GPU2D::DrawSprites(u32 line)
-{
- if (line == 0)
- {
- // reset those counters here
- // TODO: find out when those are supposed to be reset
- // it would make sense to reset them at the end of VBlank
- // however, sprites are rendered one scanline in advance
- // so they need to be reset a bit earlier
-
- OBJMosaicY = 0;
- OBJMosaicYCount = 0;
- }
-
- NumSprites = 0;
- memset(OBJLine, 0, 256*4);
- memset(OBJWindow, 0, 256);
- if (!(DispCnt & 0x1000)) return;
-
- memset(OBJIndex, 0xFF, 256);
-
- u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0];
-
- const s32 spritewidth[16] =
- {
- 8, 16, 8, 8,
- 16, 32, 8, 8,
- 32, 32, 16, 8,
- 64, 64, 32, 8
- };
- const s32 spriteheight[16] =
- {
- 8, 8, 16, 8,
- 16, 8, 32, 8,
- 32, 16, 32, 8,
- 64, 32, 64, 8
- };
-
- for (int bgnum = 0x0C00; bgnum >= 0x0000; bgnum -= 0x0400)
- {
- for (int sprnum = 127; sprnum >= 0; sprnum--)
- {
- u16* attrib = &oam[sprnum*4];
-
- if ((attrib[2] & 0x0C00) != bgnum)
- continue;
-
- bool iswin = (((attrib[0] >> 10) & 0x3) == 2);
-
- u32 sprline;
- if ((attrib[0] & 0x1000) && !iswin)
- {
- // apply Y mosaic
- sprline = OBJMosaicY;
- }
- else
- sprline = line;
-
- if (attrib[0] & 0x0100)
- {
- u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12);
- s32 width = spritewidth[sizeparam];
- s32 height = spriteheight[sizeparam];
- s32 boundwidth = width;
- s32 boundheight = height;
-
- if (attrib[0] & 0x0200)
- {
- boundwidth <<= 1;
- boundheight <<= 1;
- }
-
- u32 ypos = attrib[0] & 0xFF;
- ypos = (sprline - ypos) & 0xFF;
- if (ypos >= (u32)boundheight)
- continue;
-
- s32 xpos = (s32)(attrib[1] << 23) >> 23;
- if (xpos <= -boundwidth)
- continue;
-
- u32 rotparamgroup = (attrib[1] >> 9) & 0x1F;
-
- DoDrawSprite(Rotscale, sprnum, boundwidth, boundheight, width, height, xpos, ypos);
-
- NumSprites++;
- }
- else
- {
- if (attrib[0] & 0x0200)
- continue;
-
- u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12);
- s32 width = spritewidth[sizeparam];
- s32 height = spriteheight[sizeparam];
-
- u32 ypos = attrib[0] & 0xFF;
- ypos = (sprline - ypos) & 0xFF;
- if (ypos >= (u32)height)
- continue;
-
- s32 xpos = (s32)(attrib[1] << 23) >> 23;
- if (xpos <= -width)
- continue;
-
- DoDrawSprite(Normal, sprnum, width, height, xpos, ypos);
-
- NumSprites++;
- }
- }
- }
-}
-
-template
-void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos)
-{
- u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0];
- u16* attrib = &oam[num * 4];
- u16* rotparams = &oam[(((attrib[1] >> 9) & 0x1F) * 16) + 3];
-
- u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | 0xC0000;
- u32 tilenum = attrib[2] & 0x03FF;
- u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3);
-
- u32 ytilefactor;
-
- s32 centerX = boundwidth >> 1;
- s32 centerY = boundheight >> 1;
-
- if ((attrib[0] & 0x1000) && !window)
- {
- // apply Y mosaic
- pixelattr |= 0x100000;
- }
-
- u32 xoff;
- if (xpos >= 0)
- {
- xoff = 0;
- if ((xpos+boundwidth) > 256)
- boundwidth = 256-xpos;
- }
- else
- {
- xoff = -xpos;
- xpos = 0;
- }
-
- s16 rotA = (s16)rotparams[0];
- s16 rotB = (s16)rotparams[4];
- s16 rotC = (s16)rotparams[8];
- s16 rotD = (s16)rotparams[12];
-
- s32 rotX = ((xoff-centerX) * rotA) + ((ypos-centerY) * rotB) + (width << 7);
- s32 rotY = ((xoff-centerX) * rotC) + ((ypos-centerY) * rotD) + (height << 7);
-
- width <<= 8;
- height <<= 8;
-
- u16 color = 0; // transparent in all cases
-
- if (spritemode == 3)
- {
- u32 alpha = attrib[2] >> 12;
- if (!alpha) return;
- alpha++;
-
- pixelattr |= (0xC0000000 | (alpha << 24));
-
- if (DispCnt & 0x40)
- {
- if (DispCnt & 0x20)
- {
- // 'reserved'
- // draws nothing
-
- return;
- }
- else
- {
- tilenum <<= (7 + ((DispCnt >> 22) & 0x1));
- ytilefactor = ((width >> 8) * 2);
- }
- }
- else
- {
- if (DispCnt & 0x20)
- {
- tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
- ytilefactor = (256 * 2);
- }
- else
- {
- tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
- ytilefactor = (128 * 2);
- }
- }
-
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
-
- for (; xoff < boundwidth;)
- {
- if ((u32)rotX < width && (u32)rotY < height)
- {
- color = GPU::ReadVRAM_OBJ(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1));
-
- if (color & 0x8000)
- {
- if (window) OBJWindow[xpos] = 1;
- else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
- }
- else if (!window)
- {
- if (OBJLine[xpos] == 0)
- {
- OBJLine[xpos] = pixelattr & 0x180000;
- OBJIndex[xpos] = num;
- }
- }
- }
-
- rotX += rotA;
- rotY += rotC;
- xoff++;
- xpos++;
- }
- }
- else
- {
- if (DispCnt & 0x10)
- {
- tilenum <<= ((DispCnt >> 20) & 0x3);
- ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0);
- }
- else
- {
- ytilefactor = 0x20;
- }
-
- if (spritemode == 1) pixelattr |= 0x80000000;
- else pixelattr |= 0x10000000;
-
- if (attrib[0] & 0x2000)
- {
- // 256-color
- tilenum <<= 5;
- ytilefactor <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
-
- if (!window)
- {
- if (!(DispCnt & 0x80000000))
- pixelattr |= 0x1000;
- else
- pixelattr |= ((attrib[2] & 0xF000) >> 4);
- }
-
- for (; xoff < boundwidth;)
- {
- if ((u32)rotX < width && (u32)rotY < height)
- {
- color = GPU::ReadVRAM_OBJ(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8));
-
- if (color)
- {
- if (window) OBJWindow[xpos] = 1;
- else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
- }
- else if (!window)
- {
- if (OBJLine[xpos] == 0)
- {
- OBJLine[xpos] = pixelattr & 0x180000;
- OBJIndex[xpos] = num;
- }
- }
- }
-
- rotX += rotA;
- rotY += rotC;
- xoff++;
- xpos++;
- }
- }
- else
- {
- // 16-color
- tilenum <<= 5;
- ytilefactor <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
-
- if (!window)
- {
- pixelattr |= 0x1000;
- pixelattr |= ((attrib[2] & 0xF000) >> 8);
- }
-
- for (; xoff < boundwidth;)
- {
- if ((u32)rotX < width && (u32)rotY < height)
- {
- color = GPU::ReadVRAM_OBJ(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9));
- if (rotX & 0x100)
- color >>= 4;
- else
- color &= 0x0F;
-
- if (color)
- {
- if (window) OBJWindow[xpos] = 1;
- else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
- }
- else if (!window)
- {
- if (OBJLine[xpos] == 0)
- {
- OBJLine[xpos] = pixelattr & 0x180000;
- OBJIndex[xpos] = num;
- }
- }
- }
-
- rotX += rotA;
- rotY += rotC;
- xoff++;
- xpos++;
- }
- }
- }
-}
-
-template
-void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos)
-{
- u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0];
- u16* attrib = &oam[num * 4];
-
- u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | 0xC0000;
- u32 tilenum = attrib[2] & 0x03FF;
- u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3);
-
- u32 wmask = width - 8; // really ((width - 1) & ~0x7)
-
- if ((attrib[0] & 0x1000) && !window)
- {
- // apply Y mosaic
- pixelattr |= 0x100000;
- }
-
- // yflip
- if (attrib[1] & 0x2000)
- ypos = height-1 - ypos;
-
- u32 xoff;
- u32 xend = width;
- if (xpos >= 0)
- {
- xoff = 0;
- if ((xpos+xend) > 256)
- xend = 256-xpos;
- }
- else
- {
- xoff = -xpos;
- xpos = 0;
- }
-
- u16 color = 0; // transparent in all cases
-
- if (spritemode == 3)
- {
- // bitmap sprite
-
- u32 alpha = attrib[2] >> 12;
- if (!alpha) return;
- alpha++;
-
- pixelattr |= (0xC0000000 | (alpha << 24));
-
- if (DispCnt & 0x40)
- {
- if (DispCnt & 0x20)
- {
- // 'reserved'
- // draws nothing
-
- return;
- }
- else
- {
- tilenum <<= (7 + ((DispCnt >> 22) & 0x1));
- tilenum += (ypos * width * 2);
- }
- }
- else
- {
- if (DispCnt & 0x20)
- {
- tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
- tilenum += (ypos * 256 * 2);
- }
- else
- {
- tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
- tilenum += (ypos * 128 * 2);
- }
- }
-
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
- s32 pixelstride;
-
- if (attrib[1] & 0x1000) // xflip
- {
- pixelsaddr += (width-1 << 1);
- pixelsaddr -= (xoff << 1);
- pixelstride = -2;
- }
- else
- {
- pixelsaddr += (xoff << 1);
- pixelstride = 2;
- }
-
- for (; xoff < xend;)
- {
- color = GPU::ReadVRAM_OBJ(pixelsaddr);
-
- pixelsaddr += pixelstride;
-
- if (color & 0x8000)
- {
- if (window) OBJWindow[xpos] = 1;
- else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
- }
- else if (!window)
- {
- if (OBJLine[xpos] == 0)
- {
- OBJLine[xpos] = pixelattr & 0x180000;
- OBJIndex[xpos] = num;
- }
- }
-
- xoff++;
- xpos++;
- }
- }
- else
- {
- if (DispCnt & 0x10)
- {
- tilenum <<= ((DispCnt >> 20) & 0x3);
- tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
- }
- else
- {
- tilenum += ((ypos >> 3) * 0x20);
- }
-
- if (spritemode == 1) pixelattr |= 0x80000000;
- else pixelattr |= 0x10000000;
-
- if (attrib[0] & 0x2000)
- {
- // 256-color
- tilenum <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
- pixelsaddr += ((ypos & 0x7) << 3);
- s32 pixelstride;
-
- if (!window)
- {
- if (!(DispCnt & 0x80000000))
- pixelattr |= 0x1000;
- else
- pixelattr |= ((attrib[2] & 0xF000) >> 4);
- }
-
- if (attrib[1] & 0x1000) // xflip
- {
- pixelsaddr += (((width-1) & wmask) << 3);
- pixelsaddr += ((width-1) & 0x7);
- pixelsaddr -= ((xoff & wmask) << 3);
- pixelsaddr -= (xoff & 0x7);
- pixelstride = -1;
- }
- else
- {
- pixelsaddr += ((xoff & wmask) << 3);
- pixelsaddr += (xoff & 0x7);
- pixelstride = 1;
- }
-
- for (; xoff < xend;)
- {
- color = GPU::ReadVRAM_OBJ(pixelsaddr);
-
- pixelsaddr += pixelstride;
-
- if (color)
- {
- if (window) OBJWindow[xpos] = 1;
- else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
- }
- else if (!window)
- {
- if (OBJLine[xpos] == 0)
- {
- OBJLine[xpos] = pixelattr & 0x180000;
- OBJIndex[xpos] = num;
- }
- }
-
- xoff++;
- xpos++;
- if (!(xoff & 0x7)) pixelsaddr += (56 * pixelstride);
- }
- }
- else
- {
- // 16-color
- tilenum <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
- pixelsaddr += ((ypos & 0x7) << 2);
- s32 pixelstride;
-
- if (!window)
- {
- pixelattr |= 0x1000;
- pixelattr |= ((attrib[2] & 0xF000) >> 8);
- }
-
- // TODO: optimize VRAM access!!
- // TODO: do xflip better? the 'two pixels per byte' thing makes it a bit shitty
-
- if (attrib[1] & 0x1000) // xflip
- {
- pixelsaddr += (((width-1) & wmask) << 2);
- pixelsaddr += (((width-1) & 0x7) >> 1);
- pixelsaddr -= ((xoff & wmask) << 2);
- pixelsaddr -= ((xoff & 0x7) >> 1);
- pixelstride = -1;
- }
- else
- {
- pixelsaddr += ((xoff & wmask) << 2);
- pixelsaddr += ((xoff & 0x7) >> 1);
- pixelstride = 1;
- }
-
- for (; xoff < xend;)
- {
- if (attrib[1] & 0x1000)
- {
- if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ(pixelsaddr) & 0x0F; pixelsaddr--; }
- else color = GPU::ReadVRAM_OBJ(pixelsaddr) >> 4;
- }
- else
- {
- if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ(pixelsaddr) >> 4; pixelsaddr++; }
- else color = GPU::ReadVRAM_OBJ(pixelsaddr) & 0x0F;
- }
-
- if (color)
- {
- if (window) OBJWindow[xpos] = 1;
- else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
- }
- else if (!window)
- {
- if (OBJLine[xpos] == 0)
- {
- OBJLine[xpos] = pixelattr & 0x180000;
- OBJIndex[xpos] = num;
- }
- }
-
- xoff++;
- xpos++;
- if (!(xoff & 0x7)) pixelsaddr += ((attrib[1] & 0x1000) ? -28 : 28);
- }
- }
+ data = GPU::VRAMFlat_BOBJ;
+ mask = 0x1FFFF;
}
}
diff --git a/src/GPU2D.h b/src/GPU2D.h
index 521adf01..0f59ae36 100644
--- a/src/GPU2D.h
+++ b/src/GPU2D.h
@@ -19,11 +19,14 @@
#ifndef GPU2D_H
#define GPU2D_H
+#include "types.h"
+#include "Savestate.h"
+
class GPU2D
{
public:
GPU2D(u32 num);
- ~GPU2D();
+ virtual ~GPU2D() {}
void Reset();
@@ -31,7 +34,7 @@ public:
void SetEnabled(bool enable) { Enabled = enable; }
void SetFramebuffer(u32* buf);
- void SetRenderSettings(bool accel);
+ virtual void SetRenderSettings(bool accel) = 0;
u8 Read8(u32 addr);
u16 Read16(u32 addr);
@@ -52,36 +55,24 @@ public:
void SampleFIFO(u32 offset, u32 num);
- void DrawScanline(u32 line);
- void DrawSprites(u32 line);
+ virtual void DrawScanline(u32 line) = 0;
+ virtual void DrawSprites(u32 line) = 0;
void VBlank();
- void VBlankEnd();
+ virtual void VBlankEnd();
void CheckWindows(u32 line);
- void BGExtPalDirty(u32 base);
- void OBJExtPalDirty();
-
u16* GetBGExtPal(u32 slot, u32 pal);
u16* GetOBJExtPal();
-private:
+ void GetBGVRAM(u8*& data, u32& mask);
+ void GetOBJVRAM(u8*& data, u32& mask);
+
+protected:
u32 Num;
bool Enabled;
u32* Framebuffer;
- bool Accelerated;
-
- u32 BGOBJLine[256*3] __attribute__((aligned (8)));
- u32* _3DLine;
-
- u8 WindowMask[256] __attribute__((aligned (8)));
- u32 OBJLine[256] __attribute__((aligned (8)));
- u8 OBJWindow[256] __attribute__((aligned (8)));
- u8 OBJIndex[256] __attribute__((aligned (8)));
-
- u32 NumSprites;
-
u16 DispFIFO[16];
u32 DispFIFOReadPtr;
u32 DispFIFOWritePtr;
@@ -114,32 +105,61 @@ private:
u8 BGMosaicY, BGMosaicYMax;
u8 OBJMosaicYCount, OBJMosaicY, OBJMosaicYMax;
- u8 MosaicTable[16][256];
- u8* CurBGXMosaicTable;
- u8* CurOBJXMosaicTable;
-
u16 BlendCnt;
u16 BlendAlpha;
u8 EVA, EVB;
u8 EVY;
+ bool CaptureLatch;
u32 CaptureCnt;
u16 MasterBrightness;
- u16 BGExtPalCache[4][16*256];
- u16 OBJExtPalCache[16*256];
- u32 BGExtPalStatus[4];
- u32 OBJExtPalStatus;
+ u8 WindowMask[256] __attribute__((aligned (8)));
+ u8 OBJWindow[256] __attribute__((aligned (8)));
+ void UpdateMosaicCounters(u32 line);
+ void CalculateWindowMask(u32 line);
+
+ virtual void MosaicXSizeChanged() = 0;
+};
+
+class GPU2D_Soft : public GPU2D
+{
+public:
+ GPU2D_Soft(u32 num);
+ ~GPU2D_Soft() override {}
+
+ void SetRenderSettings(bool accel) override;
+
+ void DrawScanline(u32 line) override;
+ void DrawSprites(u32 line) override;
+ void VBlankEnd() override;
+
+protected:
+ void MosaicXSizeChanged() override;
+
+private:
+ bool Accelerated;
+
+ u32 BGOBJLine[256*3] __attribute__((aligned (8)));
+ u32* _3DLine;
+
+ u32 OBJLine[256] __attribute__((aligned (8)));
+ u8 OBJIndex[256] __attribute__((aligned (8)));
+
+ u32 NumSprites;
+
+ u8 MosaicTable[16][256];
+ u8* CurBGXMosaicTable;
+ u8* CurOBJXMosaicTable;
+
u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
u32 ColorBlend5(u32 val1, u32 val2);
u32 ColorBrightnessUp(u32 val, u32 factor);
u32 ColorBrightnessDown(u32 val, u32 factor);
u32 ColorComposite(int i, u32 val1, u32 val2);
- void UpdateMosaicCounters(u32 line);
-
template void DrawScanlineBGMode(u32 line);
void DrawScanlineBGMode6(u32 line);
void DrawScanlineBGMode7(u32 line);
@@ -147,22 +167,22 @@ private:
static void DrawPixel_Normal(u32* dst, u16 color, u32 flag);
static void DrawPixel_Accel(u32* dst, u16 color, u32 flag);
- void (*DrawPixel)(u32* dst, u16 color, u32 flag);
+
+ typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag);
void DrawBG_3D();
- template void DrawBG_Text(u32 line, u32 bgnum);
- template void DrawBG_Affine(u32 line, u32 bgnum);
- template void DrawBG_Extended(u32 line, u32 bgnum);
- template void DrawBG_Large(u32 line);
+ template void DrawBG_Text(u32 line, u32 bgnum);
+ template void DrawBG_Affine(u32 line, u32 bgnum);
+ template void DrawBG_Extended(u32 line, u32 bgnum);
+ template void DrawBG_Large(u32 line);
void ApplySpriteMosaicX();
+ template
void InterleaveSprites(u32 prio);
template void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos);
template void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);
void DoCapture(u32 line, u32 width);
-
- void CalculateWindowMask(u32 line);
};
#endif
diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp
new file mode 100644
index 00000000..c686bad7
--- /dev/null
+++ b/src/GPU2D_Soft.cpp
@@ -0,0 +1,2215 @@
+#include "GPU2D.h"
+#include "GPU.h"
+
+GPU2D_Soft::GPU2D_Soft(u32 num)
+ : GPU2D(num)
+{
+ // initialize mosaic table
+ for (int m = 0; m < 16; m++)
+ {
+ for (int x = 0; x < 256; x++)
+ {
+ int offset = x % (m+1);
+ MosaicTable[m][x] = offset;
+ }
+ }
+}
+
+void GPU2D_Soft::SetRenderSettings(bool accel)
+{
+ Accelerated = accel;
+}
+
+u32 GPU2D_Soft::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb)
+{
+ u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4;
+ u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 4) & 0x007F00;
+ u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 4) & 0x7F0000;
+
+ if (r > 0x00003F) r = 0x00003F;
+ if (g > 0x003F00) g = 0x003F00;
+ if (b > 0x3F0000) b = 0x3F0000;
+
+ return r | g | b | 0xFF000000;
+}
+
+u32 GPU2D_Soft::ColorBlend5(u32 val1, u32 val2)
+{
+ u32 eva = ((val1 >> 24) & 0x1F) + 1;
+ u32 evb = 32 - eva;
+
+ if (eva == 32) return val1;
+
+ u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 5;
+ u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 5) & 0x007F00;
+ u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 5) & 0x7F0000;
+
+ if (eva <= 16)
+ {
+ r += 0x000001;
+ g += 0x000100;
+ b += 0x010000;
+ }
+
+ if (r > 0x00003F) r = 0x00003F;
+ if (g > 0x003F00) g = 0x003F00;
+ if (b > 0x3F0000) b = 0x3F0000;
+
+ return r | g | b | 0xFF000000;
+}
+
+u32 GPU2D_Soft::ColorBrightnessUp(u32 val, u32 factor)
+{
+ u32 rb = val & 0x3F003F;
+ u32 g = val & 0x003F00;
+
+ rb += ((((0x3F003F - rb) * factor) >> 4) & 0x3F003F);
+ g += ((((0x003F00 - g) * factor) >> 4) & 0x003F00);
+
+ return rb | g | 0xFF000000;
+}
+
+u32 GPU2D_Soft::ColorBrightnessDown(u32 val, u32 factor)
+{
+ u32 rb = val & 0x3F003F;
+ u32 g = val & 0x003F00;
+
+ rb -= (((rb * factor) >> 4) & 0x3F003F);
+ g -= (((g * factor) >> 4) & 0x003F00);
+
+ return rb | g | 0xFF000000;
+}
+
+u32 GPU2D_Soft::ColorComposite(int i, u32 val1, u32 val2)
+{
+ u32 coloreffect = 0;
+ u32 eva, evb;
+
+ u32 flag1 = val1 >> 24;
+ u32 flag2 = val2 >> 24;
+
+ u32 target2;
+ if (flag2 & 0x80) target2 = 0x1000;
+ else if (flag2 & 0x40) target2 = 0x0100;
+ else target2 = flag2 << 8;
+
+ if ((flag1 & 0x80) && (BlendCnt & target2))
+ {
+ // sprite blending
+
+ coloreffect = 1;
+
+ if (flag1 & 0x40)
+ {
+ eva = flag1 & 0x1F;
+ evb = 16 - eva;
+ }
+ else
+ {
+ eva = EVA;
+ evb = EVB;
+ }
+ }
+ else if ((flag1 & 0x40) && (BlendCnt & target2))
+ {
+ // 3D layer blending
+
+ coloreffect = 4;
+ }
+ else
+ {
+ if (flag1 & 0x80) flag1 = 0x10;
+ else if (flag1 & 0x40) flag1 = 0x01;
+
+ if ((BlendCnt & flag1) && (WindowMask[i] & 0x20))
+ {
+ coloreffect = (BlendCnt >> 6) & 0x3;
+
+ if (coloreffect == 1)
+ {
+ if (BlendCnt & target2)
+ {
+ eva = EVA;
+ evb = EVB;
+ }
+ else
+ coloreffect = 0;
+ }
+ }
+ }
+
+ switch (coloreffect)
+ {
+ case 0: return val1;
+ case 1: return ColorBlend4(val1, val2, eva, evb);
+ case 2: return ColorBrightnessUp(val1, EVY);
+ case 3: return ColorBrightnessDown(val1, EVY);
+ case 4: return ColorBlend5(val1, val2);
+ }
+
+ return val1;
+}
+
+void GPU2D_Soft::DrawScanline(u32 line)
+{
+ int stride = Accelerated ? (256*3 + 1) : 256;
+ u32* dst = &Framebuffer[stride * line];
+
+ int n3dline = line;
+ line = GPU::VCount;
+
+ if (Num == 0)
+ {
+ auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG);
+ GPU::MakeVRAMFlat_ABGCoherent(bgDirty);
+ auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal);
+ GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty);
+ auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal);
+ GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty);
+ }
+ else
+ {
+ auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG);
+ GPU::MakeVRAMFlat_BBGCoherent(bgDirty);
+ auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal);
+ GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty);
+ auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal);
+ GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty);
+ }
+
+ bool forceblank = false;
+
+ // scanlines that end up outside of the GPU drawing range
+ // (as a result of writing to VCount) are filled white
+ if (line > 192) forceblank = true;
+
+ // GPU B can be completely disabled by POWCNT1
+ // oddly that's not the case for GPU A
+ if (Num && !Enabled) forceblank = true;
+
+ if (forceblank)
+ {
+ for (int i = 0; i < 256; i++)
+ dst[i] = 0xFFFFFFFF;
+
+ if (Accelerated)
+ {
+ dst[256*3] = 0;
+ }
+ return;
+ }
+
+ u32 dispmode = DispCnt >> 16;
+ dispmode &= (Num ? 0x1 : 0x3);
+
+ if (Num == 0)
+ {
+ if (!Accelerated)
+ _3DLine = GPU3D::GetLine(n3dline);
+ else if ((CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1))
+ {
+ _3DLine = GPU3D::GetLine(n3dline);
+ //GPU3D::GLRenderer::PrepareCaptureFrame();
+ }
+ }
+
+ if (line == 0 && CaptureCnt & (1 << 31))
+ CaptureLatch = true;
+
+ // always render regular graphics
+ DrawScanline_BGOBJ(line);
+ UpdateMosaicCounters(line);
+
+ switch (dispmode)
+ {
+ case 0: // screen off
+ {
+ for (int i = 0; i < 256; i++)
+ dst[i] = 0x003F3F3F;
+ }
+ break;
+
+ case 1: // regular display
+ {
+ int i = 0;
+ for (; i < (stride & ~1); i+=2)
+ *(u64*)&dst[i] = *(u64*)&BGOBJLine[i];
+ }
+ break;
+
+ case 2: // VRAM display
+ {
+ u32 vrambank = (DispCnt >> 18) & 0x3;
+ if (GPU::VRAMMap_LCDC & (1<> 4;
+ u8 b = (color & 0x7C00) >> 9;
+
+ dst[i] = r | (g << 8) | (b << 16);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < 256; i++)
+ {
+ dst[i] = 0;
+ }
+ }
+ }
+ break;
+
+ case 3: // FIFO display
+ {
+ for (int i = 0; i < 256; i++)
+ {
+ u16 color = DispFIFOBuffer[i];
+ u8 r = (color & 0x001F) << 1;
+ u8 g = (color & 0x03E0) >> 4;
+ u8 b = (color & 0x7C00) >> 9;
+
+ dst[i] = r | (g << 8) | (b << 16);
+ }
+ }
+ break;
+ }
+
+ // capture
+ if ((Num == 0) && CaptureLatch)
+ {
+ u32 capwidth, capheight;
+ switch ((CaptureCnt >> 20) & 0x3)
+ {
+ case 0: capwidth = 128; capheight = 128; break;
+ case 1: capwidth = 256; capheight = 64; break;
+ case 2: capwidth = 256; capheight = 128; break;
+ case 3: capwidth = 256; capheight = 192; break;
+ }
+
+ if (line < capheight)
+ DoCapture(line, capwidth);
+ }
+
+ if (Accelerated)
+ {
+ dst[256*3] = MasterBrightness | (DispCnt & 0x30000);
+ return;
+ }
+
+ // master brightness
+ if (dispmode != 0)
+ {
+ if ((MasterBrightness >> 14) == 1)
+ {
+ // up
+ u32 factor = MasterBrightness & 0x1F;
+ if (factor > 16) factor = 16;
+
+ for (int i = 0; i < 256; i++)
+ {
+ dst[i] = ColorBrightnessUp(dst[i], factor);
+ }
+ }
+ else if ((MasterBrightness >> 14) == 2)
+ {
+ // down
+ u32 factor = MasterBrightness & 0x1F;
+ if (factor > 16) factor = 16;
+
+ for (int i = 0; i < 256; i++)
+ {
+ dst[i] = ColorBrightnessDown(dst[i], factor);
+ }
+ }
+ }
+
+ // convert to 32-bit BGRA
+ // note: 32-bit RGBA would be more straightforward, but
+ // BGRA seems to be more compatible (Direct2D soft, cairo...)
+ for (int i = 0; i < 256; i+=2)
+ {
+ u64 c = *(u64*)&dst[i];
+
+ u64 r = (c << 18) & 0xFC000000FC0000;
+ u64 g = (c << 2) & 0xFC000000FC00;
+ u64 b = (c >> 14) & 0xFC000000FC;
+ c = r | g | b;
+
+ *(u64*)&dst[i] = c | ((c & 0x00C0C0C000C0C0C0) >> 6) | 0xFF000000FF000000;
+ }
+}
+
+void GPU2D_Soft::VBlankEnd()
+{
+ GPU2D::VBlankEnd();
+
+#ifdef OGLRENDERER_ENABLED
+ if (Accelerated)
+ {
+ if ((Num == 0) && (CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1))
+ {
+ GPU3D::GLRenderer::PrepareCaptureFrame();
+ }
+ }
+#endif
+}
+
+void GPU2D_Soft::DoCapture(u32 line, u32 width)
+{
+ u32 dstvram = (CaptureCnt >> 16) & 0x3;
+
+ // TODO: confirm this
+ // it should work like VRAM display mode, which requires VRAM to be mapped to LCDC
+ if (!(GPU::VRAMMap_LCDC & (1<> 18) & 0x3) << 14) + (line * width);
+
+ static_assert(GPU::VRAMDirtyGranularity == 512);
+ GPU::VRAMDirty[dstvram][(dstaddr & 0x1FFFF) / GPU::VRAMDirtyGranularity] = true;
+
+ // TODO: handle 3D in accelerated mode!!
+
+ u32* srcA;
+ if (CaptureCnt & (1<<24))
+ {
+ srcA = _3DLine;
+ }
+ else
+ {
+ srcA = BGOBJLine;
+ if (Accelerated)
+ {
+ // in accelerated mode, compositing is normally done on the GPU
+ // but when doing display capture, we do need the composited output
+ // so we do it here
+
+ for (int i = 0; i < 256; i++)
+ {
+ u32 val1 = BGOBJLine[i];
+ u32 val2 = BGOBJLine[256+i];
+ u32 val3 = BGOBJLine[512+i];
+
+ u32 compmode = (val3 >> 24) & 0xF;
+
+ if (compmode == 4)
+ {
+ // 3D on top, blending
+
+ u32 _3dval = _3DLine[i];
+ if ((_3dval >> 24) > 0)
+ val1 = ColorBlend5(_3dval, val1);
+ else
+ val1 = val2;
+ }
+ else if (compmode == 1)
+ {
+ // 3D on bottom, blending
+
+ u32 _3dval = _3DLine[i];
+ if ((_3dval >> 24) > 0)
+ {
+ u32 eva = (val3 >> 8) & 0x1F;
+ u32 evb = (val3 >> 16) & 0x1F;
+
+ val1 = ColorBlend4(val1, _3dval, eva, evb);
+ }
+ else
+ val1 = val2;
+ }
+ else if (compmode <= 3)
+ {
+ // 3D on top, normal/fade
+
+ u32 _3dval = _3DLine[i];
+ if ((_3dval >> 24) > 0)
+ {
+ u32 evy = (val3 >> 8) & 0x1F;
+
+ val1 = _3dval;
+ if (compmode == 2) val1 = ColorBrightnessUp(val1, evy);
+ else if (compmode == 3) val1 = ColorBrightnessDown(val1, evy);
+ }
+ else
+ val1 = val2;
+ }
+
+ BGOBJLine[i] = val1;
+ }
+ }
+ }
+
+ u16* srcB = NULL;
+ u32 srcBaddr = line * 256;
+
+ if (CaptureCnt & (1<<25))
+ {
+ srcB = &DispFIFOBuffer[0];
+ srcBaddr = 0;
+ }
+ else
+ {
+ u32 srcvram = (DispCnt >> 18) & 0x3;
+ if (GPU::VRAMMap_LCDC & (1<> 16) & 0x3) != 2)
+ srcBaddr += ((CaptureCnt >> 26) & 0x3) << 14;
+ }
+
+ dstaddr &= 0xFFFF;
+ srcBaddr &= 0xFFFF;
+
+ switch ((CaptureCnt >> 29) & 0x3)
+ {
+ case 0: // source A
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ u32 val = srcA[i];
+
+ // TODO: check what happens when alpha=0
+
+ u32 r = (val >> 1) & 0x1F;
+ u32 g = (val >> 9) & 0x1F;
+ u32 b = (val >> 17) & 0x1F;
+ u32 a = ((val >> 24) != 0) ? 0x8000 : 0;
+
+ dst[dstaddr] = r | (g << 5) | (b << 10) | a;
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ break;
+
+ case 1: // source B
+ {
+ if (srcB)
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ dst[dstaddr] = srcB[srcBaddr];
+ srcBaddr = (srcBaddr + 1) & 0xFFFF;
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ else
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ dst[dstaddr] = 0;
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ }
+ break;
+
+ case 2: // sources A+B
+ case 3:
+ {
+ u32 eva = CaptureCnt & 0x1F;
+ u32 evb = (CaptureCnt >> 8) & 0x1F;
+
+ // checkme
+ if (eva > 16) eva = 16;
+ if (evb > 16) evb = 16;
+
+ if (srcB)
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ u32 val = srcA[i];
+
+ // TODO: check what happens when alpha=0
+
+ u32 rA = (val >> 1) & 0x1F;
+ u32 gA = (val >> 9) & 0x1F;
+ u32 bA = (val >> 17) & 0x1F;
+ u32 aA = ((val >> 24) != 0) ? 1 : 0;
+
+ val = srcB[srcBaddr];
+
+ u32 rB = val & 0x1F;
+ u32 gB = (val >> 5) & 0x1F;
+ u32 bB = (val >> 10) & 0x1F;
+ u32 aB = val >> 15;
+
+ u32 rD = ((rA * aA * eva) + (rB * aB * evb)) >> 4;
+ u32 gD = ((gA * aA * eva) + (gB * aB * evb)) >> 4;
+ u32 bD = ((bA * aA * eva) + (bB * aB * evb)) >> 4;
+ u32 aD = (eva>0 ? aA : 0) | (evb>0 ? aB : 0);
+
+ if (rD > 0x1F) rD = 0x1F;
+ if (gD > 0x1F) gD = 0x1F;
+ if (bD > 0x1F) bD = 0x1F;
+
+ dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15);
+ srcBaddr = (srcBaddr + 1) & 0xFFFF;
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ else
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ u32 val = srcA[i];
+
+ // TODO: check what happens when alpha=0
+
+ u32 rA = (val >> 1) & 0x1F;
+ u32 gA = (val >> 9) & 0x1F;
+ u32 bA = (val >> 17) & 0x1F;
+ u32 aA = ((val >> 24) != 0) ? 1 : 0;
+
+ u32 rD = (rA * aA * eva) >> 4;
+ u32 gD = (gA * aA * eva) >> 4;
+ u32 bD = (bA * aA * eva) >> 4;
+ u32 aD = (eva>0 ? aA : 0);
+
+ dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15);
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ }
+ break;
+ }
+}
+
+#define DoDrawBG(type, line, num) \
+ do \
+ { \
+ if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \
+ { \
+ if (Accelerated) DrawBG_##type(line, num); \
+ else DrawBG_##type(line, num); \
+ } \
+ else \
+ { \
+ if (Accelerated) DrawBG_##type(line, num); \
+ else DrawBG_##type(line, num); \
+ } \
+ } while (false)
+
+#define DoDrawBG_Large(line) \
+ do \
+ { \
+ if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \
+ { \
+ if (Accelerated) DrawBG_Large(line); \
+ else DrawBG_Large(line); \
+ } \
+ else \
+ { \
+ if (Accelerated) DrawBG_Large(line); \
+ else DrawBG_Large(line); \
+ } \
+ } while (false)
+
+#define DoInterleaveSprites(prio) \
+ if (Accelerated) InterleaveSprites(prio); else InterleaveSprites(prio);
+
+template
+void GPU2D_Soft::DrawScanlineBGMode(u32 line)
+{
+ for (int i = 3; i >= 0; i--)
+ {
+ if ((BGCnt[3] & 0x3) == i)
+ {
+ if (DispCnt & 0x0800)
+ {
+ if (bgmode >= 3)
+ DoDrawBG(Extended, line, 3);
+ else if (bgmode >= 1)
+ DoDrawBG(Affine, line, 3);
+ else
+ DoDrawBG(Text, line, 3);
+ }
+ }
+ if ((BGCnt[2] & 0x3) == i)
+ {
+ if (DispCnt & 0x0400)
+ {
+ if (bgmode == 5)
+ DoDrawBG(Extended, line, 2);
+ else if (bgmode == 4 || bgmode == 2)
+ DoDrawBG(Affine, line, 2);
+ else
+ DoDrawBG(Text, line, 2);
+ }
+ }
+ if ((BGCnt[1] & 0x3) == i)
+ {
+ if (DispCnt & 0x0200)
+ {
+ DoDrawBG(Text, line, 1);
+ }
+ }
+ if ((BGCnt[0] & 0x3) == i)
+ {
+ if (DispCnt & 0x0100)
+ {
+ if ((!Num) && (DispCnt & 0x8))
+ DrawBG_3D();
+ else
+ DoDrawBG(Text, line, 0);
+ }
+ }
+ if ((DispCnt & 0x1000) && NumSprites)
+ DoInterleaveSprites(0x40000 | (i<<16));
+ }
+}
+
+void GPU2D_Soft::DrawScanlineBGMode6(u32 line)
+{
+ for (int i = 3; i >= 0; i--)
+ {
+ if ((BGCnt[2] & 0x3) == i)
+ {
+ if (DispCnt & 0x0400)
+ {
+ DoDrawBG_Large(line);
+ }
+ }
+ if ((BGCnt[0] & 0x3) == i)
+ {
+ if (DispCnt & 0x0100)
+ {
+ if ((!Num) && (DispCnt & 0x8))
+ DrawBG_3D();
+ }
+ }
+ if ((DispCnt & 0x1000) && NumSprites)
+ DoInterleaveSprites(0x40000 | (i<<16))
+ }
+}
+
+void GPU2D_Soft::DrawScanlineBGMode7(u32 line)
+{
+ // mode 7 only has text-mode BG0 and BG1
+
+ for (int i = 3; i >= 0; i--)
+ {
+ if ((BGCnt[1] & 0x3) == i)
+ {
+ if (DispCnt & 0x0200)
+ {
+ DoDrawBG(Text, line, 1);
+ }
+ }
+ if ((BGCnt[0] & 0x3) == i)
+ {
+ if (DispCnt & 0x0100)
+ {
+ if ((!Num) && (DispCnt & 0x8))
+ DrawBG_3D();
+ else
+ DoDrawBG(Text, line, 0);
+ }
+ }
+ if ((DispCnt & 0x1000) && NumSprites)
+ DoInterleaveSprites(0x40000 | (i<<16))
+ }
+}
+
+void GPU2D_Soft::DrawScanline_BGOBJ(u32 line)
+{
+ // forced blank disables BG/OBJ compositing
+ if (DispCnt & (1<<7))
+ {
+ for (int i = 0; i < 256; i++)
+ BGOBJLine[i] = 0xFF3F3F3F;
+
+ return;
+ }
+
+ u64 backdrop;
+ if (Num) backdrop = *(u16*)&GPU::Palette[0x400];
+ else backdrop = *(u16*)&GPU::Palette[0];
+
+ {
+ u8 r = (backdrop & 0x001F) << 1;
+ u8 g = (backdrop & 0x03E0) >> 4;
+ u8 b = (backdrop & 0x7C00) >> 9;
+
+ backdrop = r | (g << 8) | (b << 16) | 0x20000000;
+ backdrop |= (backdrop << 32);
+
+ for (int i = 0; i < 256; i+=2)
+ *(u64*)&BGOBJLine[i] = backdrop;
+ }
+
+ if (DispCnt & 0xE000)
+ CalculateWindowMask(line);
+ else
+ memset(WindowMask, 0xFF, 256);
+
+ ApplySpriteMosaicX();
+
+ switch (DispCnt & 0x7)
+ {
+ case 0: DrawScanlineBGMode<0>(line); break;
+ case 1: DrawScanlineBGMode<1>(line); break;
+ case 2: DrawScanlineBGMode<2>(line); break;
+ case 3: DrawScanlineBGMode<3>(line); break;
+ case 4: DrawScanlineBGMode<4>(line); break;
+ case 5: DrawScanlineBGMode<5>(line); break;
+ case 6: DrawScanlineBGMode6(line); break;
+ case 7: DrawScanlineBGMode7(line); break;
+ }
+
+ // color special effects
+ // can likely be optimized
+
+ if (!Accelerated)
+ {
+ for (int i = 0; i < 256; i++)
+ {
+ u32 val1 = BGOBJLine[i];
+ u32 val2 = BGOBJLine[256+i];
+
+ BGOBJLine[i] = ColorComposite(i, val1, val2);
+ }
+ }
+ else
+ {
+ if (Num == 0)
+ {
+ for (int i = 0; i < 256; i++)
+ {
+ u32 val1 = BGOBJLine[i];
+ u32 val2 = BGOBJLine[256+i];
+ u32 val3 = BGOBJLine[512+i];
+
+ u32 flag1 = val1 >> 24;
+ u32 flag2 = val2 >> 24;
+
+ u32 bldcnteffect = (BlendCnt >> 6) & 0x3;
+
+ u32 target1;
+ if (flag1 & 0x80) target1 = 0x0010;
+ else if (flag1 & 0x40) target1 = 0x0001;
+ else target1 = flag1;
+
+ u32 target2;
+ if (flag2 & 0x80) target2 = 0x1000;
+ else if (flag2 & 0x40) target2 = 0x0100;
+ else target2 = flag2 << 8;
+
+ if (((flag1 & 0xC0) == 0x40) && (BlendCnt & target2))
+ {
+ // 3D on top, blending
+
+ BGOBJLine[i] = val2;
+ BGOBJLine[256+i] = ColorComposite(i, val2, val3);
+ BGOBJLine[512+i] = 0x04000000;
+ }
+ else if ((flag1 & 0xC0) == 0x40)
+ {
+ // 3D on top, normal/fade
+
+ if (bldcnteffect == 1) bldcnteffect = 0;
+ if (!(BlendCnt & 0x0001)) bldcnteffect = 0;
+ if (!(WindowMask[i] & 0x20)) bldcnteffect = 0;
+
+ BGOBJLine[i] = val2;
+ BGOBJLine[256+i] = ColorComposite(i, val2, val3);
+ BGOBJLine[512+i] = (bldcnteffect << 24) | (EVY << 8);
+ }
+ else if (((flag2 & 0xC0) == 0x40) && ((BlendCnt & 0x01C0) == 0x0140))
+ {
+ // 3D on bottom, blending
+
+ u32 eva, evb;
+ if ((flag1 & 0xC0) == 0xC0)
+ {
+ eva = flag1 & 0x1F;
+ evb = 16 - eva;
+ }
+ else if (((BlendCnt & target1) && (WindowMask[i] & 0x20)) ||
+ ((flag1 & 0xC0) == 0x80))
+ {
+ eva = EVA;
+ evb = EVB;
+ }
+ else
+ bldcnteffect = 7;
+
+ BGOBJLine[i] = val1;
+ BGOBJLine[256+i] = ColorComposite(i, val1, val3);
+ BGOBJLine[512+i] = (bldcnteffect << 24) | (EVB << 16) | (EVA << 8);
+ }
+ else
+ {
+ // no potential 3D pixel involved
+
+ BGOBJLine[i] = ColorComposite(i, val1, val2);
+ BGOBJLine[256+i] = 0;
+ BGOBJLine[512+i] = 0x07000000;
+ }
+ }
+ }
+ else
+ {
+ for (int i = 0; i < 256; i++)
+ {
+ u32 val1 = BGOBJLine[i];
+ u32 val2 = BGOBJLine[256+i];
+
+ BGOBJLine[i] = ColorComposite(i, val1, val2);
+ BGOBJLine[256+i] = 0;
+ BGOBJLine[512+i] = 0x07000000;
+ }
+ }
+ }
+
+ if (BGMosaicY >= BGMosaicYMax)
+ {
+ BGMosaicY = 0;
+ BGMosaicYMax = BGMosaicSize[1];
+ }
+ else
+ BGMosaicY++;
+
+ /*if (OBJMosaicY >= OBJMosaicYMax)
+ {
+ OBJMosaicY = 0;
+ OBJMosaicYMax = OBJMosaicSize[1];
+ }
+ else
+ OBJMosaicY++;*/
+}
+
+
+void GPU2D_Soft::DrawPixel_Normal(u32* dst, u16 color, u32 flag)
+{
+ u8 r = (color & 0x001F) << 1;
+ u8 g = (color & 0x03E0) >> 4;
+ u8 b = (color & 0x7C00) >> 9;
+ //g |= ((color & 0x8000) >> 15);
+
+ *(dst+256) = *dst;
+ *dst = r | (g << 8) | (b << 16) | flag;
+}
+
+void GPU2D_Soft::DrawPixel_Accel(u32* dst, u16 color, u32 flag)
+{
+ u8 r = (color & 0x001F) << 1;
+ u8 g = (color & 0x03E0) >> 4;
+ u8 b = (color & 0x7C00) >> 9;
+
+ *(dst+512) = *(dst+256);
+ *(dst+256) = *dst;
+ *dst = r | (g << 8) | (b << 16) | flag;
+}
+
+void GPU2D_Soft::DrawBG_3D()
+{
+ int i = 0;
+
+ if (Accelerated)
+ {
+ for (i = 0; i < 256; i++)
+ {
+ if (!(WindowMask[i] & 0x01)) continue;
+
+ BGOBJLine[i+512] = BGOBJLine[i+256];
+ BGOBJLine[i+256] = BGOBJLine[i];
+ BGOBJLine[i] = 0x40000000; // 3D-layer placeholder
+ }
+ }
+ else
+ {
+ for (i = 0; i < 256; i++)
+ {
+ u32 c = _3DLine[i];
+
+ if ((c >> 24) == 0) continue;
+ if (!(WindowMask[i] & 0x01)) continue;
+
+ BGOBJLine[i+256] = BGOBJLine[i];
+ BGOBJLine[i] = c | 0x40000000;
+ }
+ }
+}
+
+template
+void GPU2D_Soft::DrawBG_Text(u32 line, u32 bgnum)
+{
+ u16 bgcnt = BGCnt[bgnum];
+
+ u32 tilesetaddr, tilemapaddr;
+ u16* pal;
+ u32 extpal, extpalslot;
+
+ u16 xoff = BGXPos[bgnum];
+ u16 yoff = BGYPos[bgnum] + line;
+
+ if (bgcnt & 0x0040)
+ {
+ // vertical mosaic
+ yoff -= BGMosaicY;
+ }
+
+ u32 widexmask = (bgcnt & 0x4000) ? 0x100 : 0;
+
+ extpal = (DispCnt & 0x40000000);
+ if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum;
+
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(bgvram, bgvrammask);
+ if (Num)
+ {
+ tilesetaddr = ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((bgcnt & 0x1F00) << 3);
+
+ pal = (u16*)&GPU::Palette[0x400];
+ }
+ else
+ {
+ tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+
+ pal = (u16*)&GPU::Palette[0];
+ }
+
+ // adjust Y position in tilemap
+ if (bgcnt & 0x8000)
+ {
+ tilemapaddr += ((yoff & 0x1F8) << 3);
+ if (bgcnt & 0x4000)
+ tilemapaddr += ((yoff & 0x100) << 3);
+ }
+ else
+ tilemapaddr += ((yoff & 0xF8) << 3);
+
+ u16 curtile;
+ u16* curpal;
+ u32 pixelsaddr;
+ u8 color;
+ u32 lastxpos;
+
+ if (bgcnt & 0x0080)
+ {
+ // 256-color
+
+ // preload shit as needed
+ if ((xoff & 0x7) || mosaic)
+ {
+ curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask];
+
+ if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
+ else curpal = pal;
+
+ pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6)
+ + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3);
+ }
+
+ if (mosaic) lastxpos = xoff;
+
+ for (int i = 0; i < 256; i++)
+ {
+ u32 xpos;
+ if (mosaic) xpos = xoff - CurBGXMosaicTable[i];
+ else xpos = xoff;
+
+ if ((!mosaic && (!(xpos & 0x7))) ||
+ (mosaic && ((xpos >> 3) != (lastxpos >> 3))))
+ {
+ // load a new tile
+ curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
+
+ if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
+ else curpal = pal;
+
+ pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6)
+ + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3);
+
+ if (mosaic) lastxpos = xpos;
+ }
+
+ // draw pixel
+ if (WindowMask[i] & (1<> 2) + ((xoff & widexmask) << 3))) & bgvrammask];
+ curpal = pal + ((curtile & 0xF000) >> 8);
+ pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
+ }
+
+ if (mosaic) lastxpos = xoff;
+
+ for (int i = 0; i < 256; i++)
+ {
+ u32 xpos;
+ if (mosaic) xpos = xoff - CurBGXMosaicTable[i];
+ else xpos = xoff;
+
+ if ((!mosaic && (!(xpos & 0x7))) ||
+ (mosaic && ((xpos >> 3) != (lastxpos >> 3))))
+ {
+ // load a new tile
+ curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
+ curpal = pal + ((curtile & 0xF000) >> 8);
+ pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
+
+ if (mosaic) lastxpos = xpos;
+ }
+
+ // draw pixel
+ if (WindowMask[i] & (1<> 1)) & bgvrammask] >> 4;
+ }
+ else
+ {
+ color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] & 0x0F;
+ }
+
+ if (color)
+ drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<
+void GPU2D_Soft::DrawBG_Affine(u32 line, u32 bgnum)
+{
+ u16 bgcnt = BGCnt[bgnum];
+
+ u32 tilesetaddr, tilemapaddr;
+ u16* pal;
+
+ u32 coordmask;
+ u32 yshift;
+ switch (bgcnt & 0xC000)
+ {
+ case 0x0000: coordmask = 0x07800; yshift = 7; break;
+ case 0x4000: coordmask = 0x0F800; yshift = 8; break;
+ case 0x8000: coordmask = 0x1F800; yshift = 9; break;
+ case 0xC000: coordmask = 0x3F800; yshift = 10; break;
+ }
+
+ u32 overflowmask;
+ if (bgcnt & 0x2000) overflowmask = 0;
+ else overflowmask = ~(coordmask | 0x7FF);
+
+ s16 rotA = BGRotA[bgnum-2];
+ s16 rotB = BGRotB[bgnum-2];
+ s16 rotC = BGRotC[bgnum-2];
+ s16 rotD = BGRotD[bgnum-2];
+
+ s32 rotX = BGXRefInternal[bgnum-2];
+ s32 rotY = BGYRefInternal[bgnum-2];
+
+ if (bgcnt & 0x0040)
+ {
+ // vertical mosaic
+ rotX -= (BGMosaicY * rotB);
+ rotY -= (BGMosaicY * rotD);
+ }
+
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(bgvram, bgvrammask);
+
+ if (Num)
+ {
+ tilesetaddr = ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((bgcnt & 0x1F00) << 3);
+
+ pal = (u16*)&GPU::Palette[0x400];
+ }
+ else
+ {
+ tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+
+ pal = (u16*)&GPU::Palette[0];
+ }
+
+ u16 curtile;
+ u8 color;
+
+ yshift -= 3;
+
+ for (int i = 0; i < 256; i++)
+ {
+ if (WindowMask[i] & (1<> 11) << yshift) + ((finalX & coordmask) >> 11))) & bgvrammask];
+
+ // draw pixel
+ u32 tilexoff = (finalX >> 8) & 0x7;
+ u32 tileyoff = (finalY >> 8) & 0x7;
+
+ color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
+
+ if (color)
+ drawPixel(&BGOBJLine[i], pal[color], 0x01000000<
+void GPU2D_Soft::DrawBG_Extended(u32 line, u32 bgnum)
+{
+ u16 bgcnt = BGCnt[bgnum];
+
+ u32 tilesetaddr, tilemapaddr;
+ u16* pal;
+ u32 extpal;
+
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(bgvram, bgvrammask);
+
+ extpal = (DispCnt & 0x40000000);
+
+ s16 rotA = BGRotA[bgnum-2];
+ s16 rotB = BGRotB[bgnum-2];
+ s16 rotC = BGRotC[bgnum-2];
+ s16 rotD = BGRotD[bgnum-2];
+
+ s32 rotX = BGXRefInternal[bgnum-2];
+ s32 rotY = BGYRefInternal[bgnum-2];
+
+ if (bgcnt & 0x0040)
+ {
+ // vertical mosaic
+ rotX -= (BGMosaicY * rotB);
+ rotY -= (BGMosaicY * rotD);
+ }
+
+ if (bgcnt & 0x0080)
+ {
+ // bitmap modes
+
+ u32 xmask, ymask;
+ u32 yshift;
+ switch (bgcnt & 0xC000)
+ {
+ case 0x0000: xmask = 0x07FFF; ymask = 0x07FFF; yshift = 7; break;
+ case 0x4000: xmask = 0x0FFFF; ymask = 0x0FFFF; yshift = 8; break;
+ case 0x8000: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break;
+ case 0xC000: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break;
+ }
+
+ u32 ofxmask, ofymask;
+ if (bgcnt & 0x2000)
+ {
+ ofxmask = 0;
+ ofymask = 0;
+ }
+ else
+ {
+ ofxmask = ~xmask;
+ ofymask = ~ymask;
+ }
+
+ if (Num) tilemapaddr = ((bgcnt & 0x1F00) << 6);
+ else tilemapaddr = ((bgcnt & 0x1F00) << 6);
+
+ if (bgcnt & 0x0004)
+ {
+ // direct color bitmap
+
+ u16 color;
+
+ for (int i = 0; i < 256; i++)
+ {
+ if (WindowMask[i] & (1<> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask];
+
+ if (color & 0x8000)
+ drawPixel(&BGOBJLine[i], color, 0x01000000<> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
+
+ if (color)
+ drawPixel(&BGOBJLine[i], pal[color], 0x01000000<> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+
+ pal = (u16*)&GPU::Palette[0];
+ }
+
+ u16 curtile;
+ u16* curpal;
+ u8 color;
+
+ yshift -= 3;
+
+ for (int i = 0; i < 256; i++)
+ {
+ if (WindowMask[i] & (1<> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask];
+
+ if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12);
+ else curpal = pal;
+
+ // draw pixel
+ u32 tilexoff = (finalX >> 8) & 0x7;
+ u32 tileyoff = (finalY >> 8) & 0x7;
+
+ if (curtile & 0x0400) tilexoff = 7-tilexoff;
+ if (curtile & 0x0800) tileyoff = 7-tileyoff;
+
+ color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
+
+ if (color)
+ drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<
+void GPU2D_Soft::DrawBG_Large(u32 line) // BG is always BG2
+{
+ u16 bgcnt = BGCnt[2];
+
+ u32 tilesetaddr, tilemapaddr;
+ u16* pal;
+
+ // large BG sizes:
+ // 0: 512x1024
+ // 1: 1024x512
+ // 2: 512x256
+ // 3: 512x512
+ u32 xmask, ymask;
+ u32 yshift;
+ switch (bgcnt & 0xC000)
+ {
+ case 0x0000: xmask = 0x1FFFF; ymask = 0x3FFFF; yshift = 9; break;
+ case 0x4000: xmask = 0x3FFFF; ymask = 0x1FFFF; yshift = 10; break;
+ case 0x8000: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break;
+ case 0xC000: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break;
+ }
+
+ u32 ofxmask, ofymask;
+ if (bgcnt & 0x2000)
+ {
+ ofxmask = 0;
+ ofymask = 0;
+ }
+ else
+ {
+ ofxmask = ~xmask;
+ ofymask = ~ymask;
+ }
+
+ s16 rotA = BGRotA[0];
+ s16 rotB = BGRotB[0];
+ s16 rotC = BGRotC[0];
+ s16 rotD = BGRotD[0];
+
+ s32 rotX = BGXRefInternal[0];
+ s32 rotY = BGYRefInternal[0];
+
+ if (bgcnt & 0x0040)
+ {
+ // vertical mosaic
+ rotX -= (BGMosaicY * rotB);
+ rotY -= (BGMosaicY * rotD);
+ }
+
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(bgvram, bgvrammask);
+
+ // 256-color bitmap
+
+ if (Num) pal = (u16*)&GPU::Palette[0x400];
+ else pal = (u16*)&GPU::Palette[0];
+
+ u8 color;
+
+ for (int i = 0; i < 256; i++)
+ {
+ if (WindowMask[i] & (1<<2))
+ {
+ s32 finalX, finalY;
+ if (mosaic)
+ {
+ int im = CurBGXMosaicTable[i];
+ finalX = rotX - (im * rotA);
+ finalY = rotY - (im * rotC);
+ }
+ else
+ {
+ finalX = rotX;
+ finalY = rotY;
+ }
+
+ if (!(finalX & ofxmask) && !(finalY & ofymask))
+ {
+ color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
+
+ if (color)
+ drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2);
+ }
+ }
+
+ rotX += rotA;
+ rotY += rotC;
+ }
+
+ BGXRefInternal[0] += rotB;
+ BGYRefInternal[0] += rotD;
+}
+
+// OBJ line buffer:
+// * bit0-15: color (bit15=1: direct color, bit15=0: palette index, bit12=0 to indicate extpal)
+// * bit16-17: BG-relative priority
+// * bit18: non-transparent sprite pixel exists here
+// * bit19: X mosaic should be applied here
+// * bit24-31: compositor flags
+
+void GPU2D_Soft::ApplySpriteMosaicX()
+{
+ // apply X mosaic if needed
+ // X mosaic for sprites is applied after all sprites are rendered
+
+ if (OBJMosaicSize[0] == 0) return;
+
+ u32 lastcolor = OBJLine[0];
+
+ for (u32 i = 1; i < 256; i++)
+ {
+ if (!(OBJLine[i] & 0x100000))
+ {
+ // not a mosaic'd sprite pixel
+ continue;
+ }
+
+ if ((OBJIndex[i] != OBJIndex[i-1]) || (CurOBJXMosaicTable[i] == 0))
+ lastcolor = OBJLine[i];
+ else
+ OBJLine[i] = lastcolor;
+ }
+}
+
+template
+void GPU2D_Soft::InterleaveSprites(u32 prio)
+{
+ u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+
+ if (DispCnt & 0x80000000)
+ {
+ u16* extpal = GetOBJExtPal();
+
+ for (u32 i = 0; i < 256; i++)
+ {
+ if ((OBJLine[i] & 0x70000) != prio) continue;
+ if (!(WindowMask[i] & 0x10)) continue;
+
+ u16 color;
+ u32 pixel = OBJLine[i];
+
+ if (pixel & 0x8000)
+ color = pixel & 0x7FFF;
+ else if (pixel & 0x1000)
+ color = pal[pixel & 0xFF];
+ else
+ color = extpal[pixel & 0xFFF];
+
+ drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
+ }
+ }
+ else
+ {
+ // optimized no-extpal version
+
+ for (u32 i = 0; i < 256; i++)
+ {
+ if ((OBJLine[i] & 0x70000) != prio) continue;
+ if (!(WindowMask[i] & 0x10)) continue;
+
+ u16 color;
+ u32 pixel = OBJLine[i];
+
+ if (pixel & 0x8000)
+ color = pixel & 0x7FFF;
+ else
+ color = pal[pixel & 0xFF];
+
+ drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
+ }
+ }
+}
+
+#define DoDrawSprite(type, ...) \
+ if (iswin) \
+ { \
+ DrawSprite_##type(__VA_ARGS__); \
+ } \
+ else \
+ { \
+ DrawSprite_##type(__VA_ARGS__); \
+ }
+
+void GPU2D_Soft::DrawSprites(u32 line)
+{
+ if (line == 0)
+ {
+ // reset those counters here
+ // TODO: find out when those are supposed to be reset
+ // it would make sense to reset them at the end of VBlank
+ // however, sprites are rendered one scanline in advance
+ // so they need to be reset a bit earlier
+
+ OBJMosaicY = 0;
+ OBJMosaicYCount = 0;
+ }
+
+ if (Num == 0)
+ {
+ auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ);
+ GPU::MakeVRAMFlat_AOBJCoherent(objDirty);
+ }
+ else
+ {
+ auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ);
+ GPU::MakeVRAMFlat_BOBJCoherent(objDirty);
+ }
+
+ NumSprites = 0;
+ memset(OBJLine, 0, 256*4);
+ memset(OBJWindow, 0, 256);
+ if (!(DispCnt & 0x1000)) return;
+
+ memset(OBJIndex, 0xFF, 256);
+
+ u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0];
+
+ const s32 spritewidth[16] =
+ {
+ 8, 16, 8, 8,
+ 16, 32, 8, 8,
+ 32, 32, 16, 8,
+ 64, 64, 32, 8
+ };
+ const s32 spriteheight[16] =
+ {
+ 8, 8, 16, 8,
+ 16, 8, 32, 8,
+ 32, 16, 32, 8,
+ 64, 32, 64, 8
+ };
+
+ for (int bgnum = 0x0C00; bgnum >= 0x0000; bgnum -= 0x0400)
+ {
+ for (int sprnum = 127; sprnum >= 0; sprnum--)
+ {
+ u16* attrib = &oam[sprnum*4];
+
+ if ((attrib[2] & 0x0C00) != bgnum)
+ continue;
+
+ bool iswin = (((attrib[0] >> 10) & 0x3) == 2);
+
+ u32 sprline;
+ if ((attrib[0] & 0x1000) && !iswin)
+ {
+ // apply Y mosaic
+ sprline = OBJMosaicY;
+ }
+ else
+ sprline = line;
+
+ if (attrib[0] & 0x0100)
+ {
+ u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12);
+ s32 width = spritewidth[sizeparam];
+ s32 height = spriteheight[sizeparam];
+ s32 boundwidth = width;
+ s32 boundheight = height;
+
+ if (attrib[0] & 0x0200)
+ {
+ boundwidth <<= 1;
+ boundheight <<= 1;
+ }
+
+ u32 ypos = attrib[0] & 0xFF;
+ ypos = (sprline - ypos) & 0xFF;
+ if (ypos >= (u32)boundheight)
+ continue;
+
+ s32 xpos = (s32)(attrib[1] << 23) >> 23;
+ if (xpos <= -boundwidth)
+ continue;
+
+ u32 rotparamgroup = (attrib[1] >> 9) & 0x1F;
+
+ DoDrawSprite(Rotscale, sprnum, boundwidth, boundheight, width, height, xpos, ypos);
+
+ NumSprites++;
+ }
+ else
+ {
+ if (attrib[0] & 0x0200)
+ continue;
+
+ u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12);
+ s32 width = spritewidth[sizeparam];
+ s32 height = spriteheight[sizeparam];
+
+ u32 ypos = attrib[0] & 0xFF;
+ ypos = (sprline - ypos) & 0xFF;
+ if (ypos >= (u32)height)
+ continue;
+
+ s32 xpos = (s32)(attrib[1] << 23) >> 23;
+ if (xpos <= -width)
+ continue;
+
+ DoDrawSprite(Normal, sprnum, width, height, xpos, ypos);
+
+ NumSprites++;
+ }
+ }
+ }
+}
+
+template
+void GPU2D_Soft::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos)
+{
+ u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0];
+ u16* attrib = &oam[num * 4];
+ u16* rotparams = &oam[(((attrib[1] >> 9) & 0x1F) * 16) + 3];
+
+ u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | 0xC0000;
+ u32 tilenum = attrib[2] & 0x03FF;
+ u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3);
+
+ u32 ytilefactor;
+
+ u8* objvram;
+ u32 objvrammask;
+ GetOBJVRAM(objvram, objvrammask);
+
+ s32 centerX = boundwidth >> 1;
+ s32 centerY = boundheight >> 1;
+
+ if ((attrib[0] & 0x1000) && !window)
+ {
+ // apply Y mosaic
+ pixelattr |= 0x100000;
+ }
+
+ u32 xoff;
+ if (xpos >= 0)
+ {
+ xoff = 0;
+ if ((xpos+boundwidth) > 256)
+ boundwidth = 256-xpos;
+ }
+ else
+ {
+ xoff = -xpos;
+ xpos = 0;
+ }
+
+ s16 rotA = (s16)rotparams[0];
+ s16 rotB = (s16)rotparams[4];
+ s16 rotC = (s16)rotparams[8];
+ s16 rotD = (s16)rotparams[12];
+
+ s32 rotX = ((xoff-centerX) * rotA) + ((ypos-centerY) * rotB) + (width << 7);
+ s32 rotY = ((xoff-centerX) * rotC) + ((ypos-centerY) * rotD) + (height << 7);
+
+ width <<= 8;
+ height <<= 8;
+
+ u16 color = 0; // transparent in all cases
+
+ if (spritemode == 3)
+ {
+ u32 alpha = attrib[2] >> 12;
+ if (!alpha) return;
+ alpha++;
+
+ pixelattr |= (0xC0000000 | (alpha << 24));
+
+ u32 pixelsaddr;
+ if (DispCnt & 0x40)
+ {
+ if (DispCnt & 0x20)
+ {
+ // 'reserved'
+ // draws nothing
+
+ return;
+ }
+ else
+ {
+ pixelsaddr = tilenum << (7 + ((DispCnt >> 22) & 0x1));
+ ytilefactor = ((width >> 8) * 2);
+ }
+ }
+ else
+ {
+ if (DispCnt & 0x20)
+ {
+ pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
+ ytilefactor = (256 * 2);
+ }
+ else
+ {
+ pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
+ ytilefactor = (128 * 2);
+ }
+ }
+
+ for (; xoff < boundwidth;)
+ {
+ if ((u32)rotX < width && (u32)rotY < height)
+ {
+ color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask];
+
+ if (color & 0x8000)
+ {
+ if (window) OBJWindow[xpos] = 1;
+ else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
+ }
+ else if (!window)
+ {
+ if (OBJLine[xpos] == 0)
+ {
+ OBJLine[xpos] = pixelattr & 0x180000;
+ OBJIndex[xpos] = num;
+ }
+ }
+ }
+
+ rotX += rotA;
+ rotY += rotC;
+ xoff++;
+ xpos++;
+ }
+ }
+ else
+ {
+ u32 pixelsaddr = tilenum;
+ if (DispCnt & 0x10)
+ {
+ pixelsaddr <<= ((DispCnt >> 20) & 0x3);
+ ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0);
+ }
+ else
+ {
+ ytilefactor = 0x20;
+ }
+
+ if (spritemode == 1) pixelattr |= 0x80000000;
+ else pixelattr |= 0x10000000;
+
+ ytilefactor <<= 5;
+ pixelsaddr <<= 5;
+
+ if (attrib[0] & 0x2000)
+ {
+ // 256-color
+
+ if (!window)
+ {
+ if (!(DispCnt & 0x80000000))
+ pixelattr |= 0x1000;
+ else
+ pixelattr |= ((attrib[2] & 0xF000) >> 4);
+ }
+
+ for (; xoff < boundwidth;)
+ {
+ if ((u32)rotX < width && (u32)rotY < height)
+ {
+ color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask];
+
+ if (color)
+ {
+ if (window) OBJWindow[xpos] = 1;
+ else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
+ }
+ else if (!window)
+ {
+ if (OBJLine[xpos] == 0)
+ {
+ OBJLine[xpos] = pixelattr & 0x180000;
+ OBJIndex[xpos] = num;
+ }
+ }
+ }
+
+ rotX += rotA;
+ rotY += rotC;
+ xoff++;
+ xpos++;
+ }
+ }
+ else
+ {
+ // 16-color
+ if (!window)
+ {
+ pixelattr |= 0x1000;
+ pixelattr |= ((attrib[2] & 0xF000) >> 8);
+ }
+
+ for (; xoff < boundwidth;)
+ {
+ if ((u32)rotX < width && (u32)rotY < height)
+ {
+ color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)) & objvrammask];
+ if (rotX & 0x100)
+ color >>= 4;
+ else
+ color &= 0x0F;
+
+ if (color)
+ {
+ if (window) OBJWindow[xpos] = 1;
+ else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
+ }
+ else if (!window)
+ {
+ if (OBJLine[xpos] == 0)
+ {
+ OBJLine[xpos] = pixelattr & 0x180000;
+ OBJIndex[xpos] = num;
+ }
+ }
+ }
+
+ rotX += rotA;
+ rotY += rotC;
+ xoff++;
+ xpos++;
+ }
+ }
+ }
+}
+
+template
+void GPU2D_Soft::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos)
+{
+ u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0];
+ u16* attrib = &oam[num * 4];
+
+ u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | 0xC0000;
+ u32 tilenum = attrib[2] & 0x03FF;
+ u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3);
+
+ u32 wmask = width - 8; // really ((width - 1) & ~0x7)
+
+ if ((attrib[0] & 0x1000) && !window)
+ {
+ // apply Y mosaic
+ pixelattr |= 0x100000;
+ }
+
+ u8* objvram;
+ u32 objvrammask;
+ GetOBJVRAM(objvram, objvrammask);
+
+ // yflip
+ if (attrib[1] & 0x2000)
+ ypos = height-1 - ypos;
+
+ u32 xoff;
+ u32 xend = width;
+ if (xpos >= 0)
+ {
+ xoff = 0;
+ if ((xpos+xend) > 256)
+ xend = 256-xpos;
+ }
+ else
+ {
+ xoff = -xpos;
+ xpos = 0;
+ }
+
+ u16 color = 0; // transparent in all cases
+
+ if (spritemode == 3)
+ {
+ // bitmap sprite
+
+ u32 alpha = attrib[2] >> 12;
+ if (!alpha) return;
+ alpha++;
+
+ pixelattr |= (0xC0000000 | (alpha << 24));
+
+ u32 pixelsaddr = tilenum;
+ if (DispCnt & 0x40)
+ {
+ if (DispCnt & 0x20)
+ {
+ // 'reserved'
+ // draws nothing
+
+ return;
+ }
+ else
+ {
+ pixelsaddr <<= (7 + ((DispCnt >> 22) & 0x1));
+ pixelsaddr += (ypos * width * 2);
+ }
+ }
+ else
+ {
+ if (DispCnt & 0x20)
+ {
+ pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
+ pixelsaddr += (ypos * 256 * 2);
+ }
+ else
+ {
+ pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
+ pixelsaddr += (ypos * 128 * 2);
+ }
+ }
+
+ s32 pixelstride;
+
+ if (attrib[1] & 0x1000) // xflip
+ {
+ pixelsaddr += (width-1 << 1);
+ pixelsaddr -= (xoff << 1);
+ pixelstride = -2;
+ }
+ else
+ {
+ pixelsaddr += (xoff << 1);
+ pixelstride = 2;
+ }
+
+ for (; xoff < xend;)
+ {
+ color = *(u16*)&objvram[pixelsaddr & objvrammask];
+
+ pixelsaddr += pixelstride;
+
+ if (color & 0x8000)
+ {
+ if (window) OBJWindow[xpos] = 1;
+ else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
+ }
+ else if (!window)
+ {
+ if (OBJLine[xpos] == 0)
+ {
+ OBJLine[xpos] = pixelattr & 0x180000;
+ OBJIndex[xpos] = num;
+ }
+ }
+
+ xoff++;
+ xpos++;
+ }
+ }
+ else
+ {
+ u32 pixelsaddr = tilenum;
+ if (DispCnt & 0x10)
+ {
+ pixelsaddr <<= ((DispCnt >> 20) & 0x3);
+ pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
+ }
+ else
+ {
+ pixelsaddr += ((ypos >> 3) * 0x20);
+ }
+
+ if (spritemode == 1) pixelattr |= 0x80000000;
+ else pixelattr |= 0x10000000;
+
+ if (attrib[0] & 0x2000)
+ {
+ // 256-color
+ pixelsaddr <<= 5;
+ pixelsaddr += ((ypos & 0x7) << 3);
+ s32 pixelstride;
+
+ if (!window)
+ {
+ if (!(DispCnt & 0x80000000))
+ pixelattr |= 0x1000;
+ else
+ pixelattr |= ((attrib[2] & 0xF000) >> 4);
+ }
+
+ if (attrib[1] & 0x1000) // xflip
+ {
+ pixelsaddr += (((width-1) & wmask) << 3);
+ pixelsaddr += ((width-1) & 0x7);
+ pixelsaddr -= ((xoff & wmask) << 3);
+ pixelsaddr -= (xoff & 0x7);
+ pixelstride = -1;
+ }
+ else
+ {
+ pixelsaddr += ((xoff & wmask) << 3);
+ pixelsaddr += (xoff & 0x7);
+ pixelstride = 1;
+ }
+
+ for (; xoff < xend;)
+ {
+ color = objvram[pixelsaddr];
+
+ pixelsaddr += pixelstride;
+
+ if (color)
+ {
+ if (window) OBJWindow[xpos] = 1;
+ else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
+ }
+ else if (!window)
+ {
+ if (OBJLine[xpos] == 0)
+ {
+ OBJLine[xpos] = pixelattr & 0x180000;
+ OBJIndex[xpos] = num;
+ }
+ }
+
+ xoff++;
+ xpos++;
+ if (!(xoff & 0x7)) pixelsaddr += (56 * pixelstride);
+ }
+ }
+ else
+ {
+ // 16-color
+ pixelsaddr <<= 5;
+ pixelsaddr += ((ypos & 0x7) << 2);
+ s32 pixelstride;
+
+ if (!window)
+ {
+ pixelattr |= 0x1000;
+ pixelattr |= ((attrib[2] & 0xF000) >> 8);
+ }
+
+ // TODO: optimize VRAM access!!
+ // TODO: do xflip better? the 'two pixels per byte' thing makes it a bit shitty
+
+ if (attrib[1] & 0x1000) // xflip
+ {
+ pixelsaddr += (((width-1) & wmask) << 2);
+ pixelsaddr += (((width-1) & 0x7) >> 1);
+ pixelsaddr -= ((xoff & wmask) << 2);
+ pixelsaddr -= ((xoff & 0x7) >> 1);
+ pixelstride = -1;
+ }
+ else
+ {
+ pixelsaddr += ((xoff & wmask) << 2);
+ pixelsaddr += ((xoff & 0x7) >> 1);
+ pixelstride = 1;
+ }
+
+ for (; xoff < xend;)
+ {
+ if (attrib[1] & 0x1000)
+ {
+ if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; }
+ else color = objvram[pixelsaddr & objvrammask] >> 4;
+ }
+ else
+ {
+ if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] >> 4; pixelsaddr++; }
+ else color = objvram[pixelsaddr & objvrammask] & 0x0F;
+ }
+
+ if (color)
+ {
+ if (window) OBJWindow[xpos] = 1;
+ else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; }
+ }
+ else if (!window)
+ {
+ if (OBJLine[xpos] == 0)
+ {
+ OBJLine[xpos] = pixelattr & 0x180000;
+ OBJIndex[xpos] = num;
+ }
+ }
+
+ xoff++;
+ xpos++;
+ if (!(xoff & 0x7)) pixelsaddr += ((attrib[1] & 0x1000) ? -28 : 28);
+ }
+ }
+ }
+}
+
+void GPU2D_Soft::MosaicXSizeChanged()
+{
+ CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]];
+ CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[1]];
+}
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index 74debfe0..7b304268 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -179,6 +179,10 @@ u8 RenderFogDensityTable[34];
u32 RenderClearAttr1, RenderClearAttr2;
+bool RenderFrameIdentical;
+
+u16 RenderXPos;
+
u32 ZeroDotWLimit;
u32 GXStat;
@@ -383,6 +387,8 @@ void Reset()
FlushAttributes = 0;
ResetRenderingState();
+
+ RenderXPos = 0;
}
void DoSavestate(Savestate* file)
@@ -428,6 +434,8 @@ void DoSavestate(Savestate* file)
file->Var32(&RenderClearAttr1);
file->Var32(&RenderClearAttr2);
+ file->Var16(&RenderXPos);
+
file->Var32(&ZeroDotWLimit);
file->Var32(&GXStat);
@@ -585,8 +593,6 @@ void DoSavestate(Savestate* file)
}
}
- // probably not worth storing the vblank-latched Renderxxxxxx variables
-
CmdStallQueue->DoSavestate(file);
file->Var32((u32*)&VertexPipeline);
file->Var32((u32*)&NormalPipeline);
@@ -606,6 +612,22 @@ void DoSavestate(Savestate* file)
// might cause a blank frame but atleast it won't shit itself
RenderNumPolygons = 0;
}
+
+ file->VarArray(CurVertex, sizeof(s16)*3);
+ file->VarArray(VertexColor, sizeof(u8)*3);
+ file->VarArray(TexCoords, sizeof(s16)*2);
+ file->VarArray(RawTexCoords, sizeof(s16)*2);
+ file->VarArray(Normal, sizeof(s16)*3);
+
+ file->VarArray(LightDirection, sizeof(s16)*4*3);
+ file->VarArray(LightColor, sizeof(u8)*4*3);
+ file->VarArray(MatDiffuse, sizeof(u8)*3);
+ file->VarArray(MatAmbient, sizeof(u8)*3);
+ file->VarArray(MatSpecular, sizeof(u8)*3);
+ file->VarArray(MatEmission, sizeof(u8)*3);
+
+ file->Bool32(&UseShininessTable);
+ file->VarArray(ShininessTable, 128*sizeof(u8));
}
@@ -2491,6 +2513,19 @@ void VBlank()
}
RenderNumPolygons = NumPolygons;
+ RenderFrameIdentical = false;
+ }
+ else
+ {
+ RenderFrameIdentical = RenderDispCnt == DispCnt
+ && RenderAlphaRef == AlphaRef
+ && RenderClearAttr1 == ClearAttr1
+ && RenderClearAttr2 == ClearAttr2
+ && RenderFogColor == FogColor
+ && RenderFogOffset == FogOffset * 0x200
+ && memcmp(RenderEdgeTable, EdgeTable, 8*2) == 0
+ && memcmp(RenderFogDensityTable + 1, FogDensityTable, 32) == 0
+ && memcmp(RenderToonTable, ToonTable, 32*2) == 0;
}
RenderDispCnt = DispCnt;
@@ -2533,14 +2568,46 @@ void VCount215()
#endif
}
+void SetRenderXPos(u16 xpos)
+{
+ if (!RenderingEnabled) return;
+
+ RenderXPos = xpos & 0x01FF;
+}
+
+u32 ScrolledLine[256];
+
u32* GetLine(int line)
{
- if (GPU::Renderer == 0) return SoftRenderer::GetLine(line);
+ u32* rawline = NULL;
+
+ if (GPU::Renderer == 0) rawline = SoftRenderer::GetLine(line);
#ifdef OGLRENDERER_ENABLED
- else return GLRenderer::GetLine(line);
-#else
- return NULL;
+ else rawline = GLRenderer::GetLine(line);
#endif
+
+ if (RenderXPos == 0) return rawline;
+
+ // apply X scroll
+
+ if (RenderXPos & 0x100)
+ {
+ int i = 0, j = RenderXPos;
+ for (; j < 512; i++, j++)
+ ScrolledLine[i] = 0;
+ for (j = 0; i < 256; i++, j++)
+ ScrolledLine[i] = rawline[j];
+ }
+ else
+ {
+ int i = 0, j = RenderXPos;
+ for (; j < 256; i++, j++)
+ ScrolledLine[i] = rawline[j];
+ for (; i < 256; i++)
+ ScrolledLine[i] = 0;
+ }
+
+ return ScrolledLine;
}
diff --git a/src/GPU3D.h b/src/GPU3D.h
index c69adde2..69b67fa7 100644
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@@ -87,6 +87,10 @@ extern u8 RenderFogDensityTable[34];
extern u32 RenderClearAttr1, RenderClearAttr2;
+extern bool RenderFrameIdentical;
+
+extern u16 RenderXPos;
+
extern std::array RenderPolygonRAM;
extern u32 RenderNumPolygons;
@@ -112,6 +116,8 @@ void CheckFIFODMA();
void VCount144();
void VBlank();
void VCount215();
+
+void SetRenderXPos(u16 xpos);
u32* GetLine(int line);
void WriteToGXFIFO(u32 val);
diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp
index 658b2613..88ae77a9 100644
--- a/src/GPU3D_OpenGL.cpp
+++ b/src/GPU3D_OpenGL.cpp
@@ -53,17 +53,18 @@ GLuint CurShaderID = -1;
GLuint FinalPassEdgeShader[3];
GLuint FinalPassFogShader[3];
+// std140 compliant structure
struct
{
- float uScreenSize[2];
- u32 uDispCnt;
+ float uScreenSize[2]; // vec2 0 / 2
+ u32 uDispCnt; // int 2 / 1
u32 __pad0;
- float uToonColors[32][4];
- float uEdgeColors[8][4];
- float uFogColor[4];
- float uFogDensity[34][4];
- u32 uFogOffset;
- u32 uFogShift;
+ float uToonColors[32][4]; // vec4[32] 4 / 128
+ float uEdgeColors[8][4]; // vec4[8] 132 / 32
+ float uFogColor[4]; // vec4 164 / 4
+ float uFogDensity[34][4]; // float[34] 168 / 136
+ u32 uFogOffset; // int 304 / 1
+ u32 uFogShift; // int 305 / 1
} ShaderConfig;
@@ -74,11 +75,11 @@ typedef struct
Polygon* PolyData;
u32 NumIndices;
- u16* Indices;
+ u32 IndicesOffset;
GLuint PrimType;
u32 NumEdgeIndices;
- u16* EdgeIndices;
+ u32 EdgeIndicesOffset;
u32 RenderKey;
@@ -107,7 +108,11 @@ u32 VertexBuffer[10240 * 7];
u32 NumVertices;
GLuint VertexArrayID;
+GLuint IndexBufferID;
u16 IndexBuffer[2048 * 40];
+u32 NumIndices, NumEdgeIndices;
+
+const u32 EdgeIndicesOffset = 2048 * 30;
GLuint TexMemID;
GLuint TexPalMemID;
@@ -280,7 +285,7 @@ bool Init()
glGenBuffers(1, &ShaderConfigUBO);
glBindBuffer(GL_UNIFORM_BUFFER, ShaderConfigUBO);
- glBufferData(GL_UNIFORM_BUFFER, sizeof(ShaderConfig), &ShaderConfig, GL_STATIC_DRAW);
+ glBufferData(GL_UNIFORM_BUFFER, (sizeof(ShaderConfig) + 15) & ~15, &ShaderConfig, GL_STATIC_DRAW);
glBindBufferBase(GL_UNIFORM_BUFFER, 0, ShaderConfigUBO);
@@ -320,6 +325,9 @@ bool Init()
glEnableVertexAttribArray(3); // attrib
glVertexAttribIPointer(3, 3, GL_UNSIGNED_INT, 7*4, (void*)(4*4));
+ glGenBuffers(1, &IndexBufferID);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, IndexBufferID);
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(IndexBuffer), NULL, GL_DYNAMIC_DRAW);
glGenFramebuffers(4, &FramebufferID[0]);
glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]);
@@ -563,15 +571,15 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
u32* vptr = &VertexBuffer[0];
u32 vidx = 0;
- u16* iptr = &IndexBuffer[0];
- u16* eiptr = &IndexBuffer[2048*30];
+ u32 iidx = 0;
+ u32 eidx = EdgeIndicesOffset;
for (int i = 0; i < npolys; i++)
{
RendererPolygon* rp = &polygons[i];
Polygon* poly = rp->PolyData;
- rp->Indices = iptr;
+ rp->IndicesOffset = iidx;
rp->NumIndices = 0;
u32 vidx_first = vidx;
@@ -606,7 +614,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
vptr = SetupVertex(poly, j, vtx, vtxattr, vptr);
- *iptr++ = vidx;
+ IndexBuffer[iidx++] = vidx;
rp->NumIndices++;
vidx++;
@@ -627,9 +635,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
}
// build a triangle
- *iptr++ = vidx_first;
- *iptr++ = vidx - 2;
- *iptr++ = vidx - 1;
+ IndexBuffer[iidx++] = vidx_first;
+ IndexBuffer[iidx++] = vidx - 2;
+ IndexBuffer[iidx++] = vidx - 1;
rp->NumIndices += 3;
}
else // quad, pentagon, etc
@@ -649,9 +657,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
if (j >= 2)
{
// build a triangle
- *iptr++ = vidx_first;
- *iptr++ = vidx - 1;
- *iptr++ = vidx;
+ IndexBuffer[iidx++] = vidx_first;
+ IndexBuffer[iidx++] = vidx - 1;
+ IndexBuffer[iidx++] = vidx;
rp->NumIndices += 3;
}
@@ -743,46 +751,50 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
if (j >= 1)
{
// build a triangle
- *iptr++ = vidx_first;
- *iptr++ = vidx - 1;
- *iptr++ = vidx;
+ IndexBuffer[iidx++] = vidx_first;
+ IndexBuffer[iidx++] = vidx - 1;
+ IndexBuffer[iidx++] = vidx;
rp->NumIndices += 3;
}
vidx++;
}
- *iptr++ = vidx_first;
- *iptr++ = vidx - 1;
- *iptr++ = vidx_first + 1;
+ IndexBuffer[iidx++] = vidx_first;
+ IndexBuffer[iidx++] = vidx - 1;
+ IndexBuffer[iidx++] = vidx_first + 1;
rp->NumIndices += 3;
}
}
- rp->EdgeIndices = eiptr;
+ rp->EdgeIndicesOffset = eidx;
rp->NumEdgeIndices = 0;
u32 vidx_cur = vidx_first;
for (int j = 1; j < poly->NumVertices; j++)
{
- *eiptr++ = vidx_cur;
- *eiptr++ = vidx_cur + 1;
+ IndexBuffer[eidx++] = vidx_cur;
+ IndexBuffer[eidx++] = vidx_cur + 1;
vidx_cur++;
rp->NumEdgeIndices += 2;
}
- *eiptr++ = vidx_cur;
- *eiptr++ = vidx_first;
+ IndexBuffer[eidx++] = vidx_cur;
+ IndexBuffer[eidx++] = vidx_first;
rp->NumEdgeIndices += 2;
}
NumVertices = vidx;
+ NumIndices = iidx;
+ NumEdgeIndices = eidx - EdgeIndicesOffset;
}
-void RenderSinglePolygon(int i)
+int RenderSinglePolygon(int i)
{
RendererPolygon* rp = &PolygonList[i];
- glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, rp->Indices);
+ glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2));
+
+ return 1;
}
int RenderPolygonBatch(int i)
@@ -803,7 +815,7 @@ int RenderPolygonBatch(int i)
numindices += cur_rp->NumIndices;
}
- glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, rp->Indices);
+ glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2));
return numpolys;
}
@@ -823,7 +835,7 @@ int RenderPolygonEdgeBatch(int i)
numindices += cur_rp->NumEdgeIndices;
}
- glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, rp->EdgeIndices);
+ glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->EdgeIndicesOffset * 2));
return numpolys;
}
@@ -857,6 +869,7 @@ void RenderSceneChunk(int y, int h)
RendererPolygon* rp = &PolygonList[i];
if (rp->PolyData->IsShadowMask) { i++; continue; }
+ if (rp->PolyData->Translucent) { i++; continue; }
if (rp->PolyData->Attr & (1<<14))
glDepthFunc(GL_LEQUAL);
@@ -874,7 +887,8 @@ void RenderSceneChunk(int y, int h)
}
// if edge marking is enabled, mark all opaque edges
- if (RenderDispCnt & (1<<5))
+ // TODO BETTER EDGE MARKING!!! THIS SUCKS
+ /*if (RenderDispCnt & (1<<5))
{
UseRenderShader(flags | RenderFlag_Edge);
glLineWidth(1.5);
@@ -899,7 +913,7 @@ void RenderSceneChunk(int y, int h)
}
glDepthMask(GL_TRUE);
- }
+ }*/
glEnable(GL_BLEND);
glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX);
@@ -944,15 +958,32 @@ void RenderSceneChunk(int y, int h)
}
else if (rp->PolyData->Translucent)
{
- UseRenderShader(flags | RenderFlag_Trans);
+ bool needopaque = ((rp->PolyData->Attr & 0x001F0000) == 0x001F0000);
- if (rp->PolyData->Attr & (1<<14))
+ u32 polyattr = rp->PolyData->Attr;
+ u32 polyid = (polyattr >> 24) & 0x3F;
+
+ if (polyattr & (1<<14))
glDepthFunc(GL_LEQUAL);
else
glDepthFunc(GL_LESS);
- u32 polyattr = rp->PolyData->Attr;
- u32 polyid = (polyattr >> 24) & 0x3F;
+ if (needopaque)
+ {
+ UseRenderShader(flags);
+
+ glDisable(GL_BLEND);
+ glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ glColorMaski(1, GL_TRUE, GL_TRUE, fogenable, GL_FALSE);
+
+ glStencilFunc(GL_ALWAYS, polyid, 0xFF);
+ glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
+ glStencilMask(0xFF);
+
+ RenderSinglePolygon(i);
+ }
+
+ UseRenderShader(flags | RenderFlag_Trans);
GLboolean transfog;
if (!(polyattr & (1<<15))) transfog = fogenable;
@@ -975,7 +1006,7 @@ void RenderSceneChunk(int y, int h)
if (polyattr & (1<<11)) glDepthMask(GL_TRUE);
else glDepthMask(GL_FALSE);
- i += RenderPolygonBatch(i);
+ i += needopaque ? RenderSinglePolygon(i) : RenderPolygonBatch(i);
}
else
{
@@ -989,7 +1020,7 @@ void RenderSceneChunk(int y, int h)
if (polyattr & (1<<11)) glDepthMask(GL_TRUE);
else glDepthMask(GL_FALSE);
- i += RenderPolygonBatch(i);
+ i += needopaque ? RenderSinglePolygon(i) : RenderPolygonBatch(i);
}
}
else
@@ -1030,20 +1061,37 @@ void RenderSceneChunk(int y, int h)
}
else if (rp->PolyData->Translucent)
{
- UseRenderShader(flags | RenderFlag_Trans);
+ bool needopaque = ((rp->PolyData->Attr & 0x001F0000) == 0x001F0000);
u32 polyattr = rp->PolyData->Attr;
u32 polyid = (polyattr >> 24) & 0x3F;
- GLboolean transfog;
- if (!(polyattr & (1<<15))) transfog = fogenable;
- else transfog = GL_FALSE;
-
- if (rp->PolyData->Attr & (1<<14))
+ if (polyattr & (1<<14))
glDepthFunc(GL_LEQUAL);
else
glDepthFunc(GL_LESS);
+ if (needopaque)
+ {
+ UseRenderShader(flags);
+
+ glDisable(GL_BLEND);
+ glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ glColorMaski(1, GL_TRUE, GL_TRUE, fogenable, GL_FALSE);
+
+ glStencilFunc(GL_ALWAYS, polyid, 0xFF);
+ glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
+ glStencilMask(0xFF);
+
+ RenderSinglePolygon(i);
+ }
+
+ UseRenderShader(flags | RenderFlag_Trans);
+
+ GLboolean transfog;
+ if (!(polyattr & (1<<15))) transfog = fogenable;
+ else transfog = GL_FALSE;
+
if (rp->PolyData->IsShadow)
{
glDisable(GL_BLEND);
@@ -1067,8 +1115,7 @@ void RenderSceneChunk(int y, int h)
if (polyattr & (1<<11)) glDepthMask(GL_TRUE);
else glDepthMask(GL_FALSE);
- RenderSinglePolygon(i);
- i++;
+ i += RenderSinglePolygon(i);
}
else
{
@@ -1083,7 +1130,7 @@ void RenderSceneChunk(int y, int h)
if (polyattr & (1<<11)) glDepthMask(GL_TRUE);
else glDepthMask(GL_FALSE);
- i += RenderPolygonBatch(i);
+ i += needopaque ? RenderSinglePolygon(i) : RenderPolygonBatch(i);
}
}
else
@@ -1320,6 +1367,11 @@ void RenderFrame()
glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID);
glBufferSubData(GL_ARRAY_BUFFER, 0, NumVertices*7*4, VertexBuffer);
+ // bind to access the index buffer
+ glBindVertexArray(VertexArrayID);
+ glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, NumIndices * 2, IndexBuffer);
+ glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, EdgeIndicesOffset * 2, NumEdgeIndices * 2, IndexBuffer + EdgeIndicesOffset);
+
RenderSceneChunk(0, 192);
}
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index e9d8e75f..d66eb76e 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -58,15 +58,17 @@ bool PrevIsShadowMask;
bool Enabled;
+bool FrameIdentical;
+
// threading
bool Threaded;
-void* RenderThread;
+Platform::Thread* RenderThread;
bool RenderThreadRunning;
bool RenderThreadRendering;
-void* Sema_RenderStart;
-void* Sema_RenderDone;
-void* Sema_ScanlineCount;
+Platform::Semaphore* Sema_RenderStart;
+Platform::Semaphore* Sema_RenderDone;
+Platform::Semaphore* Sema_ScanlineCount;
void RenderThreadFunc();
@@ -550,6 +552,16 @@ typedef struct
RendererPolygon PolygonList[2048];
+template
+inline T ReadVRAM_Texture(u32 addr)
+{
+ return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
+}
+template
+inline T ReadVRAM_TexPal(u32 addr)
+{
+ return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
+}
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
{
@@ -606,10 +618,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 1: // A3I5
{
vramaddr += ((t * width) + s);
- u8 pixel = GPU::ReadVRAM_Texture(vramaddr);
+ u8 pixel = ReadVRAM_Texture(vramaddr);
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal(texpal + ((pixel&0x1F)<<1));
+ *color = ReadVRAM_TexPal(texpal + ((pixel&0x1F)<<1));
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
}
break;
@@ -617,12 +629,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 2: // 4-color
{
vramaddr += (((t * width) + s) >> 2);
- u8 pixel = GPU::ReadVRAM_Texture(vramaddr);
+ u8 pixel = ReadVRAM_Texture(vramaddr);
pixel >>= ((s & 0x3) << 1);
pixel &= 0x3;
texpal <<= 3;
- *color = GPU::ReadVRAM_TexPal(texpal + (pixel<<1));
+ *color = ReadVRAM_TexPal(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
@@ -630,12 +642,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 3: // 16-color
{
vramaddr += (((t * width) + s) >> 1);
- u8 pixel = GPU::ReadVRAM_Texture(vramaddr);
+ u8 pixel = ReadVRAM_Texture(vramaddr);
if (s & 0x1) pixel >>= 4;
else pixel &= 0xF;
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal(texpal + (pixel<<1));
+ *color = ReadVRAM_TexPal(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
@@ -643,10 +655,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 4: // 256-color
{
vramaddr += ((t * width) + s);
- u8 pixel = GPU::ReadVRAM_Texture(vramaddr);
+ u8 pixel = ReadVRAM_Texture(vramaddr);
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal(texpal + (pixel<<1));
+ *color = ReadVRAM_TexPal(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
@@ -660,30 +672,30 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
if (vramaddr >= 0x40000)
slot1addr += 0x10000;
- u8 val = GPU::ReadVRAM_Texture(vramaddr);
+ u8 val = ReadVRAM_Texture(vramaddr);
val >>= (2 * (s & 0x3));
- u16 palinfo = GPU::ReadVRAM_Texture(slot1addr);
+ u16 palinfo = ReadVRAM_Texture(slot1addr);
u32 paloffset = (palinfo & 0x3FFF) << 2;
texpal <<= 4;
switch (val & 0x3)
{
case 0:
- *color = GPU::ReadVRAM_TexPal(texpal + paloffset);
+ *color = ReadVRAM_TexPal(texpal + paloffset);
*alpha = 31;
break;
case 1:
- *color = GPU::ReadVRAM_TexPal(texpal + paloffset + 2);
+ *color = ReadVRAM_TexPal(texpal + paloffset + 2);
*alpha = 31;
break;
case 2:
if ((palinfo >> 14) == 1)
{
- u16 color0 = GPU::ReadVRAM_TexPal(texpal + paloffset);
- u16 color1 = GPU::ReadVRAM_TexPal(texpal + paloffset + 2);
+ u16 color0 = ReadVRAM_TexPal(texpal + paloffset);
+ u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
@@ -700,8 +712,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
}
else if ((palinfo >> 14) == 3)
{
- u16 color0 = GPU::ReadVRAM_TexPal(texpal + paloffset);
- u16 color1 = GPU::ReadVRAM_TexPal(texpal + paloffset + 2);
+ u16 color0 = ReadVRAM_TexPal(texpal + paloffset);
+ u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
@@ -717,20 +729,20 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
*color = r | g | b;
}
else
- *color = GPU::ReadVRAM_TexPal(texpal + paloffset + 4);
+ *color = ReadVRAM_TexPal(texpal + paloffset + 4);
*alpha = 31;
break;
case 3:
if ((palinfo >> 14) == 2)
{
- *color = GPU::ReadVRAM_TexPal(texpal + paloffset + 6);
+ *color = ReadVRAM_TexPal(texpal + paloffset + 6);
*alpha = 31;
}
else if ((palinfo >> 14) == 3)
{
- u16 color0 = GPU::ReadVRAM_TexPal(texpal + paloffset);
- u16 color1 = GPU::ReadVRAM_TexPal(texpal + paloffset + 2);
+ u16 color0 = ReadVRAM_TexPal(texpal + paloffset);
+ u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
@@ -759,10 +771,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 6: // A5I3
{
vramaddr += ((t * width) + s);
- u8 pixel = GPU::ReadVRAM_Texture(vramaddr);
+ u8 pixel = ReadVRAM_Texture(vramaddr);
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal(texpal + ((pixel&0x7)<<1));
+ *color = ReadVRAM_TexPal(texpal + ((pixel&0x7)<<1));
*alpha = (pixel >> 3);
}
break;
@@ -770,7 +782,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 7: // direct color
{
vramaddr += (((t * width) + s) << 1);
- *color = GPU::ReadVRAM_Texture(vramaddr);
+ *color = ReadVRAM_Texture(vramaddr);
*alpha = (*color & 0x8000) ? 31 : 0;
}
break;
@@ -2007,8 +2019,8 @@ void ClearBuffers()
{
for (int x = 0; x < 256; x++)
{
- u16 val2 = GPU::ReadVRAM_Texture(0x40000 + (yoff << 9) + (xoff << 1));
- u16 val3 = GPU::ReadVRAM_Texture(0x60000 + (yoff << 9) + (xoff << 1));
+ u16 val2 = ReadVRAM_Texture(0x40000 + (yoff << 9) + (xoff << 1));
+ u16 val3 = ReadVRAM_Texture(0x60000 + (yoff << 9) + (xoff << 1));
// TODO: confirm color conversion
u32 r = (val2 << 1) & 0x3E; if (r) r++;
@@ -2088,11 +2100,19 @@ void VCount144()
void RenderFrame()
{
+ auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture);
+ auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal);
+
+ bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty);
+ bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty);
+
+ FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical;
+
if (RenderThreadRunning)
{
Platform::Semaphore_Post(Sema_RenderStart);
}
- else
+ else if (!FrameIdentical)
{
ClearBuffers();
RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons);
@@ -2107,8 +2127,15 @@ void RenderThreadFunc()
if (!RenderThreadRunning) return;
RenderThreadRendering = true;
- ClearBuffers();
- RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
+ if (FrameIdentical)
+ {
+ Platform::Semaphore_Post(Sema_ScanlineCount, 192);
+ }
+ else
+ {
+ ClearBuffers();
+ RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
+ }
Platform::Semaphore_Post(Sema_RenderDone);
RenderThreadRendering = false;
diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp
index 359e9cd4..0c6cf004 100644
--- a/src/GPU_OpenGL.cpp
+++ b/src/GPU_OpenGL.cpp
@@ -36,6 +36,7 @@ int ScreenH, ScreenW;
GLuint CompShader[1][3];
GLuint CompScaleLoc[1];
+GLuint Comp3DXPosLoc[1];
GLuint CompVertexBufferID;
GLuint CompVertexArrayID;
@@ -64,6 +65,7 @@ bool Init()
return false;
CompScaleLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DScale");
+ Comp3DXPosLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DXPos");
glUseProgram(CompShader[i][2]);
uni_id = glGetUniformLocation(CompShader[i][2], "ScreenTex");
@@ -180,6 +182,9 @@ void RenderFrame()
OpenGL::UseShaderProgram(CompShader[0]);
glUniform1ui(CompScaleLoc[0], Scale);
+ // TODO: support setting this midframe, if ever needed
+ glUniform1i(Comp3DXPosLoc[0], ((int)GPU3D::RenderXPos << 23) >> 23);
+
int frontbuf = GPU::FrontBuffer;
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, CompScreenInputTex);
diff --git a/src/GPU_OpenGL_shaders.h b/src/GPU_OpenGL_shaders.h
index 20ac7673..03ddb7af 100644
--- a/src/GPU_OpenGL_shaders.h
+++ b/src/GPU_OpenGL_shaders.h
@@ -40,6 +40,7 @@ void main()
const char* kCompositorFS_Nearest = R"(#version 140
uniform uint u3DScale;
+uniform int u3DXPos;
uniform usampler2D ScreenTex;
uniform sampler2D _3DTex;
@@ -52,6 +53,8 @@ void main()
{
ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0));
+ float _3dxpos = float(u3DXPos);
+
ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0));
int dispmode = mbright.b & 0x3;
@@ -68,7 +71,7 @@ void main()
{
// 3D on top, blending
- float xpos = val3.r + fract(fTexcoord.x);
+ float xpos = fTexcoord.x + _3dxpos;
float ypos = mod(fTexcoord.y, 192);
ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra
* vec4(63,63,63,31));
@@ -89,7 +92,7 @@ void main()
{
// 3D on bottom, blending
- float xpos = val3.r + fract(fTexcoord.x);
+ float xpos = fTexcoord.x + _3dxpos;
float ypos = mod(fTexcoord.y, 192);
ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra
* vec4(63,63,63,31));
@@ -109,7 +112,7 @@ void main()
{
// 3D on top, normal/fade
- float xpos = val3.r + fract(fTexcoord.x);
+ float xpos = fTexcoord.x + _3dxpos;
float ypos = mod(fTexcoord.y, 192);
ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra
* vec4(63,63,63,31));
diff --git a/src/NDS.cpp b/src/NDS.cpp
index 90149add..8b49328f 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -210,13 +210,13 @@ bool Init()
void DeInit()
{
- delete ARM9;
- delete ARM7;
-
#ifdef JIT_ENABLED
ARMJIT::DeInit();
#endif
+ delete ARM9;
+ delete ARM7;
+
for (int i = 0; i < 8; i++)
delete DMAs[i];
@@ -908,7 +908,7 @@ void RunSystem(u64 timestamp)
}
}
-template
+template
u32 RunFrame()
{
FrameStartTimestamp = SysTimestamp;
@@ -934,10 +934,10 @@ u32 RunFrame()
}
else if (CPUStop & 0x0FFF)
{
- DMAs[0]->Run();
- if (!(CPUStop & 0x80000000)) DMAs[1]->Run();
- if (!(CPUStop & 0x80000000)) DMAs[2]->Run();
- if (!(CPUStop & 0x80000000)) DMAs[3]->Run();
+ DMAs[0]->Run();
+ if (!(CPUStop & 0x80000000)) DMAs[1]->Run();
+ if (!(CPUStop & 0x80000000)) DMAs[2]->Run();
+ if (!(CPUStop & 0x80000000)) DMAs[3]->Run();
if (ConsoleType == 1) DSi::RunNDMAs(0);
}
else
@@ -962,10 +962,10 @@ u32 RunFrame()
if (CPUStop & 0x0FFF0000)
{
- DMAs[4]->Run();
- DMAs[5]->Run();
- DMAs[6]->Run();
- DMAs[7]->Run();
+ DMAs[4]->Run();
+ DMAs[5]->Run();
+ DMAs[6]->Run();
+ DMAs[7]->Run();
if (ConsoleType == 1) DSi::RunNDMAs(1);
}
else
@@ -999,6 +999,9 @@ u32 RunFrame()
ARM7Timestamp-SysTimestamp,
GPU3D::Timestamp-SysTimestamp);
#endif
+ SPU::TransferOutput();
+
+ NDSCart::FlushSRAMFile();
NumFrames++;
@@ -1009,10 +1012,14 @@ u32 RunFrame()
{
#ifdef JIT_ENABLED
if (Config::JIT_Enable)
- return RunFrame();
+ return NDS::ConsoleType == 1
+ ? RunFrame()
+ : RunFrame();
else
#endif
- return RunFrame();
+ return NDS::ConsoleType == 1
+ ? RunFrame()
+ : RunFrame();
}
void Reschedule(u64 target)
@@ -1470,7 +1477,7 @@ void HandleTimerOverflow(u32 tid)
{
Timer* timer = &Timers[tid];
- timer->Counter += timer->Reload << 16;
+ timer->Counter += (timer->Reload << 10);
if (timer->Cnt & (1<<6))
SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3));
@@ -1486,11 +1493,11 @@ void HandleTimerOverflow(u32 tid)
if ((timer->Cnt & 0x84) != 0x84)
break;
- timer->Counter += 0x10000;
- if (timer->Counter >> 16)
+ timer->Counter += (1 << 10);
+ if (!(timer->Counter >> 26))
break;
- timer->Counter = timer->Reload << 16;
+ timer->Counter = timer->Reload << 10;
if (timer->Cnt & (1<<6))
SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3));
@@ -1505,8 +1512,13 @@ void RunTimer(u32 tid, s32 cycles)
u32 oldcount = timer->Counter;
timer->Counter += (cycles << timer->CycleShift);
- if (timer->Counter < oldcount)
+ //if (timer->Counter < oldcount)
+ // HandleTimerOverflow(tid);
+ while (timer->Counter >> 26)
+ {
+ timer->Counter -= (1 << 26);
HandleTimerOverflow(tid);
+ }
}
void RunTimers(u32 cpu)
@@ -1623,7 +1635,7 @@ u16 TimerGetCounter(u32 timer)
RunTimers(timer>>2);
u32 ret = Timers[timer].Counter;
- return ret >> 16;
+ return ret >> 10;
}
void TimerStart(u32 id, u16 cnt)
@@ -1633,11 +1645,11 @@ void TimerStart(u32 id, u16 cnt)
u16 newstart = cnt & (1<<7);
timer->Cnt = cnt;
- timer->CycleShift = 16 - TimerPrescaler[cnt & 0x03];
+ timer->CycleShift = 10 - TimerPrescaler[cnt & 0x03];
if ((!curstart) && newstart)
{
- timer->Counter = timer->Reload << 16;
+ timer->Counter = timer->Reload << 10;
/*if ((cnt & 0x84) == 0x80)
{
@@ -1824,14 +1836,14 @@ void debug(u32 param)
fclose(shit);*/
FILE*
- shit = fopen("debug/picto9.bin", "wb");
+ shit = fopen("debug/power9.bin", "wb");
for (u32 i = 0x02000000; i < 0x04000000; i+=4)
{
u32 val = DSi::ARM9Read32(i);
fwrite(&val, 4, 1, shit);
}
fclose(shit);
- shit = fopen("debug/picto7.bin", "wb");
+ shit = fopen("debug/power7.bin", "wb");
for (u32 i = 0x02000000; i < 0x04000000; i+=4)
{
u32 val = DSi::ARM7Read32(i);
@@ -3001,6 +3013,7 @@ u32 ARM9IORead32(u32 addr)
case 0x04000130: return (KeyInput & 0xFFFF) | (KeyCnt << 16);
case 0x04000180: return IPCSync9;
+ case 0x04000184: return ARM9IORead16(addr);
case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16);
case 0x040001A4: return NDSCart::ROMCnt;
@@ -3115,6 +3128,10 @@ void ARM9IOWrite8(u32 addr, u8 val)
NDSCart::WriteSPIData(val);
return;
+ case 0x04000188:
+ ARM9IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24));
+ return;
+
case 0x040001A8: NDSCart::ROMCommand[0] = val; return;
case 0x040001A9: NDSCart::ROMCommand[1] = val; return;
case 0x040001AA: NDSCart::ROMCommand[2] = val; return;
@@ -3228,7 +3245,11 @@ void ARM9IOWrite16(u32 addr, u16 val)
SetIRQ(0, IRQ_IPCRecv);
if (val & 0x4000)
IPCFIFOCnt9 &= ~0x4000;
- IPCFIFOCnt9 = val & 0x8404;
+ IPCFIFOCnt9 = (val & 0x8404) | (IPCFIFOCnt9 & 0x4000);
+ return;
+
+ case 0x04000188:
+ ARM9IOWrite32(addr, val | (val << 16));
return;
case 0x040001A0:
@@ -3378,10 +3399,11 @@ void ARM9IOWrite32(u32 addr, u32 val)
case 0x04000130:
KeyCnt = val >> 16;
return;
+
case 0x04000180:
+ case 0x04000184:
ARM9IOWrite16(addr, val);
return;
-
case 0x04000188:
if (IPCFIFOCnt9 & 0x8000)
{
@@ -3640,6 +3662,7 @@ u32 ARM7IORead32(u32 addr)
case 0x04000138: return RTC::Read();
case 0x04000180: return IPCSync7;
+ case 0x04000184: return ARM7IORead16(addr);
case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16);
case 0x040001A4: return NDSCart::ROMCnt;
@@ -3716,6 +3739,10 @@ void ARM7IOWrite8(u32 addr, u8 val)
case 0x04000138: RTC::Write(val, true); return;
+ case 0x04000188:
+ ARM7IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24));
+ return;
+
case 0x040001A0:
if (ExMemCnt[0] & (1<<11))
{
@@ -3821,7 +3848,11 @@ void ARM7IOWrite16(u32 addr, u16 val)
SetIRQ(1, IRQ_IPCRecv);
if (val & 0x4000)
IPCFIFOCnt7 &= ~0x4000;
- IPCFIFOCnt7 = val & 0x8404;
+ IPCFIFOCnt7 = (val & 0x8404) | (IPCFIFOCnt7 & 0x4000);
+ return;
+
+ case 0x04000188:
+ ARM7IOWrite32(addr, val | (val << 16));
return;
case 0x040001A0:
@@ -3940,6 +3971,7 @@ void ARM7IOWrite32(u32 addr, u32 val)
case 0x04000138: RTC::Write(val & 0xFFFF, false); return;
case 0x04000180:
+ case 0x04000184:
ARM7IOWrite16(addr, val);
return;
case 0x04000188:
@@ -3984,6 +4016,11 @@ void ARM7IOWrite32(u32 addr, u32 val)
case 0x040001B0: *(u32*)&ROMSeed0[8] = val; return;
case 0x040001B4: *(u32*)&ROMSeed1[8] = val; return;
+ case 0x040001C0:
+ SPI::WriteCnt(val & 0xFFFF);
+ SPI::WriteData((val >> 16) & 0xFF);
+ return;
+
case 0x04000208: IME[1] = val & 0x1; UpdateIRQ(1); return;
case 0x04000210: IE[1] = val; UpdateIRQ(1); return;
case 0x04000214: IF[1] &= ~val; UpdateIRQ(1); return;
diff --git a/src/NDS.h b/src/NDS.h
index 046d84b6..98a0f7d6 100644
--- a/src/NDS.h
+++ b/src/NDS.h
@@ -46,6 +46,8 @@ enum
Event_DSi_SDMMCTransfer,
Event_DSi_SDIOTransfer,
Event_DSi_NWifi,
+ Event_DSi_CamIRQ,
+ Event_DSi_CamTransfer,
Event_DSi_RAMSizeChange,
@@ -82,7 +84,7 @@ enum
IRQ_IPCSendDone,
IRQ_IPCRecv,
IRQ_CartSendDone, // TODO: less misleading name
- IRQ_CartIREQMC, // IRQ triggered by game cart (example: Pok�mon Typing Adventure, BT controller)
+ IRQ_CartIREQMC, // IRQ triggered by game cart (example: Pokémon Typing Adventure, BT controller)
IRQ_GXFIFO,
IRQ_LidOpen,
IRQ_SPI,
diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp
index 077bf48c..2d8396ad 100644
--- a/src/NDSCart.cpp
+++ b/src/NDSCart.cpp
@@ -37,6 +37,7 @@ u8* SRAM;
u32 SRAMLength;
char SRAMPath[1024];
+bool SRAMFileDirty;
void (*WriteFunc)(u8 val, bool islast);
@@ -445,14 +446,21 @@ void Write(u8 val, u32 hold)
break;
}
- if (islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0))
+ SRAMFileDirty |= islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0);
+}
+
+void FlushSRAMFile()
+{
+ if (!SRAMFileDirty)
+ return;
+
+ SRAMFileDirty = false;
+
+ FILE* f = Platform::OpenFile(SRAMPath, "wb");
+ if (f)
{
- FILE* f = Platform::OpenFile(SRAMPath, "wb");
- if (f)
- {
- fwrite(SRAM, SRAMLength, 1, f);
- fclose(f);
- }
+ fwrite(SRAM, SRAMLength, 1, f);
+ fclose(f);
}
}
@@ -1034,6 +1042,11 @@ void RelocateSave(const char* path, bool write)
NDSCart_SRAM::RelocateSave(path, write);
}
+void FlushSRAMFile()
+{
+ NDSCart_SRAM::FlushSRAMFile();
+}
+
int ImportSRAM(const u8* data, u32 length)
{
memcpy(NDSCart_SRAM::SRAM, data, std::min(length, NDSCart_SRAM::SRAMLength));
diff --git a/src/NDSCart.h b/src/NDSCart.h
index 9fe916db..7d3f4a15 100644
--- a/src/NDSCart.h
+++ b/src/NDSCart.h
@@ -46,6 +46,9 @@ void DoSavestate(Savestate* file);
void DecryptSecureArea(u8* out);
bool LoadROM(const char* path, const char* sram, bool direct);
+
+void FlushSRAMFile();
+
void RelocateSave(const char* path, bool write);
int ImportSRAM(const u8* data, u32 length);
diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h
new file mode 100644
index 00000000..124ba76f
--- /dev/null
+++ b/src/NonStupidBitfield.h
@@ -0,0 +1,149 @@
+#ifndef NONSTUPIDBITFIELD_H
+#define NONSTUPIDBITFIELD_H
+
+#include "types.h"
+
+#include
+
+#include
+#include
+
+// like std::bitset but less stupid and optimised for
+// our use case (keeping track of memory invalidations)
+
+template
+struct NonStupidBitField
+{
+ static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8");
+ static const u32 DataLength = Size / 8;
+ u8 Data[DataLength];
+
+ struct Ref
+ {
+ NonStupidBitField& BitField;
+ u32 Idx;
+
+ operator bool()
+ {
+ return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7));
+ }
+
+ Ref& operator=(bool set)
+ {
+ BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7));
+ BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7));
+ return *this;
+ }
+ };
+
+ struct Iterator
+ {
+ NonStupidBitField& BitField;
+ u32 DataIdx;
+ u32 BitIdx;
+ u64 RemainingBits;
+
+ u32 operator*() { return DataIdx * 8 + BitIdx; }
+
+ bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; }
+ bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; }
+
+ template
+ void Next()
+ {
+ while (RemainingBits == 0 && DataIdx < DataLength)
+ {
+ DataIdx += sizeof(T);
+ RemainingBits = *(T*)&BitField.Data[DataIdx];
+ }
+
+ BitIdx = __builtin_ctzll(RemainingBits);
+ RemainingBits &= ~(1ULL << BitIdx);
+ }
+
+ Iterator operator++(int)
+ {
+ Iterator prev(*this);
+ ++*this;
+ return prev;
+ }
+
+ Iterator& operator++()
+ {
+ if ((DataLength % 8) == 0)
+ Next();
+ else if ((DataLength % 4) == 0)
+ Next();
+ else if ((DataLength % 2) == 0)
+ Next();
+ else
+ Next();
+
+ return *this;
+ }
+ };
+
+ NonStupidBitField(u32 start, u32 size)
+ {
+ memset(Data, 0, sizeof(Data));
+
+ if (size == 0)
+ return;
+
+ u32 roundedStartBit = (start + 7) & ~7;
+ u32 roundedEndBit = (start + size) & ~7;
+ if (roundedStartBit != roundedEndBit)
+ memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8);
+
+ if (start & 0x7)
+ Data[start >> 3] = 0xFF << (start & 0x7);
+ if ((start + size) & 0x7)
+ Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7);
+ }
+
+ NonStupidBitField()
+ {
+ memset(Data, 0, sizeof(Data));
+ }
+
+ Iterator End()
+ {
+ return Iterator{*this, DataLength, 0, 0};
+ }
+ Iterator Begin()
+ {
+ if ((DataLength % 8) == 0)
+ return ++Iterator{*this, 0, 0, *(u64*)Data};
+ else if ((DataLength % 4) == 0)
+ return ++Iterator{*this, 0, 0, *(u32*)Data};
+ else if ((DataLength % 2) == 0)
+ return ++Iterator{*this, 0, 0, *(u16*)Data};
+ else
+ return ++Iterator{*this, 0, 0, *Data};
+ }
+
+ Ref operator[](u32 idx)
+ {
+ return Ref{*this, idx};
+ }
+
+ NonStupidBitField& operator|=(const NonStupidBitField& other)
+ {
+ for (u32 i = 0; i < DataLength; i++)
+ {
+ Data[i] |= other.Data[i];
+ }
+ return *this;
+ }
+ NonStupidBitField& operator&=(const NonStupidBitField& other)
+ {
+ for (u32 i = 0; i < DataLength; i++)
+ {
+ Data[i] &= other.Data[i];
+ }
+ return *this;
+ }
+};
+
+
+#endif
\ No newline at end of file
diff --git a/src/OpenGLSupport.h b/src/OpenGLSupport.h
index 925c0ad0..44c511f5 100644
--- a/src/OpenGLSupport.h
+++ b/src/OpenGLSupport.h
@@ -23,8 +23,13 @@
#include
// TODO: different includes for each platform
-#include
-#include
+#ifdef __APPLE__
+ #include