diff --git a/README.md b/README.md index 1e10e56..2481ee5 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ quickerNES ----------- -quickerNES is an attempt to modernizing and improving the performance of [quickNES](https://github.com/kode54/QuickNES). The goals for this project are, in order of importance: +quickerNES is an attempt to modernizing [quickNES](https://github.com/kode54/QuickNES). The goals for this project are, in order of importance: -- Improve overall emulation performance for modern (x86) CPUs +- Improve overall emulation performance for modern (x86) CPUs (portability to other systems not guaranteed) - Modernize the code base with best programming practices, including CI tests, benchmarks, and coverage analysis - Add support for more mappers, controllers, and features supported by other emulators - Improve accuracy, if possible @@ -17,6 +17,7 @@ Changes + Forced alignment at the start of a page to prevent crossing cache line boundaries + Simplifying the 6502 CPU instruction fetching and decoding - Minimize compiled code size to reduce pressure on L1i cache +- Assuming little endiannes to reduce unnecessary conversion operations (not portable to big endian systems) - Reduce heap allocations - General code reorganization (make it header only to help compiler optimizations) diff --git a/source/quickerNES/Nes_Emu.h b/source/quickerNES/Nes_Emu.h index abf3f9f..9b7ca7e 100644 --- a/source/quickerNES/Nes_Emu.h +++ b/source/quickerNES/Nes_Emu.h @@ -252,7 +252,6 @@ inline void Nes_Emu::set_pixels( void* p, long n ) emu.ppu.host_row_bytes = n; } - inline uint8_t const* Nes_Emu::chr_mem() { return cart()->chr_size() ? (uint8_t*) cart()->chr() : emu.ppu.impl->chr_ram; diff --git a/source/quickerNES/blargg_endian.h b/source/quickerNES/blargg_endian.h index 8785239..066d206 100644 --- a/source/quickerNES/blargg_endian.h +++ b/source/quickerNES/blargg_endian.h @@ -1,54 +1,22 @@ #pragma once // CPU Byte Order Utilities - // Nes_Emu 0.7.0 -// BLARGG_CPU_CISC: Defined if CPU has very few general-purpose registers (< 16) -#if defined (_M_IX86) || defined (_M_IA64) || defined (__i486__) || \ - defined (__x86_64__) || defined (__ia64__) - #define BLARGG_CPU_X86 1 - #define BLARGG_CPU_CISC 1 -#endif - -#if defined (__powerpc__) || defined (__ppc__) || defined (__POWERPC__) || defined (__powerc) - #define BLARGG_CPU_POWERPC 1 -#endif - -inline unsigned get_le16( void const* p ) { - return ((unsigned char*) p) [1] * 0x100u + - ((unsigned char*) p) [0]; -} inline unsigned get_be16( void const* p ) { return ((unsigned char*) p) [0] * 0x100u + ((unsigned char*) p) [1]; } -inline unsigned long get_le32( void const* p ) { - return ((unsigned char*) p) [3] * 0x01000000ul + - ((unsigned char*) p) [2] * 0x00010000ul + - ((unsigned char*) p) [1] * 0x00000100ul + - ((unsigned char*) p) [0]; -} inline unsigned long get_be32( void const* p ) { return ((unsigned char*) p) [0] * 0x01000000ul + ((unsigned char*) p) [1] * 0x00010000ul + ((unsigned char*) p) [2] * 0x00000100ul + ((unsigned char*) p) [3]; } -inline void set_le16( void* p, unsigned n ) { - ((unsigned char*) p) [1] = (unsigned char) (n >> 8); - ((unsigned char*) p) [0] = (unsigned char) n; -} inline void set_be16( void* p, unsigned n ) { ((unsigned char*) p) [0] = (unsigned char) (n >> 8); ((unsigned char*) p) [1] = (unsigned char) n; } -inline void set_le32( void* p, unsigned long n ) { - ((unsigned char*) p) [3] = (unsigned char) (n >> 24); - ((unsigned char*) p) [2] = (unsigned char) (n >> 16); - ((unsigned char*) p) [1] = (unsigned char) (n >> 8); - ((unsigned char*) p) [0] = (unsigned char) n; -} inline void set_be32( void* p, unsigned long n ) { ((unsigned char*) p) [0] = (unsigned char) (n >> 24); ((unsigned char*) p) [1] = (unsigned char) (n >> 16); @@ -56,34 +24,9 @@ inline void set_be32( void* p, unsigned long n ) { ((unsigned char*) p) [3] = (unsigned char) n; } -#if BLARGG_NONPORTABLE - // Optimized implementation if byte order is known -#ifdef MSB_FIRST - #define GET_BE16( addr ) (*(uint16_t*) (addr)) - #define GET_BE32( addr ) (*(uint32_t*) (addr)) - #define SET_BE16( addr, data ) (void) (*(uint16_t*) (addr) = (data)) - #define SET_BE32( addr, data ) (void) (*(uint32_t*) (addr) = (data)) -#else - #define GET_LE16( addr ) (*(uint16_t*) (addr)) - #define SET_LE16( addr, data ) (void) (*(uint16_t*) (addr) = (data)) - #define SET_LE32( addr, data ) (void) (*(uint32_t*) (addr) = (data)) -#endif - - #if BLARGG_CPU_POWERPC && defined (__MWERKS__) - // PowerPC has special byte-reversed instructions - // to do: assumes that PowerPC is running in big-endian mode - // to do: implement for other compilers which don't support these macros - #define GET_LE16( addr ) (__lhbrx( (addr), 0 )) - #define SET_LE16( addr, data ) (__sthbrx( (data), (addr), 0 )) - #define SET_LE32( addr, data ) (__stwbrx( (data), (addr), 0 )) - #endif -#endif - -#ifndef GET_LE16 - #define GET_LE16( addr ) get_le16( addr ) - #define SET_LE16( addr, data ) set_le16( addr, data ) - #define SET_LE32( addr, data ) set_le32( addr, data ) -#endif +#define GET_LE16( addr ) (*(uint16_t*) (addr)) +#define SET_LE16( addr, data ) (void) (*(uint16_t*) (addr) = (data)) +#define SET_LE32( addr, data ) (void) (*(uint32_t*) (addr) = (data)) #ifndef GET_BE16 #define GET_BE16( addr ) get_be16( addr ) diff --git a/source/quickerNES/mappers/mapper019.hpp b/source/quickerNES/mappers/mapper019.hpp index 8dd33b6..40a1a14 100644 --- a/source/quickerNES/mappers/mapper019.hpp +++ b/source/quickerNES/mappers/mapper019.hpp @@ -28,13 +28,6 @@ struct namco106_state_t uint8_t irq_pending; uint8_t unused1 [1]; namco_state_t sound_state; - - void swap() - { - set_le16( &irq_ctr, irq_ctr ); - for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ ) - set_le16( &sound_state.delays [i], sound_state.delays [i] ); - } }; static_assert( sizeof (namco106_state_t) == 20 + sizeof (namco_state_t) ); @@ -187,25 +180,15 @@ public: } } - void swap() - { - set_le16( &irq_ctr, irq_ctr ); - for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ ) - set_le16( &sound_state.delays [i], sound_state.delays [i] ); - } - void save_state( mapper_state_t& out ) { sound.save_state( &sound_state ); - namco106_state_t::swap(); Nes_Mapper::save_state( out ); - namco106_state_t::swap(); } void read_state( mapper_state_t const& in ) { Nes_Mapper::read_state( in ); - namco106_state_t::swap(); sound.load_state( sound_state ); } diff --git a/source/quickerNES/mappers/mapper024.hpp b/source/quickerNES/mappers/mapper024.hpp index cf5451b..7f7ec39 100644 --- a/source/quickerNES/mappers/mapper024.hpp +++ b/source/quickerNES/mappers/mapper024.hpp @@ -39,13 +39,6 @@ struct vrc6_state_t uint8_t unused; vrc6_apu_state_t sound_state; - - void swap() - { - set_le16( &next_time, next_time ); - for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ ) - set_le16( &sound_state.delays [i], sound_state.delays [i] ); - } }; static_assert( sizeof (vrc6_state_t) == 26 + sizeof (vrc6_apu_state_t) ); @@ -74,9 +67,7 @@ public: virtual void save_state( mapper_state_t& out ) { sound.save_state( &sound_state ); - vrc6_state_t::swap(); Nes_Mapper::save_state( out ); - vrc6_state_t::swap(); // to do: kind of hacky to swap in place } virtual void apply_mapping() @@ -153,7 +144,6 @@ public: void read_state( mapper_state_t const& in ) { Nes_Mapper::read_state( in ); - vrc6_state_t::swap(); // to do: eliminate when format is updated // old-style registers diff --git a/source/quickerNES/mappers/mapper069.hpp b/source/quickerNES/mappers/mapper069.hpp index 65d3d24..3931b05 100644 --- a/source/quickerNES/mappers/mapper069.hpp +++ b/source/quickerNES/mappers/mapper069.hpp @@ -30,13 +30,6 @@ struct fme7_state_t uint8_t command; uint8_t irq_pending; fme7_apu_state_t sound_state; // only used when saving/restoring state - - void swap() - { - set_le16( &irq_count, irq_count ); - for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ ) - set_le16( &sound_state.delays [i], sound_state.delays [i] ); - } }; static_assert( sizeof (fme7_state_t) == 18 + sizeof (fme7_apu_state_t) ); @@ -66,15 +59,12 @@ public: virtual void save_state( mapper_state_t& out ) { sound.save_state( &sound_state ); - fme7_state_t::swap(); Nes_Mapper::save_state( out ); - fme7_state_t::swap(); // to do: kind of hacky to swap in place } virtual void read_state( mapper_state_t const& in ) { Nes_Mapper::read_state( in ); - fme7_state_t::swap(); sound.load_state( sound_state ); } diff --git a/source/quickerNES/mappers/mapper085.hpp b/source/quickerNES/mappers/mapper085.hpp index 24cd609..3ea48c0 100644 --- a/source/quickerNES/mappers/mapper085.hpp +++ b/source/quickerNES/mappers/mapper085.hpp @@ -55,18 +55,12 @@ public: virtual void save_state( mapper_state_t & out ) { sound.save_snapshot( &sound_state ); - - set_le16( &next_time, next_time ); - Nes_Mapper::save_state( out ); } virtual void load_state( mapper_state_t const& in ) { Nes_Mapper::load_state( in ); - - next_time = get_le16( &next_time ); - sound.load_snapshot( sound_state, in.size ); }