More optimizations and simplifications

This commit is contained in:
Sergio Martin 2024-01-19 19:36:33 +01:00
parent 8230715934
commit d7f6144209
7 changed files with 6 additions and 106 deletions

View File

@ -1,9 +1,9 @@
quickerNES
-----------
quickerNES is an attempt to modernizing and improving the performance of [quickNES](https://github.com/kode54/QuickNES). The goals for this project are, in order of importance:
quickerNES is an attempt to modernizing [quickNES](https://github.com/kode54/QuickNES). The goals for this project are, in order of importance:
- Improve overall emulation performance for modern (x86) CPUs
- Improve overall emulation performance for modern (x86) CPUs (portability to other systems not guaranteed)
- Modernize the code base with best programming practices, including CI tests, benchmarks, and coverage analysis
- Add support for more mappers, controllers, and features supported by other emulators
- Improve accuracy, if possible
@ -17,6 +17,7 @@ Changes
+ Forced alignment at the start of a page to prevent crossing cache line boundaries
+ Simplifying the 6502 CPU instruction fetching and decoding
- Minimize compiled code size to reduce pressure on L1i cache
- Assuming little endiannes to reduce unnecessary conversion operations (not portable to big endian systems)
- Reduce heap allocations
- General code reorganization (make it header only to help compiler optimizations)

View File

@ -252,7 +252,6 @@ inline void Nes_Emu::set_pixels( void* p, long n )
emu.ppu.host_row_bytes = n;
}
inline uint8_t const* Nes_Emu::chr_mem()
{
return cart()->chr_size() ? (uint8_t*) cart()->chr() : emu.ppu.impl->chr_ram;

View File

@ -1,54 +1,22 @@
#pragma once
// CPU Byte Order Utilities
// Nes_Emu 0.7.0
// BLARGG_CPU_CISC: Defined if CPU has very few general-purpose registers (< 16)
#if defined (_M_IX86) || defined (_M_IA64) || defined (__i486__) || \
defined (__x86_64__) || defined (__ia64__)
#define BLARGG_CPU_X86 1
#define BLARGG_CPU_CISC 1
#endif
#if defined (__powerpc__) || defined (__ppc__) || defined (__POWERPC__) || defined (__powerc)
#define BLARGG_CPU_POWERPC 1
#endif
inline unsigned get_le16( void const* p ) {
return ((unsigned char*) p) [1] * 0x100u +
((unsigned char*) p) [0];
}
inline unsigned get_be16( void const* p ) {
return ((unsigned char*) p) [0] * 0x100u +
((unsigned char*) p) [1];
}
inline unsigned long get_le32( void const* p ) {
return ((unsigned char*) p) [3] * 0x01000000ul +
((unsigned char*) p) [2] * 0x00010000ul +
((unsigned char*) p) [1] * 0x00000100ul +
((unsigned char*) p) [0];
}
inline unsigned long get_be32( void const* p ) {
return ((unsigned char*) p) [0] * 0x01000000ul +
((unsigned char*) p) [1] * 0x00010000ul +
((unsigned char*) p) [2] * 0x00000100ul +
((unsigned char*) p) [3];
}
inline void set_le16( void* p, unsigned n ) {
((unsigned char*) p) [1] = (unsigned char) (n >> 8);
((unsigned char*) p) [0] = (unsigned char) n;
}
inline void set_be16( void* p, unsigned n ) {
((unsigned char*) p) [0] = (unsigned char) (n >> 8);
((unsigned char*) p) [1] = (unsigned char) n;
}
inline void set_le32( void* p, unsigned long n ) {
((unsigned char*) p) [3] = (unsigned char) (n >> 24);
((unsigned char*) p) [2] = (unsigned char) (n >> 16);
((unsigned char*) p) [1] = (unsigned char) (n >> 8);
((unsigned char*) p) [0] = (unsigned char) n;
}
inline void set_be32( void* p, unsigned long n ) {
((unsigned char*) p) [0] = (unsigned char) (n >> 24);
((unsigned char*) p) [1] = (unsigned char) (n >> 16);
@ -56,34 +24,9 @@ inline void set_be32( void* p, unsigned long n ) {
((unsigned char*) p) [3] = (unsigned char) n;
}
#if BLARGG_NONPORTABLE
// Optimized implementation if byte order is known
#ifdef MSB_FIRST
#define GET_BE16( addr ) (*(uint16_t*) (addr))
#define GET_BE32( addr ) (*(uint32_t*) (addr))
#define SET_BE16( addr, data ) (void) (*(uint16_t*) (addr) = (data))
#define SET_BE32( addr, data ) (void) (*(uint32_t*) (addr) = (data))
#else
#define GET_LE16( addr ) (*(uint16_t*) (addr))
#define SET_LE16( addr, data ) (void) (*(uint16_t*) (addr) = (data))
#define SET_LE32( addr, data ) (void) (*(uint32_t*) (addr) = (data))
#endif
#if BLARGG_CPU_POWERPC && defined (__MWERKS__)
// PowerPC has special byte-reversed instructions
// to do: assumes that PowerPC is running in big-endian mode
// to do: implement for other compilers which don't support these macros
#define GET_LE16( addr ) (__lhbrx( (addr), 0 ))
#define SET_LE16( addr, data ) (__sthbrx( (data), (addr), 0 ))
#define SET_LE32( addr, data ) (__stwbrx( (data), (addr), 0 ))
#endif
#endif
#ifndef GET_LE16
#define GET_LE16( addr ) get_le16( addr )
#define SET_LE16( addr, data ) set_le16( addr, data )
#define SET_LE32( addr, data ) set_le32( addr, data )
#endif
#define GET_LE16( addr ) (*(uint16_t*) (addr))
#define SET_LE16( addr, data ) (void) (*(uint16_t*) (addr) = (data))
#define SET_LE32( addr, data ) (void) (*(uint32_t*) (addr) = (data))
#ifndef GET_BE16
#define GET_BE16( addr ) get_be16( addr )

View File

@ -28,13 +28,6 @@ struct namco106_state_t
uint8_t irq_pending;
uint8_t unused1 [1];
namco_state_t sound_state;
void swap()
{
set_le16( &irq_ctr, irq_ctr );
for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ )
set_le16( &sound_state.delays [i], sound_state.delays [i] );
}
};
static_assert( sizeof (namco106_state_t) == 20 + sizeof (namco_state_t) );
@ -187,25 +180,15 @@ public:
}
}
void swap()
{
set_le16( &irq_ctr, irq_ctr );
for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ )
set_le16( &sound_state.delays [i], sound_state.delays [i] );
}
void save_state( mapper_state_t& out )
{
sound.save_state( &sound_state );
namco106_state_t::swap();
Nes_Mapper::save_state( out );
namco106_state_t::swap();
}
void read_state( mapper_state_t const& in )
{
Nes_Mapper::read_state( in );
namco106_state_t::swap();
sound.load_state( sound_state );
}

View File

@ -39,13 +39,6 @@ struct vrc6_state_t
uint8_t unused;
vrc6_apu_state_t sound_state;
void swap()
{
set_le16( &next_time, next_time );
for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ )
set_le16( &sound_state.delays [i], sound_state.delays [i] );
}
};
static_assert( sizeof (vrc6_state_t) == 26 + sizeof (vrc6_apu_state_t) );
@ -74,9 +67,7 @@ public:
virtual void save_state( mapper_state_t& out )
{
sound.save_state( &sound_state );
vrc6_state_t::swap();
Nes_Mapper::save_state( out );
vrc6_state_t::swap(); // to do: kind of hacky to swap in place
}
virtual void apply_mapping()
@ -153,7 +144,6 @@ public:
void read_state( mapper_state_t const& in )
{
Nes_Mapper::read_state( in );
vrc6_state_t::swap();
// to do: eliminate when format is updated
// old-style registers

View File

@ -30,13 +30,6 @@ struct fme7_state_t
uint8_t command;
uint8_t irq_pending;
fme7_apu_state_t sound_state; // only used when saving/restoring state
void swap()
{
set_le16( &irq_count, irq_count );
for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ )
set_le16( &sound_state.delays [i], sound_state.delays [i] );
}
};
static_assert( sizeof (fme7_state_t) == 18 + sizeof (fme7_apu_state_t) );
@ -66,15 +59,12 @@ public:
virtual void save_state( mapper_state_t& out )
{
sound.save_state( &sound_state );
fme7_state_t::swap();
Nes_Mapper::save_state( out );
fme7_state_t::swap(); // to do: kind of hacky to swap in place
}
virtual void read_state( mapper_state_t const& in )
{
Nes_Mapper::read_state( in );
fme7_state_t::swap();
sound.load_state( sound_state );
}

View File

@ -55,18 +55,12 @@ public:
virtual void save_state( mapper_state_t & out )
{
sound.save_snapshot( &sound_state );
set_le16( &next_time, next_time );
Nes_Mapper::save_state( out );
}
virtual void load_state( mapper_state_t const& in )
{
Nes_Mapper::load_state( in );
next_time = get_le16( &next_time );
sound.load_snapshot( sound_state, in.size );
}