More optimizations and simplifications
This commit is contained in:
parent
8230715934
commit
d7f6144209
|
@ -1,9 +1,9 @@
|
|||
quickerNES
|
||||
-----------
|
||||
|
||||
quickerNES is an attempt to modernizing and improving the performance of [quickNES](https://github.com/kode54/QuickNES). The goals for this project are, in order of importance:
|
||||
quickerNES is an attempt to modernizing [quickNES](https://github.com/kode54/QuickNES). The goals for this project are, in order of importance:
|
||||
|
||||
- Improve overall emulation performance for modern (x86) CPUs
|
||||
- Improve overall emulation performance for modern (x86) CPUs (portability to other systems not guaranteed)
|
||||
- Modernize the code base with best programming practices, including CI tests, benchmarks, and coverage analysis
|
||||
- Add support for more mappers, controllers, and features supported by other emulators
|
||||
- Improve accuracy, if possible
|
||||
|
@ -17,6 +17,7 @@ Changes
|
|||
+ Forced alignment at the start of a page to prevent crossing cache line boundaries
|
||||
+ Simplifying the 6502 CPU instruction fetching and decoding
|
||||
- Minimize compiled code size to reduce pressure on L1i cache
|
||||
- Assuming little endiannes to reduce unnecessary conversion operations (not portable to big endian systems)
|
||||
- Reduce heap allocations
|
||||
- General code reorganization (make it header only to help compiler optimizations)
|
||||
|
||||
|
|
|
@ -252,7 +252,6 @@ inline void Nes_Emu::set_pixels( void* p, long n )
|
|||
emu.ppu.host_row_bytes = n;
|
||||
}
|
||||
|
||||
|
||||
inline uint8_t const* Nes_Emu::chr_mem()
|
||||
{
|
||||
return cart()->chr_size() ? (uint8_t*) cart()->chr() : emu.ppu.impl->chr_ram;
|
||||
|
|
|
@ -1,54 +1,22 @@
|
|||
#pragma once
|
||||
|
||||
// CPU Byte Order Utilities
|
||||
|
||||
// Nes_Emu 0.7.0
|
||||
|
||||
// BLARGG_CPU_CISC: Defined if CPU has very few general-purpose registers (< 16)
|
||||
#if defined (_M_IX86) || defined (_M_IA64) || defined (__i486__) || \
|
||||
defined (__x86_64__) || defined (__ia64__)
|
||||
#define BLARGG_CPU_X86 1
|
||||
#define BLARGG_CPU_CISC 1
|
||||
#endif
|
||||
|
||||
#if defined (__powerpc__) || defined (__ppc__) || defined (__POWERPC__) || defined (__powerc)
|
||||
#define BLARGG_CPU_POWERPC 1
|
||||
#endif
|
||||
|
||||
inline unsigned get_le16( void const* p ) {
|
||||
return ((unsigned char*) p) [1] * 0x100u +
|
||||
((unsigned char*) p) [0];
|
||||
}
|
||||
inline unsigned get_be16( void const* p ) {
|
||||
return ((unsigned char*) p) [0] * 0x100u +
|
||||
((unsigned char*) p) [1];
|
||||
}
|
||||
inline unsigned long get_le32( void const* p ) {
|
||||
return ((unsigned char*) p) [3] * 0x01000000ul +
|
||||
((unsigned char*) p) [2] * 0x00010000ul +
|
||||
((unsigned char*) p) [1] * 0x00000100ul +
|
||||
((unsigned char*) p) [0];
|
||||
}
|
||||
inline unsigned long get_be32( void const* p ) {
|
||||
return ((unsigned char*) p) [0] * 0x01000000ul +
|
||||
((unsigned char*) p) [1] * 0x00010000ul +
|
||||
((unsigned char*) p) [2] * 0x00000100ul +
|
||||
((unsigned char*) p) [3];
|
||||
}
|
||||
inline void set_le16( void* p, unsigned n ) {
|
||||
((unsigned char*) p) [1] = (unsigned char) (n >> 8);
|
||||
((unsigned char*) p) [0] = (unsigned char) n;
|
||||
}
|
||||
inline void set_be16( void* p, unsigned n ) {
|
||||
((unsigned char*) p) [0] = (unsigned char) (n >> 8);
|
||||
((unsigned char*) p) [1] = (unsigned char) n;
|
||||
}
|
||||
inline void set_le32( void* p, unsigned long n ) {
|
||||
((unsigned char*) p) [3] = (unsigned char) (n >> 24);
|
||||
((unsigned char*) p) [2] = (unsigned char) (n >> 16);
|
||||
((unsigned char*) p) [1] = (unsigned char) (n >> 8);
|
||||
((unsigned char*) p) [0] = (unsigned char) n;
|
||||
}
|
||||
inline void set_be32( void* p, unsigned long n ) {
|
||||
((unsigned char*) p) [0] = (unsigned char) (n >> 24);
|
||||
((unsigned char*) p) [1] = (unsigned char) (n >> 16);
|
||||
|
@ -56,34 +24,9 @@ inline void set_be32( void* p, unsigned long n ) {
|
|||
((unsigned char*) p) [3] = (unsigned char) n;
|
||||
}
|
||||
|
||||
#if BLARGG_NONPORTABLE
|
||||
// Optimized implementation if byte order is known
|
||||
#ifdef MSB_FIRST
|
||||
#define GET_BE16( addr ) (*(uint16_t*) (addr))
|
||||
#define GET_BE32( addr ) (*(uint32_t*) (addr))
|
||||
#define SET_BE16( addr, data ) (void) (*(uint16_t*) (addr) = (data))
|
||||
#define SET_BE32( addr, data ) (void) (*(uint32_t*) (addr) = (data))
|
||||
#else
|
||||
#define GET_LE16( addr ) (*(uint16_t*) (addr))
|
||||
#define SET_LE16( addr, data ) (void) (*(uint16_t*) (addr) = (data))
|
||||
#define SET_LE32( addr, data ) (void) (*(uint32_t*) (addr) = (data))
|
||||
#endif
|
||||
|
||||
#if BLARGG_CPU_POWERPC && defined (__MWERKS__)
|
||||
// PowerPC has special byte-reversed instructions
|
||||
// to do: assumes that PowerPC is running in big-endian mode
|
||||
// to do: implement for other compilers which don't support these macros
|
||||
#define GET_LE16( addr ) (__lhbrx( (addr), 0 ))
|
||||
#define SET_LE16( addr, data ) (__sthbrx( (data), (addr), 0 ))
|
||||
#define SET_LE32( addr, data ) (__stwbrx( (data), (addr), 0 ))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef GET_LE16
|
||||
#define GET_LE16( addr ) get_le16( addr )
|
||||
#define SET_LE16( addr, data ) set_le16( addr, data )
|
||||
#define SET_LE32( addr, data ) set_le32( addr, data )
|
||||
#endif
|
||||
#define GET_LE16( addr ) (*(uint16_t*) (addr))
|
||||
#define SET_LE16( addr, data ) (void) (*(uint16_t*) (addr) = (data))
|
||||
#define SET_LE32( addr, data ) (void) (*(uint32_t*) (addr) = (data))
|
||||
|
||||
#ifndef GET_BE16
|
||||
#define GET_BE16( addr ) get_be16( addr )
|
||||
|
|
|
@ -28,13 +28,6 @@ struct namco106_state_t
|
|||
uint8_t irq_pending;
|
||||
uint8_t unused1 [1];
|
||||
namco_state_t sound_state;
|
||||
|
||||
void swap()
|
||||
{
|
||||
set_le16( &irq_ctr, irq_ctr );
|
||||
for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ )
|
||||
set_le16( &sound_state.delays [i], sound_state.delays [i] );
|
||||
}
|
||||
};
|
||||
static_assert( sizeof (namco106_state_t) == 20 + sizeof (namco_state_t) );
|
||||
|
||||
|
@ -187,25 +180,15 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void swap()
|
||||
{
|
||||
set_le16( &irq_ctr, irq_ctr );
|
||||
for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ )
|
||||
set_le16( &sound_state.delays [i], sound_state.delays [i] );
|
||||
}
|
||||
|
||||
void save_state( mapper_state_t& out )
|
||||
{
|
||||
sound.save_state( &sound_state );
|
||||
namco106_state_t::swap();
|
||||
Nes_Mapper::save_state( out );
|
||||
namco106_state_t::swap();
|
||||
}
|
||||
|
||||
void read_state( mapper_state_t const& in )
|
||||
{
|
||||
Nes_Mapper::read_state( in );
|
||||
namco106_state_t::swap();
|
||||
sound.load_state( sound_state );
|
||||
}
|
||||
|
||||
|
|
|
@ -39,13 +39,6 @@ struct vrc6_state_t
|
|||
uint8_t unused;
|
||||
|
||||
vrc6_apu_state_t sound_state;
|
||||
|
||||
void swap()
|
||||
{
|
||||
set_le16( &next_time, next_time );
|
||||
for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ )
|
||||
set_le16( &sound_state.delays [i], sound_state.delays [i] );
|
||||
}
|
||||
};
|
||||
static_assert( sizeof (vrc6_state_t) == 26 + sizeof (vrc6_apu_state_t) );
|
||||
|
||||
|
@ -74,9 +67,7 @@ public:
|
|||
virtual void save_state( mapper_state_t& out )
|
||||
{
|
||||
sound.save_state( &sound_state );
|
||||
vrc6_state_t::swap();
|
||||
Nes_Mapper::save_state( out );
|
||||
vrc6_state_t::swap(); // to do: kind of hacky to swap in place
|
||||
}
|
||||
|
||||
virtual void apply_mapping()
|
||||
|
@ -153,7 +144,6 @@ public:
|
|||
void read_state( mapper_state_t const& in )
|
||||
{
|
||||
Nes_Mapper::read_state( in );
|
||||
vrc6_state_t::swap();
|
||||
|
||||
// to do: eliminate when format is updated
|
||||
// old-style registers
|
||||
|
|
|
@ -30,13 +30,6 @@ struct fme7_state_t
|
|||
uint8_t command;
|
||||
uint8_t irq_pending;
|
||||
fme7_apu_state_t sound_state; // only used when saving/restoring state
|
||||
|
||||
void swap()
|
||||
{
|
||||
set_le16( &irq_count, irq_count );
|
||||
for ( unsigned i = 0; i < sizeof sound_state.delays / sizeof sound_state.delays [0]; i++ )
|
||||
set_le16( &sound_state.delays [i], sound_state.delays [i] );
|
||||
}
|
||||
};
|
||||
static_assert( sizeof (fme7_state_t) == 18 + sizeof (fme7_apu_state_t) );
|
||||
|
||||
|
@ -66,15 +59,12 @@ public:
|
|||
virtual void save_state( mapper_state_t& out )
|
||||
{
|
||||
sound.save_state( &sound_state );
|
||||
fme7_state_t::swap();
|
||||
Nes_Mapper::save_state( out );
|
||||
fme7_state_t::swap(); // to do: kind of hacky to swap in place
|
||||
}
|
||||
|
||||
virtual void read_state( mapper_state_t const& in )
|
||||
{
|
||||
Nes_Mapper::read_state( in );
|
||||
fme7_state_t::swap();
|
||||
sound.load_state( sound_state );
|
||||
}
|
||||
|
||||
|
|
|
@ -55,18 +55,12 @@ public:
|
|||
virtual void save_state( mapper_state_t & out )
|
||||
{
|
||||
sound.save_snapshot( &sound_state );
|
||||
|
||||
set_le16( &next_time, next_time );
|
||||
|
||||
Nes_Mapper::save_state( out );
|
||||
}
|
||||
|
||||
virtual void load_state( mapper_state_t const& in )
|
||||
{
|
||||
Nes_Mapper::load_state( in );
|
||||
|
||||
next_time = get_le16( &next_time );
|
||||
|
||||
sound.load_snapshot( sound_state, in.size );
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue