Changes to the CPU

This commit is contained in:
Sergio Martin 2024-01-20 07:09:31 +01:00
parent 094ab435db
commit 74e6ab9286
2 changed files with 78 additions and 80 deletions

View File

@ -16,6 +16,7 @@ Changes
- Optimizations made in the CPU emulation core, including: - Optimizations made in the CPU emulation core, including:
+ Forced alignment at the start of a page to prevent crossing cache line boundaries + Forced alignment at the start of a page to prevent crossing cache line boundaries
+ Simplifying the 6502 CPU instruction fetching and decoding + Simplifying the 6502 CPU instruction fetching and decoding
+ Multiple branch optimizations
- Minimize compiled code size to reduce pressure on L1i cache - Minimize compiled code size to reduce pressure on L1i cache
- Assuming little endiannes to reduce unnecessary conversion operations (not portable to big endian systems) - Assuming little endiannes to reduce unnecessary conversion operations (not portable to big endian systems)
- Reduce heap allocations - Reduce heap allocations

View File

@ -44,7 +44,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#define INC_DEC_XY( reg, n ) reg = uint8_t (nz = reg + n); goto loop; #define INC_DEC_XY( reg, n ) reg = uint8_t (nz = reg + n); goto loop;
#define IND_Y(r,c) { \ #define IND_Y(r,c) { \
int temp = READ_LOW( data ) + y; \ int32_t temp = READ_LOW( data ) + y; \
data = temp + 0x100 * READ_LOW( uint8_t (data + 1) ); \ data = temp + 0x100 * READ_LOW( uint8_t (data + 1) ); \
if (c) HANDLE_PAGE_CROSSING( temp ); \ if (c) HANDLE_PAGE_CROSSING( temp ); \
if (!(r) || (temp & 0x100)) \ if (!(r) || (temp & 0x100)) \
@ -52,7 +52,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
} }
#define IND_X { \ #define IND_X { \
int temp = data + x; \ int32_t temp = data + x; \
data = 0x100 * READ_LOW( uint8_t (temp + 1) ) + READ_LOW( uint8_t (temp) ); \ data = 0x100 * READ_LOW( uint8_t (temp + 1) ) + READ_LOW( uint8_t (temp) ); \
} }
@ -118,11 +118,10 @@ imm##op: \
#define BRANCH( cond ) \ #define BRANCH( cond ) \
{ \ { \
pc++; \ pc++; \
int offset = (int8_t) data; \ if ( (cond) == false ) [[likely]] {clock_count--; goto loop; } \
int extra_clock = (pc & 0xFF) + offset; \ int32_t offset = (int8_t) data; \
if ( !(cond) ) [[likely]] {clock_count--; goto loop; } \ int32_t extra_clock = (pc & 0xFF) + offset; \
pc += offset; \ pc += offset; \
pc = uint16_t( pc ); \
clock_count += (extra_clock >> 8) & 1; \ clock_count += (extra_clock >> 8) & 1; \
goto loop; \ goto loop; \
} }
@ -147,7 +146,7 @@ void Nes_Cpu::reset( void const* unmapped_page )
code_map [1] = low_mem - 1 * page_size; code_map [1] = low_mem - 1 * page_size;
code_map [2] = low_mem - 2 * page_size; code_map [2] = low_mem - 2 * page_size;
code_map [3] = low_mem - 3 * page_size; code_map [3] = low_mem - 3 * page_size;
for ( int i = 4; i < page_count + 1; i++ ) for ( int32_t i = 4; i < page_count + 1; i++ )
code_map [i] = (uint8_t*) unmapped_page; code_map [i] = (uint8_t*) unmapped_page;
isCorrectExecution = true; isCorrectExecution = true;
@ -156,13 +155,13 @@ void Nes_Cpu::reset( void const* unmapped_page )
// Note: 'addr' is evaulated more than once in the following macros, so it // Note: 'addr' is evaulated more than once in the following macros, so it
// must not contain side-effects. // must not contain side-effects.
//static void log_read( int opcode ) { LOG_FREQ( "read", 256, opcode ); } //static void log_read( int32_t opcode ) { LOG_FREQ( "read", 256, opcode ); }
#define READ_LIKELY_PPU( addr ) (NES_CPU_READ_PPU( this, (addr), (clock_count) )) #define READ_LIKELY_PPU( addr ) (NES_CPU_READ_PPU( this, (addr), (clock_count) ))
#define READ( addr ) (NES_CPU_READ( this, (addr), (clock_count) )) #define READ( addr ) (NES_CPU_READ( this, (addr), (clock_count) ))
#define WRITE( addr, data ) {NES_CPU_WRITE( this, (addr), (data), (clock_count) );} #define WRITE( addr, data ) {NES_CPU_WRITE( this, (addr), (data), (clock_count) );}
#define READ_LOW( addr ) (low_mem [int (addr)]) #define READ_LOW( addr ) (low_mem [int32_t (addr)])
#define WRITE_LOW( addr, data ) (void) (READ_LOW( addr ) = (data)) #define WRITE_LOW( addr, data ) (void) (READ_LOW( addr ) = (data))
#define READ_PROG( addr ) (code_map [(addr) >> page_bits] [addr]) #define READ_PROG( addr ) (code_map [(addr) >> page_bits] [addr])
@ -188,7 +187,7 @@ void Nes_Cpu::reset( void const* unmapped_page )
nz |= ~in & st_z; \ nz |= ~in & st_z; \
} while ( 0 ) } while ( 0 )
inline int Nes_Cpu::read( nes_addr_t addr ) inline int32_t Nes_Cpu::read( nes_addr_t addr )
{ {
return READ( addr ); return READ( addr );
} }
@ -211,18 +210,18 @@ __attribute__((optimize("align-functions=" _PAGE_SIZE)))
volatile result_t result = result_cycles; volatile result_t result = result_cycles;
// registers // registers
unsigned pc = r.pc; uint32_t pc = r.pc;
int sp; int32_t sp;
SET_SP( r.sp ); SET_SP( r.sp );
int a = r.a; int32_t a = r.a;
int x = r.x; int32_t x = r.x;
int y = r.y; int32_t y = r.y;
int status; int32_t status;
int c; // carry set if (c & 0x100) != 0 int32_t c; // carry set if (c & 0x100) != 0
int nz; // Z set if (nz & 0xFF) == 0, N set if (nz & 0x880) != 0 int32_t nz; // Z set if (nz & 0xFF) == 0, N set if (nz & 0x880) != 0
{ {
int temp = r.status; int32_t temp = r.status;
SET_STATUS( temp ); SET_STATUS( temp );
} }
@ -256,7 +255,7 @@ loop:
BRANCH( (uint8_t) nz ); BRANCH( (uint8_t) nz );
case 0x20: { // JSR case 0x20: { // JSR
int temp = pc + 1; int32_t temp = pc + 1;
pc = GET_OPERAND16( pc ); pc = GET_OPERAND16( pc );
WRITE_LOW( 0x100 | (sp - 1), temp >> 8 ); WRITE_LOW( 0x100 | (sp - 1), temp >> 8 );
sp = (sp - 2) | 0x100; sp = (sp - 2) | 0x100;
@ -301,12 +300,11 @@ loop:
a = nz = y; a = nz = y;
goto loop; goto loop;
case 0xAD:{// LDA abs case 0xAD: // LDA abs
unsigned addr = GET_ADDR(); data = GET_ADDR();
pc += 2; pc += 2;
a = nz = READ_LIKELY_PPU( addr ); a = nz = READ_LIKELY_PPU( data );
goto loop; goto loop;
}
case 0x60: // RTS case 0x60: // RTS
pc = 1 + READ_LOW( sp ); pc = 1 + READ_LOW( sp );
@ -320,12 +318,11 @@ loop:
case 0x9D: // STA abs,X case 0x9D: // STA abs,X
data += x; data += x;
sta_ind_common: { sta_ind_common:
int temp = data;
ADD_PAGE ADD_PAGE
READ( data - ( temp & 0x100 ) ); READ( data - ( data & 0x100 ) );
goto sta_ptr; goto sta_ptr;
}
case 0x8D: // STA abs case 0x8D: // STA abs
ADD_PAGE ADD_PAGE
sta_ptr: sta_ptr:
@ -344,15 +341,15 @@ loop:
data -= x; data -= x;
case 0xBD:{// LDA abs,X case 0xBD:{// LDA abs,X
pc++; pc++;
unsigned msb = GET_OPERAND( pc ); uint32_t msb = GET_OPERAND( pc );
data += x; data += x;
// indexed common // indexed common
pc++; pc++;
HANDLE_PAGE_CROSSING( data ); HANDLE_PAGE_CROSSING( data );
int temp = data; int32_t temp = data;
data += msb * 0x100; data += msb * 0x100;
a = nz = READ_PROG( uint16_t( data ) ); a = nz = READ_PROG( uint16_t( data ) );
if ( (unsigned) (data - 0x2000) >= 0x6000 ) if ( (uint32_t) (data - 0x2000) >= 0x6000 )
goto loop; goto loop;
if ( temp & 0x100 ) if ( temp & 0x100 )
READ( data - 0x100 ); READ( data - 0x100 );
@ -361,15 +358,15 @@ loop:
} }
case 0xB1:{// LDA (ind),Y case 0xB1:{// LDA (ind),Y
unsigned msb = READ_LOW( (uint8_t) (data + 1) ); uint32_t msb = READ_LOW( (uint8_t) (data + 1) );
data = READ_LOW( data ) + y; data = READ_LOW( data ) + y;
// indexed common // indexed common
pc++; pc++;
HANDLE_PAGE_CROSSING( data ); HANDLE_PAGE_CROSSING( data );
int temp = data; int32_t temp = data;
data += msb * 0x100; data += msb * 0x100;
a = nz = READ_PROG( uint16_t( data ) ); a = nz = READ_PROG( uint16_t( data ) );
if ( (unsigned) (data - 0x2000) >= 0x6000 ) if ( (uint32_t) (data - 0x2000) >= 0x6000 )
goto loop; goto loop;
if ( temp & 0x100 ) if ( temp & 0x100 )
READ( data - 0x100 ); READ( data - 0x100 );
@ -446,7 +443,7 @@ loop:
HANDLE_PAGE_CROSSING( data ); HANDLE_PAGE_CROSSING( data );
case 0xAC:{// LDY abs case 0xAC:{// LDY abs
pc++; pc++;
unsigned addr = data + 0x100 * GET_OPERAND( pc ); uint32_t addr = data + 0x100 * GET_OPERAND( pc );
if ( data & 0x100 ) if ( data & 0x100 )
READ( addr - 0x100 ); READ( addr - 0x100 );
pc++; pc++;
@ -459,7 +456,7 @@ loop:
HANDLE_PAGE_CROSSING( data ); HANDLE_PAGE_CROSSING( data );
case 0xAE:{// LDX abs case 0xAE:{// LDX abs
pc++; pc++;
unsigned addr = data + 0x100 * GET_OPERAND( pc ); uint32_t addr = data + 0x100 * GET_OPERAND( pc );
pc++; pc++;
if ( data & 0x100 ) if ( data & 0x100 )
READ( addr - 0x100 ); READ( addr - 0x100 );
@ -468,7 +465,7 @@ loop:
} }
{ {
int temp; int32_t temp;
case 0x8C: // STY abs case 0x8C: // STY abs
temp = y; temp = y;
goto store_abs; goto store_abs;
@ -476,7 +473,7 @@ loop:
case 0x8E: // STX abs case 0x8E: // STX abs
temp = x; temp = x;
store_abs: store_abs:
unsigned addr = GET_ADDR(); uint32_t addr = GET_ADDR();
WRITE( addr, temp ); WRITE( addr, temp );
pc += 2; pc += 2;
goto loop; goto loop;
@ -485,7 +482,7 @@ loop:
// Compare // Compare
case 0xEC:{// CPX abs case 0xEC:{// CPX abs
unsigned addr = GET_ADDR(); uint32_t addr = GET_ADDR();
pc++; pc++;
data = READ( addr ); data = READ( addr );
goto cpx_data; goto cpx_data;
@ -502,7 +499,7 @@ loop:
goto loop; goto loop;
case 0xCC:{// CPY abs case 0xCC:{// CPY abs
unsigned addr = GET_ADDR(); uint32_t addr = GET_ADDR();
pc++; pc++;
data = READ( addr ); data = READ( addr );
goto cpy_data; goto cpy_data;
@ -536,7 +533,7 @@ loop:
goto loop; goto loop;
case 0x2C:{// BIT abs case 0x2C:{// BIT abs
unsigned addr = GET_ADDR(); uint32_t addr = GET_ADDR();
pc += 2; pc += 2;
status &= ~st_v; status &= ~st_v;
nz = READ_LIKELY_PPU( addr ); nz = READ_LIKELY_PPU( addr );
@ -568,8 +565,8 @@ loop:
ARITH_ADDR_MODES( 0x65 ) // ADC ARITH_ADDR_MODES( 0x65 ) // ADC
adc_imm: { adc_imm: {
int carry = (c >> 8) & 1; int32_t carry = (c >> 8) & 1;
int ov = (a ^ 0x80) + carry + (int8_t) data; // sign-extend int32_t ov = (a ^ 0x80) + carry + (int8_t) data; // sign-extend
status &= ~st_v; status &= ~st_v;
status |= (ov >> 2) & 0x40; status |= (ov >> 2) & 0x40;
c = nz = a + data + carry; c = nz = a + data + carry;
@ -598,7 +595,7 @@ loop:
case 0x2A: { // ROL A case 0x2A: { // ROL A
nz = a << 1; nz = a << 1;
int temp = (c >> 8) & 1; int32_t temp = (c >> 8) & 1;
c = nz; c = nz;
nz |= temp; nz |= temp;
a = (uint8_t) nz; a = (uint8_t) nz;
@ -615,7 +612,7 @@ loop:
c = 0; c = 0;
case 0x2E: // ROL abs case 0x2E: // ROL abs
rol_abs: { rol_abs: {
int temp = data; int32_t temp = data;
ADD_PAGE ADD_PAGE
if ( opcode == 0x1E || opcode == 0x3E ) READ( data - ( temp & 0x100 ) ); if ( opcode == 0x1E || opcode == 0x3E ) READ( data - ( temp & 0x100 ) );
WRITE( data, temp = READ( data ) ); WRITE( data, temp = READ( data ) );
@ -637,7 +634,7 @@ loop:
c = 0; c = 0;
case 0x6E: // ROR abs case 0x6E: // ROR abs
ror_abs: { ror_abs: {
int temp = data; int32_t temp = data;
ADD_PAGE ADD_PAGE
if ( opcode == 0x5E || opcode == 0x7E ) READ( data - ( temp & 0x100 ) ); if ( opcode == 0x5E || opcode == 0x7E ) READ( data - ( temp & 0x100 ) );
WRITE( data, temp = READ( data ) ); WRITE( data, temp = READ( data ) );
@ -656,7 +653,7 @@ loop:
c = 0; c = 0;
case 0x66: // ROR zp case 0x66: // ROR zp
ror_zp: { ror_zp: {
int temp = READ_LOW( data ); int32_t temp = READ_LOW( data );
nz = ((c >> 1) & 0x80) | (temp >> 1); nz = ((c >> 1) & 0x80) | (temp >> 1);
c = temp << 8; c = temp << 8;
goto write_nz_zp; goto write_nz_zp;
@ -700,7 +697,7 @@ loop:
goto loop; goto loop;
case 0xFE: { // INC abs,x case 0xFE: { // INC abs,x
int temp = data + x; int32_t temp = data + x;
data = x + GET_ADDR(); data = x + GET_ADDR();
READ( data - ( temp & 0x100 ) ); READ( data - ( temp & 0x100 ) );
goto inc_ptr; goto inc_ptr;
@ -713,7 +710,7 @@ loop:
goto inc_common; goto inc_common;
case 0xDE: { // DEC abs,x case 0xDE: { // DEC abs,x
int temp = data + x; int32_t temp = data + x;
data = x + GET_ADDR(); data = x + GET_ADDR();
READ( data - ( temp & 0x100 ) ); READ( data - ( temp & 0x100 ) );
goto dec_ptr; goto dec_ptr;
@ -724,7 +721,7 @@ loop:
dec_ptr: dec_ptr:
nz = -1; nz = -1;
inc_common: { inc_common: {
int temp; int32_t temp;
WRITE( data, temp = READ( data ) ); WRITE( data, temp = READ( data ) );
nz += temp; nz += temp;
pc += 2; pc += 2;
@ -761,7 +758,7 @@ loop:
case 0x40: // RTI case 0x40: // RTI
{ {
int temp = READ_LOW( sp ); int32_t temp = READ_LOW( sp );
pc = READ_LOW( 0x100 | (sp - 0xFF) ); pc = READ_LOW( 0x100 | (sp - 0xFF) );
pc |= READ_LOW( 0x100 | (sp - 0xFE) ) * 0x100; pc |= READ_LOW( 0x100 | (sp - 0xFE) ) * 0x100;
sp = (sp - 0xFD) | 0x100; sp = (sp - 0xFD) | 0x100;
@ -783,7 +780,7 @@ loop:
goto loop; goto loop;
case 0x28:{// PLP case 0x28:{// PLP
int temp = READ_LOW( sp ); int32_t temp = READ_LOW( sp );
sp = (sp - 0xFF) | 0x100; sp = (sp - 0xFF) | 0x100;
data = status; data = status;
SET_STATUS( temp ); SET_STATUS( temp );
@ -795,7 +792,7 @@ loop:
} }
case 0x08: { // PHP case 0x08: { // PHP
int temp; int32_t temp;
CALC_STATUS( temp ); CALC_STATUS( temp );
PUSH( temp | st_b | st_r ); PUSH( temp | st_b | st_r );
goto loop; goto loop;
@ -811,7 +808,7 @@ loop:
pc++; pc++;
WRITE_LOW( 0x100 | (sp - 1), pc >> 8 ); WRITE_LOW( 0x100 | (sp - 1), pc >> 8 );
WRITE_LOW( 0x100 | (sp - 2), pc ); WRITE_LOW( 0x100 | (sp - 2), pc );
int temp; int32_t temp;
CALC_STATUS( temp ); CALC_STATUS( temp );
sp = (sp - 3) | 0x100; sp = (sp - 3) | 0x100;
WRITE_LOW( sp, temp | st_b | st_r ); WRITE_LOW( sp, temp | st_b | st_r );
@ -879,7 +876,7 @@ loop:
case 0x1C: case 0x3C: case 0x5C: case 0x7C: case 0xDC: case 0xFC: { // SKW case 0x1C: case 0x3C: case 0x5C: case 0x7C: case 0xDC: case 0xFC: { // SKW
data += x; data += x;
HANDLE_PAGE_CROSSING( data ); HANDLE_PAGE_CROSSING( data );
int addr = GET_ADDR() + x; int32_t addr = GET_ADDR() + x;
if ( data & 0x100 ) if ( data & 0x100 )
READ( addr - 0x100 ); READ( addr - 0x100 );
READ( addr ); READ( addr );
@ -911,7 +908,7 @@ loop:
ARITH_ADDR_MODES_PTR( 0x27 ) { // RLA ARITH_ADDR_MODES_PTR( 0x27 ) { // RLA
WRITE( data, nz = READ( data ) ); WRITE( data, nz = READ( data ) );
int temp = c; int32_t temp = c;
c = nz << 1; c = nz << 1;
nz = uint8_t( c ) | ( ( temp >> 8 ) & 0x01 ); nz = uint8_t( c ) | ( ( temp >> 8 ) & 0x01 );
WRITE( data, nz ); WRITE( data, nz );
@ -921,7 +918,7 @@ loop:
} }
ARITH_ADDR_MODES_PTR( 0x67 ) { // RRA ARITH_ADDR_MODES_PTR( 0x67 ) { // RRA
int temp; int32_t temp;
WRITE( data, temp = READ( data ) ); WRITE( data, temp = READ( data ) );
nz = ((c >> 1) & 0x80) | (temp >> 1); nz = ((c >> 1) & 0x80) | (temp >> 1);
WRITE( data, nz ); WRITE( data, nz );
@ -992,7 +989,7 @@ loop:
case 0xBF: { case 0xBF: {
data += y; data += y;
HANDLE_PAGE_CROSSING( data ); HANDLE_PAGE_CROSSING( data );
int temp = data; int32_t temp = data;
ADD_PAGE; ADD_PAGE;
if ( temp & 0x100 ) if ( temp & 0x100 )
READ( data - 0x100 ); READ( data - 0x100 );
@ -1041,7 +1038,7 @@ loop:
case 0x9F: { // SHA abs,Y case 0x9F: { // SHA abs,Y
data += y; data += y;
int temp = data; int32_t temp = data;
ADD_PAGE ADD_PAGE
READ( data - ( temp & 0x100 ) ); READ( data - ( temp & 0x100 ) );
pc++; pc++;
@ -1051,7 +1048,7 @@ loop:
case 0x9E: { // SHX abs,Y case 0x9E: { // SHX abs,Y
data += y; data += y;
int temp = data; int32_t temp = data;
ADD_PAGE ADD_PAGE
READ( data - ( temp & 0x100 ) ); READ( data - ( temp & 0x100 ) );
pc++; pc++;
@ -1062,7 +1059,7 @@ loop:
case 0x9C: { // SHY abs,X case 0x9C: { // SHY abs,X
data += x; data += x;
int temp = data; int32_t temp = data;
ADD_PAGE ADD_PAGE
READ( data - ( temp & 0x100 ) ); READ( data - ( temp & 0x100 ) );
pc++; pc++;
@ -1073,7 +1070,7 @@ loop:
case 0x9B: { // SHS abs,Y case 0x9B: { // SHS abs,Y
data += y; data += y;
int temp = data; int32_t temp = data;
ADD_PAGE ADD_PAGE
READ( data - ( temp & 0x100 ) ); READ( data - ( temp & 0x100 ) );
pc++; pc++;
@ -1085,7 +1082,7 @@ loop:
case 0xBB: { // LAS abs,Y case 0xBB: { // LAS abs,Y
data += y; data += y;
HANDLE_PAGE_CROSSING( data ); HANDLE_PAGE_CROSSING( data );
int temp = data; int32_t temp = data;
ADD_PAGE ADD_PAGE
if ( temp & 0x100 ) if ( temp & 0x100 )
READ( data - 0x100 ); READ( data - 0x100 );
@ -1115,7 +1112,7 @@ loop:
// fall through // fall through
// default: // default:
// // skip over proper number of bytes // // skip over proper number of bytes
// static unsigned char const row [8] = { 0x95, 0x95, 0x95, 0xd5, 0x95, 0x95, 0xd5, 0xf5 }; // static uint32_t char const row [8] = { 0x95, 0x95, 0x95, 0xd5, 0x95, 0x95, 0xd5, 0xf5 };
// int len = row [opcode >> 2 & 7] >> (opcode << 1 & 6) & 3; // int len = row [opcode >> 2 & 7] >> (opcode << 1 & 6) & 3;
// if ( opcode == 0x9C ) // if ( opcode == 0x9C )
// len = 3; // len = 3;