Update to 20100811 release.

byuu says:

12-15% faster than v067.10, and my Atom never goes below 58fps for
normal lo-res games at this point. Just a little more and I can leave
Async on. That's pretty much it though for the low hanging fruit.
Everything else will be a lot of work for a little gain. Speedups are
from range testing across scanline boundaries and from using blargg's
fast DSP core.

Snes9X is now only 1.93x faster than bsnes, and bsnes is now faster than
Super Sleuth.

I also fixed the Circuit USA menus (HDMA timing adjustment), Wild Guns
flickering (IRQ lock) and Jumpin' Derby (external IRQ triggering.)
There's definitely a lot of troublesome games, mostly the same ones we
had in the past (Koushien 2, Robocop vs The Terminator, etc.) I'm
definitely going to debug Starfox, but I may not bother with some of the
more obscure ones.
This commit is contained in:
Tim Allen 2010-08-12 10:39:41 +10:00
parent 9000bb4084
commit fa0f1c1e98
7 changed files with 634 additions and 903 deletions

View File

@ -112,6 +112,7 @@ void CPU::reset() {
status.irq_transition = false;
status.irq_pending = false;
status.irq_lock = false;
status.hdma_pending = false;
status.wram_addr = 0x000000;

View File

@ -34,11 +34,16 @@ private:
void op_irq(uint16 vector);
//timing
struct QueueEvent {
enum : unsigned {
DramRefresh,
HdmaRun,
};
};
nall::priority_queue<unsigned> queue;
void queue_event(unsigned id);
void last_cycle();
void add_clocks(unsigned clocks);
void add_time(unsigned clocks);
void scanline();
void run_auto_joypad_poll();
@ -56,6 +61,7 @@ private:
unsigned hdma_addr(unsigned i);
unsigned hdma_iaddr(unsigned i);
void dma_run();
bool hdma_active_after(unsigned i);
void hdma_update(unsigned i);
void hdma_run();
void hdma_init();
@ -104,6 +110,7 @@ private:
bool irq_transition;
bool irq_pending;
bool irq_lock;
bool hdma_pending;
unsigned wram_addr;

View File

@ -83,6 +83,15 @@ void CPU::dma_run() {
dma_transfer(channel[i].direction, dma_bbus(i, index++), dma_addr(i));
} while(channel[i].dma_enabled && --channel[i].transfer_size);
}
status.irq_lock = true;
}
bool CPU::hdma_active_after(unsigned i) {
for(unsigned n = i + 1; i < 8; i++) {
if(channel[i].hdma_enabled && !channel[i].hdma_completed) return true;
}
return false;
}
void CPU::hdma_update(unsigned i) {
@ -93,10 +102,15 @@ void CPU::hdma_update(unsigned i) {
add_clocks(8);
if(channel[i].indirect) {
channel[i].indirect_addr = dma_read(hdma_addr(i)) << 0;
add_clocks(8);
channel[i].indirect_addr |= dma_read(hdma_addr(i)) << 8;
channel[i].indirect_addr = dma_read(hdma_addr(i)) << 8;
add_clocks(8);
//emulating this glitch causes a slight slowdown; only enable if needed
//if(!channel[i].hdma_completed || hdma_active_after(i)) {
channel[i].indirect_addr >>= 8;
channel[i].indirect_addr |= dma_read(hdma_addr(i)) << 8;
add_clocks(8);
//}
}
}
}
@ -108,7 +122,7 @@ void CPU::hdma_run() {
}
if(channels == 0) return;
add_clocks(16);
add_clocks(24);
for(unsigned i = 0; i < 8; i++) {
if(channel[i].hdma_enabled == false || channel[i].hdma_completed == true) continue;
channel[i].dma_enabled = false;
@ -130,6 +144,8 @@ void CPU::hdma_run() {
channel[i].hdma_do_transfer = channel[i].line_counter & 0x80;
hdma_update(i);
}
status.irq_lock = true;
}
void CPU::hdma_init() {
@ -150,6 +166,8 @@ void CPU::hdma_init() {
channel[i].line_counter = 0;
hdma_update(i);
}
status.irq_lock = true;
}
void CPU::dma_reset() {

View File

@ -1,12 +1,5 @@
#ifdef CPU_CPP
struct QueueEvent {
enum : unsigned {
DramRefresh,
HdmaRun,
};
};
void CPU::queue_event(unsigned id) {
switch(id) {
case QueueEvent::DramRefresh: return add_clocks(40);
@ -15,13 +8,18 @@ void CPU::queue_event(unsigned id) {
}
void CPU::last_cycle() {
if(status.irq_lock) {
status.irq_lock = false;
return;
}
if(status.nmi_transition) {
regs.wai = false;
status.nmi_transition = false;
status.nmi_pending = true;
}
if(status.irq_transition) {
if(status.irq_transition || regs.irq) {
regs.wai = false;
status.irq_transition = false;
status.irq_pending = !regs.p.i;
@ -29,40 +27,32 @@ void CPU::last_cycle() {
}
void CPU::add_clocks(unsigned clocks) {
step(clocks);
queue.tick(clocks);
unsigned clocksleft = lineclocks() - hcounter();
if(clocks > clocksleft) {
add_time(clocksleft);
add_time(clocks - clocksleft);
} else {
add_time(clocks);
}
}
void CPU::add_time(unsigned clocks) {
if(status.irq_line && (status.virq_enabled || status.hirq_enabled)) {
status.irq_transition = true;
}
if(status.virq_enabled && !status.hirq_enabled) {
if(status.hirq_enabled) {
if(status.virq_enabled) {
unsigned cpu_time = vcounter() * 1364 + hcounter();
unsigned irq_time = status.vtime * 1364 + status.htime * 4;
if(cpu_time > irq_time) irq_time += 262 * 1364;
bool irq_valid = status.irq_valid;
status.irq_valid = cpu_time <= irq_time && cpu_time + clocks > irq_time;
if(!irq_valid && status.irq_valid) status.irq_line = true;
} else {
unsigned irq_time = status.htime * 4;
if(hcounter() > irq_time) irq_time += 1364;
bool irq_valid = status.irq_valid;
status.irq_valid = hcounter() <= irq_time && hcounter() + clocks > irq_time;
if(!irq_valid && status.irq_valid) status.irq_line = true;
}
if(status.irq_line) status.irq_transition = true;
} else if(status.virq_enabled) {
bool irq_valid = status.irq_valid;
status.irq_valid = vcounter() == status.vtime;
if(!irq_valid && status.irq_valid) {
status.irq_line = true;
status.irq_transition = true;
}
} else if(status.hirq_enabled) {
bool irq_valid = status.irq_valid;
status.irq_valid = hcounter() <= status.htime * 4 && hcounter() + clocks > status.htime * 4;
if(status.virq_enabled && vcounter() != status.vtime) status.irq_valid = false;
if(!irq_valid && status.irq_valid) {
status.irq_line = true;
status.irq_transition = true;
}
if(!irq_valid && status.irq_valid) status.irq_line = true;
if(status.irq_line) status.irq_transition = true;
}
tick(clocks);
queue.tick(clocks);
step(clocks);
}
void CPU::scanline() {

View File

@ -1,7 +1,7 @@
namespace SNES {
namespace Info {
static const char Name[] = "bsnes";
static const char Version[] = "067.10";
static const char Version[] = "067.11";
static const unsigned SerializerVersion = 12;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
// Highly accurate SNES SPC-700 DSP emulator
// Fast SNES SPC-700 DSP emulator (about 3x speed of accurate one)
// snes_spc 0.9.5
#ifndef BLARGG_SPC_DSP_H
@ -8,16 +8,18 @@
BLARGG_NAMESPACE_BEGIN
extern "C" { typedef void (*dsp_copy_func_t)( unsigned char** io, void* state, size_t ); }
struct Spc_Dsp {
public:
typedef BOOST::uint8_t byte;
// Setup
// Initializes DSP and has it use the 64K RAM provided
void init( void* ram_64k );
// Sets function that is called when output buffer is filled, or NULL for none
blargg_callback<void (*)( void* user_data )> set_output_callback;
//void set_output_callback( void (*func)( void* user_data ), void* user_data );
// Sets destination for output samples. If begin is NULL, doesn't generate any.
typedef short sample_t;
@ -26,52 +28,42 @@ public:
// Current position in output buffer, or NULL if no buffer set
sample_t* output_ptr() const;
// Sets function that is called when output buffer is filled, or NULL for none
blargg_callback<void (*)( void* user_data )> set_output_callback;
//void set_output_callback( void (*func)( void* user_data ), void* user_data );
// Emulation
// Number of samples written to output buffer since last set, or 0 if no buffer set.
int sample_count() const;
// Emulation
// Resets DSP to power-on state
void reset();
// Emulates pressing reset switch on SNES
void soft_reset();
// Reads/writes DSP registers. For accuracy, you must first call run()
// Reads/writes DSP registers. For accuracy, you must first call spc_run_dsp()
// to catch the DSP up to present.
int read ( int addr ) const;
void write( int addr, int data );
// Runs DSP for specified number of clocks (~1024000 per second). Every 32 clocks
// a pair of samples is be generated.
// a pair of samples is generated.
void run( int clock_count );
// Sound control
// Using these reduces emulation accuracy.
// Mutes voices corresponding to non-zero bits in mask (issues repeated KOFF events).
// Mutes voices corresponding to non-zero bits in mask (overrides VxVOL with 0).
// Reduces emulation accuracy.
enum { voice_count = 8 };
void mute_voices( int mask ) { mute_mask = mask; }
void mute_voices( int mask ) { mute_mask = mask; }
// If true, prevents channels and global volumes from being phase-negated
void disable_surround( bool disable = true );
// State
// Resets DSP and uses supplied values to initialize registers
enum { register_count = 128 };
void load( byte const regs [register_count] );
// Saves/loads exact emulator state
enum { state_size = 640 }; // maximum space needed when saving
typedef dsp_copy_func_t copy_func_t;
void copy_state( unsigned char** io, copy_func_t );
// Returns non-zero if new key-on events occurred since last call
bool check_kon();
// DSP register addresses
// Global registers
@ -109,21 +101,17 @@ public:
struct voice_t
{
int buf [brr_buf_size*2];// decoded samples (twice the size to simplify wrap handling)
int buf_pos; // place in buffer where next samples will be decoded
int* buf_pos; // place in buffer where next samples will be decoded
int interp_pos; // relative fractional position in sample (0x1000 = 1.0)
int brr_addr; // address of current BRR block
int brr_offset; // current decoding offset in BRR block
byte* regs; // pointer to voice's DSP registers
int vbit; // bitmask for voice: 0x01 for voice 0, 0x02 for voice 1, etc.
int kon_delay; // KON delay/current setup phase
env_mode_t env_mode;
int env; // current envelope level
int hidden_env; // used by GAIN mode 7, very obscure quirk
byte t_envx_out;
int volume [2]; // copy of volume from DSP registers, with surround disabled
};
private:
enum { brr_block_size = 9 };
// non-emulation state
byte* ram; // 64K shared RAM between DSP and SMP
int mute_mask;
@ -133,145 +121,81 @@ private:
sample_t* output_end;
sample_t* user_output_end;
sample_t dummy_buf [2];
bool kon_check; // set when a new KON occurs
struct state_t
{
int every_other_sample; // toggles every sample
int kon; // KON value when last checked
int noise;
int counter;
int echo_offset; // offset from ESA in echo buffer
int echo_length; // number of bytes that echo_offset will stop at
int phase; // next clock cycle to run (0-31)
unsigned counters [4];
// Hidden registers also written to when main register is written to
int new_kon;
byte endx_buf;
byte envx_buf;
byte outx_buf;
// Temporary state between clocks
// read once per sample
int t_pmon;
int t_non;
int t_eon;
int t_dir;
int new_kon;
int t_koff;
// read a few clocks ahead then used
int t_brr_next_addr;
int t_adsr0;
int t_brr_header;
int t_brr_byte;
int t_srcn;
int t_esa;
int t_echo_enabled;
// internal state that is recalculated every sample
int t_dir_addr;
int t_pitch;
int t_output;
int t_looped;
int t_echo_ptr;
// left/right sums
int t_main_out [2];
int t_echo_out [2];
int t_echo_in [2];
voice_t voices [voice_count];
// Echo history keeps most recent 8 samples (twice the size to simplify wrap handling)
int (*echo_hist_pos) [2]; // &echo_hist [0 to 7]
int echo_hist [echo_hist_size * 2] [2];
unsigned* counter_select [32];
voice_t voices [voice_count];
};
state_t m;
byte regs [register_count];
void init_counter();
void run_counters();
unsigned read_counter( int rate );
int interpolate( voice_t const* v );
void run_envelope( voice_t* const v );
void decode_brr( voice_t* v );
void misc_27();
void misc_28();
void misc_29();
void misc_30();
void voice_output( voice_t const* v, int ch );
void voice_V1( voice_t* const );
void voice_V2( voice_t* const );
void voice_V3( voice_t* const );
void voice_V3a( voice_t* const );
void voice_V3b( voice_t* const );
void voice_V3c( voice_t* const );
void voice_V4( voice_t* const );
void voice_V5( voice_t* const );
void voice_V6( voice_t* const );
void voice_V7( voice_t* const );
void voice_V8( voice_t* const );
void voice_V9( voice_t* const );
void voice_V7_V4_V1( voice_t* const );
void voice_V8_V5_V2( voice_t* const );
void voice_V9_V6_V3( voice_t* const );
void echo_read( int ch );
int echo_output( int ch );
void echo_write( int ch );
void echo_22();
void echo_23();
void echo_24();
void echo_25();
void echo_26();
void echo_27();
void echo_28();
void echo_29();
void echo_30();
void run_counter( int );
void update_voice_vol( int addr );
void set_null_output();
void write_sample( int l, int r );
void apply_output_enables();
};
#include <assert.h>
inline int Spc_Dsp::read( int addr ) const
{
assert( (unsigned) addr < register_count );
return regs [addr];
}
inline void Spc_Dsp::update_voice_vol( int addr )
{
int l = (int8_t) regs [addr + v_voll];
int r = (int8_t) regs [addr + v_volr];
if ( l * r < surround_threshold )
{
// signs differ, so negate those that are negative
l ^= l >> 7;
r ^= r >> 7;
}
int index = addr >> 4;
voice_t& v = m.voices [index];
int enabled = ~mute_mask >> index & 1;
v.volume [0] = l * enabled;
v.volume [1] = r * enabled;
}
inline void Spc_Dsp::write( int addr, int data )
{
assert( (unsigned) addr < register_count );
regs [addr] = (byte) data;
switch ( addr & 0x0F )
int low = addr & 0x0F;
if ( low < 0x2 ) // voice volumes
{
update_voice_vol( low ^ addr /* addr & 0xF0 */ );
}
else if ( low == 0xC )
{
case v_envx:
m.envx_buf = (byte) data;
break;
case v_outx:
m.outx_buf = (byte) data;
break;
case 0x0C:
if ( addr == r_kon )
m.new_kon = (byte) data;
if ( addr == r_endx ) // always cleared, regardless of data written
{
m.endx_buf = 0;
regs [r_endx] = 0;
}
break;
}
}
@ -280,40 +204,22 @@ inline void Spc_Dsp::disable_surround( bool disable )
surround_threshold = disable ? 0 : -0x4000;
}
inline bool Spc_Dsp::check_kon()
{
bool old = kon_check;
kon_check = 0;
return old;
}
inline Spc_Dsp::sample_t* Spc_Dsp::output_ptr() const
{
// Don't return pointer into dummy_buf
return (output_ptr_ != dummy_buf ? output_ptr_ : user_output_end);
}
class SPC_State_Copier {
Spc_Dsp::copy_func_t func;
unsigned char** buf;
public:
SPC_State_Copier( unsigned char** p, Spc_Dsp::copy_func_t f ) { func = f; buf = p; }
void copy( void* state, size_t size );
int copy_int( int state, int size );
void skip( int count );
// Reads uint8_t and then skips that many bytes. If writing, writes
// uint8_t of 0. This allows future expansion at this point, by writing
// non-zero and additional data.
void extra();
};
#define SPC_COPY( type, state )\
{\
state = (BOOST::type) copier.copy_int( state, sizeof (BOOST::type) );\
check( (BOOST::type) state == state );\
inline int Spc_Dsp::sample_count() const
{
sample_t* p = output_ptr();
return (p ? p - output_begin : 0);
}
#define SPC_NO_COPY_STATE_FUNCS 1
#define SPC_LESS_ACCURATE 1
BLARGG_NAMESPACE_END
#endif