diff --git a/output/dll/bizswan.dll b/output/dll/bizswan.dll
index 7c0542124a..e9c4fda63f 100644
Binary files a/output/dll/bizswan.dll and b/output/dll/bizswan.dll differ
diff --git a/wonderswan/blip/Blip_Buffer.h b/wonderswan/blip/Blip_Buffer.h
index 95a75a4311..a8e90ee053 100644
--- a/wonderswan/blip/Blip_Buffer.h
+++ b/wonderswan/blip/Blip_Buffer.h
@@ -1,13 +1,27 @@
+// Band-limited sound synthesis buffer
+// Various changes and hacks for use in Mednafen.
 
-// Band-limited sound synthesis and buffering
+#ifdef __GNUC__
+ #define blip_inline inline __attribute__((always_inline))
+#else
+ #define blip_inline inline
+#endif
 
-// Blip_Buffer 0.4.0
+#include <limits.h>
+#include <inttypes.h>
 
+// Blip_Buffer 0.4.1
 #ifndef BLIP_BUFFER_H
 #define BLIP_BUFFER_H
 
+// Internal
+typedef int32_t blip_long;
+typedef uint32_t blip_ulong;
+typedef int64_t blip_s64;
+typedef uint64_t blip_u64;
+
 // Time unit at source clock rate
-typedef long blip_time_t;
+typedef blip_long blip_time_t;
 
 // Output samples are 16-bit signed, with a range of -32768 to 32767
 typedef short blip_sample_t;
@@ -65,19 +79,21 @@ public:
 	
 // Experimental features
 	
+	// Count number of clocks needed until 'count' samples will be available.
+	// If buffer can't even hold 'count' samples, returns number of clocks until
+	// buffer becomes full.
+	blip_time_t count_clocks( long count ) const;
+	
 	// Number of raw samples that can be mixed within frame of specified duration.
 	long count_samples( blip_time_t duration ) const;
 	
 	// Mix 'count' samples from 'buf' into buffer.
 	void mix_samples( blip_sample_t const* buf, long count );
 	
-	// Count number of clocks needed until 'count' samples will be available.
-	// If buffer can't even hold 'count' samples, returns number of clocks until
-	// buffer becomes full.
-	blip_time_t count_clocks( long count ) const;
-	
 	// not documented yet
-	typedef unsigned long blip_resampled_time_t;
+	void set_modified() { modified_ = 1; }
+	int clear_modified() { int b = modified_; modified_ = 0; return b; }
+	typedef blip_u64 blip_resampled_time_t;
 	void remove_silence( long count );
 	blip_resampled_time_t resampled_duration( int t ) const     { return t * factor_; }
 	blip_resampled_time_t resampled_time( blip_time_t t ) const { return t * factor_ + offset_; }
@@ -95,18 +111,19 @@ private:
 	Blip_Buffer( const Blip_Buffer& );
 	Blip_Buffer& operator = ( const Blip_Buffer& );
 public:
-	typedef long buf_t_;
-	unsigned long factor_;
+	typedef blip_time_t buf_t_;
+	blip_u64 factor_;
 	blip_resampled_time_t offset_;
 	buf_t_* buffer_;
-	long buffer_size_;
+	blip_long buffer_size_;
+	blip_long reader_accum_;
+	int bass_shift_;
 private:
-	long reader_accum;
-	int bass_shift;
 	long sample_rate_;
 	long clock_rate_;
 	int bass_freq_;
 	int length_;
+	int modified_;
 	friend class Blip_Reader;
 };
 
@@ -114,40 +131,60 @@ private:
 	#include "config.h"
 #endif
 
+#define BLIP_BUFFER_ACCURACY 32
+#define BLIP_PHASE_BITS 8
+
 // Number of bits in resample ratio fraction. Higher values give a more accurate ratio
 // but reduce maximum buffer size.
-#ifndef BLIP_BUFFER_ACCURACY
-	#define BLIP_BUFFER_ACCURACY 16
-#endif
+//#ifndef BLIP_BUFFER_ACCURACY
+//	#define BLIP_BUFFER_ACCURACY 16
+//#endif
 
 // Number bits in phase offset. Fewer than 6 bits (64 phase offsets) results in
 // noticeable broadband noise when synthesizing high frequency square waves.
 // Affects size of Blip_Synth objects since they store the waveform directly.
-#ifndef BLIP_PHASE_BITS
-	#define BLIP_PHASE_BITS 6
-#endif
+//#ifndef BLIP_PHASE_BITS
+//	#if BLIP_BUFFER_FAST
+//		#define BLIP_PHASE_BITS 8
+//	#else
+//		#define BLIP_PHASE_BITS 6
+//	#endif
+//#endif
 
 	// Internal
-	typedef unsigned long blip_resampled_time_t;
+	typedef blip_u64 blip_resampled_time_t;
 	int const blip_widest_impulse_ = 16;
+	int const blip_buffer_extra_ = blip_widest_impulse_ + 2;
 	int const blip_res = 1 << BLIP_PHASE_BITS;
 	class blip_eq_t;
 	
-	class Blip_Synth_ {
-		double volume_unit_;
-		short* const impulses;
-		int const width;
-		long kernel_unit;
-		int impulses_size() const { return blip_res / 2 * width + 1; }
-		void adjust_impulse();
+	class Blip_Synth_Fast_ {
 	public:
 		Blip_Buffer* buf;
 		int last_amp;
 		int delta_factor;
 		
+		void volume_unit( double );
+		Blip_Synth_Fast_();
+		void treble_eq( blip_eq_t const& ) { }
+	};
+	
+	class Blip_Synth_ {
+	public:
+		Blip_Buffer* buf;
+		int last_amp;
+		int delta_factor;
+		
+		void volume_unit( double );
 		Blip_Synth_( short* impulses, int width );
 		void treble_eq( blip_eq_t const& );
-		void volume_unit( double );
+	private:
+		double volume_unit_;
+		short* const impulses;
+		int const width;
+		blip_long kernel_unit;
+		int impulses_size() const { return blip_res / 2 * width + 1; }
+		void adjust_impulse();
 	};
 
 // Quality level. Start with blip_good_quality.
@@ -164,7 +201,7 @@ public:
 	// Set overall volume of waveform
 	void volume( double v ) { impl.volume_unit( v * (1.0 / (range < 0 ? -range : range)) ); }
 	
-	// Configure low-pass filter (see notes.txt)
+	// Configure low-pass filter (see blip_buffer.txt)
 	void treble_eq( blip_eq_t const& eq )       { impl.treble_eq( eq ); }
 	
 	// Get/set Blip_Buffer used for output
@@ -183,7 +220,7 @@ public:
 	void offset( blip_time_t, int delta, Blip_Buffer* ) const;
 	void offset( blip_time_t t, int delta ) const { offset( t, delta, impl.buf ); }
 	
-	// Works directly in terms of fractional output samples. Contact author for more.
+	// Works directly in terms of fractional output samples. Contact author for more info.
 	void offset_resampled( blip_resampled_time_t, int delta, Blip_Buffer* ) const;
 	
 	// Same as offset(), except code is inlined for higher performance
@@ -194,12 +231,16 @@ public:
 		offset_resampled( t * impl.buf->factor_ + impl.buf->offset_, delta, impl.buf );
 	}
 	
-public:
-	Blip_Synth() : impl( impulses, quality ) { }
 private:
+#if BLIP_BUFFER_FAST
+	Blip_Synth_Fast_ impl;
+#else
+	Blip_Synth_ impl;
 	typedef short imp_t;
 	imp_t impulses [blip_res * (quality / 2) + 1];
-	Blip_Synth_ impl;
+public:
+	Blip_Synth() : impl( impulses, quality ) { }
+#endif
 };
 
 // Low-pass equalization parameters
@@ -209,7 +250,7 @@ public:
 	// treble, small positive values (0 to 5.0) increase treble.
 	blip_eq_t( double treble_db = 0 );
 	
-	// See notes.txt
+	// See blip_buffer.txt
 	blip_eq_t( double treble, long rolloff_freq, long sample_rate, long cutoff_freq = 0 );
 	
 private:
@@ -223,104 +264,208 @@ private:
 
 int const blip_sample_bits = 30;
 
-// Optimized inline sample reader for custom sample formats and mixing of Blip_Buffer samples
-class Blip_Reader {
+// Dummy Blip_Buffer to direct sound output to, for easy muting without
+// having to stop sound code.
+class Silent_Blip_Buffer : public Blip_Buffer {
+	buf_t_ buf [blip_buffer_extra_ + 1];
 public:
-	// Begin reading samples from buffer. Returns value to pass to next() (can
-	// be ignored if default bass_freq is acceptable).
-	int begin( Blip_Buffer& );
+	// The following cannot be used (an assertion will fail if attempted):
+	blargg_err_t set_sample_rate( long samples_per_sec, int msec_length );
+	blip_time_t count_clocks( long count ) const;
+	void mix_samples( blip_sample_t const* buf, long count );
 	
-	// Current sample
-	long read() const               { return accum >> (blip_sample_bits - 16); }
-	
-	// Current raw sample in full internal resolution
-	long read_raw() const           { return accum; }
-	
-	// Advance to next sample
-	void next( int bass_shift = 9 )         { accum += *buf++ - (accum >> bass_shift); }
-	
-	// End reading samples from buffer. The number of samples read must now be removed
-	// using Blip_Buffer::remove_samples().
-	void end( Blip_Buffer& b )              { b.reader_accum = accum; }
-	
-private:
-	const Blip_Buffer::buf_t_* buf;
-	long accum;
+	Silent_Blip_Buffer();
 };
 
+	#if defined (__GNUC__) || _MSC_VER >= 1100
+		#define BLIP_RESTRICT __restrict
+	#else
+		#define BLIP_RESTRICT
+	#endif
 
-// End of public interface
+// Optimized reading from Blip_Buffer, for use in custom sample output
 
+// Begin reading from buffer. Name should be unique to the current block.
+#define BLIP_READER_BEGIN( name, blip_buffer ) \
+	const Blip_Buffer::buf_t_* BLIP_RESTRICT name##_reader_buf = (blip_buffer).buffer_;\
+	blip_long name##_reader_accum = (blip_buffer).reader_accum_
+
+// Get value to pass to BLIP_READER_NEXT()
+#define BLIP_READER_BASS( blip_buffer ) ((blip_buffer).bass_shift_)
+
+// Constant value to use instead of BLIP_READER_BASS(), for slightly more optimal
+// code at the cost of having no bass control
+int const blip_reader_default_bass = 9;
+
+// Current sample
+#define BLIP_READER_READ( name )        (name##_reader_accum >> (blip_sample_bits - 16))
+
+// Current raw sample in full internal resolution
+#define BLIP_READER_READ_RAW( name )    (name##_reader_accum)
+
+// Advance to next sample
+#define BLIP_READER_NEXT( name, bass ) \
+	(void) (name##_reader_accum += *name##_reader_buf++ - (name##_reader_accum >> (bass)))
+
+// End reading samples from buffer. The number of samples read must now be removed
+// using Blip_Buffer::remove_samples().
+#define BLIP_READER_END( name, blip_buffer ) \
+	(void) ((blip_buffer).reader_accum_ = name##_reader_accum)
 
-#include <assert.h>
 
 // Compatibility with older version
 const long blip_unscaled = 65535;
 const int blip_low_quality  = blip_med_quality;
 const int blip_best_quality = blip_high_quality;
 
-#define BLIP_FWD( i ) {                     \
-	long t0 = i0 * delta + buf [fwd + i];   \
-	long t1 = imp [blip_res * (i + 1)] * delta + buf [fwd + 1 + i]; \
-	i0 = imp [blip_res * (i + 2)];          \
-	buf [fwd + i] = t0;                     \
-	buf [fwd + 1 + i] = t1; }
+// Deprecated; use BLIP_READER macros as follows:
+// Blip_Reader r; r.begin( buf ); -> BLIP_READER_BEGIN( r, buf );
+// int bass = r.begin( buf )      -> BLIP_READER_BEGIN( r, buf ); int bass = BLIP_READER_BASS( buf );
+// r.read()                       -> BLIP_READER_READ( r )
+// r.read_raw()                   -> BLIP_READER_READ_RAW( r )
+// r.next( bass )                 -> BLIP_READER_NEXT( r, bass )
+// r.next()                       -> BLIP_READER_NEXT( r, blip_reader_default_bass )
+// r.end( buf )                   -> BLIP_READER_END( r, buf )
+class Blip_Reader {
+public:
+	int begin( Blip_Buffer& );
+	blip_long read() const          { return accum >> (blip_sample_bits - 16); }
+	blip_long read_raw() const      { return accum; }
+	void next( int bass_shift = 9 )         { accum += *buf++ - (accum >> bass_shift); }
+	void end( Blip_Buffer& b )              { b.reader_accum_ = accum; }
+	
+private:
+	const Blip_Buffer::buf_t_* buf;
+	blip_long accum;
+};
 
-#define BLIP_REV( r ) {                     \
-	long t0 = i0 * delta + buf [rev - r];   \
-	long t1 = imp [blip_res * r] * delta + buf [rev + 1 - r];   \
-	i0 = imp [blip_res * (r - 1)];          \
-	buf [rev - r] = t0;                     \
-	buf [rev + 1 - r] = t1; }
+// End of public interface
+
+#include <assert.h>
 
 template<int quality,int range>
-inline void Blip_Synth<quality,range>::offset_resampled( blip_resampled_time_t time,
+blip_inline void Blip_Synth<quality,range>::offset_resampled( blip_resampled_time_t time,
 		int delta, Blip_Buffer* blip_buf ) const
 {
 	// Fails if time is beyond end of Blip_Buffer, due to a bug in caller code or the
 	// need for a longer buffer as set by set_sample_rate().
-	assert( (long) (time >> BLIP_BUFFER_ACCURACY) < blip_buf->buffer_size_ );
+	assert( (blip_long) (time >> BLIP_BUFFER_ACCURACY) < blip_buf->buffer_size_ );
 	delta *= impl.delta_factor;
+	blip_long* BLIP_RESTRICT buf = blip_buf->buffer_ + (time >> BLIP_BUFFER_ACCURACY);
 	int phase = (int) (time >> (BLIP_BUFFER_ACCURACY - BLIP_PHASE_BITS) & (blip_res - 1));
-	imp_t const* imp = impulses + blip_res - phase;
-	long* buf = blip_buf->buffer_ + (time >> BLIP_BUFFER_ACCURACY);
-	long i0 = *imp;
+
+#if BLIP_BUFFER_FAST
+	blip_long left = buf [0] + delta;
 	
+	// Kind of crappy, but doing shift after multiply results in overflow.
+	// Alternate way of delaying multiply by delta_factor results in worse
+	// sub-sample resolution.
+	blip_long right = (delta >> BLIP_PHASE_BITS) * phase;
+	left  -= right;
+	right += buf [1];
+	
+	buf [0] = left;
+	buf [1] = right;
+#else
+
 	int const fwd = (blip_widest_impulse_ - quality) / 2;
 	int const rev = fwd + quality - 2;
+	int const mid = quality / 2 - 1;
 	
-	BLIP_FWD( 0 )
-	if ( quality > 8  ) BLIP_FWD( 2 )
-	if ( quality > 12 ) BLIP_FWD( 4 )
-	{
-		int const mid = quality / 2 - 1;
-		long t0 = i0 * delta + buf [fwd + mid - 1];
-		long t1 = imp [blip_res * mid] * delta + buf [fwd + mid];
-		imp = impulses + phase;
-		i0 = imp [blip_res * mid];
-		buf [fwd + mid - 1] = t0;
-		buf [fwd + mid] = t1;
+	imp_t const* BLIP_RESTRICT imp = impulses + blip_res - phase;
+	
+	#if defined (_M_IX86) || defined (_M_IA64) || defined (__i486__) || \
+			defined (__x86_64__) || defined (__ia64__) || defined (__i386__)
+	
+	// straight forward implementation resulted in better code on GCC for x86
+	
+	#define ADD_IMP( out, in ) \
+		buf [out] += (blip_long) imp [blip_res * (in)] * delta
+	
+	#define BLIP_FWD( i ) {\
+		ADD_IMP( fwd     + i, i     );\
+		ADD_IMP( fwd + 1 + i, i + 1 );\
 	}
-	if ( quality > 12 ) BLIP_REV( 6 )
-	if ( quality > 8  ) BLIP_REV( 4 )
-	BLIP_REV( 2 )
+	#define BLIP_REV( r ) {\
+		ADD_IMP( rev     - r, r + 1 );\
+		ADD_IMP( rev + 1 - r, r     );\
+	}
+
+		BLIP_FWD( 0 )
+		if ( quality > 8  ) BLIP_FWD( 2 )
+		if ( quality > 12 ) BLIP_FWD( 4 )
+		{
+			ADD_IMP( fwd + mid - 1, mid - 1 );
+			ADD_IMP( fwd + mid    , mid     );
+			imp = impulses + phase;
+		}
+		if ( quality > 12 ) BLIP_REV( 6 )
+		if ( quality > 8  ) BLIP_REV( 4 )
+		BLIP_REV( 2 )
+		
+		ADD_IMP( rev    , 1 );
+		ADD_IMP( rev + 1, 0 );
+		
+	#else
 	
-	long t0 = i0 * delta + buf [rev];
-	long t1 = *imp * delta + buf [rev + 1];
-	buf [rev] = t0;
-	buf [rev + 1] = t1;
+	// for RISC processors, help compiler by reading ahead of writes
+	
+	#define BLIP_FWD( i ) {\
+		blip_long t0 =                       i0 * delta + buf [fwd     + i];\
+		blip_long t1 = imp [blip_res * (i + 1)] * delta + buf [fwd + 1 + i];\
+		i0 =           imp [blip_res * (i + 2)];\
+		buf [fwd     + i] = t0;\
+		buf [fwd + 1 + i] = t1;\
+	}
+	#define BLIP_REV( r ) {\
+		blip_long t0 =                 i0 * delta + buf [rev     - r];\
+		blip_long t1 = imp [blip_res * r] * delta + buf [rev + 1 - r];\
+		i0 =           imp [blip_res * (r - 1)];\
+		buf [rev     - r] = t0;\
+		buf [rev + 1 - r] = t1;\
+	}
+		
+		blip_long i0 = *imp;
+		BLIP_FWD( 0 )
+		if ( quality > 8  ) BLIP_FWD( 2 )
+		if ( quality > 12 ) BLIP_FWD( 4 )
+		{
+			blip_long t0 =                   i0 * delta + buf [fwd + mid - 1];
+			blip_long t1 = imp [blip_res * mid] * delta + buf [fwd + mid    ];
+			imp = impulses + phase;
+			i0 = imp [blip_res * mid];
+			buf [fwd + mid - 1] = t0;
+			buf [fwd + mid    ] = t1;
+		}
+		if ( quality > 12 ) BLIP_REV( 6 )
+		if ( quality > 8  ) BLIP_REV( 4 )
+		BLIP_REV( 2 )
+		
+		blip_long t0 =   i0 * delta + buf [rev    ];
+		blip_long t1 = *imp * delta + buf [rev + 1];
+		buf [rev    ] = t0;
+		buf [rev + 1] = t1;
+	#endif
+	
+#endif
 }
 
 #undef BLIP_FWD
 #undef BLIP_REV
 
 template<int quality,int range>
+#if BLIP_BUFFER_FAST
+	blip_inline
+#endif
 void Blip_Synth<quality,range>::offset( blip_time_t t, int delta, Blip_Buffer* buf ) const
 {
 	offset_resampled( t * buf->factor_ + buf->offset_, delta, buf );
 }
 
 template<int quality,int range>
+#if BLIP_BUFFER_FAST
+	blip_inline
+#endif
 void Blip_Synth<quality,range>::update( blip_time_t t, int amp )
 {
 	int delta = amp - impl.last_amp;
@@ -328,28 +473,26 @@ void Blip_Synth<quality,range>::update( blip_time_t t, int amp )
 	offset_resampled( t * impl.buf->factor_ + impl.buf->offset_, delta, impl.buf );
 }
 
-inline blip_eq_t::blip_eq_t( double t ) :
+blip_inline blip_eq_t::blip_eq_t( double t ) :
 		treble( t ), rolloff_freq( 0 ), sample_rate( 44100 ), cutoff_freq( 0 ) { }
-inline blip_eq_t::blip_eq_t( double t, long rf, long sr, long cf ) :
+blip_inline blip_eq_t::blip_eq_t( double t, long rf, long sr, long cf ) :
 		treble( t ), rolloff_freq( rf ), sample_rate( sr ), cutoff_freq( cf ) { }
 
-inline int  Blip_Buffer::length() const         { return length_; }
-inline long Blip_Buffer::samples_avail() const  { return (long) (offset_ >> BLIP_BUFFER_ACCURACY); }
-inline long Blip_Buffer::sample_rate() const    { return sample_rate_; }
-inline int  Blip_Buffer::output_latency() const { return blip_widest_impulse_ / 2; }
-inline long Blip_Buffer::clock_rate() const     { return clock_rate_; }
-inline void Blip_Buffer::clock_rate( long cps ) { factor_ = clock_rate_factor( clock_rate_ = cps ); }
+blip_inline int  Blip_Buffer::length() const         { return length_; }
+blip_inline long Blip_Buffer::samples_avail() const  { return (long) (offset_ >> BLIP_BUFFER_ACCURACY); }
+blip_inline long Blip_Buffer::sample_rate() const    { return sample_rate_; }
+blip_inline int  Blip_Buffer::output_latency() const { return blip_widest_impulse_ / 2; }
+blip_inline long Blip_Buffer::clock_rate() const     { return clock_rate_; }
+blip_inline void Blip_Buffer::clock_rate( long cps ) { factor_ = clock_rate_factor( clock_rate_ = cps ); }
 
-inline int Blip_Reader::begin( Blip_Buffer& blip_buf )
+blip_inline int Blip_Reader::begin( Blip_Buffer& blip_buf )
 {
 	buf = blip_buf.buffer_;
-	accum = blip_buf.reader_accum;
-	return blip_buf.bass_shift;
+	accum = blip_buf.reader_accum_;
+	return blip_buf.bass_shift_;
 }
 
 int const blip_max_length = 0;
 int const blip_default_length = 250;
 
 #endif
-
-
diff --git a/wonderswan/gfx.cpp b/wonderswan/gfx.cpp
index baaaafce1d..7a1fdc9443 100644
--- a/wonderswan/gfx.cpp
+++ b/wonderswan/gfx.cpp
@@ -20,6 +20,7 @@
 
 #include "system.h"
 #include <cstring>
+#include <algorithm>
 
 namespace MDFN_IEN_WSWAN
 {
@@ -72,16 +73,18 @@ namespace MDFN_IEN_WSWAN
 
 		case 0x14: LCDControl = V; break; //    if((!(wsIO[0x14]&1))&&(data&1)) { wsLine=0; }break; /* LCD off ??*/
 		case 0x15: LCDIcons = V; break;
+		case 0x16: LCDVtotal = V; break;
 
 		case 0x60: VideoMode = V; 
 			SetVideo(V>>5, false); 
 			//printf("VideoMode: %02x, %02x\n", V, V >> 5);
 			break;
 
-		case 0xa2: if((V & 0x01) && !(BTimerControl & 0x01))
-					   HBCounter = HBTimerPeriod;
-			if((V & 0x04) && !(BTimerControl & 0x04))
-				VBCounter = VBTimerPeriod;
+		case 0xa2:
+			// if((V & 0x01) && !(BTimerControl & 0x01))
+			// 	HBCounter = HBTimerPeriod;
+			// if((V & 0x04) && !(BTimerControl & 0x04))
+			// 	VBCounter = VBTimerPeriod;
 			BTimerControl = V; 
 			//printf("%04x:%02x\n", A, V);
 			break;
@@ -134,6 +137,7 @@ namespace MDFN_IEN_WSWAN
 		case 0x13: return(FGYScroll);
 		case 0x14: return(LCDControl);
 		case 0x15: return(LCDIcons);
+		case 0x16: return(LCDVtotal);
 		case 0x60: return(VideoMode);
 		case 0xa0: return(wsc ? 0x87 : 0x86);
 		case 0xa2: return(BTimerControl);
@@ -168,21 +172,30 @@ namespace MDFN_IEN_WSWAN
 		sys->memory.CheckSoundDMA();
 
 		// Update sprite data table
+		// Note: it's at 142 actually but it doesn't "update" until next frame
 		if(wsLine == 142)
 		{
-			SpriteCountCache = SpriteCount;
-
-			if(SpriteCountCache > 0x80)
-				SpriteCountCache = 0x80;
-
-			memcpy(SpriteTable, &sys->memory.wsRAM[(SPRBase << 9) + (SpriteStart << 2)], SpriteCountCache << 2);
+			NextSpriteCountCache = std::min<uint8>(0x80, SpriteCount);
+			memcpy(NextSpriteTable, &sys->memory.wsRAM[(SPRBase << 9) + (SpriteStart << 2)], NextSpriteCountCache << 2);
 		}
 
 		if(wsLine == 144)
 		{
+			SpriteCountCache = NextSpriteCountCache;
+			memcpy(SpriteTable, NextSpriteTable, SpriteCountCache << 2);
 			ret = true;
 			sys->interrupt.DoInterrupt(WSINT_VBLANK);
 			//printf("VBlank: %d\n", wsLine);
+			if(VBCounter && (BTimerControl & 0x04))
+			{
+				VBCounter--;
+				if(!VBCounter)
+				{
+					if(BTimerControl & 0x08) // loop
+						VBCounter = VBTimerPeriod;
+					sys->interrupt.DoInterrupt(WSINT_VBLANK_TIMER);
+				}
+			}
 		}
 
 
@@ -199,10 +212,12 @@ namespace MDFN_IEN_WSWAN
 		}
 
 		// CPU ==========================
-		sys->cpu.execute(224);
+		sys->cpu.execute(128);
+		sys->memory.CheckSoundDMA();
+		sys->cpu.execute(96);
 		// CPU ==========================
 
-		wsLine = (wsLine + 1) % 159;
+		wsLine = (wsLine + 1) % (std::max<uint8>(144, LCDVtotal) + 1);
 		if(wsLine == LineCompare)
 		{
 			sys->interrupt.DoInterrupt(WSINT_LINE_HIT);
@@ -215,21 +230,21 @@ namespace MDFN_IEN_WSWAN
 
 		sys->rtc.Clock(256);
 
-		if(!wsLine)
-		{
-			if(VBCounter && (BTimerControl & 0x04))
-			{
-				VBCounter--;
-				if(!VBCounter)
-				{
-					if(BTimerControl & 0x08) // Loop mode?
-						VBCounter = VBTimerPeriod;
+		// if(!wsLine)
+		// {
+		// 	if(VBCounter && (BTimerControl & 0x04))
+		// 	{
+		// 		VBCounter--;
+		// 		if(!VBCounter)
+		// 		{
+		// 			if(BTimerControl & 0x08) // Loop mode?
+		// 				VBCounter = VBTimerPeriod;
 
-					sys->interrupt.DoInterrupt(WSINT_VBLANK_TIMER);
-				}
-			}
-			wsLine = 0;
-		}
+		// 			sys->interrupt.DoInterrupt(WSINT_VBLANK_TIMER);
+		// 		}
+		// 	}
+		// 	wsLine = 0;
+		// }
 
 		return ret;
 	}
@@ -239,14 +254,14 @@ namespace MDFN_IEN_WSWAN
 		LayerEnabled = mask;
 	}
 
-	void GFX::SetBWPalette(const uint32 *colors)
-	{
-		std::memcpy(ColorMapG, colors, sizeof(ColorMapG));
-	}
-	void GFX::SetColorPalette(const uint32 *colors)
-	{
-		std::memcpy(ColorMap, colors, sizeof(ColorMap));
-	}
+	void GFX::SetBWPalette(const uint32 *colors)
+	{
+		std::memcpy(ColorMapG, colors, sizeof(ColorMapG));
+	}
+	void GFX::SetColorPalette(const uint32 *colors)
+	{
+		std::memcpy(ColorMap, colors, sizeof(ColorMap));
+	}
 
 	/*
 	void GFX::SetPixelFormat()
@@ -355,7 +370,7 @@ namespace MDFN_IEN_WSWAN
 
 				if(windowtype == 0x20) // Display FG only inside window
 				{
-					if((wsLine >= FGy0) && (wsLine < FGy1))
+					if((wsLine >= FGy0) && (wsLine <= FGy1))
 						for(j = FGx0; j <= FGx1 && j < 224; j++)
 							in_window[7 + j] = 1;
 				}
@@ -363,7 +378,7 @@ namespace MDFN_IEN_WSWAN
 				{
 					for(j = 0; j < 224; j++)
 					{
-						if(!(j >= FGx0 && j < FGx1) || !((wsLine >= FGy0) && (wsLine < FGy1)))
+						if(!(j >= FGx0 && j <= FGx1) || !((wsLine >= FGy0) && (wsLine <= FGy1)))
 							in_window[7 + j] = 1;
 					}
 				}
@@ -431,8 +446,8 @@ namespace MDFN_IEN_WSWAN
 			if(DispControl & 0x08)
 			{
 				memset(in_window, 0, sizeof(in_window));
-				if((wsLine >= SPRy0) && (wsLine < SPRy1))
-					for(j = SPRx0; j < SPRx1 && j < 256; j++)
+				if((wsLine >= SPRy0) && (wsLine <= SPRy1))
+					for(j = SPRx0; j <= SPRx1 && j < 256; j++)
 						in_window[7 + j] = 1;
 			}
 			else
@@ -565,9 +580,9 @@ namespace MDFN_IEN_WSWAN
 		}
 	}
 
-	void GFX::Init(bool color)
-	{
-		wsc = color;
+	void GFX::Init(bool color)
+	{
+		wsc = color;
 	}
 
 	void GFX::Reset()
@@ -577,6 +592,8 @@ namespace MDFN_IEN_WSWAN
 
 		std::memset(SpriteTable, 0, sizeof(SpriteTable));
 		SpriteCountCache = 0;
+		std::memset(NextSpriteTable, 0, sizeof(NextSpriteTable));
+		NextSpriteCountCache = 0;
 		DispControl = 0;
 		BGColor = 0;
 		LineCompare = 0xBB;
@@ -599,6 +616,7 @@ namespace MDFN_IEN_WSWAN
 		FGXScroll = FGYScroll = 0;
 		LCDControl = 0;
 		LCDIcons = 0;
+		LCDVtotal = 158;
 
 		BTimerControl = 0;
 		HBTimerPeriod = 0;
@@ -629,9 +647,9 @@ namespace MDFN_IEN_WSWAN
 		NSS(wsTCacheUpdate2);		  
 		NSS(wsTileRow);
 		*/
-
-		NSS(wsVMode);
-
+
+		NSS(wsVMode);
+
 		NSS(wsMonoPal);
 		NSS(wsColors);
 		NSS(wsCols);
@@ -645,6 +663,8 @@ namespace MDFN_IEN_WSWAN
 
 		NSS(SpriteTable);
 		NSS(SpriteCountCache);
+		NSS(NextSpriteTable);
+		NSS(NextSpriteCountCache);
 		NSS(DispControl);
 		NSS(BGColor);
 		NSS(LineCompare);
@@ -667,6 +687,7 @@ namespace MDFN_IEN_WSWAN
 		NSS(FGYScroll);
 		NSS(LCDControl);
 		NSS(LCDIcons);
+		NSS(LCDVtotal);
 
 		NSS(BTimerControl);
 		NSS(HBTimerPeriod);
@@ -674,8 +695,8 @@ namespace MDFN_IEN_WSWAN
 
 		NSS(HBCounter);
 		NSS(VBCounter);
-		NSS(VideoMode);
-
+		NSS(VideoMode);
+
 		NSS(wsc); // mono / color
 	}
 }
diff --git a/wonderswan/gfx.h b/wonderswan/gfx.h
index b1f7bafb32..80bd53c613 100644
--- a/wonderswan/gfx.h
+++ b/wonderswan/gfx.h
@@ -14,8 +14,8 @@ public:
 	// TCACHE ====================================
 	void InvalidByAddr(uint32);
 	void SetVideo(int, bool);
-	void MakeTiles();
-	void GetTile(uint32 number,uint32 line,int flipv,int fliph,int bank);
+	void MakeTiles();
+	void GetTile(uint32 number,uint32 line,int flipv,int fliph,int bank);
 	// TCACHE/====================================
 	void Scanline(uint32 *target);
 	void SetPixelFormat();
@@ -34,47 +34,50 @@ public:
 
 private:
 	// TCACHE ====================================
-	uint8	tiles[256][256][2][8];
-	uint8	wsTCache[512*64];			
-	uint8	wsTCache2[512*64];			
-	uint8	wsTCacheFlipped[512*64];
-	uint8	wsTCacheFlipped2[512*64];
-	uint8	wsTCacheUpdate[512];		
-	uint8	wsTCacheUpdate2[512];		  
-	uint8	wsTileRow[8];
+	uint8	tiles[256][256][2][8];
+	uint8	wsTCache[512*64];			
+	uint8	wsTCache2[512*64];			
+	uint8	wsTCacheFlipped[512*64];
+	uint8	wsTCacheFlipped2[512*64];
+	uint8	wsTCacheUpdate[512];		
+	uint8	wsTCacheUpdate2[512];		  
+	uint8	wsTileRow[8];
 	// TCACHE/====================================
 	int		wsVMode;
 
-	uint32 wsMonoPal[16][4];
-	uint32 wsColors[8];
-	uint32 wsCols[16][16];
-
-	uint32 ColorMapG[16];
-	uint32 ColorMap[16*16*16];
-	uint32 LayerEnabled;
-
-	uint8 wsLine;                 /*current scanline*/
-
-	uint8 SpriteTable[0x80][4];
-	uint32 SpriteCountCache;
-	uint8 DispControl;
-	uint8 BGColor;
-	uint8 LineCompare;
-	uint8 SPRBase;
-	uint8 SpriteStart, SpriteCount;
-	uint8 FGBGLoc;
-	uint8 FGx0, FGy0, FGx1, FGy1;
-	uint8 SPRx0, SPRy0, SPRx1, SPRy1;
-
-	uint8 BGXScroll, BGYScroll;
-	uint8 FGXScroll, FGYScroll;
-	uint8 LCDControl, LCDIcons;
-
-	uint8 BTimerControl;
-	uint16 HBTimerPeriod;
-	uint16 VBTimerPeriod;
-
-	uint16 HBCounter, VBCounter;
+	uint32 wsMonoPal[16][4];
+	uint32 wsColors[8];
+	uint32 wsCols[16][16];
+
+	uint32 ColorMapG[16];
+	uint32 ColorMap[16*16*16];
+	uint32 LayerEnabled;
+
+	uint8 wsLine;                 /*current scanline*/
+
+	uint8 SpriteTable[0x80][4];
+	uint32 SpriteCountCache;
+	uint8 NextSpriteTable[0x80][4];
+	uint32 NextSpriteCountCache;
+	uint8 DispControl;
+	uint8 BGColor;
+	uint8 LineCompare;
+	uint8 SPRBase;
+	uint8 SpriteStart, SpriteCount;
+	uint8 FGBGLoc;
+	uint8 FGx0, FGy0, FGx1, FGy1;
+	uint8 SPRx0, SPRy0, SPRx1, SPRy1;
+
+	uint8 BGXScroll, BGYScroll;
+	uint8 FGXScroll, FGYScroll;
+	uint8 LCDControl, LCDIcons;
+	uint8 LCDVtotal;
+
+	uint8 BTimerControl;
+	uint16 HBTimerPeriod;
+	uint16 VBTimerPeriod;
+
+	uint16 HBCounter, VBCounter;
 	uint8 VideoMode;
 
 	bool wsc; // mono / color
diff --git a/wonderswan/mingw/Makefile b/wonderswan/mingw/Makefile
index c31b85f9ea..04336364c0 100644
--- a/wonderswan/mingw/Makefile
+++ b/wonderswan/mingw/Makefile
@@ -11,9 +11,13 @@ else
 	$(error Unknown arch)
 endif
 
-CXXFLAGS = -Wall -DLSB_FIRST -I.. -Wno-multichar -O3 -Wzero-as-null-pointer-constant -std=gnu++11 -fomit-frame-pointer -fno-exceptions -flto -fPIC
+CXXFLAGS = -Wall -DLSB_FIRST -I.. -Wno-multichar -O3 -Wzero-as-null-pointer-constant -std=gnu++11 -fomit-frame-pointer -fno-exceptions -flto
 TARGET = bizswan.dll
 
+ifeq (,$(findstring MINGW,$(shell uname)))
+	CXXFLAGS += -fPIC
+endif
+
 LDFLAGS_32 = -static -static-libgcc -static-libstdc++
 LDFLAGS_64 =
 LDFLAGS = -shared $(LDFLAGS_$(ARCH)) $(CXXFLAGS)