From 82a17ac0f5b460d92407068e1a92cf1fc7fd7345 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Sat, 24 Sep 2011 19:51:08 +1000 Subject: [PATCH] Update to v082r21 release. byuu says: 2-6% speed hit in SNES core for outputting 19-bit (rounded to 32-bit ... sigh) video, so that luma non-linearity can eventually be emulated properly. Now using sinc audio resampler, massive speed hit of course to NES+GB only, but it's required to get rid of aliasing (buzzing) present in many, many games otherwise. Fixed fast forward and none/blur select. Finally fixed texture clearing for changing pixel shaders and video filters. Some realllly basic NES MMC3, extremely broken so don't bug me about it. Other stuff, probably. --- bsnes/nall/dsp.hpp | 5 + bsnes/nall/dsp/core.hpp | 3 + bsnes/nall/dsp/resample/lib/sinc.hpp | 600 ++++++++++++++++++ bsnes/nall/dsp/resample/sinc.hpp | 54 ++ bsnes/nall/dsp/settings.hpp | 1 + bsnes/nes/cartridge/cartridge.cpp | 1 + bsnes/nes/mapper/mapper.cpp | 9 + bsnes/nes/mapper/mapper.hpp | 3 + bsnes/nes/mapper/mmc3/mmc3.cpp | 200 ++++++ bsnes/nes/mapper/mmc3/mmc3.hpp | 42 ++ bsnes/ruby/video/opengl.hpp | 2 - bsnes/snes/alt/ppu-compatibility/ppu.cpp | 18 +- bsnes/snes/alt/ppu-compatibility/ppu.hpp | 5 +- .../alt/ppu-compatibility/render/line.cpp | 11 +- bsnes/snes/alt/ppu-performance/ppu.cpp | 4 +- bsnes/snes/alt/ppu-performance/ppu.hpp | 4 +- .../alt/ppu-performance/screen/screen.cpp | 30 +- .../alt/ppu-performance/screen/screen.hpp | 1 - bsnes/snes/audio/audio.cpp | 2 +- bsnes/snes/interface/interface.cpp | 2 +- bsnes/snes/interface/interface.hpp | 2 +- bsnes/snes/ppu/ppu.cpp | 4 +- bsnes/snes/ppu/ppu.hpp | 4 +- bsnes/snes/ppu/screen/screen.cpp | 22 +- bsnes/snes/ppu/screen/screen.hpp | 5 +- bsnes/snes/video/video.cpp | 10 +- bsnes/snes/video/video.hpp | 2 +- bsnes/ui/general/main-window.cpp | 2 +- bsnes/ui/input/user-interface.cpp | 7 +- bsnes/ui/interface/snes.cpp | 42 +- bsnes/ui/interface/snes.hpp | 7 +- bsnes/ui/main.cpp | 4 +- bsnes/ui/utility/utility.cpp | 4 +- 33 files changed, 996 insertions(+), 116 deletions(-) create mode 100755 bsnes/nall/dsp/resample/lib/sinc.hpp create mode 100755 bsnes/nall/dsp/resample/sinc.hpp create mode 100755 bsnes/nes/mapper/mmc3/mmc3.cpp create mode 100755 bsnes/nes/mapper/mmc3/mmc3.hpp diff --git a/bsnes/nall/dsp.hpp b/bsnes/nall/dsp.hpp index 009c8b6c..a2400ec7 100755 --- a/bsnes/nall/dsp.hpp +++ b/bsnes/nall/dsp.hpp @@ -1,6 +1,11 @@ #ifndef NALL_DSP_HPP #define NALL_DSP_HPP +#include +#ifdef __SSE__ + #include +#endif + #define NALL_DSP_INTERNAL_HPP #include #undef NALL_DSP_INTERNAL_HPP diff --git a/bsnes/nall/dsp/core.hpp b/bsnes/nall/dsp/core.hpp index ed7ea878..a5b967b1 100755 --- a/bsnes/nall/dsp/core.hpp +++ b/bsnes/nall/dsp/core.hpp @@ -28,6 +28,7 @@ struct DSP { Cubic, Hermite, Average, + Sinc, }; inline void setChannels(unsigned channels); @@ -54,6 +55,7 @@ protected: friend class ResampleCubic; friend class ResampleAverage; friend class ResampleHermite; + friend class ResampleSinc; struct Settings { unsigned channels; @@ -85,6 +87,7 @@ protected: #include "resample/cubic.hpp" #include "resample/hermite.hpp" #include "resample/average.hpp" +#include "resample/sinc.hpp" #include "settings.hpp" void DSP::sample(signed channel[]) { diff --git a/bsnes/nall/dsp/resample/lib/sinc.hpp b/bsnes/nall/dsp/resample/lib/sinc.hpp new file mode 100755 index 00000000..3e953679 --- /dev/null +++ b/bsnes/nall/dsp/resample/lib/sinc.hpp @@ -0,0 +1,600 @@ +// If these types are changed to anything other than "float", you should comment out the SSE detection directives below +// so that the SSE code is not used. + +typedef float resample_coeff_t; // note: sizeof(resample_coeff_t) must be == to a power of 2, and not larger than 16 +typedef float resample_samp_t; + + +// ...but don't comment this single RESAMPLE_SSEREGPARM define out when disabling SSE. +#define RESAMPLE_SSEREGPARM + +#if defined(__SSE__) + #define SINCRESAMPLE_USE_SSE 1 + #ifndef __x86_64__ + #undef RESAMPLE_SSEREGPARM + #define RESAMPLE_SSEREGPARM __attribute__((sseregparm)) + #endif +#else + // TODO: altivec here +#endif + +namespace ResampleUtility +{ + inline void kaiser_window(double* io, int count, double beta); + inline void gen_sinc(double* out, int size, double cutoff, double kaiser); + inline void gen_sinc_os(double* out, int size, double cutoff, double kaiser); + inline void normalize(double* io, int size, double gain = 1.0); + + inline void* make_aligned(void* ptr, unsigned boundary); // boundary must be a power of 2 +} + +class SincResampleHR +{ + private: + + inline void Init(unsigned ratio_arg, double desired_bandwidth, double beta, double d); + + inline void write(resample_samp_t sample) RESAMPLE_SSEREGPARM; + inline resample_samp_t read(void) RESAMPLE_SSEREGPARM; + inline bool output_avail(void); + + private: + + inline resample_samp_t mac(const resample_samp_t *wave, const resample_coeff_t *coeff, unsigned count); + + unsigned ratio; + unsigned num_convolutions; + + resample_coeff_t *coeffs; + std::vector coeffs_mem; + + // second half of ringbuffer should be copy of first half. + resample_samp_t *rb; + std::vector rb_mem; + + signed rb_readpos; + signed rb_writepos; + signed rb_in; + signed rb_eff_size; + + friend class SincResample; +}; + +class SincResample +{ + public: + + enum + { + QUALITY_LOW = 0, + QUALITY_MEDIUM = 2, + QUALITY_HIGH = 4 + }; + + inline SincResample(double input_rate, double output_rate, double desired_bandwidth, unsigned quality = QUALITY_HIGH); + + inline void write(resample_samp_t sample) RESAMPLE_SSEREGPARM; + inline resample_samp_t read(void) RESAMPLE_SSEREGPARM; + inline bool output_avail(void); + + private: + + inline void Init(double input_rate, double output_rate, double desired_bandwidth, double beta, double d, unsigned pn_nume, unsigned phases_min); + + inline resample_samp_t mac(const resample_samp_t *wave, const resample_coeff_t *coeffs_a, const resample_coeff_t *coeffs_b, const double ffract, unsigned count) RESAMPLE_SSEREGPARM; + + unsigned num_convolutions; + unsigned num_phases; + + unsigned step_int; + double step_fract; + + double input_pos_fract; + + + std::vector coeffs; // Pointers into coeff_mem. + std::vector coeff_mem; + + + std::vector rb; // second half should be copy of first half. + signed rb_readpos; + signed rb_writepos; + signed rb_in; + + bool hr_used; + SincResampleHR hr; +}; + + +// +// Code: +// +//#include "resample.hpp" + +#if 0 +namespace bit +{ + inline unsigned round(unsigned x) { + if((x & (x - 1)) == 0) return x; + while(x & (x - 1)) x &= x - 1; + return x << 1; + } +} +#endif + +void SincResampleHR::Init(unsigned ratio_arg, double desired_bandwidth, double beta, double d) +{ + const unsigned align_boundary = 16; + std::vector coeffs_tmp; + double cutoff; // 1.0 = f/2 + + ratio = ratio_arg; + + //num_convolutions = ((unsigned)ceil(d / ((1.0 - desired_bandwidth) / ratio)) + 1) &~ 1; // round up to be even + num_convolutions = ((unsigned)ceil(d / ((1.0 - desired_bandwidth) / ratio)) | 1); + + cutoff = (1.0 / ratio) - (d / num_convolutions); + +//printf("%d %d %.20f\n", ratio, num_convolutions, cutoff); + assert(num_convolutions > ratio); + + + // Generate windowed sinc of POWER + coeffs_tmp.resize(num_convolutions); + //ResampleUtility::gen_sinc(&coeffs_tmp[0], num_convolutions, cutoff, beta); + ResampleUtility::gen_sinc_os(&coeffs_tmp[0], num_convolutions, cutoff, beta); + ResampleUtility::normalize(&coeffs_tmp[0], num_convolutions); + + // Copy from coeffs_tmp to coeffs~ + // We multiply many coefficients at a time in the mac loop, so make sure the last few that don't really + // exist are allocated, zero'd mem. + + coeffs_mem.resize(((num_convolutions + 7) &~ 7) * sizeof(resample_coeff_t) + (align_boundary - 1)); + coeffs = (resample_coeff_t *)ResampleUtility::make_aligned(&coeffs_mem[0], align_boundary); + + + for(unsigned i = 0; i < num_convolutions; i++) + coeffs[i] = coeffs_tmp[i]; + + rb_eff_size = nall::bit::round(num_convolutions * 2) >> 1; + rb_readpos = 0; + rb_writepos = 0; + rb_in = 0; + + rb_mem.resize(rb_eff_size * 2 * sizeof(resample_samp_t) + (align_boundary - 1)); + rb = (resample_samp_t *)ResampleUtility::make_aligned(&rb_mem[0], align_boundary); +} + + +inline bool SincResampleHR::output_avail(void) +{ + return(rb_in >= (signed)num_convolutions); +} + +inline void SincResampleHR::write(resample_samp_t sample) +{ + assert(!output_avail()); + + rb[rb_writepos] = sample; + rb[rb_writepos + rb_eff_size] = sample; + rb_writepos = (rb_writepos + 1) & (rb_eff_size - 1); + rb_in++; +} + +resample_samp_t SincResampleHR::mac(const resample_samp_t *wave, const resample_coeff_t *coeff, unsigned count) +{ +#if SINCRESAMPLE_USE_SSE + __m128 accum_veca[2] = { _mm_set1_ps(0), _mm_set1_ps(0) }; + + resample_samp_t accum; + + for(unsigned c = 0; c < count; c += 8) + { + for(unsigned i = 0; i < 2; i++) + { + __m128 co[2]; + __m128 w[2]; + + co[i] = _mm_load_ps(&coeff[c + i * 4]); + w[i] = _mm_load_ps(&wave[c + i * 4]); + + w[i] = _mm_mul_ps(w[i], co[i]); + + accum_veca[i] = _mm_add_ps(w[i], accum_veca[i]); + } + } + + __m128 accum_vec = _mm_add_ps(accum_veca[0], accum_veca[1]); //_mm_add_ps(_mm_add_ps(accum_veca[0], accum_veca[1]), _mm_add_ps(accum_veca[2], accum_veca[3])); + + accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (3 << 0) | (2 << 2) | (1 << 4) | (0 << 6))); + accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (1 << 0) | (0 << 2) | (1 << 4) | (0 << 6))); + + _mm_store_ss(&accum, accum_vec); + + return accum; +#else + resample_samp_t accum[4] = { 0, 0, 0, 0 }; + + for(unsigned c = 0; c < count; c+= 4) + { + accum[0] += wave[c + 0] * coeff[c + 0]; + accum[1] += wave[c + 1] * coeff[c + 1]; + accum[2] += wave[c + 2] * coeff[c + 2]; + accum[3] += wave[c + 3] * coeff[c + 3]; + } + + return (accum[0] + accum[1]) + (accum[2] + accum[3]); // don't mess with parentheses(assuming compiler doesn't already, which it may... + +#endif +} + + +resample_samp_t SincResampleHR::read(void) +{ + assert(output_avail()); + resample_samp_t ret; + + ret = mac(&rb[rb_readpos], &coeffs[0], num_convolutions); + + rb_readpos = (rb_readpos + ratio) & (rb_eff_size - 1); + rb_in -= ratio; + + return ret; +} + + +SincResample::SincResample(double input_rate, double output_rate, double desired_bandwidth, unsigned quality) +{ + const struct + { + double beta; + double d; + unsigned pn_nume; + unsigned phases_min; + } qtab[5] = + { + { 5.658, 3.62, 4096, 4 }, + { 6.764, 4.32, 8192, 4 }, + { 7.865, 5.0, 16384, 8 }, + { 8.960, 5.7, 32768, 16 }, + { 10.056, 6.4, 65536, 32 } + }; + + // Sanity checks + assert(ceil(input_rate) > 0); + assert(ceil(output_rate) > 0); + assert(ceil(input_rate / output_rate) <= 1024); + assert(ceil(output_rate / input_rate) <= 1024); + + // The simplistic number-of-phases calculation code doesn't work well enough for when desired_bandwidth is close to 1.0 and when + // upsampling. + assert(desired_bandwidth >= 0.25 && desired_bandwidth < 0.96); + assert(quality >= 0 && quality <= 4); + + hr_used = false; + +#if 1 + // Round down to the nearest multiple of 4(so wave buffer remains aligned) + // It also adjusts the effective intermediate sampling rate up slightly, so that the upper frequencies below f/2 + // aren't overly attenuated so much. In the future, we might want to do an FFT or something to choose the intermediate rate more accurately + // to virtually eliminate over-attenuation. + unsigned ioratio_rd = (unsigned)floor(input_rate / (output_rate * (1.0 + (1.0 - desired_bandwidth) / 2) )) & ~3; + + if(ioratio_rd >= 8) + { + hr.Init(ioratio_rd, desired_bandwidth, qtab[quality].beta, qtab[quality].d); //10.056, 6.4); + hr_used = true; + + input_rate /= ioratio_rd; + } +#endif + + Init(input_rate, output_rate, desired_bandwidth, qtab[quality].beta, qtab[quality].d, qtab[quality].pn_nume, qtab[quality].phases_min); +} + +void SincResample::Init(double input_rate, double output_rate, double desired_bandwidth, double beta, double d, unsigned pn_nume, unsigned phases_min) +{ + const unsigned max_mult_atatime = 8; // multiply "granularity". must be power of 2. + const unsigned max_mult_minus1 = (max_mult_atatime - 1); + const unsigned conv_alignment_bytes = 16; // must be power of 2 + const double input_to_output_ratio = input_rate / output_rate; + const double output_to_input_ratio = output_rate / input_rate; + double cutoff; // 1.0 = input_rate / 2 + std::vector coeff_init_buffer; + + // Round up num_convolutions to be even. + if(output_rate > input_rate) + num_convolutions = ((unsigned)ceil(d / (1.0 - desired_bandwidth)) + 1) & ~1; + else + num_convolutions = ((unsigned)ceil(d / (output_to_input_ratio * (1.0 - desired_bandwidth))) + 1) & ~1; + + if(output_rate > input_rate) // Upsampling + cutoff = desired_bandwidth; + else // Downsampling + cutoff = output_to_input_ratio * desired_bandwidth; + + // Round up to be even. + num_phases = (std::max(pn_nume / num_convolutions, phases_min) + 1) &~1; + + // Adjust cutoff to account for the multiple phases. + cutoff = cutoff / num_phases; + + assert((num_convolutions & 1) == 0); + assert((num_phases & 1) == 0); + +// fprintf(stderr, "num_convolutions=%u, num_phases=%u, total expected coeff byte size=%lu\n", num_convolutions, num_phases, +// (long)((num_phases + 2) * ((num_convolutions + max_mult_minus1) & ~max_mult_minus1) * sizeof(float) + conv_alignment_bytes)); + + coeff_init_buffer.resize(num_phases * num_convolutions); + + coeffs.resize(num_phases + 1 + 1); + + coeff_mem.resize((num_phases + 1 + 1) * ((num_convolutions + max_mult_minus1) &~ max_mult_minus1) * sizeof(resample_coeff_t) + conv_alignment_bytes); + + // Assign aligned pointers into coeff_mem + { + resample_coeff_t *base_ptr = (resample_coeff_t *)ResampleUtility::make_aligned(&coeff_mem[0], conv_alignment_bytes); + + for(unsigned phase = 0; phase < (num_phases + 1 + 1); phase++) + { + coeffs[phase] = base_ptr + (((num_convolutions + max_mult_minus1) & ~max_mult_minus1) * phase); + } + } + + ResampleUtility::gen_sinc(&coeff_init_buffer[0], num_phases * num_convolutions, cutoff, beta); + ResampleUtility::normalize(&coeff_init_buffer[0], num_phases * num_convolutions, num_phases); + + // Reorder coefficients to allow for more efficient convolution. + for(int phase = -1; phase < ((int)num_phases + 1); phase++) + { + for(int conv = 0; conv < (int)num_convolutions; conv++) + { + double coeff; + + if(phase == -1 && conv == 0) + coeff = 0; + else if(phase == (int)num_phases && conv == ((int)num_convolutions - 1)) + coeff = 0; + else + coeff = coeff_init_buffer[conv * num_phases + phase]; + + coeffs[phase + 1][conv] = coeff; + } + } + + // Free a bit of mem + coeff_init_buffer.resize(0); + + step_int = floor(input_to_output_ratio); + step_fract = input_to_output_ratio - step_int; + + input_pos_fract = 0; + + // Do NOT use rb.size() later in the code, since it'll include the padding. + // We should only need one "max_mult_minus1" here, not two, since it won't matter if it over-reads(due to doing "max_mult_atatime" multiplications at a time + // rather than just 1, in which case this over-read wouldn't happen), from the first half into the duplicated half, + // since those corresponding coefficients will be zero anyway; this is just to handle the case of reading off the end of the duplicated half to + // prevent illegal memory accesses. + rb.resize(num_convolutions * 2 + max_mult_minus1); + + rb_readpos = 0; + rb_writepos = 0; + rb_in = 0; +} + +resample_samp_t SincResample::mac(const resample_samp_t *wave, const resample_coeff_t *coeffs_a, const resample_coeff_t *coeffs_b, const double ffract, unsigned count) +{ + resample_samp_t accum = 0; +#if SINCRESAMPLE_USE_SSE + __m128 accum_vec_a[2] = { _mm_set1_ps(0), _mm_set1_ps(0) }; + __m128 accum_vec_b[2] = { _mm_set1_ps(0), _mm_set1_ps(0) }; + + for(unsigned c = 0; c < count; c += 8) //8) //4) + { + __m128 coeff_a[2]; + __m128 coeff_b[2]; + __m128 w[2]; + __m128 result_a[2], result_b[2]; + + for(unsigned i = 0; i < 2; i++) + { + coeff_a[i] = _mm_load_ps(&coeffs_a[c + (i * 4)]); + coeff_b[i] = _mm_load_ps(&coeffs_b[c + (i * 4)]); + w[i] = _mm_loadu_ps(&wave[c + (i * 4)]); + + result_a[i] = _mm_mul_ps(coeff_a[i], w[i]); + result_b[i] = _mm_mul_ps(coeff_b[i], w[i]); + + accum_vec_a[i] = _mm_add_ps(result_a[i], accum_vec_a[i]); + accum_vec_b[i] = _mm_add_ps(result_b[i], accum_vec_b[i]); + } + } + + __m128 accum_vec, av_a, av_b; + __m128 mult_a_vec = _mm_set1_ps(1.0 - ffract); + __m128 mult_b_vec = _mm_set1_ps(ffract); + + av_a = _mm_mul_ps(mult_a_vec, /*accum_vec_a[0]);*/ _mm_add_ps(accum_vec_a[0], accum_vec_a[1])); + av_b = _mm_mul_ps(mult_b_vec, /*accum_vec_b[0]);*/ _mm_add_ps(accum_vec_b[0], accum_vec_b[1])); + + accum_vec = _mm_add_ps(av_a, av_b); + + accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (3 << 0) | (2 << 2) | (1 << 4) | (0 << 6))); + accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (1 << 0) | (0 << 2) | (1 << 4) | (0 << 6))); + + _mm_store_ss(&accum, accum_vec); +#else + resample_coeff_t mult_a = 1.0 - ffract; + resample_coeff_t mult_b = ffract; + + for(unsigned c = 0; c < count; c += 4) + { + accum += wave[c + 0] * (coeffs_a[c + 0] * mult_a + coeffs_b[c + 0] * mult_b); + accum += wave[c + 1] * (coeffs_a[c + 1] * mult_a + coeffs_b[c + 1] * mult_b); + accum += wave[c + 2] * (coeffs_a[c + 2] * mult_a + coeffs_b[c + 2] * mult_b); + accum += wave[c + 3] * (coeffs_a[c + 3] * mult_a + coeffs_b[c + 3] * mult_b); + } +#endif + + return accum; +} + +inline bool SincResample::output_avail(void) +{ + return(rb_in >= (int)num_convolutions); +} + +resample_samp_t SincResample::read(void) +{ + assert(output_avail()); + double phase = input_pos_fract * num_phases - 0.5; + signed phase_int = (signed)floor(phase); + double phase_fract = phase - phase_int; + unsigned phase_a = num_phases - 1 - phase_int; + unsigned phase_b = phase_a - 1; + resample_samp_t ret; + + ret = mac(&rb[rb_readpos], &coeffs[phase_a + 1][0], &coeffs[phase_b + 1][0], phase_fract, num_convolutions); + + unsigned int_increment = step_int; + + input_pos_fract += step_fract; + int_increment += floor(input_pos_fract); + input_pos_fract -= floor(input_pos_fract); + + rb_readpos = (rb_readpos + int_increment) % num_convolutions; + rb_in -= int_increment; + + return ret; +} + +inline void SincResample::write(resample_samp_t sample) +{ + assert(!output_avail()); + + if(hr_used) + { + hr.write(sample); + + if(hr.output_avail()) + { + sample = hr.read(); + } + else + { + return; + } + } + + rb[rb_writepos + 0 * num_convolutions] = sample; + rb[rb_writepos + 1 * num_convolutions] = sample; + rb_writepos = (rb_writepos + 1) % num_convolutions; + rb_in++; +} + +void ResampleUtility::kaiser_window( double* io, int count, double beta) +{ + int const accuracy = 24; //16; //12; + + double* end = io + count; + + double beta2 = beta * beta * (double) -0.25; + double to_fract = beta2 / ((double) count * count); + double i = 0; + double rescale = 0; // Doesn't need an initializer, to shut up gcc + + for ( ; io < end; ++io, i += 1 ) + { + double x = i * i * to_fract - beta2; + double u = x; + double k = x + 1; + + double n = 2; + do + { + u *= x / (n * n); + n += 1; + k += u; + } + while ( k <= u * (1 << accuracy) ); + + if ( !i ) + rescale = 1 / k; // otherwise values get large + + *io *= k * rescale; + } +} + +void ResampleUtility::gen_sinc(double* out, int size, double cutoff, double kaiser) +{ + assert( size % 2 == 0 ); // size must be even + + int const half_size = size / 2; + double* const mid = &out [half_size]; + + // Generate right half of sinc + for ( int i = 0; i < half_size; i++ ) + { + double angle = (i * 2 + 1) * (M_PI / 2); + mid [i] = sin( angle * cutoff ) / angle; + } + + kaiser_window( mid, half_size, kaiser ); + + // Mirror for left half + for ( int i = 0; i < half_size; i++ ) + out [i] = mid [half_size - 1 - i]; +} + +void ResampleUtility::gen_sinc_os(double* out, int size, double cutoff, double kaiser) +{ + assert( size % 2 == 1); // size must be odd + + for(int i = 0; i < size; i++) + { + if(i == (size / 2)) + out[i] = 2 * M_PI * (cutoff / 2); //0.078478; //1.0; //sin(2 * M_PI * (cutoff / 2) * (i - size / 2)) / (i - (size / 2)); + else + out[i] = sin(2 * M_PI * (cutoff / 2) * (i - size / 2)) / (i - (size / 2)); + +// out[i] *= 0.3635819 - 0.4891775 * cos(2 * M_PI * i / (size - 1)) + 0.1365995 * cos(4 * M_PI * i / (size - 1)) - 0.0106411 * cos(6 * M_PI * i / (size - 1)); +//0.42 - 0.5 * cos(2 * M_PI * i / (size - 1)) + 0.08 * cos(4 * M_PI * i / (size - 1)); + +// printf("%d %f\n", i, out[i]); + } + + kaiser_window(&out[size / 2], size / 2 + 1, kaiser); + + // Mirror for left half + for ( int i = 0; i < size / 2; i++ ) + out [i] = out [size - 1 - i]; + +} + +void ResampleUtility::normalize(double* io, int size, double gain) +{ + double sum = 0; + for ( int i = 0; i < size; i++ ) + sum += io [i]; + + double scale = gain / sum; + for ( int i = 0; i < size; i++ ) + io [i] *= scale; +} + +void* ResampleUtility::make_aligned(void* ptr, unsigned boundary) +{ + unsigned char* null_ptr = (unsigned char *)NULL; + unsigned char* uc_ptr = (unsigned char *)ptr; + + uc_ptr += (boundary - ((uc_ptr - null_ptr) & (boundary - 1))) & (boundary - 1); + + //while((uc_ptr - null_ptr) & (boundary - 1)) + // uc_ptr++; + + //printf("%16llx %16llx\n", (unsigned long long)ptr, (unsigned long long)uc_ptr); + + assert((uc_ptr - (unsigned char *)ptr) < boundary && (uc_ptr >= (unsigned char *)ptr)); + + return uc_ptr; +} diff --git a/bsnes/nall/dsp/resample/sinc.hpp b/bsnes/nall/dsp/resample/sinc.hpp new file mode 100755 index 00000000..a77a1eeb --- /dev/null +++ b/bsnes/nall/dsp/resample/sinc.hpp @@ -0,0 +1,54 @@ +#ifdef NALL_DSP_INTERNAL_HPP + +#include "lib/sinc.hpp" + +struct ResampleSinc : Resampler { + inline void setFrequency(); + inline void clear(); + inline void sample(); + inline ResampleSinc(DSP &dsp); + +private: + inline void remakeSinc(); + SincResample *sinc_resampler[8]; +}; + +void ResampleSinc::setFrequency() { + remakeSinc(); +} + +void ResampleSinc::clear() { + remakeSinc(); +} + +void ResampleSinc::sample() { + for(unsigned c = 0; c < dsp.settings.channels; c++) { + sinc_resampler[c]->write(dsp.buffer.read(c)); + } + + if(sinc_resampler[0]->output_avail()) { + do { + for(unsigned c = 0; c < dsp.settings.channels; c++) { + dsp.output.write(c) = sinc_resampler[c]->read(); + } + dsp.output.wroffset++; + } while(sinc_resampler[0]->output_avail()); + } + + dsp.buffer.rdoffset++; +} + +ResampleSinc::ResampleSinc(DSP &dsp) : Resampler(dsp) { + for(unsigned n = 0; n < 8; n++) sinc_resampler[n] = 0; +} + +void ResampleSinc::remakeSinc() { + assert(dsp.settings.channels < 8); + + for(unsigned c = 0; c < dsp.settings.channels; c++) { + if(sinc_resampler[c]) delete sinc_resampler[c]; + sinc_resampler[c] = new SincResample(dsp.settings.frequency, frequency, 0.85, SincResample::QUALITY_HIGH); + } +} + +#endif diff --git a/bsnes/nall/dsp/settings.hpp b/bsnes/nall/dsp/settings.hpp index 9ce817e8..3a8f24c6 100755 --- a/bsnes/nall/dsp/settings.hpp +++ b/bsnes/nall/dsp/settings.hpp @@ -36,6 +36,7 @@ void DSP::setResampler(ResampleEngine engine) { case ResampleEngine::Cubic: resampler = new ResampleCubic (*this); return; case ResampleEngine::Hermite: resampler = new ResampleHermite(*this); return; case ResampleEngine::Average: resampler = new ResampleAverage(*this); return; + case ResampleEngine::Sinc: resampler = new ResampleSinc (*this); return; } throw; diff --git a/bsnes/nes/cartridge/cartridge.cpp b/bsnes/nes/cartridge/cartridge.cpp index 08d78879..7a98a538 100755 --- a/bsnes/nes/cartridge/cartridge.cpp +++ b/bsnes/nes/cartridge/cartridge.cpp @@ -32,6 +32,7 @@ void Cartridge::load(const string &xml, const uint8_t *data, unsigned size) { default : mapper = &Mapper::none; break; case 1: mapper = &Mapper::mmc1; break; case 3: mapper = &Mapper::cnrom; break; + case 4: mapper = &Mapper::mmc3; break; case 7: mapper = &Mapper::aorom; break; case 16: mapper = &Mapper::bandaiFCG; break; } diff --git a/bsnes/nes/mapper/mapper.cpp b/bsnes/nes/mapper/mapper.cpp index 40eaf9fd..7ee3837a 100755 --- a/bsnes/nes/mapper/mapper.cpp +++ b/bsnes/nes/mapper/mapper.cpp @@ -21,6 +21,14 @@ namespace Mapper { return base; } + uint8& Mapper::prg_data(unsigned addr) { + return cartridge.prg_data[mirror(addr, cartridge.prg_size)]; + } + + uint8& Mapper::chr_data(unsigned addr) { + return cartridge.chr_data[mirror(addr, cartridge.chr_size)]; + } + unsigned Mapper::ram_size() { return 0u; } @@ -34,6 +42,7 @@ namespace Mapper { #include "bandai-fcg/bandai-fcg.cpp" #include "cnrom/cnrom.cpp" #include "mmc1/mmc1.cpp" + #include "mmc3/mmc3.cpp" } } diff --git a/bsnes/nes/mapper/mapper.hpp b/bsnes/nes/mapper/mapper.hpp index 2a09c492..0b74a1e5 100755 --- a/bsnes/nes/mapper/mapper.hpp +++ b/bsnes/nes/mapper/mapper.hpp @@ -1,6 +1,8 @@ namespace Mapper { struct Mapper { unsigned mirror(unsigned addr, unsigned size) const; + uint8& prg_data(unsigned addr); + uint8& chr_data(unsigned addr); virtual uint8 prg_read(uint16 addr) = 0; virtual void prg_write(uint16 addr, uint8 data) = 0; @@ -25,4 +27,5 @@ namespace Mapper { #include "bandai-fcg/bandai-fcg.hpp" #include "cnrom/cnrom.hpp" #include "mmc1/mmc1.hpp" + #include "mmc3/mmc3.hpp" } diff --git a/bsnes/nes/mapper/mmc3/mmc3.cpp b/bsnes/nes/mapper/mmc3/mmc3.cpp new file mode 100755 index 00000000..d1db70a5 --- /dev/null +++ b/bsnes/nes/mapper/mmc3/mmc3.cpp @@ -0,0 +1,200 @@ +MMC3 mmc3; + +void MMC3::clock_irq_test(uint16 addr) { + if(!(last_chr_addr & 0x1000) && (addr & 0x1000)) { + if(irq_delay == 0) { + if(irq_counter == 0) { + irq_counter = irq_latch; + irq_line = irq_enable; + cpu.set_irq_line(irq_line); + } + irq_counter--; + } + irq_delay = 5; + } + + last_chr_addr = addr; +} + +unsigned MMC3::prg_addr(uint16 addr) { + if((addr & 0xe000) == 0x8000) { + if((bank_select & 0x40) == 1) return (0x3e << 13) | (addr & 0x1fff); + return (prg_bank[0] << 13) | (addr & 0x1fff); + } + + if((addr & 0xe000) == 0xa000) { + return (prg_bank[1] << 13) | (addr & 0x1fff); + } + + if((addr & 0xe000) == 0xc000) { + if((bank_select & 0x40) == 0) return (0x3e << 13) | (addr & 0x1fff); + return (prg_bank[0] << 13) | (addr & 0x1fff); + } + + if((addr & 0xe000) == 0xe000) { + return (0x3f << 13) | (addr & 0x1fff); + } + + throw; +} + +uint8 MMC3::prg_read(uint16 addr) { + if(irq_delay) irq_delay--; + + if((addr & 0xe000) == 0x6000) { //$6000-7fff + if(prg_ram_enable) { + return prg_ram[addr & 0x1fff]; + } + } + + if(addr & 0x8000) { //$8000-ffff + return prg_data(prg_addr(addr)); + } + + return cpu.mdr(); +} + +void MMC3::prg_write(uint16 addr, uint8 data) { + if(irq_delay) irq_delay--; + + if((addr & 0xe000) == 0x6000) { //$6000-7fff + if(prg_ram_enable && prg_ram_write_protect == false) { + prg_ram[addr & 0x1fff] = data; + } + } + + switch(addr & 0xe001) { + case 0x8000: + bank_select = data & 0xc7; + break; + + case 0x8001: + switch(bank_select & 7) { + case 0: chr_bank[0] = data & ~1; break; + case 1: chr_bank[1] = data & ~1; break; + case 2: chr_bank[2] = data; break; + case 3: chr_bank[3] = data; break; + case 4: chr_bank[4] = data; break; + case 5: chr_bank[5] = data; break; + case 6: prg_bank[0] = data & 0x3f; break; + case 7: prg_bank[1] = data & 0x3f; break; + } + break; + + case 0xa000: + mirror_select = data & 0x01; + break; + + case 0xa001: + prg_ram_enable = data & 0x80; + prg_ram_write_protect = data & 0x40; + break; + + case 0xc000: + irq_latch = data; + break; + + case 0xc001: + irq_counter = 0; + break; + + case 0xe000: + irq_enable = false; + irq_line = 0; + cpu.set_irq_line(irq_line); + break; + + case 0xe001: + irq_enable = true; + break; + } +} + +unsigned MMC3::chr_addr(uint16 addr) { + if((bank_select & 0x80) == 0) { + if(addr <= 0x07ff) return (chr_bank[0] << 10) | (addr & 0x07ff); + if(addr <= 0x0fff) return (chr_bank[1] << 10) | (addr & 0x07ff); + if(addr <= 0x13ff) return (chr_bank[2] << 10) | (addr & 0x03ff); + if(addr <= 0x17ff) return (chr_bank[3] << 10) | (addr & 0x03ff); + if(addr <= 0x1bff) return (chr_bank[4] << 10) | (addr & 0x03ff); + if(addr <= 0x1fff) return (chr_bank[5] << 10) | (addr & 0x03ff); + } + + if((bank_select & 0x80) != 0) { + if(addr <= 0x03ff) return (chr_bank[2] << 10) | (addr & 0x03ff); + if(addr <= 0x07ff) return (chr_bank[3] << 10) | (addr & 0x03ff); + if(addr <= 0x0bff) return (chr_bank[4] << 10) | (addr & 0x03ff); + if(addr <= 0x0fff) return (chr_bank[5] << 10) | (addr & 0x03ff); + if(addr <= 0x17ff) return (chr_bank[0] << 10) | (addr & 0x07ff); + if(addr <= 0x1fff) return (chr_bank[1] << 10) | (addr & 0x07ff); + } + + throw; +} + +uint8 MMC3::chr_read(uint16 addr) { + clock_irq_test(addr); + return chr_data(chr_addr(addr)); +} + +void MMC3::chr_write(uint16 addr, uint8 data) { + clock_irq_test(addr); + last_chr_addr = addr; + if(cartridge.chr_ram == false) return; + chr_data(chr_addr(addr)) = data; +} + +unsigned MMC3::ciram_addr(uint13 addr) { + clock_irq_test(0x2000 | addr); + if(mirror_select == 0) return ((addr & 0x0400) >> 0) | (addr & 0x03ff); + if(mirror_select == 1) return ((addr & 0x0800) >> 1) | (addr & 0x03ff); + throw; +} + +uint8 MMC3::ciram_read(uint13 addr) { + clock_irq_test(0x2000 | addr); + return ppu.ciram_read(ciram_addr(addr)); +} + +void MMC3::ciram_write(uint13 addr, uint8 data) { + return ppu.ciram_write(ciram_addr(addr), data); +} + +unsigned MMC3::ram_size() { + return 8192u; +} + +uint8* MMC3::ram_data() { + return prg_ram; +} + +void MMC3::power() { + reset(); +} + +void MMC3::reset() { + bank_select = 0; + + prg_bank[0] = 0; + prg_bank[1] = 0; + + chr_bank[0] = 0; + chr_bank[1] = 0; + chr_bank[2] = 0; + chr_bank[3] = 0; + chr_bank[4] = 0; + chr_bank[5] = 0; + + mirror_select = 0; + prg_ram_enable = 1; + prg_ram_write_protect = 0; + + irq_latch = 0x00; + irq_counter = 0x00; + irq_enable = false; + irq_delay = 0; + irq_line = 0; +} + +void MMC3::serialize(serializer &s) { +} diff --git a/bsnes/nes/mapper/mmc3/mmc3.hpp b/bsnes/nes/mapper/mmc3/mmc3.hpp new file mode 100755 index 00000000..cbf3ca7e --- /dev/null +++ b/bsnes/nes/mapper/mmc3/mmc3.hpp @@ -0,0 +1,42 @@ +struct MMC3 : Mapper { + uint8 prg_read(uint16 addr); + void prg_write(uint16 addr, uint8 data); + + uint8 chr_read(uint16 addr); + void chr_write(uint16 addr, uint8 data); + + uint8 ciram_read(uint13 addr); + void ciram_write(uint13 addr, uint8 data); + + unsigned ram_size(); + uint8* ram_data(); + + void power(); + void reset(); + + void serialize(serializer&); + +private: + uint8 prg_ram[8192]; + + uint8 bank_select; + uint8 prg_bank[2]; + uint8 chr_bank[6]; + bool mirror_select; + bool prg_ram_enable; + bool prg_ram_write_protect; + uint8 irq_latch; + uint8 irq_counter; + bool irq_enable; + unsigned irq_delay; + bool irq_line; + + uint16 last_chr_addr; + + unsigned prg_addr(uint16 addr); + unsigned chr_addr(uint16 addr); + unsigned ciram_addr(uint13 addr); + void clock_irq_test(uint16 addr); +}; + +extern MMC3 mmc3; diff --git a/bsnes/ruby/video/opengl.hpp b/bsnes/ruby/video/opengl.hpp index 3022d0da..386a68b5 100755 --- a/bsnes/ruby/video/opengl.hpp +++ b/bsnes/ruby/video/opengl.hpp @@ -37,8 +37,6 @@ public: unsigned iwidth, iheight; void resize(unsigned width, unsigned height) { - if(iwidth >= width && iheight >= height) return; - if(gltexture == 0) glGenTextures(1, &gltexture); iwidth = max(width, iwidth ); iheight = max(height, iheight); diff --git a/bsnes/snes/alt/ppu-compatibility/ppu.cpp b/bsnes/snes/alt/ppu-compatibility/ppu.cpp index 298ed737..2ae46e7d 100755 --- a/bsnes/snes/alt/ppu-compatibility/ppu.cpp +++ b/bsnes/snes/alt/ppu-compatibility/ppu.cpp @@ -351,7 +351,7 @@ void PPU::power() { void PPU::reset() { create(Enter, system.cpu_frequency()); PPUcounter::reset(); - memset(surface, 0, 512 * 512 * sizeof(uint16)); + memset(surface, 0, 512 * 512 * sizeof(uint32)); frame(); @@ -399,7 +399,7 @@ void PPU::set_frameskip(unsigned frameskip_) { } PPU::PPU() { - surface = new uint16[512 * 512]; + surface = new uint32[512 * 512]; output = surface + 16 * 512; alloc_tiledata_cache(); @@ -410,20 +410,6 @@ PPU::PPU() { } } - for(unsigned l = 0; l < 16; l++) { - for(unsigned r = 0; r < 32; r++) { - for(unsigned g = 0; g < 32; g++) { - for(unsigned b = 0; b < 32; b++) { - double luma = (double)l / 15.0; - unsigned ar = (luma * r + 0.5); - unsigned ag = (luma * g + 0.5); - unsigned ab = (luma * b + 0.5); - light_table[l][(r << 10) + (g << 5) + b] = (ab << 10) + (ag << 5) + ar; - } - } - } - } - layer_enabled[BG1][0] = true; layer_enabled[BG1][1] = true; layer_enabled[BG2][0] = true; diff --git a/bsnes/snes/alt/ppu-compatibility/ppu.hpp b/bsnes/snes/alt/ppu-compatibility/ppu.hpp index 70442cdb..cccaabba 100755 --- a/bsnes/snes/alt/ppu-compatibility/ppu.hpp +++ b/bsnes/snes/alt/ppu-compatibility/ppu.hpp @@ -12,8 +12,8 @@ public: #include "mmio/mmio.hpp" #include "render/render.hpp" - uint16 *surface; - uint16 *output; + uint32 *surface; + uint32 *output; uint8 ppu1_version; uint8 ppu2_version; @@ -50,7 +50,6 @@ public: alwaysinline bool overscan() const { return display.overscan; } alwaysinline bool hires() const { return (regs.pseudo_hires || regs.bg_mode == 5 || regs.bg_mode == 6); } - uint16 light_table[16][32768]; uint16 mosaic_table[16][4096]; void render_line(); diff --git a/bsnes/snes/alt/ppu-compatibility/render/line.cpp b/bsnes/snes/alt/ppu-compatibility/render/line.cpp index d63884a9..22766844 100755 --- a/bsnes/snes/alt/ppu-compatibility/render/line.cpp +++ b/bsnes/snes/alt/ppu-compatibility/render/line.cpp @@ -85,13 +85,12 @@ inline uint16 PPU::get_pixel_swap(uint32 x) { } inline void PPU::render_line_output() { - uint16 *ptr = (uint16*)output + (line * 1024) + ((interlace() && field()) ? 512 : 0); - uint16 *luma = light_table[regs.display_brightness]; - uint16 curr, prev; + uint32 *ptr = (uint32*)output + (line * 1024) + ((interlace() && field()) ? 512 : 0); + uint32 curr, prev; if(!regs.pseudo_hires && regs.bg_mode != 5 && regs.bg_mode != 6) { for(unsigned x = 0; x < 256; x++) { - curr = luma[get_pixel_normal(x)]; + curr = (regs.display_brightness << 15) | get_pixel_normal(x); *ptr++ = curr; } } else { @@ -99,11 +98,11 @@ inline void PPU::render_line_output() { //blending is disabled below, as this should be done via video filtering //blending code is left for reference purposes - curr = luma[get_pixel_swap(x)]; + curr = (regs.display_brightness << 15) | get_pixel_swap(x); *ptr++ = curr; //(prev + curr - ((prev ^ curr) & 0x0421)) >> 1; //prev = curr; - curr = luma[get_pixel_normal(x)]; + curr = (regs.display_brightness << 15) | get_pixel_normal(x); *ptr++ = curr; //(prev + curr - ((prev ^ curr) & 0x0421)) >> 1; //prev = curr; } diff --git a/bsnes/snes/alt/ppu-performance/ppu.cpp b/bsnes/snes/alt/ppu-performance/ppu.cpp index 5db84183..7d564925 100755 --- a/bsnes/snes/alt/ppu-performance/ppu.cpp +++ b/bsnes/snes/alt/ppu-performance/ppu.cpp @@ -104,7 +104,7 @@ void PPU::power() { void PPU::reset() { create(Enter, system.cpu_frequency()); PPUcounter::reset(); - memset(surface, 0, 512 * 512 * sizeof(uint16)); + memset(surface, 0, 512 * 512 * sizeof(uint32)); mmio_reset(); display.interlace = false; display.overscan = false; @@ -140,7 +140,7 @@ bg3(*this, Background::ID::BG3), bg4(*this, Background::ID::BG4), sprite(*this), screen(*this) { - surface = new uint16[512 * 512]; + surface = new uint32[512 * 512]; output = surface + 16 * 512; display.width = 256; display.height = 224; diff --git a/bsnes/snes/alt/ppu-performance/ppu.hpp b/bsnes/snes/alt/ppu-performance/ppu.hpp index 08da7ec4..37eb991e 100755 --- a/bsnes/snes/alt/ppu-performance/ppu.hpp +++ b/bsnes/snes/alt/ppu-performance/ppu.hpp @@ -28,8 +28,8 @@ public: ~PPU(); private: - uint16 *surface; - uint16 *output; + uint32 *surface; + uint32 *output; #include "mmio/mmio.hpp" #include "window/window.hpp" diff --git a/bsnes/snes/alt/ppu-performance/screen/screen.cpp b/bsnes/snes/alt/ppu-performance/screen/screen.cpp index f318aed3..7939f243 100755 --- a/bsnes/snes/alt/ppu-performance/screen/screen.cpp +++ b/bsnes/snes/alt/ppu-performance/screen/screen.cpp @@ -55,9 +55,9 @@ void PPU::Screen::scanline() { } void PPU::Screen::render_black() { - uint16 *data = self.output + self.vcounter() * 1024; + uint32 *data = self.output + self.vcounter() * 1024; if(self.interlace() && self.field()) data += 512; - memset(data, 0, self.display.width << 1); + memset(data, 0, self.display.width << 2); } uint16 PPU::Screen::get_pixel_main(unsigned x) { @@ -115,43 +115,25 @@ uint16 PPU::Screen::get_pixel_sub(unsigned x) { } void PPU::Screen::render() { - uint16 *data = self.output + self.vcounter() * 1024; + uint32 *data = self.output + self.vcounter() * 1024; if(self.interlace() && self.field()) data += 512; - uint16 *light = light_table[self.regs.display_brightness]; if(!self.regs.pseudo_hires && self.regs.bgmode != 5 && self.regs.bgmode != 6) { for(unsigned i = 0; i < 256; i++) { - data[i] = light[get_pixel_main(i)]; + data[i] = (self.regs.display_brightness << 15) | get_pixel_main(i); } } else { for(unsigned i = 0; i < 256; i++) { - *data++ = light[get_pixel_sub(i)]; - *data++ = light[get_pixel_main(i)]; + *data++ = (self.regs.display_brightness << 15) | get_pixel_sub(i); + *data++ = (self.regs.display_brightness << 15) | get_pixel_main(i); } } } PPU::Screen::Screen(PPU &self) : self(self) { - light_table = new uint16*[16]; - for(unsigned l = 0; l < 16; l++) { - light_table[l] = new uint16[32768]; - for(unsigned r = 0; r < 32; r++) { - for(unsigned g = 0; g < 32; g++) { - for(unsigned b = 0; b < 32; b++) { - double luma = (double)l / 15.0; - unsigned ar = (luma * r + 0.5); - unsigned ag = (luma * g + 0.5); - unsigned ab = (luma * b + 0.5); - light_table[l][(r << 10) + (g << 5) + (b << 0)] = (ab << 10) + (ag << 5) + (ar << 0); - } - } - } - } } PPU::Screen::~Screen() { - for(unsigned l = 0; l < 16; l++) delete[] light_table[l]; - delete[] light_table; } void PPU::Screen::Output::plot_main(unsigned x, unsigned color, unsigned priority, unsigned source) { diff --git a/bsnes/snes/alt/ppu-performance/screen/screen.hpp b/bsnes/snes/alt/ppu-performance/screen/screen.hpp index 3554c3a1..e93d3cd4 100755 --- a/bsnes/snes/alt/ppu-performance/screen/screen.hpp +++ b/bsnes/snes/alt/ppu-performance/screen/screen.hpp @@ -25,7 +25,6 @@ class Screen { } output; ColorWindow window; - uint16 **light_table; alwaysinline unsigned get_palette(unsigned color); unsigned get_direct_color(unsigned palette, unsigned tile); diff --git a/bsnes/snes/audio/audio.cpp b/bsnes/snes/audio/audio.cpp index 092fab06..e20b45f1 100755 --- a/bsnes/snes/audio/audio.cpp +++ b/bsnes/snes/audio/audio.cpp @@ -13,7 +13,7 @@ void Audio::coprocessor_enable(bool state) { void Audio::coprocessor_frequency(double input_frequency) { dspaudio.setFrequency(input_frequency); - dspaudio.setResampler(nall::DSP::ResampleEngine::Average); + dspaudio.setResampler(nall::DSP::ResampleEngine::Sinc); dspaudio.setResamplerFrequency(system.apu_frequency() / 768.0); } diff --git a/bsnes/snes/interface/interface.cpp b/bsnes/snes/interface/interface.cpp index 54fa945d..eeea94a5 100755 --- a/bsnes/snes/interface/interface.cpp +++ b/bsnes/snes/interface/interface.cpp @@ -4,7 +4,7 @@ namespace SNES { Interface *interface = 0; -void Interface::videoRefresh(const uint16_t *data, bool hires, bool interlace, bool overscan) { +void Interface::videoRefresh(const uint32_t *data, bool hires, bool interlace, bool overscan) { } void Interface::audioSample(int16_t l_sample, int16_t r_sample) { diff --git a/bsnes/snes/interface/interface.hpp b/bsnes/snes/interface/interface.hpp index 7f7acb45..166bfa5f 100755 --- a/bsnes/snes/interface/interface.hpp +++ b/bsnes/snes/interface/interface.hpp @@ -1,6 +1,6 @@ class Interface { public: - virtual void videoRefresh(const uint16_t *data, bool hires, bool interlace, bool overscan); + virtual void videoRefresh(const uint32_t *data, bool hires, bool interlace, bool overscan); virtual void audioSample(int16_t lsample, int16_t rsample); virtual int16_t inputPoll(bool port, Input::Device device, unsigned index, unsigned id); diff --git a/bsnes/snes/ppu/ppu.cpp b/bsnes/snes/ppu/ppu.cpp index f8a65e15..cf981cb1 100755 --- a/bsnes/snes/ppu/ppu.cpp +++ b/bsnes/snes/ppu/ppu.cpp @@ -105,7 +105,7 @@ void PPU::power() { void PPU::reset() { create(Enter, system.cpu_frequency()); PPUcounter::reset(); - memset(surface, 0, 512 * 512 * sizeof(uint16)); + memset(surface, 0, 512 * 512 * sizeof(uint32)); mmio_reset(); bg1.reset(); @@ -153,7 +153,7 @@ bg4(*this, Background::ID::BG4), sprite(*this), window(*this), screen(*this) { - surface = new uint16[512 * 512]; + surface = new uint32[512 * 512]; output = surface + 16 * 512; } diff --git a/bsnes/snes/ppu/ppu.hpp b/bsnes/snes/ppu/ppu.hpp index d3028d25..7fa7513a 100755 --- a/bsnes/snes/ppu/ppu.hpp +++ b/bsnes/snes/ppu/ppu.hpp @@ -23,8 +23,8 @@ public: ~PPU(); private: - uint16 *surface; - uint16 *output; + uint32 *surface; + uint32 *output; uint8 ppu1_version; uint8 ppu2_version; diff --git a/bsnes/snes/ppu/screen/screen.cpp b/bsnes/snes/ppu/screen/screen.cpp index 5de6c33c..64d2464d 100755 --- a/bsnes/snes/ppu/screen/screen.cpp +++ b/bsnes/snes/ppu/screen/screen.cpp @@ -8,7 +8,7 @@ void PPU::Screen::scanline() { void PPU::Screen::run() { if(ppu.vcounter() == 0) return; - uint16 color; + uint32 color; if(self.regs.pseudo_hires == false && self.regs.bgmode != 5 && self.regs.bgmode != 6) { color = get_pixel(0); *output++ = color; @@ -21,7 +21,7 @@ void PPU::Screen::run() { } } -uint16 PPU::Screen::get_pixel(bool swap) { +uint32 PPU::Screen::get_pixel(bool swap) { if(ppu.regs.overscan == false && ppu.vcounter() >= 225) return 0x0000; enum source_t { BG1, BG2, BG3, BG4, OAM, BACK }; @@ -149,9 +149,8 @@ uint16 PPU::Screen::get_pixel(bool swap) { //lighting //======== - output = light_table[self.regs.display_brightness][output]; - if(self.regs.display_disable) output = 0x0000; - return output; + if(self.regs.display_disable) return 0; + return (self.regs.display_brightness << 15) | output; } uint16 PPU::Screen::addsub(unsigned x, unsigned y, bool halve) { @@ -206,19 +205,6 @@ void PPU::Screen::reset() { } PPU::Screen::Screen(PPU &self) : self(self) { - for(unsigned l = 0; l < 16; l++) { - for(unsigned r = 0; r < 32; r++) { - for(unsigned g = 0; g < 32; g++) { - for(unsigned b = 0; b < 32; b++) { - double luma = (double)l / 15.0; - unsigned ar = (luma * r + 0.5); - unsigned ag = (luma * g + 0.5); - unsigned ab = (luma * b + 0.5); - light_table[l][(b << 10) + (g << 5) + r] = (ab << 10) + (ag << 5) + ar; - } - } - } - } } #endif diff --git a/bsnes/snes/ppu/screen/screen.hpp b/bsnes/snes/ppu/screen/screen.hpp index 4c3a2617..15502335 100755 --- a/bsnes/snes/ppu/screen/screen.hpp +++ b/bsnes/snes/ppu/screen/screen.hpp @@ -1,5 +1,5 @@ class Screen { - uint16 *output; + uint32 *output; struct Regs { bool addsub_mode; @@ -23,8 +23,7 @@ class Screen { void run(); void reset(); - uint16 light_table[16][32768]; - uint16 get_pixel(bool swap); + uint32 get_pixel(bool swap); uint16 addsub(unsigned x, unsigned y, bool halve); uint16 get_color(unsigned palette); uint16 get_direct_color(unsigned palette, unsigned tile); diff --git a/bsnes/snes/video/video.cpp b/bsnes/snes/video/video.cpp index b2bd1810..0ad22fbb 100755 --- a/bsnes/snes/video/video.cpp +++ b/bsnes/snes/video/video.cpp @@ -21,7 +21,7 @@ const uint8_t Video::cursor[15 * 15] = { }; void Video::draw_cursor(uint16_t color, int x, int y) { - uint16_t *data = (uint16_t*)ppu.output; + uint32_t *data = (uint32_t*)ppu.output; if(ppu.interlace() && ppu.field()) data += 512; for(int cy = 0; cy < 15; cy++) { @@ -34,13 +34,13 @@ void Video::draw_cursor(uint16_t color, int x, int y) { if(vx < 0 || vx >= 256) continue; //do not draw offscreen uint8_t pixel = cursor[cy * 15 + cx]; if(pixel == 0) continue; - uint16_t pixelcolor = (pixel == 1) ? 0 : color; + uint32_t pixelcolor = (15 << 15) | ((pixel == 1) ? 0 : color); if(hires == false) { - *((uint16_t*)data + vy * 1024 + vx) = pixelcolor; + *((uint32_t*)data + vy * 1024 + vx) = pixelcolor; } else { - *((uint16_t*)data + vy * 1024 + vx * 2 + 0) = pixelcolor; - *((uint16_t*)data + vy * 1024 + vx * 2 + 1) = pixelcolor; + *((uint32_t*)data + vy * 1024 + vx * 2 + 0) = pixelcolor; + *((uint32_t*)data + vy * 1024 + vx * 2 + 1) = pixelcolor; } } } diff --git a/bsnes/snes/video/video.hpp b/bsnes/snes/video/video.hpp index 1a37e48d..b044e48f 100755 --- a/bsnes/snes/video/video.hpp +++ b/bsnes/snes/video/video.hpp @@ -1,4 +1,4 @@ -class Video { +struct Video { private: bool hires; unsigned line_width[240]; diff --git a/bsnes/ui/general/main-window.cpp b/bsnes/ui/general/main-window.cpp index daa885ab..9757d996 100755 --- a/bsnes/ui/general/main-window.cpp +++ b/bsnes/ui/general/main-window.cpp @@ -2,7 +2,7 @@ MainWindow *mainWindow = 0; MainWindow::MainWindow() { setTitle(application->title); - setGeometry({ 256, 256, 640, 480 }); + setGeometry({ 256, 256, 626, 480 }); setBackgroundColor({ 0, 0, 0 }); windowManager->append(this, "MainWindow"); diff --git a/bsnes/ui/input/user-interface.cpp b/bsnes/ui/input/user-interface.cpp index 2363cca3..90ce26d2 100755 --- a/bsnes/ui/input/user-interface.cpp +++ b/bsnes/ui/input/user-interface.cpp @@ -32,15 +32,12 @@ void HotkeyGeneral::inputEvent(int16_t scancode, int16_t value) { } if(scancode == turboMode.scancode) { - static bool Vsync, Async; if(value) { - Vsync = any_cast(video.get(Video::Synchronize)); - Async = any_cast(audio.get(Audio::Synchronize)); video.set(Video::Synchronize, false); audio.set(Audio::Synchronize, false); } else { - video.set(Video::Synchronize, Vsync); - audio.set(Audio::Synchronize, Async); + video.set(Video::Synchronize, config->video.synchronize); + audio.set(Audio::Synchronize, config->audio.synchronize); } } diff --git a/bsnes/ui/interface/snes.cpp b/bsnes/ui/interface/snes.cpp index 5354487f..19a5d3c4 100755 --- a/bsnes/ui/interface/snes.cpp +++ b/bsnes/ui/interface/snes.cpp @@ -28,6 +28,7 @@ bool InterfaceSNES::loadCartridge(const string &basename) { interface->unloadCartridge(); interface->baseName = nall::basename(basename); + interface->slotName = { nall::basename(basename) }; string xml; xml.readfile({ interface->baseName, ".xml" }); @@ -36,8 +37,6 @@ bool InterfaceSNES::loadCartridge(const string &basename) { SNES::Interface::loadCartridge({ xml, data, size }); delete[] data; - interface->slotName = { nall::basename(basename) }; - loadMemory(); interface->loadCartridge(::Interface::Mode::SNES); return true; @@ -52,6 +51,7 @@ bool InterfaceSNES::loadSatellaviewSlottedCartridge(const string &basename, cons interface->unloadCartridge(); interface->baseName = nall::basename(basename); if(data[1]) interface->baseName.append("+", nall::basename(notdir(slotname))); + interface->slotName = { nall::basename(basename), nall::basename(slotname) }; string xml; xml.readfile({ interface->baseName, ".xml" }); @@ -61,8 +61,6 @@ bool InterfaceSNES::loadSatellaviewSlottedCartridge(const string &basename, cons delete[] data[0]; if(data[1]) delete[] data[1]; - interface->slotName = { nall::basename(basename), nall::basename(slotname) }; - loadMemory(); interface->loadCartridge(::Interface::Mode::SNES); return true; @@ -77,6 +75,7 @@ bool InterfaceSNES::loadSatellaviewCartridge(const string &basename, const strin interface->unloadCartridge(); interface->baseName = nall::basename(basename); if(data[1]) interface->baseName.append("+", nall::basename(notdir(slotname))); + interface->slotName = { nall::basename(basename), nall::basename(slotname) }; string xml; xml.readfile({ interface->baseName, ".xml" }); @@ -86,8 +85,6 @@ bool InterfaceSNES::loadSatellaviewCartridge(const string &basename, const strin delete[] data[0]; if(data[1]) delete[] data[1]; - interface->slotName = { nall::basename(basename), nall::basename(slotname) }; - loadMemory(); interface->loadCartridge(::Interface::Mode::SNES); return true; @@ -105,6 +102,7 @@ bool InterfaceSNES::loadSufamiTurboCartridge(const string &basename, const strin if(data[1] && data[2]) interface->baseName = { nall::basename(slotAname), "+", nall::basename(notdir(slotBname)) }; else if(data[1]) interface->baseName = nall::basename(slotAname); else if(data[2]) interface->baseName = nall::basename(slotBname); + interface->slotName = { nall::basename(basename), nall::basename(slotAname), nall::basename(slotBname) }; string xml; xml.readfile({ interface->baseName, ".xml" }); @@ -115,8 +113,6 @@ bool InterfaceSNES::loadSufamiTurboCartridge(const string &basename, const strin if(data[1]) delete[] data[1]; if(data[2]) delete[] data[2]; - interface->slotName = { nall::basename(basename), nall::basename(slotAname), nall::basename(slotBname) }; - loadMemory(); interface->loadCartridge(::Interface::Mode::SNES); return true; @@ -131,6 +127,7 @@ bool InterfaceSNES::loadSuperGameBoyCartridge(const string &basename, const stri interface->unloadCartridge(); interface->baseName = nall::basename(basename); if(data[1]) interface->baseName = nall::basename(slotname); + interface->slotName = { nall::basename(basename), nall::basename(slotname) }; string xml; xml.readfile({ interface->baseName, ".xml" }); @@ -143,8 +140,6 @@ bool InterfaceSNES::loadSuperGameBoyCartridge(const string &basename, const stri delete[] data[0]; if(data[1]) delete[] data[1]; - interface->slotName = { nall::basename(basename), nall::basename(slotname) }; - loadMemory(); interface->loadCartridge(::Interface::Mode::SNES); return true; @@ -198,7 +193,7 @@ bool InterfaceSNES::loadState(const string &filename) { // -void InterfaceSNES::videoRefresh(const uint16_t *data, bool hires, bool interlace, bool overscan) { +void InterfaceSNES::videoRefresh(const uint32_t *data, bool hires, bool interlace, bool overscan) { static uint16_t output[512 * 478]; unsigned width = 256 << hires; @@ -216,11 +211,10 @@ void InterfaceSNES::videoRefresh(const uint16_t *data, bool hires, bool interlac } for(unsigned y = 0; y < height; y++) { - const uint16_t *sp = data + y * pitch; + const uint32_t *sp = data + y * pitch; uint16_t *dp = output + y * 512; for(unsigned x = 0; x < width; x++) { - uint16_t color = *sp++; - *dp++ = ((color & 0x001f) << 10) | (color & 0x03e0) | ((color & 0x7c00) >> 10); + *dp++ = palette[*sp++]; } } @@ -269,5 +263,23 @@ int16_t InterfaceSNES::inputPoll(bool port, SNES::Input::Device device, unsigned } string InterfaceSNES::path(SNES::Cartridge::Slot slot, const string &hint) { - return dir(interface->baseName); + static unsigned index[] = { 0, 0, 0, 1, 2, 1 }; + return { interface->slotName[index[(unsigned)slot]], hint }; +} + +InterfaceSNES::InterfaceSNES() { + //{llll bbbbb ggggg rrrrr} -> { rrrrr ggggg bbbbb } + for(unsigned l = 0; l < 16; l++) { + for(unsigned r = 0; r < 32; r++) { + for(unsigned g = 0; g < 32; g++) { + for(unsigned b = 0; b < 32; b++) { + double luma = (double)l / 15.0; + unsigned ar = (luma * r + 0.5); + unsigned ag = (luma * g + 0.5); + unsigned ab = (luma * b + 0.5); + palette[(l << 15) + (r << 10) + (g << 5) + (b << 0)] = (ab << 10) + (ag << 5) + (ar << 0); + } + } + } + } } diff --git a/bsnes/ui/interface/snes.hpp b/bsnes/ui/interface/snes.hpp index 31e9ab3d..b4685301 100755 --- a/bsnes/ui/interface/snes.hpp +++ b/bsnes/ui/interface/snes.hpp @@ -14,9 +14,14 @@ struct InterfaceSNES : SNES::Interface { bool saveState(const string &filename); bool loadState(const string &filename); - void videoRefresh(const uint16_t *data, bool hires, bool interlace, bool overscan); + void videoRefresh(const uint32_t *data, bool hires, bool interlace, bool overscan); void audioSample(int16_t lsample, int16_t rsample); int16_t inputPoll(bool port, SNES::Input::Device device, unsigned index, unsigned id); string path(SNES::Cartridge::Slot slot, const string &hint); + + InterfaceSNES(); + +private: + unsigned palette[16 * 32768]; }; diff --git a/bsnes/ui/main.cpp b/bsnes/ui/main.cpp index a69d88f4..60788c55 100755 --- a/bsnes/ui/main.cpp +++ b/bsnes/ui/main.cpp @@ -41,7 +41,7 @@ Application::Application(int argc, char **argv) { inputManager = new InputManager; utility = new Utility; - title = "bsnes v082.20"; + title = "bsnes v082.21"; #if defined(PLATFORM_WIN) normalFont = "Tahoma, 8"; @@ -84,7 +84,7 @@ Application::Application(int argc, char **argv) { dspaudio.setPrecision(16); dspaudio.setVolume(config->audio.mute == false ? 1.0 : 0.0); dspaudio.setBalance(0.0); - dspaudio.setResampler(DSP::ResampleEngine::Average); + dspaudio.setResampler(DSP::ResampleEngine::Sinc); dspaudio.setResamplerFrequency(48000.0); input.driver(config->input.driver); diff --git a/bsnes/ui/utility/utility.cpp b/bsnes/ui/utility/utility.cpp index 5c8acfda..4c0cc05c 100755 --- a/bsnes/ui/utility/utility.cpp +++ b/bsnes/ui/utility/utility.cpp @@ -119,11 +119,11 @@ void Utility::bindVideoFilter() { void Utility::bindVideoShader() { if(config->video.shader == "None") { + video.set(Video::Shader, (const char*)""); video.set(Video::Filter, 0u); - video.set(Video::Shader, (const char*)""); } else if(config->video.shader == "Blur") { - video.set(Video::Filter, 1u); video.set(Video::Shader, (const char*)""); + video.set(Video::Filter, 1u); } else { string data; data.readfile(config->video.shader);