Sameboy blip buf (#3154)

* blip buf experiment

* do this without needing to modify core

* Delete blip_buf.os

* dont bother keeping a sbuf in biz_t

* darn space/tabbing
This commit is contained in:
CasualPokePlayer 2022-02-17 00:30:38 -08:00 committed by GitHub
parent 3139277475
commit 0a9200fa59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 460 additions and 27 deletions

Binary file not shown.

Binary file not shown.

View File

@ -47,12 +47,6 @@ namespace BizHawk.Emulation.Cores.Nintendo.Sameboy
[DllImport("libsameboy", CallingConvention = CallingConvention.Cdecl)]
public static extern void sameboy_destroy(IntPtr core);
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void SampleCallback(IntPtr core, IntPtr sample);
[DllImport("libsameboy", CallingConvention = CallingConvention.Cdecl)]
public static extern void sameboy_setsamplecallback(IntPtr core, SampleCallback callback);
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void InputCallback();
@ -60,7 +54,7 @@ namespace BizHawk.Emulation.Cores.Nintendo.Sameboy
public static extern void sameboy_setinputcallback(IntPtr core, InputCallback callback);
[DllImport("libsameboy", CallingConvention = CallingConvention.Cdecl)]
public static extern void sameboy_frameadvance(IntPtr core, Buttons buttons, ushort x, ushort y, int[] videobuf, bool render, bool border);
public static extern void sameboy_frameadvance(IntPtr core, Buttons buttons, ushort x, ushort y, short[] soundbuf, ref int nsamps, int[] videobuf, bool render, bool border);
[DllImport("libsameboy", CallingConvention = CallingConvention.Cdecl)]
public static extern void sameboy_reset(IntPtr core);

View File

@ -78,7 +78,7 @@ namespace BizHawk.Emulation.Cores.Nintendo.Sameboy
{
var buttons = FrameAdvancePrep(controller);
LibSameboy.sameboy_frameadvance(SameboyState, buttons, GetAccX(controller), GetAccY(), VideoBuffer, render, _settings.ShowBorder);
LibSameboy.sameboy_frameadvance(SameboyState, buttons, GetAccX(controller), GetAccY(), _soundoutbuff, ref _soundoutbuffcontains, VideoBuffer, render, _settings.ShowBorder);
if (!rendersound)
{

View File

@ -38,13 +38,5 @@ namespace BizHawk.Emulation.Cores.Nintendo.Sameboy
private int _soundoutbuffcontains = 0;
private readonly short[] _soundoutbuff = new short[2048];
private unsafe void QueueSample(IntPtr core, IntPtr sample)
{
short* s = (short*)sample;
_soundoutbuff[_soundoutbuffcontains * 2] = s[0];
_soundoutbuff[_soundoutbuffcontains * 2 + 1] = s[1];
_soundoutbuffcontains++;
}
}
}

View File

@ -27,7 +27,6 @@ namespace BizHawk.Emulation.Cores.Nintendo.Sameboy
public bool IsCGBDMGMode() => LibSameboy.sameboy_iscgbdmg(SameboyState);
private readonly LibSameboy.SampleCallback _samplecb;
private readonly LibSameboy.InputCallback _inputcb;
[CoreConstructor(VSystemID.Raw.GB)]
@ -80,8 +79,6 @@ namespace BizHawk.Emulation.Cores.Nintendo.Sameboy
InitMemoryDomains();
InitMemoryCallbacks();
_samplecb = QueueSample;
LibSameboy.sameboy_setsamplecallback(SameboyState, _samplecb);
_inputcb = InputCallback;
LibSameboy.sameboy_setinputcallback(SameboyState, _inputcb);
_tracecb = MakeTrace;

View File

@ -1,5 +1,6 @@
.sconsign.dblite
BizInterface.os
blip_buf.os
config.log
libsameboy.dll.a
.sconf_temp

View File

@ -1,4 +1,5 @@
#include "gb.h"
#include "blip_buf.h"
#include "stdio.h"
#ifdef _WIN32
@ -7,6 +8,8 @@
#define EXPORT __attribute__((visibility("default")))
#endif
typedef int16_t s16;
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
@ -29,6 +32,10 @@ typedef void (*scanline_callback_t)(u32);
typedef struct
{
GB_gameboy_t gb;
blip_t* blip_l;
blip_t* blip_r;
GB_sample_t latch;
GB_sample_t sample;
u32 vbuf[256 * 224];
u32 bg_pal[0x20];
u32 obj_pal[0x20];
@ -50,6 +57,13 @@ static u8 PeekIO(biz_t* biz, u8 addr)
return io[addr];
}
static void sample_cb(GB_gameboy_t *gb, GB_sample_t* sample)
{
biz_t* biz = (biz_t*)gb;
biz->sample.left = sample->left;
biz->sample.right = sample->right;
}
static u32 rgb_cb(GB_gameboy_t *gb, u8 r, u8 g, u8 b)
{
return (0xFF << 24) | (r << 16) | (g << 8) | b;
@ -106,25 +120,27 @@ EXPORT biz_t* sameboy_create(u8* romdata, u32 romlen, u8* biosdata, u32 bioslen,
GB_init(&biz->gb, model);
GB_load_rom_from_buffer(&biz->gb, romdata, romlen);
GB_load_boot_rom_from_buffer(&biz->gb, biosdata, bioslen);
GB_set_sample_rate(&biz->gb, 44100);
GB_set_sample_rate(&biz->gb, GB_get_clock_rate(&biz->gb) / 2);
GB_apu_set_sample_callback(&biz->gb, sample_cb);
GB_set_rgb_encode_callback(&biz->gb, rgb_cb);
GB_set_vblank_callback(&biz->gb, vblank_cb);
GB_set_rtc_mode(&biz->gb, realtime ? GB_RTC_MODE_SYNC_TO_HOST : GB_RTC_MODE_ACCURATE);
GB_set_allow_illegal_inputs(&biz->gb, true);
biz->blip_l = blip_new(1024);
biz->blip_r = blip_new(1024);
blip_set_rates(biz->blip_l, GB_get_clock_rate(&biz->gb) / 2, 44100);
blip_set_rates(biz->blip_r, GB_get_clock_rate(&biz->gb) / 2, 44100);
return biz;
}
EXPORT void sameboy_destroy(biz_t* biz)
{
GB_free(&biz->gb);
blip_delete(biz->blip_l);
blip_delete(biz->blip_r);
free(biz);
}
EXPORT void sameboy_setsamplecallback(biz_t* biz, GB_sample_callback_t callback)
{
GB_apu_set_sample_callback(&biz->gb, callback);
}
EXPORT void sameboy_setinputcallback(biz_t* biz, input_callback_t callback)
{
biz->input_cb = callback;
@ -135,7 +151,7 @@ static double FromRawToG(u16 raw)
return (raw - 0x81D0) / (0x70 * 1.0);
}
EXPORT void sameboy_frameadvance(biz_t* biz, GB_key_mask_t keys, u16 x, u16 y, u32* vbuf, bool render, bool border)
EXPORT void sameboy_frameadvance(biz_t* biz, GB_key_mask_t keys, u16 x, u16 y, s16* sbuf, u32* nsamp, u32* vbuf, bool render, bool border)
{
GB_set_key_mask(&biz->gb, keys);
if (GB_has_accelerometer(&biz->gb))
@ -165,9 +181,26 @@ EXPORT void sameboy_frameadvance(biz_t* biz, GB_key_mask_t keys, u16 x, u16 y, u
{
biz->input_cb();
}
if (biz->latch.left != biz->sample.left)
{
blip_add_delta(biz->blip_l, cycles, biz->latch.left - biz->sample.left);
biz->latch.left = biz->sample.left;
}
if (biz->latch.right != biz->sample.right)
{
blip_add_delta(biz->blip_r, cycles, biz->latch.right - biz->sample.right);
biz->latch.right = biz->sample.right;
}
}
while (!biz->vblank_occured && cycles < 35112);
blip_end_frame(biz->blip_l, cycles);
blip_end_frame(biz->blip_r, cycles);
u32 samps = blip_samples_avail(biz->blip_l);
blip_read_samples(biz->blip_l, sbuf + 0, samps, 1);
blip_read_samples(biz->blip_r, sbuf + 1, samps, 1);
*nsamp = samps;
if (biz->vblank_occured && render)
{
memcpy(vbuf, biz->vbuf, sizeof biz->vbuf);

View File

@ -31,7 +31,7 @@ conf = env.Configure()
conf.Finish()
shlib = env.SharedLibrary('sameboy', sourceFiles + ['BizInterface.c'],
shlib = env.SharedLibrary('sameboy', sourceFiles + ['BizInterface.c'] + ['blip_buf.c'],
LINKFLAGS = env['LINKFLAGS'] + ' -s -Wno-attributes',
SHLIBPREFIX = "lib")

View File

@ -0,0 +1,344 @@
/* blip_buf 1.1.0. http://www.slack.net/~ant/ */
#include "blip_buf.h"
#include <assert.h>
#include <limits.h>
#include <string.h>
#include <stdlib.h>
/* Library Copyright (C) 2003-2009 Shay Green. This library is free software;
you can redistribute it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version. This
library is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
details. You should have received a copy of the GNU Lesser General Public
License along with this module; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
#if defined (BLARGG_TEST) && BLARGG_TEST
#include "blargg_test.h"
#endif
/* Equivalent to ULONG_MAX >= 0xFFFFFFFF00000000.
Avoids constants that don't fit in 32 bits. */
#if ULONG_MAX/0xFFFFFFFF > 0xFFFFFFFF
typedef unsigned long fixed_t;
enum { pre_shift = 32 };
#elif defined(ULLONG_MAX)
typedef unsigned long long fixed_t;
enum { pre_shift = 32 };
#else
typedef unsigned fixed_t;
enum { pre_shift = 0 };
#endif
enum { time_bits = pre_shift + 20 };
static fixed_t const time_unit = (fixed_t) 1 << time_bits;
enum { bass_shift = 9 }; /* affects high-pass filter breakpoint frequency */
enum { end_frame_extra = 2 }; /* allows deltas slightly after frame length */
enum { half_width = 8 };
enum { buf_extra = half_width*2 + end_frame_extra };
enum { phase_bits = 5 };
enum { phase_count = 1 << phase_bits };
enum { delta_bits = 15 };
enum { delta_unit = 1 << delta_bits };
enum { frac_bits = time_bits - pre_shift };
/* We could eliminate avail and encode whole samples in offset, but that would
limit the total buffered samples to blip_max_frame. That could only be
increased by decreasing time_bits, which would reduce resample ratio accuracy.
*/
/** Sample buffer that resamples to output rate and accumulates samples
until they're read out */
struct blip_t
{
fixed_t factor;
fixed_t offset;
int avail;
int size;
int integrator;
};
typedef int buf_t;
/* probably not totally portable */
#define SAMPLES( buf ) ((buf_t*) ((buf) + 1))
/* Arithmetic (sign-preserving) right shift */
#define ARITH_SHIFT( n, shift ) \
((n) >> (shift))
enum { max_sample = +32767 };
enum { min_sample = -32768 };
#define CLAMP( n ) \
{\
if ( (short) n != n )\
n = ARITH_SHIFT( n, 16 ) ^ max_sample;\
}
static void check_assumptions( void )
{
int n;
#if INT_MAX < 0x7FFFFFFF || UINT_MAX < 0xFFFFFFFF
#error "int must be at least 32 bits"
#endif
assert( (-3 >> 1) == -2 ); /* right shift must preserve sign */
n = max_sample * 2;
CLAMP( n );
assert( n == max_sample );
n = min_sample * 2;
CLAMP( n );
assert( n == min_sample );
assert( blip_max_ratio <= time_unit );
assert( blip_max_frame <= (fixed_t) -1 >> time_bits );
}
blip_t* blip_new( int size )
{
blip_t* m;
assert( size >= 0 );
m = (blip_t*) malloc( sizeof *m + (size + buf_extra) * sizeof (buf_t) );
if ( m )
{
m->factor = time_unit / blip_max_ratio;
m->size = size;
blip_clear( m );
check_assumptions();
}
return m;
}
void blip_delete( blip_t* m )
{
if ( m != NULL )
{
/* Clear fields in case user tries to use after freeing */
memset( m, 0, sizeof *m );
free( m );
}
}
void blip_set_rates( blip_t* m, double clock_rate, double sample_rate )
{
double factor = time_unit * sample_rate / clock_rate;
m->factor = (fixed_t) factor;
/* Fails if clock_rate exceeds maximum, relative to sample_rate */
assert( 0 <= factor - m->factor && factor - m->factor < 1 );
/* Avoid requiring math.h. Equivalent to
m->factor = (int) ceil( factor ) */
if ( m->factor < factor )
m->factor++;
/* At this point, factor is most likely rounded up, but could still
have been rounded down in the floating-point calculation. */
}
void blip_clear( blip_t* m )
{
/* We could set offset to 0, factor/2, or factor-1. 0 is suitable if
factor is rounded up. factor-1 is suitable if factor is rounded down.
Since we don't know rounding direction, factor/2 accommodates either,
with the slight loss of showing an error in half the time. Since for
a 64-bit factor this is years, the halving isn't a problem. */
m->offset = m->factor / 2;
m->avail = 0;
m->integrator = 0;
memset( SAMPLES( m ), 0, (m->size + buf_extra) * sizeof (buf_t) );
}
int blip_clocks_needed( const blip_t* m, int samples )
{
fixed_t needed;
/* Fails if buffer can't hold that many more samples */
assert( samples >= 0 && m->avail + samples <= m->size );
needed = (fixed_t) samples * time_unit;
if ( needed < m->offset )
return 0;
return (needed - m->offset + m->factor - 1) / m->factor;
}
void blip_end_frame( blip_t* m, unsigned t )
{
fixed_t off = t * m->factor + m->offset;
m->avail += off >> time_bits;
m->offset = off & (time_unit - 1);
/* Fails if buffer size was exceeded */
assert( m->avail <= m->size );
}
int blip_samples_avail( const blip_t* m )
{
return m->avail;
}
static void remove_samples( blip_t* m, int count )
{
buf_t* buf = SAMPLES( m );
int remain = m->avail + buf_extra - count;
m->avail -= count;
memmove( &buf [0], &buf [count], remain * sizeof buf [0] );
memset( &buf [remain], 0, count * sizeof buf [0] );
}
int blip_read_samples( blip_t* m, short out [], int count, int stereo )
{
assert( count >= 0 );
if ( count > m->avail )
count = m->avail;
if ( count )
{
int const step = stereo ? 2 : 1;
buf_t const* in = SAMPLES( m );
buf_t const* end = in + count;
int sum = m->integrator;
do
{
/* Eliminate fraction */
int s = ARITH_SHIFT( sum, delta_bits );
sum += *in++;
CLAMP( s );
*out = s;
out += step;
/* High-pass filter */
sum -= s << (delta_bits - bass_shift);
}
while ( in != end );
m->integrator = sum;
remove_samples( m, count );
}
return count;
}
/* Things that didn't help performance on x86:
__attribute__((aligned(128)))
#define short int
restrict
*/
/* Sinc_Generator( 0.9, 0.55, 4.5 ) */
static short const bl_step [phase_count + 1] [half_width] =
{
{ 43, -115, 350, -488, 1136, -914, 5861,21022},
{ 44, -118, 348, -473, 1076, -799, 5274,21001},
{ 45, -121, 344, -454, 1011, -677, 4706,20936},
{ 46, -122, 336, -431, 942, -549, 4156,20829},
{ 47, -123, 327, -404, 868, -418, 3629,20679},
{ 47, -122, 316, -375, 792, -285, 3124,20488},
{ 47, -120, 303, -344, 714, -151, 2644,20256},
{ 46, -117, 289, -310, 634, -17, 2188,19985},
{ 46, -114, 273, -275, 553, 117, 1758,19675},
{ 44, -108, 255, -237, 471, 247, 1356,19327},
{ 43, -103, 237, -199, 390, 373, 981,18944},
{ 42, -98, 218, -160, 310, 495, 633,18527},
{ 40, -91, 198, -121, 231, 611, 314,18078},
{ 38, -84, 178, -81, 153, 722, 22,17599},
{ 36, -76, 157, -43, 80, 824, -241,17092},
{ 34, -68, 135, -3, 8, 919, -476,16558},
{ 32, -61, 115, 34, -60, 1006, -683,16001},
{ 29, -52, 94, 70, -123, 1083, -862,15422},
{ 27, -44, 73, 106, -184, 1152,-1015,14824},
{ 25, -36, 53, 139, -239, 1211,-1142,14210},
{ 22, -27, 34, 170, -290, 1261,-1244,13582},
{ 20, -20, 16, 199, -335, 1301,-1322,12942},
{ 18, -12, -3, 226, -375, 1331,-1376,12293},
{ 15, -4, -19, 250, -410, 1351,-1408,11638},
{ 13, 3, -35, 272, -439, 1361,-1419,10979},
{ 11, 9, -49, 292, -464, 1362,-1410,10319},
{ 9, 16, -63, 309, -483, 1354,-1383, 9660},
{ 7, 22, -75, 322, -496, 1337,-1339, 9005},
{ 6, 26, -85, 333, -504, 1312,-1280, 8355},
{ 4, 31, -94, 341, -507, 1278,-1205, 7713},
{ 3, 35, -102, 347, -506, 1238,-1119, 7082},
{ 1, 40, -110, 350, -499, 1190,-1021, 6464},
{ 0, 43, -115, 350, -488, 1136, -914, 5861}
};
/* Shifting by pre_shift allows calculation using unsigned int rather than
possibly-wider fixed_t. On 32-bit platforms, this is likely more efficient.
And by having pre_shift 32, a 32-bit platform can easily do the shift by
simply ignoring the low half. */
void blip_add_delta( blip_t* m, unsigned time, int delta )
{
unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift);
buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits);
int const phase_shift = frac_bits - phase_bits;
int phase = fixed >> phase_shift & (phase_count - 1);
short const* in = bl_step [phase];
short const* rev = bl_step [phase_count - phase];
int interp = fixed >> (phase_shift - delta_bits) & (delta_unit - 1);
int delta2 = (delta * interp) >> delta_bits;
delta -= delta2;
/* Fails if buffer size was exceeded */
assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] );
out [0] += in[0]*delta + in[half_width+0]*delta2;
out [1] += in[1]*delta + in[half_width+1]*delta2;
out [2] += in[2]*delta + in[half_width+2]*delta2;
out [3] += in[3]*delta + in[half_width+3]*delta2;
out [4] += in[4]*delta + in[half_width+4]*delta2;
out [5] += in[5]*delta + in[half_width+5]*delta2;
out [6] += in[6]*delta + in[half_width+6]*delta2;
out [7] += in[7]*delta + in[half_width+7]*delta2;
in = rev;
out [ 8] += in[7]*delta + in[7-half_width]*delta2;
out [ 9] += in[6]*delta + in[6-half_width]*delta2;
out [10] += in[5]*delta + in[5-half_width]*delta2;
out [11] += in[4]*delta + in[4-half_width]*delta2;
out [12] += in[3]*delta + in[3-half_width]*delta2;
out [13] += in[2]*delta + in[2-half_width]*delta2;
out [14] += in[1]*delta + in[1-half_width]*delta2;
out [15] += in[0]*delta + in[0-half_width]*delta2;
}
void blip_add_delta_fast( blip_t* m, unsigned time, int delta )
{
unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift);
buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits);
int interp = fixed >> (frac_bits - delta_bits) & (delta_unit - 1);
int delta2 = delta * interp;
/* Fails if buffer size was exceeded */
assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] );
out [7] += delta * delta_unit - delta2;
out [8] += delta2;
}

View File

@ -0,0 +1,72 @@
/** \file
Sample buffer that resamples from input clock rate to output sample rate */
/* blip_buf 1.1.0 */
#ifndef BLIP_BUF_H
#define BLIP_BUF_H
#ifdef __cplusplus
extern "C" {
#endif
/** First parameter of most functions is blip_t*, or const blip_t* if nothing
is changed. */
typedef struct blip_t blip_t;
/** Creates new buffer that can hold at most sample_count samples. Sets rates
so that there are blip_max_ratio clocks per sample. Returns pointer to new
buffer, or NULL if insufficient memory. */
blip_t* blip_new( int sample_count );
/** Sets approximate input clock rate and output sample rate. For every
clock_rate input clocks, approximately sample_rate samples are generated. */
void blip_set_rates( blip_t*, double clock_rate, double sample_rate );
enum { /** Maximum clock_rate/sample_rate ratio. For a given sample_rate,
clock_rate must not be greater than sample_rate*blip_max_ratio. */
blip_max_ratio = 1 << 20 };
/** Clears entire buffer. Afterwards, blip_samples_avail() == 0. */
void blip_clear( blip_t* );
/** Adds positive/negative delta into buffer at specified clock time. */
void blip_add_delta( blip_t*, unsigned int clock_time, int delta );
/** Same as blip_add_delta(), but uses faster, lower-quality synthesis. */
void blip_add_delta_fast( blip_t*, unsigned int clock_time, int delta );
/** Length of time frame, in clocks, needed to make sample_count additional
samples available. */
int blip_clocks_needed( const blip_t*, int sample_count );
enum { /** Maximum number of samples that can be generated from one time frame. */
blip_max_frame = 4000 };
/** Makes input clocks before clock_duration available for reading as output
samples. Also begins new time frame at clock_duration, so that clock time 0 in
the new time frame specifies the same clock as clock_duration in the old time
frame specified. Deltas can have been added slightly past clock_duration (up to
however many clocks there are in two output samples). */
void blip_end_frame( blip_t*, unsigned int clock_duration );
/** Number of buffered samples available for reading. */
int blip_samples_avail( const blip_t* );
/** Reads and removes at most 'count' samples and writes them to 'out'. If
'stereo' is true, writes output to every other element of 'out', allowing easy
interleaving of two buffers into a stereo sample stream. Outputs 16-bit signed
samples. Returns number of samples actually read. */
int blip_read_samples( blip_t*, short out [], int count, int stereo );
/** Frees buffer. No effect if NULL is passed. */
void blip_delete( blip_t* );
/* Deprecated */
typedef blip_t blip_buffer_t;
#ifdef __cplusplus
}
#endif
#endif