FFmpeg: Support dynamic audio sample rate

2022-08-06 18:20:36 -07:00 · 2022-08-06 18:20:36 -07:00 · 82ce165211
parent 4247fd0be3
commit 82ce165211
7 changed files with 71 additions and 34 deletions
--- a/1
+++ b/1
@ -74,6 +74,7 @@ Misc:
 - Debugger: Save and restore CLI history
 - Debugger: GDB now works while the game is paused
 - Debugger: Add command to load external symbol file (fixes mgba.io/i/2480)
+ - FFmpeg: Support dynamic audio sample rate
 - GB MBC: Filter out MBC errors when cartridge is yanked (fixes mgba.io/i/2488)
 - GB Video: Add default SGB border
 - GBA: Automatically skip BIOS if ROM has invalid logo
--- a/include/mgba/core/interface.h
+++ b/include/mgba/core/interface.h
@ -189,6 +189,7 @@ DECLARE_VECTOR(mCoreCallbacksList, struct mCoreCallbacks);

 struct mAVStream {
 	void (*videoDimensionsChanged)(struct mAVStream*, unsigned width, unsigned height);
+	void (*audioRateChanged)(struct mAVStream*, unsigned rate);
 	void (*postVideoFrame)(struct mAVStream*, const color_t* buffer, size_t stride);
 	void (*postAudioFrame)(struct mAVStream*, int16_t left, int16_t right);
 	void (*postAudioBuffer)(struct mAVStream*, struct blip_t* left, struct blip_t* right);
--- a/src/feature/ffmpeg/ffmpeg-encoder.c
+++ b/src/feature/ffmpeg/ffmpeg-encoder.c
@ -37,12 +37,15 @@
 static void _ffmpegPostVideoFrame(struct mAVStream*, const color_t* pixels, size_t stride);
 static void _ffmpegPostAudioFrame(struct mAVStream*, int16_t left, int16_t right);
 static void _ffmpegSetVideoDimensions(struct mAVStream*, unsigned width, unsigned height);
+static void _ffmpegSetAudioRate(struct mAVStream*, unsigned rate);

 static bool _ffmpegWriteAudioFrame(struct FFmpegEncoder* encoder, struct AVFrame* audioFrame);
 static bool _ffmpegWriteVideoFrame(struct FFmpegEncoder* encoder, struct AVFrame* videoFrame);

+static void _ffmpegOpenResampleContext(struct FFmpegEncoder* encoder);
+
 enum {
-	PREFERRED_SAMPLE_RATE = 0x8000
+	PREFERRED_SAMPLE_RATE = 0x10000
 };

 void FFmpegEncoderInit(struct FFmpegEncoder* encoder) {
@ -51,9 +54,10 @@ void FFmpegEncoderInit(struct FFmpegEncoder* encoder) {
 #endif

 	encoder->d.videoDimensionsChanged = _ffmpegSetVideoDimensions;
+	encoder->d.audioRateChanged = _ffmpegSetAudioRate;
 	encoder->d.postVideoFrame = _ffmpegPostVideoFrame;
 	encoder->d.postAudioFrame = _ffmpegPostAudioFrame;
-	encoder->d.postAudioBuffer = 0;
+	encoder->d.postAudioBuffer = NULL;

 	encoder->audioCodec = NULL;
 	encoder->videoCodec = NULL;
@ -64,6 +68,7 @@ void FFmpegEncoderInit(struct FFmpegEncoder* encoder) {
 	FFmpegEncoderSetDimensions(encoder, GBA_VIDEO_HORIZONTAL_PIXELS, GBA_VIDEO_VERTICAL_PIXELS);
 	encoder->iwidth = GBA_VIDEO_HORIZONTAL_PIXELS;
 	encoder->iheight = GBA_VIDEO_VERTICAL_PIXELS;
+	encoder->isampleRate = PREFERRED_SAMPLE_RATE;
 	encoder->frameskip = 1;
 	encoder->skipResidue = 0;
 	encoder->loop = false;
@ -147,19 +152,24 @@ bool FFmpegEncoderSetAudio(struct FFmpegEncoder* encoder, const char* acodec, un
 	if (encoder->sampleFormat == AV_SAMPLE_FMT_NONE) {
 		return false;
 	}
-	encoder->sampleRate = PREFERRED_SAMPLE_RATE;
+	encoder->sampleRate = encoder->isampleRate;
 	if (codec->supported_samplerates) {
 		for (i = 0; codec->supported_samplerates[i]; ++i) {
-			if (codec->supported_samplerates[i] < PREFERRED_SAMPLE_RATE) {
+			if (codec->supported_samplerates[i] < encoder->isampleRate) {
 				continue;
 			}
-			if (encoder->sampleRate == PREFERRED_SAMPLE_RATE || encoder->sampleRate > codec->supported_samplerates[i]) {
+			if (encoder->sampleRate == encoder->isampleRate || encoder->sampleRate > codec->supported_samplerates[i]) {
 				encoder->sampleRate = codec->supported_samplerates[i];
 			}
 		}
+	} else if (codec->id == AV_CODEC_ID_FLAC) {
+		// HACK: FLAC doesn't support > 65535Hz unless it's divisible by 10
+		if (encoder->sampleRate >= 65535) {
+			encoder->sampleRate -= encoder->isampleRate % 10;
+		}
 	} else if (codec->id == AV_CODEC_ID_AAC) {
 		// HACK: AAC doesn't support 32768Hz (it rounds to 32000), but libfaac doesn't tell us that
-		encoder->sampleRate = 44100;
+		encoder->sampleRate = 48000;
 	}
 	encoder->audioCodec = acodec;
 	encoder->audioBitrate = abr;
@ -321,22 +331,7 @@ bool FFmpegEncoderOpen(struct FFmpegEncoder* encoder, const char* outfile) {
 		encoder->audioFrame->format = encoder->audio->sample_fmt;
 		encoder->audioFrame->pts = 0;
 		encoder->audioFrame->channel_layout = AV_CH_LAYOUT_STEREO;
-#ifdef USE_LIBAVRESAMPLE
-		encoder->resampleContext = avresample_alloc_context();
-		av_opt_set_int(encoder->resampleContext, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
-		av_opt_set_int(encoder->resampleContext, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
-		av_opt_set_int(encoder->resampleContext, "in_sample_rate", PREFERRED_SAMPLE_RATE, 0);
-		av_opt_set_int(encoder->resampleContext, "out_sample_rate", encoder->sampleRate, 0);
-		av_opt_set_int(encoder->resampleContext, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
-		av_opt_set_int(encoder->resampleContext, "out_sample_fmt", encoder->sampleFormat, 0);
-		avresample_open(encoder->resampleContext);
-#else
-		encoder->resampleContext = swr_alloc_set_opts(NULL, AV_CH_LAYOUT_STEREO, encoder->sampleFormat, encoder->sampleRate,
-		                                              AV_CH_LAYOUT_STEREO, AV_SAMPLE_FMT_S16, PREFERRED_SAMPLE_RATE, 0, NULL);
-		swr_init(encoder->resampleContext);
-#endif
-		encoder->audioBufferSize = (encoder->audioFrame->nb_samples * PREFERRED_SAMPLE_RATE / encoder->sampleRate) * 4;
-		encoder->audioBuffer = av_malloc(encoder->audioBufferSize);
+		_ffmpegOpenResampleContext(encoder);
 		av_frame_get_buffer(encoder->audioFrame, 0);

 		if (encoder->audio->codec->id == AV_CODEC_ID_AAC &&
@ -858,6 +853,11 @@ static void _ffmpegSetVideoDimensions(struct mAVStream* stream, unsigned width,
 	    SWS_POINT, 0, 0, 0);
 }

+static void _ffmpegSetAudioRate(struct mAVStream* stream, unsigned rate) {
+	struct FFmpegEncoder* encoder = (struct FFmpegEncoder*) stream;
+	FFmpegEncoderSetInputSampleRate(encoder, rate);
+}
+
 void FFmpegEncoderSetInputFrameRate(struct FFmpegEncoder* encoder, int numerator, int denominator) {
 	reduceFraction(&numerator, &denominator);
 	encoder->frameCycles = numerator;
@ -866,3 +866,35 @@ void FFmpegEncoderSetInputFrameRate(struct FFmpegEncoder* encoder, int numerator
 		encoder->video->framerate = (AVRational) { denominator, numerator * encoder->frameskip };
 	}
 }
+
+void FFmpegEncoderSetInputSampleRate(struct FFmpegEncoder* encoder, int sampleRate) {
+	encoder->isampleRate = sampleRate;
+	if (encoder->resampleContext) {	
+		av_freep(&encoder->audioBuffer);
+#ifdef USE_LIBAVRESAMPLE
+		avresample_close(encoder->resampleContext);
+#else
+		swr_free(&encoder->resampleContext);
+#endif
+		_ffmpegOpenResampleContext(encoder);
+	}
+}
+
+void _ffmpegOpenResampleContext(struct FFmpegEncoder* encoder) {
+	encoder->audioBufferSize = av_rescale_q(encoder->audioFrame->nb_samples, (AVRational) { 4, encoder->sampleRate }, (AVRational) { 1, encoder->isampleRate });
+	encoder->audioBuffer = av_malloc(encoder->audioBufferSize);
+#ifdef USE_LIBAVRESAMPLE
+	encoder->resampleContext = avresample_alloc_context();
+	av_opt_set_int(encoder->resampleContext, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
+	av_opt_set_int(encoder->resampleContext, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
+	av_opt_set_int(encoder->resampleContext, "in_sample_rate", encoder->isampleRate, 0);
+	av_opt_set_int(encoder->resampleContext, "out_sample_rate", encoder->sampleRate, 0);
+	av_opt_set_int(encoder->resampleContext, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
+	av_opt_set_int(encoder->resampleContext, "out_sample_fmt", encoder->sampleFormat, 0);
+	avresample_open(encoder->resampleContext);
+#else
+	encoder->resampleContext = swr_alloc_set_opts(NULL, AV_CH_LAYOUT_STEREO, encoder->sampleFormat, encoder->sampleRate,
+	                                              AV_CH_LAYOUT_STEREO, AV_SAMPLE_FMT_S16, encoder->isampleRate, 0, NULL);
+	swr_init(encoder->resampleContext);
+#endif
+}
--- a/src/feature/ffmpeg/ffmpeg-encoder.h
+++ b/src/feature/ffmpeg/ffmpeg-encoder.h
@ -56,6 +56,7 @@ struct FFmpegEncoder {
 	int height;
 	int iwidth;
 	int iheight;
+	int isampleRate;
 	int frameCycles;
 	int cycles;
 	int frameskip;
@ -78,6 +79,7 @@ bool FFmpegEncoderSetVideo(struct FFmpegEncoder*, const char* vcodec, int vbr, i
 bool FFmpegEncoderSetContainer(struct FFmpegEncoder*, const char* container);
 void FFmpegEncoderSetDimensions(struct FFmpegEncoder*, int width, int height);
 void FFmpegEncoderSetInputFrameRate(struct FFmpegEncoder*, int numerator, int denominator);
+void FFmpegEncoderSetInputSampleRate(struct FFmpegEncoder*, int sampleRate);
 void FFmpegEncoderSetLooping(struct FFmpegEncoder*, bool loop);
 bool FFmpegEncoderVerifyContainer(struct FFmpegEncoder*);
 bool FFmpegEncoderOpen(struct FFmpegEncoder*, const char* outfile);
--- a/src/gb/core.c
+++ b/src/gb/core.c
@ -427,6 +427,9 @@ static void _GBCoreSetAVStream(struct mCore* core, struct mAVStream* stream) {
 		core->desiredVideoDimensions(core, &width, &height);
 		stream->videoDimensionsChanged(stream, width, height);
 	}
+	if (stream && stream->audioRateChanged) {
+		stream->audioRateChanged(stream, DMG_SM83_FREQUENCY / gb->audio.sampleInterval);
+	}
 }

 static bool _GBCoreLoadROM(struct mCore* core, struct VFile* vf) {
--- a/src/gba/audio.c
+++ b/src/gba/audio.c
@ -237,7 +237,11 @@ void GBAAudioWriteSOUNDCNT_X(struct GBAAudio* audio, uint16_t value) {

 void GBAAudioWriteSOUNDBIAS(struct GBAAudio* audio, uint16_t value) {
 	audio->soundbias = value;
+	int32_t oldSampleInterval = audio->sampleInterval;
 	audio->sampleInterval = 0x200 >> GBARegisterSOUNDBIASGetResolution(value);
+	if (oldSampleInterval != audio->sampleInterval && audio->p->stream && audio->p->stream->audioRateChanged) {
+		audio->p->stream->audioRateChanged(audio->p->stream, GBA_ARM7TDMI_FREQUENCY / audio->sampleInterval);
+	}
 }

 void GBAAudioWriteWaveRAM(struct GBAAudio* audio, int address, uint32_t value) {
@ -401,20 +405,15 @@ static void _sample(struct mTiming* timing, void* user, uint32_t cyclesLate) {
 	GBAAudioSample(audio, mTimingCurrentTime(&audio->p->timing) - cyclesLate);

 	int samples = 2 << GBARegisterSOUNDBIASGetResolution(audio->soundbias);
-	int sampleMask = 1 << GBARegisterSOUNDBIASGetResolution(audio->soundbias);
 	memset(audio->chA.samples, audio->chA.samples[samples - 1], sizeof(audio->chA.samples));
 	memset(audio->chB.samples, audio->chB.samples[samples - 1], sizeof(audio->chB.samples));

 	mCoreSyncLockAudio(audio->p->sync);
 	unsigned produced;
-	int32_t sampleSumLeft = 0;
-	int32_t sampleSumRight = 0;
 	int i;
 	for (i = 0; i < samples; ++i) {
 		int16_t sampleLeft = audio->currentSamples[i].left;
 		int16_t sampleRight = audio->currentSamples[i].right;
-		sampleSumLeft += sampleLeft;
-		sampleSumRight += sampleRight;
 		if ((size_t) blip_samples_avail(audio->psg.left) < audio->samples) {
 			blip_add_delta(audio->psg.left, audio->clock, sampleLeft - audio->lastLeft);
 			blip_add_delta(audio->psg.right, audio->clock, sampleRight - audio->lastRight);
@ -427,13 +426,9 @@ static void _sample(struct mTiming* timing, void* user, uint32_t cyclesLate) {
 				audio->clock -= CLOCKS_PER_FRAME;
 			}
 		}
-		// TODO: Post all frames
-		if (audio->p->stream && audio->p->stream->postAudioFrame && (i & (sampleMask - 1)) == sampleMask - 1) {
-			sampleSumLeft /= sampleMask;
-			sampleSumRight /= sampleMask;
-			audio->p->stream->postAudioFrame(audio->p->stream, sampleSumLeft, sampleSumRight);
-			sampleSumLeft = 0;
-			sampleSumRight = 0;
+
+		if (audio->p->stream && audio->p->stream->postAudioFrame) {
+			audio->p->stream->postAudioFrame(audio->p->stream, sampleLeft, sampleRight);
 		}
 	}
 	produced = blip_samples_avail(audio->psg.left);
--- a/src/gba/core.c
+++ b/src/gba/core.c
@ -503,6 +503,9 @@ static void _GBACoreSetAVStream(struct mCore* core, struct mAVStream* stream) {
 		core->desiredVideoDimensions(core, &width, &height);
 		stream->videoDimensionsChanged(stream, width, height);
 	}
+	if (stream && stream->audioRateChanged) {
+		stream->audioRateChanged(stream, GBA_ARM7TDMI_FREQUENCY / gba->audio.sampleInterval);
+	}
 }

 static bool _GBACoreLoadROM(struct mCore* core, struct VFile* vf) {