use the host clock to smooth out frame timings when audio latency is high

This commit is contained in:
Anthony Pesch 2017-06-26 00:46:39 -04:00
parent 089c3016cc
commit f2ceb7f637
9 changed files with 74 additions and 24 deletions

View File

@ -103,7 +103,7 @@ list(APPEND RELIB_LIBS inih)
if(ARCH_A64)
file(GLOB VIXL_SOURCES deps/vixl/src/*.cc deps/vixl/src/aarch64/*.cc)
add_library(vixl STATIC ${VIXL_SOURCES})
target_compile_definitions(vixl PRIVATE -DVIXL_CODE_BUFFER_MALLOC)
target_compile_definitions(vixl PRIVATE VIXL_CODE_BUFFER_STATIC)
list(APPEND RELIB_INCLUDES deps/vixl/src)
list(APPEND RELIB_LIBS vixl)
endif()

View File

@ -7,6 +7,13 @@
#define array_size(arr) (int)(sizeof(arr) / sizeof((arr)[0]))
#if COMPILER_MSVC
#define ALIGNED(x) __declspec(align(x))
#else
#define ALIGNED(x) __attribute__((aligned(x)))
#endif
/* macro for accessing the parent struct of a given pointer */
#if PLATFORM_WINDOWS
static inline void *container_of_(void *ptr, ptrdiff_t offset) {

View File

@ -173,15 +173,11 @@ static int arm7_init(struct device *dev) {
struct arm7 *arm = (struct arm7 *)dev;
struct dreamcast *dc = arm->dc;
/* place code buffer in data segment (as opposed to allocating on the heap) to
keep it within 2 GB of the code segment, enabling the x64 backend to use
RIP-relative offsets when calling functions */
static uint8_t arm7_code[0x800000];
/* initialize jit and its interfaces */
arm->frontend = armv3_frontend_create();
#if ARCH_X64
DEFINE_JIT_CODE_BUFFER(arm7_code);
arm->backend = x64_backend_create(arm7_code, sizeof(arm7_code));
#else
arm->backend = interp_backend_create();

View File

@ -138,14 +138,11 @@ static int sh4_init(struct device *dev) {
struct sh4 *sh4 = (struct sh4 *)dev;
struct dreamcast *dc = sh4->dc;
/* place code buffer in data segment (as opposed to allocating on the heap) to
keep it within 2 GB of the code segment, enabling the x64 backend to use
RIP-relative offsets when calling functions */
static uint8_t sh4_code[0x800000];
/* initialize jit and its interfaces */
sh4->frontend = sh4_frontend_create();
#if ARCH_X64
DEFINE_JIT_CODE_BUFFER(sh4_code);
sh4->backend = x64_backend_create(sh4_code, sizeof(sh4_code));
#else
sh4->backend = interp_backend_create();

View File

@ -5,8 +5,8 @@
#include "core/filesystem.h"
#include "core/log.h"
#include "core/option.h"
#include "core/profiler.h"
#include "core/ringbuf.h"
#include "core/time.h"
#include "emulator.h"
#include "host/host.h"
#include "render/render_backend.h"
@ -23,6 +23,7 @@ DEFINE_OPTION_INT(latency, 50, "Preferred audio latency in ms");
#define AUDIO_FRAMES_TO_MS(frames) \
(int)(((float)frames * 1000.0f) / (float)AUDIO_FREQ)
#define MS_TO_AUDIO_FRAMES(ms) (int)(((float)(ms) / 1000.0f) * AUDIO_FREQ)
#define NS_TO_AUDIO_FRAMES(ns) (int)(((float)(ns) / NS_PER_SEC) * AUDIO_FREQ)
/*
* sdl host implementation
@ -38,6 +39,7 @@ struct sdl_host {
SDL_AudioDeviceID audio_dev;
SDL_AudioSpec audio_spec;
struct ringbuf *audio_frames;
volatile int64_t audio_last_callback;
int key_map[K_NUM_KEYS];
SDL_GameController *controllers[INPUT_MAX_CONTROLLERS];
@ -86,8 +88,30 @@ static int audio_buffer_low(struct sdl_host *host) {
return 1;
}
/* SDL's write callback is called very coarsely, seemingly, only each time
its buffered data has completely drained
since the main loop is designed to synchronize speed based on the amount
of buffered audio data, with larger buffer sizes (due to a larger latency
setting) this can result in the callback being called only one time for
multiple video frames
this creates a situation where multiple video frames are immediately ran
when the callback fires in order to push enough audio data to avoid an
underflow, and then multiple vblanks occur on the host where no new frame
is presented as the main loop again blocks waiting for another write
callback to decrease the amount of buffered audio data
in order to smooth out the video frame timings when the audio latency is
high, the host clock is used to interpolate the amount of available audio
data between callbacks */
int64_t now = time_nanoseconds();
int64_t since_last_callback = now - host->audio_last_callback;
int frames_available = audio_available_frames(host);
frames_available -= NS_TO_AUDIO_FRAMES(since_last_callback);
int low_water_mark = host->audio_spec.samples;
return audio_available_frames(host) <= low_water_mark;
return frames_available <= low_water_mark;
}
static void audio_write_callback(void *userdata, Uint8 *stream, int len) {
@ -109,6 +133,8 @@ static void audio_write_callback(void *userdata, Uint8 *stream, int len) {
/* copy frames to output stream */
memcpy(buf, tmp, n * frame_size);
}
host->audio_last_callback = time_nanoseconds();
}
void audio_push(struct host *base, const int16_t *data, int num_frames) {
@ -152,8 +178,12 @@ static int audio_init(struct sdl_host *host) {
return 0;
}
/* create ringbuffer to store data coming in from AICA */
host->audio_frames = ringbuf_create(host->audio_spec.samples * 4);
/* create ringbuffer to store data coming in from AICA. note, the buffer needs
to be at least two video frames in size, in order to handle the coarse
synchronization used by the main loop, where an entire guest video frame is
ran when the available audio data is deemed low */
static const int frame_size = 2 * 2;
host->audio_frames = ringbuf_create(AUDIO_FREQ * frame_size);
/* resume device */
SDL_PauseAudioDevice(host->audio_dev, 0);

View File

@ -14,6 +14,23 @@ struct jit_register {
const void *data;
};
/* macro to help declare a code buffer for the backends to use
note, the code buffer needs to be placed in the data segment (as opposed to
allocating on the heap) to keep it within 2 GB of the code segment, enabling
the x64 backend to use RIP-relative offsets when calling functions
further, the code buffer needs to be no greater than 1 MB in size so the a64
backend can use conditional branches to thunks without trampolining
finally, the code buffer needs to be aligned to a 4kb page so it's easy to
mprotect */
#if ARCH_A64
#define DEFINE_JIT_CODE_BUFFER(name) static uint8_t name[0x100000] ALIGNED(4096)
#else
#define DEFINE_JIT_CODE_BUFFER(name) static uint8_t name[0x800000] ALIGNED(4096)
#endif
struct jit_backend {
struct jit *jit;

View File

@ -11,9 +11,6 @@ extern "C" {
#include "jit/jit.h"
}
/* size of codegen buffer reserved for thunks */
#define X64_THUNK_SIZE 1024
/*
* x64 register layout
*/
@ -353,8 +350,8 @@ static void x64_backend_emit_prologue(struct x64_backend *backend,
e.add(e.dword[guestctx + guest->offset_instrs], block->num_instrs);
}
static void *x64_backend_emit(struct x64_backend *backend,
struct jit_block *block, struct ir *ir) {
static void x64_backend_emit(struct x64_backend *backend,
struct jit_block *block, struct ir *ir) {
auto &e = *backend->codegen;
const uint8_t *code = backend->codegen->getCurr();
@ -396,9 +393,8 @@ static void *x64_backend_emit(struct x64_backend *backend,
e.outLocalLabel();
block->host_addr = (void *)code;
block->host_size = (int)(backend->codegen->getCurr() - code);
return (void *)code;
}
static void x64_backend_emit_thunks(struct x64_backend *backend) {
@ -580,7 +576,7 @@ static int x64_backend_assemble_code(struct jit_backend *base,
/* try to generate the x64 code. if the code buffer overflows let the backend
know so it can reset the cache and try again */
try {
block->host_addr = x64_backend_emit(backend, block, ir);
x64_backend_emit(backend, block, ir);
} catch (const Xbyak::Error &e) {
if (e != Xbyak::ERR_CODE_IS_TOO_BIG) {
LOG_FATAL("x64 codegen failure, %s", e.what());
@ -630,7 +626,8 @@ struct jit_backend *x64_backend_create(void *code, int code_size) {
calloc(1, sizeof(struct x64_backend)));
Xbyak::util::Cpu cpu;
CHECK(Xbyak::CodeArray::protect(code, code_size, true));
int r = protect_pages(code, code_size, ACC_READWRITEEXEC);
CHECK(r);
backend->base.init = &x64_backend_init;
backend->base.destroy = &x64_backend_destroy;

View File

@ -50,6 +50,8 @@ struct x64_backend {
/*
* backend functionality used by emitters
*/
#define X64_THUNK_SIZE 1024
#if PLATFORM_WINDOWS
#define X64_STACK_SHADOW_SPACE 32
#else

View File

@ -327,6 +327,10 @@ void jit_compile_block(struct jit *jit, uint32_t guest_addr) {
int res = jit->backend->assemble_code(jit->backend, block, &ir);
if (res) {
#if 0
jit->backend->dump_code(jit->backend, block);
#endif
jit_finalize_block(jit, block);
} else {
/* if the backend overflowed, completely free the cache and let dispatch