mirror of https://github.com/inolen/redream.git
use the host clock to smooth out frame timings when audio latency is high
This commit is contained in:
parent
089c3016cc
commit
f2ceb7f637
|
@ -103,7 +103,7 @@ list(APPEND RELIB_LIBS inih)
|
|||
if(ARCH_A64)
|
||||
file(GLOB VIXL_SOURCES deps/vixl/src/*.cc deps/vixl/src/aarch64/*.cc)
|
||||
add_library(vixl STATIC ${VIXL_SOURCES})
|
||||
target_compile_definitions(vixl PRIVATE -DVIXL_CODE_BUFFER_MALLOC)
|
||||
target_compile_definitions(vixl PRIVATE VIXL_CODE_BUFFER_STATIC)
|
||||
list(APPEND RELIB_INCLUDES deps/vixl/src)
|
||||
list(APPEND RELIB_LIBS vixl)
|
||||
endif()
|
||||
|
|
|
@ -7,6 +7,13 @@
|
|||
|
||||
#define array_size(arr) (int)(sizeof(arr) / sizeof((arr)[0]))
|
||||
|
||||
#if COMPILER_MSVC
|
||||
#define ALIGNED(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
|
||||
/* macro for accessing the parent struct of a given pointer */
|
||||
#if PLATFORM_WINDOWS
|
||||
|
||||
static inline void *container_of_(void *ptr, ptrdiff_t offset) {
|
||||
|
|
|
@ -173,15 +173,11 @@ static int arm7_init(struct device *dev) {
|
|||
struct arm7 *arm = (struct arm7 *)dev;
|
||||
struct dreamcast *dc = arm->dc;
|
||||
|
||||
/* place code buffer in data segment (as opposed to allocating on the heap) to
|
||||
keep it within 2 GB of the code segment, enabling the x64 backend to use
|
||||
RIP-relative offsets when calling functions */
|
||||
static uint8_t arm7_code[0x800000];
|
||||
|
||||
/* initialize jit and its interfaces */
|
||||
arm->frontend = armv3_frontend_create();
|
||||
|
||||
#if ARCH_X64
|
||||
DEFINE_JIT_CODE_BUFFER(arm7_code);
|
||||
arm->backend = x64_backend_create(arm7_code, sizeof(arm7_code));
|
||||
#else
|
||||
arm->backend = interp_backend_create();
|
||||
|
|
|
@ -138,14 +138,11 @@ static int sh4_init(struct device *dev) {
|
|||
struct sh4 *sh4 = (struct sh4 *)dev;
|
||||
struct dreamcast *dc = sh4->dc;
|
||||
|
||||
/* place code buffer in data segment (as opposed to allocating on the heap) to
|
||||
keep it within 2 GB of the code segment, enabling the x64 backend to use
|
||||
RIP-relative offsets when calling functions */
|
||||
static uint8_t sh4_code[0x800000];
|
||||
|
||||
/* initialize jit and its interfaces */
|
||||
sh4->frontend = sh4_frontend_create();
|
||||
|
||||
#if ARCH_X64
|
||||
DEFINE_JIT_CODE_BUFFER(sh4_code);
|
||||
sh4->backend = x64_backend_create(sh4_code, sizeof(sh4_code));
|
||||
#else
|
||||
sh4->backend = interp_backend_create();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
#include "core/filesystem.h"
|
||||
#include "core/log.h"
|
||||
#include "core/option.h"
|
||||
#include "core/profiler.h"
|
||||
#include "core/ringbuf.h"
|
||||
#include "core/time.h"
|
||||
#include "emulator.h"
|
||||
#include "host/host.h"
|
||||
#include "render/render_backend.h"
|
||||
|
@ -23,6 +23,7 @@ DEFINE_OPTION_INT(latency, 50, "Preferred audio latency in ms");
|
|||
#define AUDIO_FRAMES_TO_MS(frames) \
|
||||
(int)(((float)frames * 1000.0f) / (float)AUDIO_FREQ)
|
||||
#define MS_TO_AUDIO_FRAMES(ms) (int)(((float)(ms) / 1000.0f) * AUDIO_FREQ)
|
||||
#define NS_TO_AUDIO_FRAMES(ns) (int)(((float)(ns) / NS_PER_SEC) * AUDIO_FREQ)
|
||||
|
||||
/*
|
||||
* sdl host implementation
|
||||
|
@ -38,6 +39,7 @@ struct sdl_host {
|
|||
SDL_AudioDeviceID audio_dev;
|
||||
SDL_AudioSpec audio_spec;
|
||||
struct ringbuf *audio_frames;
|
||||
volatile int64_t audio_last_callback;
|
||||
|
||||
int key_map[K_NUM_KEYS];
|
||||
SDL_GameController *controllers[INPUT_MAX_CONTROLLERS];
|
||||
|
@ -86,8 +88,30 @@ static int audio_buffer_low(struct sdl_host *host) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* SDL's write callback is called very coarsely, seemingly, only each time
|
||||
its buffered data has completely drained
|
||||
|
||||
since the main loop is designed to synchronize speed based on the amount
|
||||
of buffered audio data, with larger buffer sizes (due to a larger latency
|
||||
setting) this can result in the callback being called only one time for
|
||||
multiple video frames
|
||||
|
||||
this creates a situation where multiple video frames are immediately ran
|
||||
when the callback fires in order to push enough audio data to avoid an
|
||||
underflow, and then multiple vblanks occur on the host where no new frame
|
||||
is presented as the main loop again blocks waiting for another write
|
||||
callback to decrease the amount of buffered audio data
|
||||
|
||||
in order to smooth out the video frame timings when the audio latency is
|
||||
high, the host clock is used to interpolate the amount of available audio
|
||||
data between callbacks */
|
||||
int64_t now = time_nanoseconds();
|
||||
int64_t since_last_callback = now - host->audio_last_callback;
|
||||
int frames_available = audio_available_frames(host);
|
||||
frames_available -= NS_TO_AUDIO_FRAMES(since_last_callback);
|
||||
|
||||
int low_water_mark = host->audio_spec.samples;
|
||||
return audio_available_frames(host) <= low_water_mark;
|
||||
return frames_available <= low_water_mark;
|
||||
}
|
||||
|
||||
static void audio_write_callback(void *userdata, Uint8 *stream, int len) {
|
||||
|
@ -109,6 +133,8 @@ static void audio_write_callback(void *userdata, Uint8 *stream, int len) {
|
|||
/* copy frames to output stream */
|
||||
memcpy(buf, tmp, n * frame_size);
|
||||
}
|
||||
|
||||
host->audio_last_callback = time_nanoseconds();
|
||||
}
|
||||
|
||||
void audio_push(struct host *base, const int16_t *data, int num_frames) {
|
||||
|
@ -152,8 +178,12 @@ static int audio_init(struct sdl_host *host) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* create ringbuffer to store data coming in from AICA */
|
||||
host->audio_frames = ringbuf_create(host->audio_spec.samples * 4);
|
||||
/* create ringbuffer to store data coming in from AICA. note, the buffer needs
|
||||
to be at least two video frames in size, in order to handle the coarse
|
||||
synchronization used by the main loop, where an entire guest video frame is
|
||||
ran when the available audio data is deemed low */
|
||||
static const int frame_size = 2 * 2;
|
||||
host->audio_frames = ringbuf_create(AUDIO_FREQ * frame_size);
|
||||
|
||||
/* resume device */
|
||||
SDL_PauseAudioDevice(host->audio_dev, 0);
|
||||
|
|
|
@ -14,6 +14,23 @@ struct jit_register {
|
|||
const void *data;
|
||||
};
|
||||
|
||||
/* macro to help declare a code buffer for the backends to use
|
||||
|
||||
note, the code buffer needs to be placed in the data segment (as opposed to
|
||||
allocating on the heap) to keep it within 2 GB of the code segment, enabling
|
||||
the x64 backend to use RIP-relative offsets when calling functions
|
||||
|
||||
further, the code buffer needs to be no greater than 1 MB in size so the a64
|
||||
backend can use conditional branches to thunks without trampolining
|
||||
|
||||
finally, the code buffer needs to be aligned to a 4kb page so it's easy to
|
||||
mprotect */
|
||||
#if ARCH_A64
|
||||
#define DEFINE_JIT_CODE_BUFFER(name) static uint8_t name[0x100000] ALIGNED(4096)
|
||||
#else
|
||||
#define DEFINE_JIT_CODE_BUFFER(name) static uint8_t name[0x800000] ALIGNED(4096)
|
||||
#endif
|
||||
|
||||
struct jit_backend {
|
||||
struct jit *jit;
|
||||
|
||||
|
|
|
@ -11,9 +11,6 @@ extern "C" {
|
|||
#include "jit/jit.h"
|
||||
}
|
||||
|
||||
/* size of codegen buffer reserved for thunks */
|
||||
#define X64_THUNK_SIZE 1024
|
||||
|
||||
/*
|
||||
* x64 register layout
|
||||
*/
|
||||
|
@ -353,8 +350,8 @@ static void x64_backend_emit_prologue(struct x64_backend *backend,
|
|||
e.add(e.dword[guestctx + guest->offset_instrs], block->num_instrs);
|
||||
}
|
||||
|
||||
static void *x64_backend_emit(struct x64_backend *backend,
|
||||
struct jit_block *block, struct ir *ir) {
|
||||
static void x64_backend_emit(struct x64_backend *backend,
|
||||
struct jit_block *block, struct ir *ir) {
|
||||
auto &e = *backend->codegen;
|
||||
const uint8_t *code = backend->codegen->getCurr();
|
||||
|
||||
|
@ -396,9 +393,8 @@ static void *x64_backend_emit(struct x64_backend *backend,
|
|||
|
||||
e.outLocalLabel();
|
||||
|
||||
block->host_addr = (void *)code;
|
||||
block->host_size = (int)(backend->codegen->getCurr() - code);
|
||||
|
||||
return (void *)code;
|
||||
}
|
||||
|
||||
static void x64_backend_emit_thunks(struct x64_backend *backend) {
|
||||
|
@ -580,7 +576,7 @@ static int x64_backend_assemble_code(struct jit_backend *base,
|
|||
/* try to generate the x64 code. if the code buffer overflows let the backend
|
||||
know so it can reset the cache and try again */
|
||||
try {
|
||||
block->host_addr = x64_backend_emit(backend, block, ir);
|
||||
x64_backend_emit(backend, block, ir);
|
||||
} catch (const Xbyak::Error &e) {
|
||||
if (e != Xbyak::ERR_CODE_IS_TOO_BIG) {
|
||||
LOG_FATAL("x64 codegen failure, %s", e.what());
|
||||
|
@ -630,7 +626,8 @@ struct jit_backend *x64_backend_create(void *code, int code_size) {
|
|||
calloc(1, sizeof(struct x64_backend)));
|
||||
Xbyak::util::Cpu cpu;
|
||||
|
||||
CHECK(Xbyak::CodeArray::protect(code, code_size, true));
|
||||
int r = protect_pages(code, code_size, ACC_READWRITEEXEC);
|
||||
CHECK(r);
|
||||
|
||||
backend->base.init = &x64_backend_init;
|
||||
backend->base.destroy = &x64_backend_destroy;
|
||||
|
|
|
@ -50,6 +50,8 @@ struct x64_backend {
|
|||
/*
|
||||
* backend functionality used by emitters
|
||||
*/
|
||||
#define X64_THUNK_SIZE 1024
|
||||
|
||||
#if PLATFORM_WINDOWS
|
||||
#define X64_STACK_SHADOW_SPACE 32
|
||||
#else
|
||||
|
|
|
@ -327,6 +327,10 @@ void jit_compile_block(struct jit *jit, uint32_t guest_addr) {
|
|||
int res = jit->backend->assemble_code(jit->backend, block, &ir);
|
||||
|
||||
if (res) {
|
||||
#if 0
|
||||
jit->backend->dump_code(jit->backend, block);
|
||||
#endif
|
||||
|
||||
jit_finalize_block(jit, block);
|
||||
} else {
|
||||
/* if the backend overflowed, completely free the cache and let dispatch
|
||||
|
|
Loading…
Reference in New Issue