From 257c6b729f556e9553e5c4ea79655f32c85325be Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 18 Oct 2022 10:08:02 +0200 Subject: [PATCH 01/34] rzip: allow multiple writes --- core/archive/rzip.cpp | 29 +++++++++++++++++++++-------- core/archive/rzip.h | 1 + 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/core/archive/rzip.cpp b/core/archive/rzip.cpp index 7b2c2e513..a60c1b8d2 100644 --- a/core/archive/rzip.cpp +++ b/core/archive/rzip.cpp @@ -24,6 +24,7 @@ const u8 RZipHeader[8] = { '#', 'R', 'Z', 'I', 'P', 'v', 1, '#' }; bool RZipFile::Open(const std::string& path, bool write) { verify(file == nullptr); + this->write = write; file = nowide::fopen(path.c_str(), write ? "wb" : "rb"); if (file == nullptr) @@ -49,6 +50,17 @@ bool RZipFile::Open(const std::string& path, bool write) chunkIndex = 0; chunkSize = 0; } + else + { + maxChunkSize = 1024 * 1024; + if (std::fwrite(RZipHeader, sizeof(RZipHeader), 1, file) != 1 + || std::fwrite(&maxChunkSize, sizeof(maxChunkSize), 1, file) != 1 + || std::fwrite(&size, sizeof(size), 1, file) != 1) + { + Close(); + return false; + } + } return true; } @@ -57,6 +69,11 @@ void RZipFile::Close() { if (file != nullptr) { + if (write) + { + std::fseek(file, sizeof(RZipHeader) + sizeof(maxChunkSize), SEEK_SET); + std::fwrite(&size, sizeof(size), 1, file); + } std::fclose(file); file = nullptr; if (chunk != nullptr) @@ -70,6 +87,7 @@ void RZipFile::Close() size_t RZipFile::Read(void *data, size_t length) { verify(file != nullptr); + verify(!write); u8 *p = (u8 *)data; size_t rv = 0; @@ -99,7 +117,7 @@ size_t RZipFile::Read(void *data, size_t length) delete [] zipped; chunkSize = (u32)tl; } - u32 l = std::min(chunkSize - chunkIndex, (u32)length); + u32 l = std::min(chunkSize - chunkIndex, (u32)(length - rv)); memcpy(p, chunk + chunkIndex, l); p += l; chunkIndex += l; @@ -112,14 +130,9 @@ size_t RZipFile::Read(void *data, size_t length) size_t RZipFile::Write(const void *data, size_t length) { verify(file != nullptr); - verify(std::ftell(file) == 0); + verify(write); - maxChunkSize = 1024 * 1024; - size = length; - if (std::fwrite(RZipHeader, sizeof(RZipHeader), 1, file) != 1 - || std::fwrite(&maxChunkSize, sizeof(maxChunkSize), 1, file) != 1 - || std::fwrite(&size, sizeof(size), 1, file) != 1) - return 0; + size += length; const u8 *p = (const u8 *)data; // compression output buffer must be 0.1% larger + 12 bytes uLongf maxZippedSize = maxChunkSize + maxChunkSize / 1000 + 12; diff --git a/core/archive/rzip.h b/core/archive/rzip.h index 2743535eb..6d2dc287f 100644 --- a/core/archive/rzip.h +++ b/core/archive/rzip.h @@ -41,4 +41,5 @@ private: u8 *chunk = nullptr; u32 chunkSize = 0; u32 chunkIndex = 0; + bool write = false; }; From 77a105d8886e886419bdbe1d345f9e9022ee0162 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 18 Oct 2022 10:12:27 +0200 Subject: [PATCH 02/34] set USE_DX9 in CMakeLists.txt instead of build.h --- CMakeLists.txt | 1 + core/build.h | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e3c2bd476..2a564216a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1171,6 +1171,7 @@ if(WIN32 AND NOT LIBRETRO AND NOT WINDOWS_STORE) endif() target_link_libraries(${PROJECT_NAME} PUBLIC d3d9 d3dx9) + target_compile_definitions(${PROJECT_NAME} PRIVATE USE_DX9) endif() if(WIN32) diff --git a/core/build.h b/core/build.h index a8560f76f..1214440fd 100755 --- a/core/build.h +++ b/core/build.h @@ -222,9 +222,6 @@ #if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP) #define TARGET_UWP #endif -#if !defined(LIBRETRO) && !defined(TARGET_UWP) -#define USE_DX9 -#endif #endif From f25aeee7563bb057a407471cd69666d4d264bd3d Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 22 Oct 2022 12:08:05 +0200 Subject: [PATCH 03/34] audio: don't crash if audio init fails. Fall back to auto Don't crash if an audio backend fails to initialize. Fall back to auto or run without sound. Better audio backend framework --- core/oslib/audiobackend_alsa.cpp | 633 +++++++++--------- core/oslib/audiobackend_coreaudio.cpp | 494 +++++++------- core/oslib/audiobackend_directsound.cpp | 374 ++++++----- core/oslib/audiobackend_libao.cpp | 78 ++- core/oslib/audiobackend_null.cpp | 79 ++- core/oslib/audiobackend_oboe.cpp | 272 ++++---- core/oslib/audiobackend_omx.cpp | 587 ++++++++-------- core/oslib/audiobackend_oss.cpp | 181 +++-- core/oslib/audiobackend_pulseaudio.cpp | 561 ++++++---------- core/oslib/audiobackend_sdl2.cpp | 395 +++++------ core/oslib/audiostream.cpp | 154 ++--- core/oslib/audiostream.h | 90 +-- core/rend/gui.cpp | 34 +- .../reicast/emulator/emu/AudioBackend.java | 16 +- .../flycast/src/main/jni/src/Android.cpp | 91 ++- 15 files changed, 1995 insertions(+), 2044 deletions(-) diff --git a/core/oslib/audiobackend_alsa.cpp b/core/oslib/audiobackend_alsa.cpp index 414d01317..7ffe1fd50 100644 --- a/core/oslib/audiobackend_alsa.cpp +++ b/core/oslib/audiobackend_alsa.cpp @@ -3,348 +3,353 @@ #include #include "cfg/cfg.h" -static snd_pcm_t *handle; -static bool pcm_blocking = true; -static snd_pcm_uframes_t buffer_size; -static snd_pcm_uframes_t period_size; -static snd_pcm_t *handle_record; - -static void alsa_init() +class AlsaAudioBackend : public AudioBackend { - snd_pcm_hw_params_t *params; + snd_pcm_t *handle = nullptr; + bool pcm_blocking = true; + snd_pcm_uframes_t buffer_size = 0; + snd_pcm_uframes_t period_size = 0; + snd_pcm_t *handle_record = nullptr; - std::string device = cfgLoadStr("alsa", "device", ""); +public: + AlsaAudioBackend() + : AudioBackend("alsa", "Advanced Linux Sound Architecture") {} - int rc = -1; - if (device.empty() || device == "auto") + bool init() override { - INFO_LOG(AUDIO, "ALSA: trying to determine audio device"); + snd_pcm_hw_params_t *params; - // trying default device - device = "default"; - rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); + std::string device = cfgLoadStr("alsa", "device", ""); - // "default" didn't work, try first device - if (rc < 0) + int rc = -1; + if (device.empty() || device == "auto") { - device = "plughw:0,0,0"; + INFO_LOG(AUDIO, "ALSA: trying to determine audio device"); + + // trying default device + device = "default"; rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); + // "default" didn't work, try first device if (rc < 0) { - device = "plughw:0,0"; + device = "plughw:0,0,0"; + rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); + + if (rc < 0) + { + device = "plughw:0,0"; + rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); + } + } + + // first didn't work, try second + if (rc < 0) + { + device = "plughw:1,0"; rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); } - } - // first didn't work, try second - if (rc < 0) - { - device = "plughw:1,0"; - rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); - } - - // try pulse audio backend - if (rc < 0) - { - device = "pulse"; - rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); - } - - if (rc < 0) - INFO_LOG(AUDIO, "ALSA: unable to automatically determine audio device."); - } - else { - rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); - } - - if (rc < 0) - { - WARN_LOG(AUDIO, "ALSA: unable to open PCM device %s: %s", device.c_str(), snd_strerror(rc)); - return; - } - - INFO_LOG(AUDIO, "ALSA: Successfully initialized \"%s\"", device.c_str()); - - /* Allocate a hardware parameters object. */ - snd_pcm_hw_params_alloca(¶ms); - - /* Fill it in with default values. */ - rc=snd_pcm_hw_params_any(handle, params); - if (rc < 0) - { - WARN_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_any %s", snd_strerror(rc)); - return; - } - - /* Set the desired hardware parameters. */ - - /* Interleaved mode */ - rc=snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED); - if (rc < 0) - { - WARN_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_access %s", snd_strerror(rc)); - return; - } - - /* Signed 16-bit little-endian format */ - rc=snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE); - if (rc < 0) - { - WARN_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_format %s", snd_strerror(rc)); - return; - } - - /* Two channels (stereo) */ - rc=snd_pcm_hw_params_set_channels(handle, params, 2); - if (rc < 0) - { - WARN_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_channels %s", snd_strerror(rc)); - return; - } - - // 44100 samples/second - rc = snd_pcm_hw_params_set_rate(handle, params, 44100, 0); - if (rc < 0) - { - WARN_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_rate %s", snd_strerror(rc)); - return; - } - - // Period size (512) - period_size = std::min(SAMPLE_COUNT, (u32)config::AudioBufferSize / 4); - rc = snd_pcm_hw_params_set_period_size_near(handle, params, &period_size, nullptr); - if (rc < 0) - { - WARN_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_periods_near %s", snd_strerror(rc)); - return; - } - INFO_LOG(AUDIO, "ALSA: period size set to %zd", (size_t)period_size); - - // Sample buffer size - buffer_size = config::AudioBufferSize; - rc = snd_pcm_hw_params_set_buffer_size_near(handle, params, &buffer_size); - if (rc < 0) - { - WARN_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_buffer_size_near %s", snd_strerror(rc)); - return; - } - INFO_LOG(AUDIO, "ALSA: buffer size set to %ld", buffer_size); - - /* Write the parameters to the driver */ - rc = snd_pcm_hw_params(handle, params); - if (rc < 0) - { - WARN_LOG(AUDIO, "ALSA: Unable to set hw parameters: %s", snd_strerror(rc)); - return; - } -} - -static bool alsa_init_record(u32 sampling_freq) -{ - int err; - if ((err = snd_pcm_open(&handle_record, "default", SND_PCM_STREAM_CAPTURE, 0)) < 0) - { - INFO_LOG(AUDIO, "ALSA: Cannot open default audio capture device: %s", snd_strerror(err)); - return false; - } - snd_pcm_hw_params_t *hw_params; - if ((err = snd_pcm_hw_params_malloc(&hw_params)) < 0) - { - INFO_LOG(AUDIO, "ALSA: Cannot allocate hardware parameter structure: %s", snd_strerror(err)); - snd_pcm_close(handle_record); - return false; - } - if ((err = snd_pcm_hw_params_any(handle_record, hw_params)) < 0) - { - INFO_LOG(AUDIO, "ALSA: Cannot initialize hardware parameter structure: %s", snd_strerror(err)); - snd_pcm_hw_params_free(hw_params); - snd_pcm_close(handle_record); - return false; - } - if ((err = snd_pcm_hw_params_set_access(handle_record, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED)) < 0) - { - INFO_LOG(AUDIO, "ALSA: Cannot set access type: %s\n", snd_strerror(err)); - snd_pcm_hw_params_free(hw_params); - snd_pcm_close(handle_record); - return false; - } - if ((err = snd_pcm_hw_params_set_format(handle_record, hw_params, SND_PCM_FORMAT_S16_LE)) < 0) - { - INFO_LOG(AUDIO, "ALSA: Cannot set sample format: %s", snd_strerror(err)); - snd_pcm_hw_params_free(hw_params); - snd_pcm_close(handle_record); - return false; - } - if ((err = snd_pcm_hw_params_set_rate(handle_record, hw_params, sampling_freq, 0)) < 0) - { - INFO_LOG(AUDIO, "ALSA: Cannot set sample rate to %d Hz: %s", sampling_freq, snd_strerror(err)); - snd_pcm_hw_params_free(hw_params); - snd_pcm_close(handle_record); - return false; - } - if ((err = snd_pcm_hw_params_set_channels(handle_record, hw_params, 1)) < 0) - { - INFO_LOG(AUDIO, "ALSA: Cannot set channel count: %s", snd_strerror(err)); - snd_pcm_hw_params_free(hw_params); - snd_pcm_close(handle_record); - return false; - } - if ((err = snd_pcm_hw_params(handle_record, hw_params)) < 0) - { - INFO_LOG(AUDIO, "ALSA: Cannot set parameters: %s", snd_strerror(err)); - snd_pcm_hw_params_free(hw_params); - snd_pcm_close(handle_record); - return false; - } - snd_pcm_hw_params_free(hw_params); - snd_pcm_nonblock(handle_record, 1); - if ((err = snd_pcm_prepare(handle_record)) < 0) - { - INFO_LOG(AUDIO, "ALSA: Cannot prepare device: %s", snd_strerror(err)); - snd_pcm_close(handle_record); - return false; - } - INFO_LOG(AUDIO, "ALSA: Successfully initialized capture device"); - - return true; -} - -static void alsa_term_record() -{ - snd_pcm_close(handle_record); -} - -static u32 alsa_record(void* frame, u32 samples) -{ - int err = snd_pcm_readi(handle_record, frame, samples); - if (err < (int)samples) - { - if (err < 0) - { - DEBUG_LOG(AUDIO, "ALSA: Recording error: %s", snd_strerror(err)); - err = 0; - err = snd_pcm_prepare(handle_record); - } - u8 *buffer = (u8 *)frame + err; - memset(buffer, 0, (samples - err) * 2); - } - - return err; -} - -static u32 alsa_push(const void* frame, u32 samples, bool wait) -{ - if (wait != pcm_blocking) { - snd_pcm_nonblock(handle, wait ? 0 : 1); - pcm_blocking = wait; - } - - int rc = snd_pcm_writei(handle, frame, samples); - if (rc < 0) - { - snd_pcm_recover(handle, rc, 1); - if (rc == -EPIPE) - { - // EPIPE means underrun - // Write some silence then our samples - const size_t silence_size = buffer_size - samples; - void *silence = alloca(silence_size * 4); - memset(silence, 0, silence_size * 4); - snd_pcm_writei(handle, silence, silence_size); - snd_pcm_writei(handle, frame, samples); - } - } - return 1; -} - -static void alsa_term() -{ - snd_pcm_drop(handle); - snd_pcm_close(handle); -} - -static std::vector alsa_get_devicelist() -{ - std::vector result; - - char **hints; - int err = snd_device_name_hint(-1, "pcm", (void***)&hints); - - // Error initializing ALSA - if (err != 0) - return result; - - // special value to automatically detect on initialization - result.emplace_back("auto"); - - char** n = hints; - while (*n != NULL) - { - // Get the type (NULL/Input/Output) - char *type = snd_device_name_get_hint(*n, "IOID"); - char *name = snd_device_name_get_hint(*n, "NAME"); - - if (name != NULL) - { - // We only want output or special devices (like "default" or "pulse") - // TODO Only those with type == NULL? - if (type == NULL || strcmp(type, "Output") == 0) + // try pulse audio backend + if (rc < 0) { - // TODO Check if device works (however we need to hash the resulting list then) - /*snd_pcm_t *handle; - int rc = snd_pcm_open(&handle, name, SND_PCM_STREAM_PLAYBACK, 0); - - if (rc == 0) - { - result.push_back(name); - snd_pcm_close(handle); - } - */ - - result.emplace_back(name); + device = "pulse"; + rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); } + if (rc < 0) + INFO_LOG(AUDIO, "ALSA: unable to automatically determine audio device."); + } + else { + rc = snd_pcm_open(&handle, device.c_str(), SND_PCM_STREAM_PLAYBACK, 0); } - if (type != NULL) - free(type); + if (rc < 0) + { + ERROR_LOG(AUDIO, "ALSA: unable to open PCM device %s: %s", device.c_str(), snd_strerror(rc)); + return false; + } - if (name != NULL) - free(name); + INFO_LOG(AUDIO, "ALSA: Successfully initialized \"%s\"", device.c_str()); - n++; + /* Allocate a hardware parameters object. */ + snd_pcm_hw_params_alloca(¶ms); + + /* Fill it in with default values. */ + rc=snd_pcm_hw_params_any(handle, params); + if (rc < 0) + { + ERROR_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_any %s", snd_strerror(rc)); + term(); + return false; + } + + /* Set the desired hardware parameters. */ + + /* Interleaved mode */ + rc=snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED); + if (rc < 0) + { + ERROR_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_access %s", snd_strerror(rc)); + term(); + return false; + } + + /* Signed 16-bit little-endian format */ + rc=snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE); + if (rc < 0) + { + ERROR_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_format %s", snd_strerror(rc)); + term(); + return false; + } + + /* Two channels (stereo) */ + rc=snd_pcm_hw_params_set_channels(handle, params, 2); + if (rc < 0) + { + ERROR_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_channels %s", snd_strerror(rc)); + term(); + return false; + } + + // 44100 samples/second + rc = snd_pcm_hw_params_set_rate(handle, params, 44100, 0); + if (rc < 0) + { + ERROR_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_rate %s", snd_strerror(rc)); + term(); + return false; + } + + // Period size (512) + period_size = std::min(SAMPLE_COUNT, (u32)config::AudioBufferSize / 4); + rc = snd_pcm_hw_params_set_period_size_near(handle, params, &period_size, nullptr); + if (rc < 0) + { + ERROR_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_periods_near %s", snd_strerror(rc)); + term(); + return false; + } + INFO_LOG(AUDIO, "ALSA: period size set to %zd", (size_t)period_size); + + // Sample buffer size + buffer_size = config::AudioBufferSize; + rc = snd_pcm_hw_params_set_buffer_size_near(handle, params, &buffer_size); + if (rc < 0) + { + ERROR_LOG(AUDIO, "ALSA: Error:snd_pcm_hw_params_set_buffer_size_near %s", snd_strerror(rc)); + term(); + return false; + } + INFO_LOG(AUDIO, "ALSA: buffer size set to %ld", buffer_size); + + /* Write the parameters to the driver */ + rc = snd_pcm_hw_params(handle, params); + if (rc < 0) + { + ERROR_LOG(AUDIO, "ALSA: Unable to set hw parameters: %s", snd_strerror(rc)); + term(); + return false; + } + + return true; } - snd_device_name_free_hint((void**)hints); + bool initRecord(u32 sampling_freq) override + { + int err; + if ((err = snd_pcm_open(&handle_record, "default", SND_PCM_STREAM_CAPTURE, 0)) < 0) + { + ERROR_LOG(AUDIO, "ALSA: Cannot open default audio capture device: %s", snd_strerror(err)); + return false; + } + snd_pcm_hw_params_t *hw_params; + if ((err = snd_pcm_hw_params_malloc(&hw_params)) < 0) + { + ERROR_LOG(AUDIO, "ALSA: Cannot allocate hardware parameter structure: %s", snd_strerror(err)); + snd_pcm_close(handle_record); + return false; + } + if ((err = snd_pcm_hw_params_any(handle_record, hw_params)) < 0) + { + ERROR_LOG(AUDIO, "ALSA: Cannot initialize hardware parameter structure: %s", snd_strerror(err)); + snd_pcm_hw_params_free(hw_params); + snd_pcm_close(handle_record); + return false; + } + if ((err = snd_pcm_hw_params_set_access(handle_record, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED)) < 0) + { + ERROR_LOG(AUDIO, "ALSA: Cannot set access type: %s\n", snd_strerror(err)); + snd_pcm_hw_params_free(hw_params); + snd_pcm_close(handle_record); + return false; + } + if ((err = snd_pcm_hw_params_set_format(handle_record, hw_params, SND_PCM_FORMAT_S16_LE)) < 0) + { + ERROR_LOG(AUDIO, "ALSA: Cannot set sample format: %s", snd_strerror(err)); + snd_pcm_hw_params_free(hw_params); + snd_pcm_close(handle_record); + return false; + } + if ((err = snd_pcm_hw_params_set_rate(handle_record, hw_params, sampling_freq, 0)) < 0) + { + ERROR_LOG(AUDIO, "ALSA: Cannot set sample rate to %d Hz: %s", sampling_freq, snd_strerror(err)); + snd_pcm_hw_params_free(hw_params); + snd_pcm_close(handle_record); + return false; + } + if ((err = snd_pcm_hw_params_set_channels(handle_record, hw_params, 1)) < 0) + { + ERROR_LOG(AUDIO, "ALSA: Cannot set channel count: %s", snd_strerror(err)); + snd_pcm_hw_params_free(hw_params); + snd_pcm_close(handle_record); + return false; + } + if ((err = snd_pcm_hw_params(handle_record, hw_params)) < 0) + { + ERROR_LOG(AUDIO, "ALSA: Cannot set parameters: %s", snd_strerror(err)); + snd_pcm_hw_params_free(hw_params); + snd_pcm_close(handle_record); + return false; + } + snd_pcm_hw_params_free(hw_params); + snd_pcm_nonblock(handle_record, 1); + if ((err = snd_pcm_prepare(handle_record)) < 0) + { + ERROR_LOG(AUDIO, "ALSA: Cannot prepare device: %s", snd_strerror(err)); + snd_pcm_close(handle_record); + return false; + } + INFO_LOG(AUDIO, "ALSA: Successfully initialized capture device"); - return result; -} + return true; + } -static audio_option_t* alsa_audio_options(int* option_count) -{ - *option_count = 1; - static audio_option_t result[1]; + void termRecord() override + { + snd_pcm_close(handle_record); + } - result[0].cfg_name = "device"; - result[0].caption = "Device"; - result[0].type = list; - result[0].list_callback = alsa_get_devicelist; + u32 record(void* frame, u32 samples) override + { + int err = snd_pcm_readi(handle_record, frame, samples); + if (err < (int)samples) + { + if (err < 0) + { + DEBUG_LOG(AUDIO, "ALSA: Recording error: %s", snd_strerror(err)); + err = 0; + err = snd_pcm_prepare(handle_record); + } + u8 *buffer = (u8 *)frame + err; + memset(buffer, 0, (samples - err) * 2); + } - return result; -} + return err; + } -static audiobackend_t audiobackend_alsa = { - "alsa", // Slug - "Advanced Linux Sound Architecture", // Name - &alsa_init, - &alsa_push, - &alsa_term, - &alsa_audio_options, - &alsa_init_record, - &alsa_record, - &alsa_term_record + u32 push(const void* frame, u32 samples, bool wait) override + { + if (wait != pcm_blocking) { + snd_pcm_nonblock(handle, wait ? 0 : 1); + pcm_blocking = wait; + } + + int rc = snd_pcm_writei(handle, frame, samples); + if (rc < 0) + { + snd_pcm_recover(handle, rc, 1); + if (rc == -EPIPE) + { + // EPIPE means underrun + // Write some silence then our samples + const size_t silence_size = buffer_size - samples; + void *silence = alloca(silence_size * 4); + memset(silence, 0, silence_size * 4); + snd_pcm_writei(handle, silence, silence_size); + snd_pcm_writei(handle, frame, samples); + } + } + return 1; + } + + void term() override + { + snd_pcm_drop(handle); + snd_pcm_close(handle); + } + + std::vector getDeviceList() + { + std::vector result; + + char **hints; + int err = snd_device_name_hint(-1, "pcm", (void***)&hints); + + // Error initializing ALSA + if (err != 0) + return result; + + // special value to automatically detect on initialization + result.emplace_back("auto"); + + char** n = hints; + while (*n != NULL) + { + // Get the type (NULL/Input/Output) + char *type = snd_device_name_get_hint(*n, "IOID"); + char *name = snd_device_name_get_hint(*n, "NAME"); + + if (name != NULL) + { + // We only want output or special devices (like "default" or "pulse") + // TODO Only those with type == NULL? + if (type == NULL || strcmp(type, "Output") == 0) + { + // TODO Check if device works (however we need to hash the resulting list then) + /*snd_pcm_t *handle; + int rc = snd_pcm_open(&handle, name, SND_PCM_STREAM_PLAYBACK, 0); + + if (rc == 0) + { + result.push_back(name); + snd_pcm_close(handle); + } + */ + + result.emplace_back(name); + } + + } + + if (type != NULL) + free(type); + + if (name != NULL) + free(name); + + n++; + } + + snd_device_name_free_hint((void**)hints); + + return result; + } + + Option* getOptions(int *count) override + { + *count = 1; + static Option result; + + result.name = "device"; + result.caption = "Device"; + result.type = Option::list; + result.values = getDeviceList(); + + return &result; + } }; +static AlsaAudioBackend alsaBackend; -static bool alsa = RegisterAudioBackend(&audiobackend_alsa); #endif diff --git a/core/oslib/audiobackend_coreaudio.cpp b/core/oslib/audiobackend_coreaudio.cpp index d485aaea9..4ff578cf6 100644 --- a/core/oslib/audiobackend_coreaudio.cpp +++ b/core/oslib/audiobackend_coreaudio.cpp @@ -20,260 +20,288 @@ #include #include -static AudioUnit audioUnit; - -static u32 BUFSIZE; -static u8 *samples_temp; - -static std::atomic samples_wptr; -static std::atomic samples_rptr; -static cResetEvent bufferEmpty; - -// input buffer and indexes -static u8 samples_input[2400]; -constexpr size_t InputBufSize = sizeof(samples_input); -static std::atomic input_wptr; -static std::atomic input_rptr; -AudioQueueRef recordQueue; - -static OSStatus coreaudio_callback(void* ctx, AudioUnitRenderActionFlags* flags, const AudioTimeStamp* ts, - UInt32 bus, UInt32 frames, AudioBufferList* abl) +class CoreAudioBackend : public AudioBackend { - for (int i = 0; i < abl->mNumberBuffers; i++) - { - int size = abl->mBuffers[i].mDataByteSize; - u8 *outBuffer = (u8 *)abl->mBuffers[i].mData; + AudioUnit audioUnit; + + u32 BUFSIZE = 0; + u8 *samples_temp = nullptr; + + std::atomic samples_wptr; + std::atomic samples_rptr; + cResetEvent bufferEmpty; + + // input buffer and indexes + u8 samples_input[2400]; + static constexpr size_t InputBufSize = sizeof(samples_input); + std::atomic input_wptr; + std::atomic input_rptr; + AudioQueueRef recordQueue; + + static OSStatus renderCallback(void* ctx, AudioUnitRenderActionFlags* flags, const AudioTimeStamp* ts, + UInt32 bus, UInt32 frames, AudioBufferList* abl) + { + CoreAudioBackend *backend = (CoreAudioBackend *)ctx; + for (int i = 0; i < abl->mNumberBuffers; i++) + { + int size = abl->mBuffers[i].mDataByteSize; + u8 *outBuffer = (u8 *)abl->mBuffers[i].mData; + while (size != 0) + { + int avail = (backend->samples_wptr - backend->samples_rptr + backend->BUFSIZE) % backend->BUFSIZE; + if (avail == 0) + { + //printf("Core Audio: buffer underrun %d bytes (%d)", size, abl->mBuffers[i].mDataByteSize); + memset(outBuffer, '\0', size); + return noErr; + } + avail = std::min(avail, size); + avail = std::min(avail, (int)backend->BUFSIZE - backend->samples_rptr); + memcpy(outBuffer, backend->samples_temp + backend->samples_rptr, avail); + backend->samples_rptr = (backend->samples_rptr + avail) % backend->BUFSIZE; + size -= avail; + outBuffer += avail; + // Set the mutex to allow writing + backend->bufferEmpty.Set(); + } + } + + return noErr; + } + +public: + CoreAudioBackend() + : AudioBackend("coreaudio", "Core Audio") {} + + bool init() override + { + OSStatus err; + AURenderCallbackStruct callback_struct; + AudioStreamBasicDescription format; + AudioComponentDescription desc; + AudioComponent component; + + desc.componentType = kAudioUnitType_Output; +#if !defined(TARGET_IPHONE) + desc.componentSubType = kAudioUnitSubType_DefaultOutput; +#else + desc.componentSubType = kAudioUnitSubType_RemoteIO; +#endif + desc.componentFlags = 0; + desc.componentFlagsMask = 0; + desc.componentManufacturer = kAudioUnitManufacturer_Apple; + component = AudioComponentFindNext(nullptr, &desc); + + if (component == nullptr) { + ERROR_LOG(AUDIO, "coreaudio: AudioComponentFindNext failed"); + return false; + } + + err = AudioComponentInstanceNew(component, &audioUnit); + if (err != noErr) { + ERROR_LOG(AUDIO, "coreaudio: AudioComponentInstanceNew failed"); + return false; + } + + FillOutASBDForLPCM(format, 44100, + 2, 16, 16, false, false, false); + err = AudioUnitSetProperty(audioUnit, + kAudioUnitProperty_StreamFormat, + kAudioUnitScope_Input, 0, &format, + sizeof(AudioStreamBasicDescription)); + if (err != noErr) + { + ERROR_LOG(AUDIO, "coreaudio: AudioUnitSetProperty(kAudioUnitProperty_StreamFormat) failed"); + AudioComponentInstanceDispose(audioUnit); + return false; + } + + callback_struct.inputProc = renderCallback; + callback_struct.inputProcRefCon = this; + err = AudioUnitSetProperty(audioUnit, + kAudioUnitProperty_SetRenderCallback, + kAudioUnitScope_Input, 0, &callback_struct, + sizeof callback_struct); + if (err != noErr) + { + ERROR_LOG(AUDIO, "coreaudio: AudioUnitSetProperty(kAudioUnitProperty_SetRenderCallback) failed"); + AudioComponentInstanceDispose(audioUnit); + return false; + } + + /* + err = AudioUnitSetParameter(audioUnit, + kHALOutputParam_Volume, + kAudioUnitParameterFlag_Output, 0, + 1, 0); + verify(err == noErr); + */ + + err = AudioUnitInitialize(audioUnit); + if (err != noErr) + { + ERROR_LOG(AUDIO, "coreaudio: AudioUnitInitialize failed"); + AudioComponentInstanceDispose(audioUnit); + return false; + } + + BUFSIZE = config::AudioBufferSize * 4; + samples_temp = new u8[BUFSIZE](); + samples_rptr = 0; + samples_wptr = 0; + + err = AudioOutputUnitStart(audioUnit); + if (err != noErr) + { + ERROR_LOG(AUDIO, "coreaudio: AudioOutputUnitStart failed"); + AudioUnitUninitialize(audioUnit); + AudioComponentInstanceDispose(audioUnit); + return false; + } + + bufferEmpty.Set(); + + return true; + } + + u32 push(const void* frame, u32 samples, bool wait) override + { + int size = samples * 4; while (size != 0) { - int avail = (samples_wptr - samples_rptr + BUFSIZE) % BUFSIZE; + int avail = (samples_rptr - samples_wptr - 4 + BUFSIZE) % BUFSIZE; if (avail == 0) { - //printf("Core Audio: buffer underrun %d bytes (%d)", size, abl->mBuffers[i].mDataByteSize); - memset(outBuffer, '\0', size); - return noErr; + if (!wait) + break; + bufferEmpty.Wait(); + continue; } avail = std::min(avail, size); - avail = std::min(avail, (int)BUFSIZE - samples_rptr); - memcpy(outBuffer, samples_temp + samples_rptr, avail); - samples_rptr = (samples_rptr + avail) % BUFSIZE; + avail = std::min(avail, (int)BUFSIZE - samples_wptr); + memcpy(&samples_temp[samples_wptr], frame, avail); + samples_wptr = (samples_wptr + avail) % BUFSIZE; + frame = (u8 *)frame + avail; size -= avail; - outBuffer += avail; - // Set the mutex to allow writing - bufferEmpty.Set(); } - } - - return noErr; -} - -static void coreaudio_init() -{ - OSStatus err; - AURenderCallbackStruct callback_struct; - AudioStreamBasicDescription format; - AudioComponentDescription desc; - AudioComponent component; - - desc.componentType = kAudioUnitType_Output; -#if !defined(TARGET_IPHONE) - desc.componentSubType = kAudioUnitSubType_DefaultOutput; -#else - desc.componentSubType = kAudioUnitSubType_RemoteIO; -#endif - desc.componentFlags = 0; - desc.componentFlagsMask = 0; - desc.componentManufacturer = kAudioUnitManufacturer_Apple; - component = AudioComponentFindNext(nullptr, &desc); - - verify(component != nullptr); - - err = AudioComponentInstanceNew(component, &audioUnit); - verify(err == noErr); - - FillOutASBDForLPCM(format, 44100, - 2, 16, 16, false, false, false); - err = AudioUnitSetProperty(audioUnit, - kAudioUnitProperty_StreamFormat, - kAudioUnitScope_Input, 0, &format, - sizeof(AudioStreamBasicDescription)); - verify(err == noErr); - - callback_struct.inputProc = coreaudio_callback; - callback_struct.inputProcRefCon = 0; - err = AudioUnitSetProperty(audioUnit, - kAudioUnitProperty_SetRenderCallback, - kAudioUnitScope_Input, 0, &callback_struct, - sizeof callback_struct); - verify(err == noErr); - - /* - err = AudioUnitSetParameter(audioUnit, - kHALOutputParam_Volume, - kAudioUnitParameterFlag_Output, 0, - 1, 0); - verify(err == noErr); - */ - - err = AudioUnitInitialize(audioUnit); - verify(err == noErr); - - BUFSIZE = config::AudioBufferSize * 4; - samples_temp = new u8[BUFSIZE](); - samples_rptr = 0; - samples_wptr = 0; - - err = AudioOutputUnitStart(audioUnit); - verify(err == noErr); - bufferEmpty.Set(); -} - -static u32 coreaudio_push(const void* frame, u32 samples, bool wait) -{ - int size = samples * 4; - while (size != 0) - { - int avail = (samples_rptr - samples_wptr - 4 + BUFSIZE) % BUFSIZE; - if (avail == 0) - { - if (!wait) - break; - bufferEmpty.Wait(); - continue; - } - avail = std::min(avail, size); - avail = std::min(avail, (int)BUFSIZE - samples_wptr); - memcpy(&samples_temp[samples_wptr], frame, avail); - samples_wptr = (samples_wptr + avail) % BUFSIZE; - frame = (u8 *)frame + avail; - size -= avail; - } - - return 1; -} - -static void coreaudio_term() -{ - AudioOutputUnitStop(audioUnit); - AudioUnitUninitialize(audioUnit); - AudioComponentInstanceDispose(audioUnit); - bufferEmpty.Set(); - delete [] samples_temp; -} - -static void coreaudio_record_callback(void *inUserData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer, const AudioTimeStamp *inStartTime, UInt32 frameSize, const AudioStreamPacketDescription *dataFormat) -{ - //DEBUG_LOG(AUDIO, "AudioQueue callback: wptr %d rptr %d bytes %d", (int)input_wptr, (int)input_rptr, inBuffer->mAudioDataByteSize); - UInt32 size = inBuffer->mAudioDataByteSize; - UInt32 freeSpace = (input_rptr - input_wptr - 2 + InputBufSize) % InputBufSize; - if (size > freeSpace) - { - DEBUG_LOG(AUDIO, "coreaudio: record overrun %d bytes", size - freeSpace); - size = freeSpace; + return 1; } - while (size != 0) + + void term() override { - UInt32 chunk = std::min(size, (UInt32)(InputBufSize - input_wptr)); - memcpy(samples_input + input_wptr, inBuffer->mAudioData, chunk); - input_wptr = (input_wptr + chunk) % InputBufSize; - size -= chunk; + AudioOutputUnitStop(audioUnit); + AudioUnitUninitialize(audioUnit); + AudioComponentInstanceDispose(audioUnit); + bufferEmpty.Set(); + delete [] samples_temp; + samples_temp = nullptr; } - AudioQueueEnqueueBuffer(recordQueue, inBuffer, 0, nullptr); -} -static void coreaudio_term_record() -{ - if (recordQueue != nullptr) + static void recordCallback(void *inUserData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer, + const AudioTimeStamp *inStartTime, UInt32 frameSize, const AudioStreamPacketDescription *dataFormat) { - AudioQueueStop(recordQueue, true); - AudioQueueDispose(recordQueue, true); - recordQueue = nullptr; - } -} - -static bool coreaudio_init_record(u32 sampling_freq) -{ - AudioStreamBasicDescription desc{}; - desc.mFormatID = kAudioFormatLinearPCM; - desc.mSampleRate = (double)sampling_freq; - desc.mChannelsPerFrame = 1; - desc.mBitsPerChannel = 16; - desc.mBytesPerPacket = desc.mBytesPerFrame = 2; - desc.mFramesPerPacket = 1; - desc.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; - desc.mReserved = 0; - - OSStatus err = AudioQueueNewInput(&desc, - coreaudio_record_callback, - nullptr, - nullptr, - kCFRunLoopCommonModes, - 0, - &recordQueue); - if (err != noErr) - { - ERROR_LOG(AUDIO, "AudioQueueNewInput failed: %d", err); - return false; - } - - AudioQueueBufferRef buffers[2]; - for (UInt32 i = 0; i < ARRAY_SIZE(buffers) && err == noErr; i++) - { - err = AudioQueueAllocateBuffer(recordQueue, 480, &buffers[i]); - if (err == noErr) - err = AudioQueueEnqueueBuffer(recordQueue, buffers[i], 0, nullptr); - } - input_wptr = 0; - input_rptr = 0; - if (err == noErr) - err = AudioQueueStart(recordQueue, nullptr); - if (err != noErr) - { - ERROR_LOG(AUDIO, "AudioQueue init failed: %d", err); - coreaudio_term_record(); - return false; - } - INFO_LOG(AUDIO, "AudioQueue initialized - sample rate %f", desc.mSampleRate); - - return true; -} - -static u32 coreaudio_record(void* frame, u32 samples) -{ -// DEBUG_LOG(AUDIO, "coreaudio_record: wptr %d rptr %d", (int)input_wptr, (int)input_rptr); - u32 size = samples * 2; - while (size != 0) - { - u32 avail = (input_wptr - input_rptr + InputBufSize) % InputBufSize; - if (avail == 0) + //DEBUG_LOG(AUDIO, "AudioQueue callback: wptr %d rptr %d bytes %d", (int)input_wptr, (int)input_rptr, inBuffer->mAudioDataByteSize); + CoreAudioBackend *backend = (CoreAudioBackend *)inUserData; + UInt32 size = inBuffer->mAudioDataByteSize; + UInt32 freeSpace = (backend->input_rptr - backend->input_wptr - 2 + InputBufSize) % InputBufSize; + if (size > freeSpace) { - DEBUG_LOG(AUDIO, "coreaudio: record underrun %d bytes", size); - break; + DEBUG_LOG(AUDIO, "coreaudio: record overrun %d bytes", size - freeSpace); + size = freeSpace; } - avail = std::min(avail, size); - avail = std::min(avail, (u32)(InputBufSize - input_rptr)); + while (size != 0) + { + UInt32 chunk = std::min(size, (UInt32)(InputBufSize - backend->input_wptr)); + memcpy(backend->samples_input + backend->input_wptr, inBuffer->mAudioData, chunk); + backend->input_wptr = (backend->input_wptr + chunk) % InputBufSize; + size -= chunk; + } + AudioQueueEnqueueBuffer(backend->recordQueue, inBuffer, 0, nullptr); + } - memcpy(frame, &samples_input[input_rptr], avail); - frame = (u8 *)frame + avail; - input_rptr = (input_rptr + avail) % InputBufSize; - size -= avail; - } + void termRecord() override + { + if (recordQueue != nullptr) + { + AudioQueueStop(recordQueue, true); + AudioQueueDispose(recordQueue, true); + recordQueue = nullptr; + } + } - return samples - size / 2; -} + bool initRecord(u32 sampling_freq) override + { + AudioStreamBasicDescription desc{}; + desc.mFormatID = kAudioFormatLinearPCM; + desc.mSampleRate = (double)sampling_freq; + desc.mChannelsPerFrame = 1; + desc.mBitsPerChannel = 16; + desc.mBytesPerPacket = desc.mBytesPerFrame = 2; + desc.mFramesPerPacket = 1; + desc.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; + desc.mReserved = 0; + + OSStatus err = AudioQueueNewInput(&desc, + recordCallback, + this, + nullptr, + kCFRunLoopCommonModes, + 0, + &recordQueue); + if (err != noErr) + { + ERROR_LOG(AUDIO, "AudioQueueNewInput failed: %d", err); + return false; + } -static audiobackend_t audiobackend_coreaudio = { - "coreaudio", // Slug - "Core Audio", // Name - &coreaudio_init, - &coreaudio_push, - &coreaudio_term, - nullptr, - &coreaudio_init_record, - &coreaudio_record, - &coreaudio_term_record + AudioQueueBufferRef buffers[2]; + for (UInt32 i = 0; i < ARRAY_SIZE(buffers) && err == noErr; i++) + { + err = AudioQueueAllocateBuffer(recordQueue, 480, &buffers[i]); + if (err == noErr) + err = AudioQueueEnqueueBuffer(recordQueue, buffers[i], 0, nullptr); + } + input_wptr = 0; + input_rptr = 0; + if (err == noErr) + err = AudioQueueStart(recordQueue, nullptr); + if (err != noErr) + { + ERROR_LOG(AUDIO, "AudioQueue init failed: %d", err); + termRecord(); + return false; + } + INFO_LOG(AUDIO, "AudioQueue initialized - sample rate %f", desc.mSampleRate); + + return true; + } + + u32 record(void* frame, u32 samples) override + { + // DEBUG_LOG(AUDIO, "coreaudio_record: wptr %d rptr %d", (int)input_wptr, (int)input_rptr); + u32 size = samples * 2; + while (size != 0) + { + u32 avail = (input_wptr - input_rptr + InputBufSize) % InputBufSize; + if (avail == 0) + { + DEBUG_LOG(AUDIO, "coreaudio: record underrun %d bytes", size); + break; + } + avail = std::min(avail, size); + avail = std::min(avail, (u32)(InputBufSize - input_rptr)); + + memcpy(frame, &samples_input[input_rptr], avail); + frame = (u8 *)frame + avail; + input_rptr = (input_rptr + avail) % InputBufSize; + size -= avail; + } + + return samples - size / 2; + } }; +static CoreAudioBackend coreAudioBackend; -static bool core = RegisterAudioBackend(&audiobackend_coreaudio); #endif diff --git a/core/oslib/audiobackend_directsound.cpp b/core/oslib/audiobackend_directsound.cpp index 530695789..b4c220ca8 100644 --- a/core/oslib/audiobackend_directsound.cpp +++ b/core/oslib/audiobackend_directsound.cpp @@ -22,204 +22,218 @@ struct __declspec(uuid("{B0210783-89cd-11d0-AF08-00A0C925CD16}")) IDirectSoundNo struct __declspec(uuid("{00990DF4-0DBB-4872-833E-6D303E80AEB6}")) IDirectSoundCaptureBuffer8; #endif -static ComPtr dsound; -static ComPtr buffer; -static std::vector notificationEvents; - -static ComPtr dcapture; -static ComPtr capture_buffer; - -static std::atomic_bool audioThreadRunning; -static std::thread audioThread; -static cResetEvent pushWait; - -constexpr u32 SAMPLE_BYTES = SAMPLE_COUNT * 4; - -static RingBuffer ringBuffer; - -static u32 notificationOffset(int index) { - return index * SAMPLE_BYTES; -} - -static void audioThreadMain() +class DirectSoundBackend : public AudioBackend { - audioThreadRunning = true; - while (true) + ComPtr dsound; + ComPtr buffer; + std::vector notificationEvents; + + ComPtr dcapture; + ComPtr capture_buffer; + + std::atomic_bool audioThreadRunning; + std::thread audioThread; + cResetEvent pushWait; + + static constexpr u32 SAMPLE_BYTES = SAMPLE_COUNT * 4; + + RingBuffer ringBuffer; + + u32 notificationOffset(int index) { + return index * SAMPLE_BYTES; + } + + void audioThreadMain() { - u32 rv = WaitForMultipleObjects(notificationEvents.size(), ¬ificationEvents[0], false, 100); - - if (!audioThreadRunning) - break; - if (rv == WAIT_TIMEOUT || rv == WAIT_FAILED) - continue; - rv -= WAIT_OBJECT_0; - - void *p1, *p2; - DWORD sz1, sz2; - - if (SUCCEEDED(buffer->Lock(notificationOffset(rv), SAMPLE_BYTES, &p1, &sz1, &p2, &sz2, 0))) + audioThreadRunning = true; + while (true) { - if (!ringBuffer.read((u8*)p1, sz1)) - memset(p1, 0, sz1); - if (sz2 != 0) + u32 rv = WaitForMultipleObjects(notificationEvents.size(), ¬ificationEvents[0], false, 100); + + if (!audioThreadRunning) + break; + if (rv == WAIT_TIMEOUT || rv == WAIT_FAILED) + continue; + rv -= WAIT_OBJECT_0; + + void *p1, *p2; + DWORD sz1, sz2; + + if (SUCCEEDED(buffer->Lock(notificationOffset(rv), SAMPLE_BYTES, &p1, &sz1, &p2, &sz2, 0))) { - if (!ringBuffer.read((u8*)p2, sz2)) - memset(p2, 0, sz2); + if (!ringBuffer.read((u8*)p1, sz1)) + memset(p1, 0, sz1); + if (sz2 != 0) + { + if (!ringBuffer.read((u8*)p2, sz2)) + memset(p2, 0, sz2); + } + buffer->Unlock(p1, sz1, p2, sz2); + pushWait.Set(); } - buffer->Unlock(p1, sz1, p2, sz2); - pushWait.Set(); } } -} -static void directsound_init() -{ - verifyc(DirectSoundCreate8(NULL, &dsound.get(), NULL)); - verifyc(dsound->SetCooperativeLevel(getNativeHwnd(), DSSCL_PRIORITY)); +public: + DirectSoundBackend() + : AudioBackend("directsound", "Microsoft DirectSound") {} - // Set up WAV format structure. - WAVEFORMATEX wfx; - memset(&wfx, 0, sizeof(WAVEFORMATEX)); - wfx.wFormatTag = WAVE_FORMAT_PCM; - wfx.nChannels = 2; - wfx.nSamplesPerSec = 44100; - wfx.nBlockAlign = 4; - wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign; - wfx.wBitsPerSample = 16; - - // Set up DSBUFFERDESC structure. - DSBUFFERDESC desc; - memset(&desc, 0, sizeof(DSBUFFERDESC)); - desc.dwSize = sizeof(DSBUFFERDESC); - desc.dwFlags = DSBCAPS_GETCURRENTPOSITION2 | DSBCAPS_CTRLPOSITIONNOTIFY | DSBCAPS_GLOBALFOCUS; - desc.dwBufferBytes = SAMPLE_BYTES * 2; - desc.lpwfxFormat = &wfx; - - // Create the buffer - ComPtr buffer_; - verifyc(dsound->CreateSoundBuffer(&desc, &buffer_.get(), 0)); - verifyc(buffer_.as(buffer)); - - // Set up notifications - ComPtr bufferNotify; - verifyc(buffer.as(bufferNotify)); - notificationEvents.clear(); - std::vector posNotify; - for (int i = 0; notificationOffset(i) < desc.dwBufferBytes; i++) + bool init() override { - notificationEvents.push_back(CreateEvent(nullptr, false, false, nullptr)); - posNotify.push_back({ notificationOffset(i), notificationEvents.back() }); + if (FAILED(DirectSoundCreate8(NULL, &dsound.get(), NULL))) { + ERROR_LOG(AUDIO, "DirectSound8 initialization failed"); + return false; + } + if (FAILED(dsound->SetCooperativeLevel(getNativeHwnd(), DSSCL_PRIORITY))) { + ERROR_LOG(AUDIO, "DirectSound8 SetCooperativeLevel failed"); + dsound.reset(); + return false; + } + + // Set up WAV format structure. + WAVEFORMATEX wfx; + memset(&wfx, 0, sizeof(WAVEFORMATEX)); + wfx.wFormatTag = WAVE_FORMAT_PCM; + wfx.nChannels = 2; + wfx.nSamplesPerSec = 44100; + wfx.nBlockAlign = 4; + wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign; + wfx.wBitsPerSample = 16; + + // Set up DSBUFFERDESC structure. + DSBUFFERDESC desc; + memset(&desc, 0, sizeof(DSBUFFERDESC)); + desc.dwSize = sizeof(DSBUFFERDESC); + desc.dwFlags = DSBCAPS_GETCURRENTPOSITION2 | DSBCAPS_CTRLPOSITIONNOTIFY | DSBCAPS_GLOBALFOCUS; + desc.dwBufferBytes = SAMPLE_BYTES * 2; + desc.lpwfxFormat = &wfx; + + // Create the buffer + ComPtr buffer_; + if (FAILED(dsound->CreateSoundBuffer(&desc, &buffer_.get(), 0)) + || FAILED(buffer_.as(buffer))) + { + ERROR_LOG(AUDIO, "DirectSound8 CreateSoundBuffer failed"); + dsound.reset(); + return false; + } + + // Set up notifications + ComPtr bufferNotify; + verifyc(buffer.as(bufferNotify)); + notificationEvents.clear(); + std::vector posNotify; + for (int i = 0; notificationOffset(i) < desc.dwBufferBytes; i++) + { + notificationEvents.push_back(CreateEvent(nullptr, false, false, nullptr)); + posNotify.push_back({ notificationOffset(i), notificationEvents.back() }); + } + bufferNotify->SetNotificationPositions(posNotify.size(), &posNotify[0]); + + // Clear the buffers + void *p1, *p2; + DWORD sz1, sz2; + verifyc(buffer->Lock(0, desc.dwBufferBytes, &p1, &sz1, &p2, &sz2, 0)); + verify(p2 == nullptr); + memset(p1, 0, sz1); + verifyc(buffer->Unlock(p1, sz1, p2, sz2)); + ringBuffer.setCapacity(config::AudioBufferSize * 4); + + // Start the thread + audioThread = std::thread(&audioThreadMain, this); + + // Play the buffer ! + if (FAILED(buffer->Play(0, 0, DSBPLAY_LOOPING))) + { + ERROR_LOG(AUDIO, "DirectSound8 Play failed"); + term(); + return false; + } + INFO_LOG(AUDIO, "DirectSound playback started"); + + return true; } - bufferNotify->SetNotificationPositions(posNotify.size(), &posNotify[0]); - // Clear the buffers - void *p1, *p2; - DWORD sz1, sz2; - verifyc(buffer->Lock(0, desc.dwBufferBytes, &p1, &sz1, &p2, &sz2, 0)); - verify(p2 == nullptr); - memset(p1, 0, sz1); - verifyc(buffer->Unlock(p1, sz1, p2, sz2)); - ringBuffer.setCapacity(config::AudioBufferSize * 4); - - // Start the thread - audioThread = std::thread(audioThreadMain); - - // Play the buffer ! - verifyc(buffer->Play(0, 0, DSBPLAY_LOOPING)); - INFO_LOG(AUDIO, "DirectSound playback started"); -} - -static u32 directsound_push(const void* frame, u32 samples, bool wait) -{ - while (!ringBuffer.write((const u8 *)frame, samples * 4) && wait) - pushWait.Wait(); - - return 1; -} - -static void directsound_term() -{ - audioThreadRunning = false; - audioThread.join(); - buffer->Stop(); - - for (HANDLE event : notificationEvents) - CloseHandle(event); - buffer.reset(); - dsound.reset(); - INFO_LOG(AUDIO, "DirectSound playback stopped"); -} - -static bool directsound_init_record(u32 sampling_freq) -{ - if (FAILED(DirectSoundCaptureCreate8(&DSDEVID_DefaultVoiceCapture, &dcapture.get(), NULL))) + u32 push(const void* frame, u32 samples, bool wait) override { - INFO_LOG(AUDIO, "DirectSound capture device creation failed"); - return false; + while (!ringBuffer.write((const u8 *)frame, samples * 4) && wait) + pushWait.Wait(); + + return 1; } - HRESULT hr; - WAVEFORMATEX wfx = - { WAVE_FORMAT_PCM, 1, sampling_freq, sampling_freq * 2, 2, 16, 0 }; - // wFormatTag, nChannels, nSamplesPerSec, nAvgBytesPerSec, - // nBlockAlign, wBitsPerSample, cbSize - DSCBUFFERDESC dscbd; - dscbd.dwSize = sizeof(DSCBUFFERDESC); - dscbd.dwFlags = 0; - dscbd.dwBufferBytes = 480 * 2; - dscbd.dwReserved = 0; - dscbd.lpwfxFormat = &wfx; - dscbd.dwFXCount = 0; - dscbd.lpDSCFXDesc = NULL; - - ComPtr pDSCB; - if (FAILED(hr = dcapture->CreateCaptureBuffer(&dscbd, &pDSCB.get(), NULL))) + void term() override { - INFO_LOG(AUDIO, "DirectSound capture buffer creation failed"); + audioThreadRunning = false; + audioThread.join(); + buffer->Stop(); + + for (HANDLE event : notificationEvents) + CloseHandle(event); + buffer.reset(); + dsound.reset(); + INFO_LOG(AUDIO, "DirectSound playback stopped"); + } + + bool initRecord(u32 sampling_freq) override + { + if (FAILED(DirectSoundCaptureCreate8(&DSDEVID_DefaultVoiceCapture, &dcapture.get(), NULL))) + { + ERROR_LOG(AUDIO, "DirectSound capture device creation failed"); + return false; + } + HRESULT hr; + WAVEFORMATEX wfx = + { WAVE_FORMAT_PCM, 1, sampling_freq, sampling_freq * 2, 2, 16, 0 }; + // wFormatTag, nChannels, nSamplesPerSec, nAvgBytesPerSec, + // nBlockAlign, wBitsPerSample, cbSize + + DSCBUFFERDESC dscbd; + dscbd.dwSize = sizeof(DSCBUFFERDESC); + dscbd.dwFlags = 0; + dscbd.dwBufferBytes = 480 * 2; + dscbd.dwReserved = 0; + dscbd.lpwfxFormat = &wfx; + dscbd.dwFXCount = 0; + dscbd.lpDSCFXDesc = NULL; + + ComPtr pDSCB; + if (FAILED(hr = dcapture->CreateCaptureBuffer(&dscbd, &pDSCB.get(), NULL))) + { + ERROR_LOG(AUDIO, "DirectSound capture buffer creation failed"); + dcapture.reset(); + return false; + } + pDSCB.as(capture_buffer); + capture_buffer->Start(DSCBSTART_LOOPING); + INFO_LOG(AUDIO, "DirectSound capture device and buffer created"); + + return true; + } + + u32 record(void *buffer, u32 samples) override + { + DWORD readPos; + capture_buffer->GetCurrentPosition(NULL, &readPos); + void *p1, *p2; + DWORD p1bytes, p2bytes; + capture_buffer->Lock(readPos, samples * 2, &p1, &p1bytes, &p2, &p2bytes, 0); + memcpy(buffer, p1, p1bytes); + if (p2bytes > 0) + memcpy((u8 *)buffer + p1bytes, p2, p2bytes); + capture_buffer->Unlock(p1, p1bytes, p2, p2bytes); + return (p1bytes + p2bytes) / 2; + } + + void termRecord() + { + if (!dcapture) + return; + capture_buffer->Stop(); + capture_buffer.reset(); dcapture.reset(); - return false; } - pDSCB.as(capture_buffer); - capture_buffer->Start(DSCBSTART_LOOPING); - INFO_LOG(AUDIO, "DirectSound capture device and buffer created"); - - return true; -} - -static u32 directsound_record(void *buffer, u32 samples) -{ - DWORD readPos; - capture_buffer->GetCurrentPosition(NULL, &readPos); - void *p1, *p2; - DWORD p1bytes, p2bytes; - capture_buffer->Lock(readPos, samples * 2, &p1, &p1bytes, &p2, &p2bytes, 0); - memcpy(buffer, p1, p1bytes); - if (p2bytes > 0) - memcpy((u8 *)buffer + p1bytes, p2, p2bytes); - capture_buffer->Unlock(p1, p1bytes, p2, p2bytes); - return (p1bytes + p2bytes) / 2; -} - -static void directsound_term_record() -{ - if (!dcapture) - return; - capture_buffer->Stop(); - capture_buffer.reset(); - dcapture.reset(); -} - -static audiobackend_t audiobackend_directsound = { - "directsound", // Slug - "Microsoft DirectSound", // Name - &directsound_init, - &directsound_push, - &directsound_term, - NULL, - &directsound_init_record, - &directsound_record, - &directsound_term_record }; +static DirectSoundBackend directSoundBackend; -static bool ds = RegisterAudioBackend(&audiobackend_directsound); #endif diff --git a/core/oslib/audiobackend_libao.cpp b/core/oslib/audiobackend_libao.cpp index a08fefd80..f40383b03 100644 --- a/core/oslib/audiobackend_libao.cpp +++ b/core/oslib/audiobackend_libao.cpp @@ -2,49 +2,53 @@ #include "audiostream.h" #include -static ao_device *aodevice; -static ao_sample_format aoformat; - -static void libao_init() +class LibAOBackend : public AudioBackend { - ao_initialize(); - memset(&aoformat, 0, sizeof(aoformat)); + ao_device *aodevice = nullptr; - aoformat.bits = 16; - aoformat.channels = 2; - aoformat.rate = 44100; - aoformat.byte_format = AO_FMT_LITTLE; +public: + LibAOBackend() + : AudioBackend("libao", "libao") {} - aodevice = ao_open_live(ao_default_driver_id(), &aoformat, NULL); // Live output - if (!aodevice) - aodevice = ao_open_live(ao_driver_id("null"), &aoformat, NULL); -} - -static u32 libao_push(const void* frame, u32 samples, bool wait) -{ - if (aodevice) - ao_play(aodevice, (char*)frame, samples * 4); - - return 1; -} - -static void libao_term() -{ - if (aodevice) + bool init() override { - ao_close(aodevice); - ao_shutdown(); + ao_initialize(); + + ao_sample_format aoformat {}; + aoformat.bits = 16; + aoformat.channels = 2; + aoformat.rate = 44100; + aoformat.byte_format = AO_FMT_LITTLE; + + aodevice = ao_open_live(ao_default_driver_id(), &aoformat, NULL); // Live output + if (aodevice == nullptr) + aodevice = ao_open_live(ao_driver_id("null"), &aoformat, NULL); + if (aodevice == nullptr) { + ERROR_LOG(AUDIO, "Cannot open libao driver"); + ao_shutdown(); + } + + return aodevice != nullptr; } -} -static audiobackend_t audiobackend_libao = { - "libao", // Slug - "libao", // Name - &libao_init, - &libao_push, - &libao_term, - NULL + u32 push(const void* frame, u32 samples, bool wait) override + { + if (aodevice != nullptr) + ao_play(aodevice, (char*)frame, samples * 4); + + return 1; + } + + void term() override + { + if (aodevice != nullptr) + { + ao_close(aodevice); + aodevice = nullptr; + ao_shutdown(); + } + } }; +static LibAOBackend libAOBackend; -static bool ao = RegisterAudioBackend(&audiobackend_libao); #endif diff --git a/core/oslib/audiobackend_null.cpp b/core/oslib/audiobackend_null.cpp index 0a90c92e1..08a8d876a 100644 --- a/core/oslib/audiobackend_null.cpp +++ b/core/oslib/audiobackend_null.cpp @@ -3,47 +3,59 @@ #include #include -using the_clock = std::chrono::high_resolution_clock; - -static the_clock::time_point last_time; - -static void null_init() +class NullAudioBackend : public AudioBackend { - last_time = the_clock::time_point(); -} + using the_clock = std::chrono::high_resolution_clock; -static void null_term() -{ -} +public: + NullAudioBackend() + : AudioBackend("null", "No Audio") {} -static u32 null_push(const void* frame, u32 samples, bool wait) -{ - if (wait) + bool init() override { - if (last_time.time_since_epoch() != the_clock::duration::zero()) - { - auto fduration = std::chrono::nanoseconds(1000000000L * samples / 44100); - auto duration = fduration - (the_clock::now() - last_time); - std::this_thread::sleep_for(duration); - last_time += fduration; - } - else - last_time = the_clock::now(); + last_time = the_clock::time_point(); + return true; } - return 1; -} -static bool null_init_record(u32 sampling_freq) -{ - return true; -} + void term() override + { + } -static u32 null_record(void *buffer, u32 samples) -{ - memset(buffer, 0, samples * 2); - return samples; -} + u32 push(const void* frame, u32 samples, bool wait) override + { + if (wait) + { + if (last_time.time_since_epoch() != the_clock::duration::zero()) + { + auto fduration = std::chrono::nanoseconds(1000000000L * samples / 44100); + auto duration = fduration - (the_clock::now() - last_time); + std::this_thread::sleep_for(duration); + last_time += fduration; + } + else + last_time = the_clock::now(); + } + return 1; + } + bool initRecord(u32 sampling_freq) override + { + return true; + } + + u32 record(void *buffer, u32 samples) override + { + memset(buffer, 0, samples * 2); + return samples; + } + +private: + the_clock::time_point last_time; +}; + +static NullAudioBackend nullBackend; + +/* static audiobackend_t audiobackend_null = { "null", // Slug "No Audio", // Name @@ -57,3 +69,4 @@ static audiobackend_t audiobackend_null = { }; static bool null = RegisterAudioBackend(&audiobackend_null); +*/ diff --git a/core/oslib/audiobackend_oboe.cpp b/core/oslib/audiobackend_oboe.cpp index 7887d0c91..36bfe0963 100644 --- a/core/oslib/audiobackend_oboe.cpp +++ b/core/oslib/audiobackend_oboe.cpp @@ -25,161 +25,163 @@ #include #include "stdclass.h" -static RingBuffer ringBuffer; -static cResetEvent pushWait; - -static std::shared_ptr stream; -static std::shared_ptr recordStream; - -static void audio_init(); -static void audio_term(); - -class AudioCallback : public oboe::AudioStreamDataCallback +class OboeBackend : AudioBackend { -public: - oboe::DataCallbackResult onAudioReady(oboe::AudioStream *audioStream, void *audioData, int32_t numFrames) override - { - if (!ringBuffer.read((u8 *)audioData, numFrames * 4)) - // underrun - memset(audioData, 0, numFrames * 4); - pushWait.Set(); + RingBuffer ringBuffer; + cResetEvent pushWait; - return oboe::DataCallbackResult::Continue; - } -}; -static AudioCallback audioCallback; + std::shared_ptr stream; + std::shared_ptr recordStream; -class AudioErrorCallback : public oboe::AudioStreamErrorCallback -{ -public: - void onErrorAfterClose(oboe::AudioStream *stream, oboe::Result error) override { - WARN_LOG(AUDIO, "Audio device lost. Attempting to reopen the audio stream"); - audio_term(); - audio_init(); - } -}; -static AudioErrorCallback errorCallback; - -static void audio_init() -{ - // Actual capacity is size-1 to avoid overrun so add one buffer - ringBuffer.setCapacity((config::AudioBufferSize + SAMPLE_COUNT) * 4); - - oboe::AudioStreamBuilder builder; - oboe::Result result = builder.setDirection(oboe::Direction::Output) - ->setPerformanceMode(oboe::PerformanceMode::LowLatency) - ->setSharingMode(oboe::SharingMode::Exclusive) - ->setFormat(oboe::AudioFormat::I16) - ->setChannelCount(oboe::ChannelCount::Stereo) - ->setSampleRate(44100) - ->setFramesPerCallback(SAMPLE_COUNT) - ->setDataCallback(&audioCallback) - ->setErrorCallback(&errorCallback) - ->setUsage(oboe::Usage::Game) - ->openStream(stream); - if (result != oboe::Result::OK) + class AudioCallback : public oboe::AudioStreamDataCallback { - ERROR_LOG(AUDIO, "Oboe open stream failed: %s", oboe::convertToText(result)); - return; - } + public: + AudioCallback(OboeBackend *backend) : backend(backend) {} - if (stream->getAudioApi() == oboe::AudioApi::AAudio && config::AudioBufferSize < 1764) - { - // Reduce internal buffer for low latency (< 40 ms) - int bufSize = stream->getBufferSizeInFrames(); - int burst = stream->getFramesPerBurst(); - if (bufSize - burst > SAMPLE_COUNT) + oboe::DataCallbackResult onAudioReady(oboe::AudioStream *audioStream, void *audioData, int32_t numFrames) override { - while (bufSize - burst > SAMPLE_COUNT) - bufSize -= burst; - stream->setBufferSizeInFrames(bufSize); + if (!backend->ringBuffer.read((u8 *)audioData, numFrames * 4)) + // underrun + memset(audioData, 0, numFrames * 4); + backend->pushWait.Set(); + + return oboe::DataCallbackResult::Continue; + } + + OboeBackend *backend; + }; + AudioCallback audioCallback; + + class AudioErrorCallback : public oboe::AudioStreamErrorCallback + { + public: + AudioErrorCallback(OboeBackend *backend) : backend(backend) {} + + void onErrorAfterClose(oboe::AudioStream *stream, oboe::Result error) override { + WARN_LOG(AUDIO, "Audio device lost. Attempting to reopen the audio stream"); + backend->term(); + backend->init(); + } + + OboeBackend *backend; + }; + AudioErrorCallback errorCallback; + +public: + OboeBackend() + : AudioBackend("Oboe", "Automatic AAudio / OpenSL selection"), audioCallback(this), errorCallback(this) {} + + bool init() override + { + // Actual capacity is size-1 to avoid overrun so add one buffer + ringBuffer.setCapacity((config::AudioBufferSize + SAMPLE_COUNT) * 4); + + oboe::AudioStreamBuilder builder; + oboe::Result result = builder.setDirection(oboe::Direction::Output) + ->setPerformanceMode(oboe::PerformanceMode::LowLatency) + ->setSharingMode(oboe::SharingMode::Exclusive) + ->setFormat(oboe::AudioFormat::I16) + ->setChannelCount(oboe::ChannelCount::Stereo) + ->setSampleRate(44100) + ->setFramesPerCallback(SAMPLE_COUNT) + ->setDataCallback(&audioCallback) + ->setErrorCallback(&errorCallback) + ->setUsage(oboe::Usage::Game) + ->openStream(stream); + if (result != oboe::Result::OK) + { + ERROR_LOG(AUDIO, "Oboe open stream failed: %s", oboe::convertToText(result)); + return false; + } + + if (stream->getAudioApi() == oboe::AudioApi::AAudio && config::AudioBufferSize < 1764) + { + // Reduce internal buffer for low latency (< 40 ms) + int bufSize = stream->getBufferSizeInFrames(); + int burst = stream->getFramesPerBurst(); + if (bufSize - burst > SAMPLE_COUNT) + { + while (bufSize - burst > SAMPLE_COUNT) + bufSize -= burst; + stream->setBufferSizeInFrames(bufSize); + } + } + + stream->requestStart(); + NOTICE_LOG(AUDIO, "Oboe driver started. stream capacity: %d frames, size: %d frames, frames/callback: %d, frames/burst: %d", + stream->getBufferCapacityInFrames(), stream->getBufferSizeInFrames(), + stream->getFramesPerCallback(), stream->getFramesPerBurst()); + + return true; + } + + void term() override + { + NOTICE_LOG(AUDIO, "Oboe driver stopping"); + if (stream != nullptr) + { + stream->stop(); + stream->close(); + stream.reset(); } } - stream->requestStart(); - NOTICE_LOG(AUDIO, "Oboe driver started. stream capacity: %d frames, size: %d frames, frames/callback: %d, frames/burst: %d", - stream->getBufferCapacityInFrames(), stream->getBufferSizeInFrames(), - stream->getFramesPerCallback(), stream->getFramesPerBurst()); -} - -static void audio_term() -{ - NOTICE_LOG(AUDIO, "Oboe driver stopping"); - if (stream != nullptr) + u32 push(const void* frame, u32 samples, bool wait) override { - stream->stop(); - stream->close(); - stream.reset(); + while (!ringBuffer.write((const u8 *)frame, samples * 4) && wait) + pushWait.Wait(); + + return 1; } -} -static u32 audio_push(const void* frame, u32 samples, bool wait) { - while (!ringBuffer.write((const u8 *)frame, samples * 4) && wait) - pushWait.Wait(); - - return 1; -} - -static void term_record() -{ - if (recordStream != nullptr) + void termRecord() override { - recordStream->stop(); - recordStream->close(); - recordStream.reset(); + if (recordStream != nullptr) + { + recordStream->stop(); + recordStream->close(); + recordStream.reset(); + } + NOTICE_LOG(AUDIO, "Oboe recorder stopped"); } - NOTICE_LOG(AUDIO, "Oboe recorder stopped"); -} -static bool init_record(u32 sampling_freq) -{ - oboe::AudioStreamBuilder builder; - oboe::Result result = builder.setDirection(oboe::Direction::Input) - ->setPerformanceMode(oboe::PerformanceMode::None) - ->setSharingMode(oboe::SharingMode::Exclusive) - ->setFormat(oboe::AudioFormat::I16) - ->setChannelCount(oboe::ChannelCount::Mono) - ->setSampleRate(sampling_freq) - ->openStream(recordStream); - if (result != oboe::Result::OK) + bool initRecord(u32 sampling_freq) override { - ERROR_LOG(AUDIO, "Oboe open record stream failed: %s", oboe::convertToText(result)); - return false; + oboe::AudioStreamBuilder builder; + oboe::Result result = builder.setDirection(oboe::Direction::Input) + ->setPerformanceMode(oboe::PerformanceMode::None) + ->setSharingMode(oboe::SharingMode::Exclusive) + ->setFormat(oboe::AudioFormat::I16) + ->setChannelCount(oboe::ChannelCount::Mono) + ->setSampleRate(sampling_freq) + ->openStream(recordStream); + if (result != oboe::Result::OK) + { + ERROR_LOG(AUDIO, "Oboe open record stream failed: %s", oboe::convertToText(result)); + return false; + } + recordStream->requestStart(); + NOTICE_LOG(AUDIO, "Oboe recorder started. stream capacity: %d frames", + stream->getBufferCapacityInFrames()); + + return true; } - recordStream->requestStart(); - NOTICE_LOG(AUDIO, "Oboe recorder started. stream capacity: %d frames", - stream->getBufferCapacityInFrames()); - return true; -} - -static u32 record(void *data, u32 samples) -{ - if (recordStream == nullptr) - return 0; - oboe::ResultWithValue result = recordStream->read(data, samples, 0); - if (result == oboe::Result::ErrorDisconnected) + u32 record(void *data, u32 samples) override { - WARN_LOG(AUDIO, "Recording device lost. Attempting to reopen the audio stream"); - u32 sampleRate = recordStream->getSampleRate(); - term_record(); - init_record(sampleRate); + if (recordStream == nullptr) + return 0; + oboe::ResultWithValue result = recordStream->read(data, samples, 0); + if (result == oboe::Result::ErrorDisconnected) + { + WARN_LOG(AUDIO, "Recording device lost. Attempting to reopen the audio stream"); + u32 sampleRate = recordStream->getSampleRate(); + termRecord(); + initRecord(sampleRate); + } + return std::max(0, result.value()); } - return std::max(0, result.value()); -} - -static audiobackend_t audiobackend_oboe = { - "Oboe", // Slug - "Automatic AAudio / OpenSL selection", // Name - &audio_init, - &audio_push, - &audio_term, - NULL, - &init_record, - &record, - &term_record }; - -static bool oboebe = RegisterAudioBackend(&audiobackend_oboe); +static OboeBackend oboeBackend; #endif diff --git a/core/oslib/audiobackend_omx.cpp b/core/oslib/audiobackend_omx.cpp index 37f305829..3c2be0444 100644 --- a/core/oslib/audiobackend_omx.cpp +++ b/core/oslib/audiobackend_omx.cpp @@ -6,316 +6,331 @@ #define PORT_INDEX 100 #define OUTPUT_FREQ 44100 -static OMX_HANDLETYPE omx_handle; -static OMX_STATETYPE omx_state = OMX_StateInvalid; -static size_t audio_buffer_idx; -static OMX_BUFFERHEADERTYPE** audio_buffers = NULL; -static u32 latency_max; -static u32 buffer_count; -static u32 buffer_size; -static u32 buffer_length; -static pthread_mutex_t audio_lock; -static pthread_cond_t omx_state_cond; - -static OMX_ERRORTYPE EventHandler( - OMX_IN OMX_HANDLETYPE hComponent, - OMX_IN OMX_PTR pAppData, - OMX_IN OMX_EVENTTYPE eEvent, - OMX_IN OMX_U32 nData1, - OMX_IN OMX_U32 nData2, - OMX_IN OMX_PTR pEventData) +class OMXAudioBackend : public AudioBackend { - pthread_mutex_lock(&audio_lock); - if(eEvent == OMX_EventCmdComplete && nData1 == OMX_CommandStateSet) + OMX_HANDLETYPE omx_handle = nullptr; + OMX_STATETYPE omx_state = OMX_StateInvalid; + size_t audio_buffer_idx = 0; + OMX_BUFFERHEADERTYPE** audio_buffers = nullptr; + u32 latency_max = 0; + u32 buffer_count = 0; + u32 buffer_size = 0; + u32 buffer_length = 0; + pthread_mutex_t audio_lock; + pthread_cond_t omx_state_cond; + + static OMX_ERRORTYPE eventHandler( + OMX_IN OMX_HANDLETYPE hComponent, + OMX_IN OMX_PTR pAppData, + OMX_IN OMX_EVENTTYPE eEvent, + OMX_IN OMX_U32 nData1, + OMX_IN OMX_U32 nData2, + OMX_IN OMX_PTR pEventData) { - omx_state = (OMX_STATETYPE)nData2; - pthread_cond_signal(&omx_state_cond); - } - pthread_mutex_unlock(&audio_lock); - return OMX_ErrorNone; -} - -static OMX_ERRORTYPE EmptyBufferDone( - OMX_IN OMX_HANDLETYPE hComponent, - OMX_IN OMX_PTR pAppData, - OMX_IN OMX_BUFFERHEADERTYPE* pBuffer) -{ - return OMX_ErrorNone; -} - -static OMX_ERRORTYPE FillBufferDone( - OMX_OUT OMX_HANDLETYPE hComponent, - OMX_OUT OMX_PTR pAppData, - OMX_OUT OMX_BUFFERHEADERTYPE* pBuffer) -{ - return OMX_ErrorNone; -} - -static void omx_wait_for_state(OMX_STATETYPE state) -{ - pthread_mutex_lock(&audio_lock); - while(omx_state != state) - pthread_cond_wait(&omx_state_cond, &audio_lock); - pthread_mutex_unlock(&audio_lock); -} - -static u32 omx_get_latency() -{ - OMX_PARAM_U32TYPE param; - memset(¶m, 0, sizeof(OMX_PARAM_U32TYPE)); - param.nSize = sizeof(OMX_PARAM_U32TYPE); - param.nVersion.nVersion = OMX_VERSION; - param.nPortIndex = PORT_INDEX; - - OMX_ERRORTYPE error = OMX_GetConfig(omx_handle, OMX_IndexConfigAudioRenderingLatency, ¶m); - if(error != OMX_ErrorNone) - WARN_LOG(AUDIO, "OMX: failed to get OMX configuration (OMX_IndexConfigAudioRenderingLatency). Error 0x%X", error); - - return param.nU32 * 1000 / OUTPUT_FREQ; -} - -static void omx_init() -{ - OMX_ERRORTYPE error; - - error = OMX_Init(); - if(error != OMX_ErrorNone) - { - WARN_LOG(AUDIO, "OMX: OMX_Init() failed. Error 0x%X", error); - return; - } - - // Initialize settings - latency_max = config::OmxAudioLatency; - buffer_size = config::AudioBufferSize * 4; - buffer_count = 2 + OUTPUT_FREQ * latency_max / (buffer_size * 1000); - - OMX_CALLBACKTYPE callbacks; - callbacks.EventHandler = EventHandler; - callbacks.EmptyBufferDone = EmptyBufferDone; - callbacks.EmptyBufferDone = FillBufferDone; - - error = OMX_GetHandle(&omx_handle, (OMX_STRING)"OMX.broadcom.audio_render", NULL, &callbacks); - if(error != OMX_ErrorNone) - { - WARN_LOG(AUDIO, "OMX: OMX_GetHandle() failed. Error 0x%X", error); - return; - } - - OMX_PARAM_PORTDEFINITIONTYPE param; - memset(¶m, 0, sizeof(OMX_PARAM_PORTDEFINITIONTYPE)); - param.nSize = sizeof(OMX_PARAM_PORTDEFINITIONTYPE); - param.nVersion.nVersion = OMX_VERSION; - param.nPortIndex = PORT_INDEX; - param.nBufferSize = buffer_size; - param.nBufferCountActual = buffer_count; - param.format.audio.eEncoding = OMX_AUDIO_CodingPCM; - - error = OMX_SetParameter(omx_handle, OMX_IndexParamPortDefinition, ¶m); - if(error != OMX_ErrorNone) - WARN_LOG(AUDIO, "OMX: failed to set OMX_IndexParamPortDefinition. Error 0x%X", error); - - OMX_AUDIO_PARAM_PCMMODETYPE pcm; - memset(&pcm, 0, sizeof(OMX_AUDIO_PARAM_PCMMODETYPE)); - pcm.nSize = sizeof(OMX_AUDIO_PARAM_PCMMODETYPE); - pcm.nVersion.nVersion = OMX_VERSION; - pcm.nPortIndex = PORT_INDEX; - pcm.nChannels = 2; - pcm.eNumData = OMX_NumericalDataSigned; - pcm.eEndian = OMX_EndianLittle; - pcm.nSamplingRate = OUTPUT_FREQ; - pcm.bInterleaved = OMX_TRUE; - pcm.nBitPerSample = 16; - pcm.ePCMMode = OMX_AUDIO_PCMModeLinear; - pcm.eChannelMapping[1] = OMX_AUDIO_ChannelRF; - pcm.eChannelMapping[0] = OMX_AUDIO_ChannelLF; - - error = OMX_SetParameter(omx_handle, OMX_IndexParamAudioPcm, &pcm); - if(error != OMX_ErrorNone) - WARN_LOG(AUDIO, "OMX: failed to set OMX_IndexParamAudioPcm. Error 0x%X", error); - - // Disable all ports - error = OMX_SendCommand(omx_handle, OMX_CommandPortDisable, PORT_INDEX, NULL); - if(error != OMX_ErrorNone) - WARN_LOG(AUDIO, "OMX: failed to do OMX_CommandPortDisable. Error 0x%X", error); - - OMX_PORT_PARAM_TYPE param2; - memset(¶m2, 0, sizeof(OMX_PORT_PARAM_TYPE)); - param2.nSize = sizeof(OMX_PORT_PARAM_TYPE); - param2.nVersion.nVersion = OMX_VERSION; - error = OMX_GetParameter(omx_handle, OMX_IndexParamOtherInit, ¶m2); - if(error != OMX_ErrorNone) - { - WARN_LOG(AUDIO, "OMX: failed to get OMX_IndexParamOtherInit. Error 0x%X", error); - } - else - { - for(u32 i = 0; i < param2.nPorts; i++) + OMXAudioBackend *backend = (OMXAudioBackend *)pAddData; + pthread_mutex_lock(&backend->audio_lock); + if(eEvent == OMX_EventCmdComplete && nData1 == OMX_CommandStateSet) { - u32 port = param2.nStartPortNumber + i; - error = OMX_SendCommand(omx_handle, OMX_CommandPortDisable, port, NULL); + backend->omx_state = (OMX_STATETYPE)nData2; + pthread_cond_signal(&backend->omx_state_cond); + } + pthread_mutex_unlock(&backend->audio_lock); + return OMX_ErrorNone; + } + + static OMX_ERRORTYPE emptyBufferDone( + OMX_IN OMX_HANDLETYPE hComponent, + OMX_IN OMX_PTR pAppData, + OMX_IN OMX_BUFFERHEADERTYPE* pBuffer) + { + return OMX_ErrorNone; + } + + static OMX_ERRORTYPE fillBufferDone( + OMX_OUT OMX_HANDLETYPE hComponent, + OMX_OUT OMX_PTR pAppData, + OMX_OUT OMX_BUFFERHEADERTYPE* pBuffer) + { + return OMX_ErrorNone; + } + + void waitForState(OMX_STATETYPE state) + { + pthread_mutex_lock(&audio_lock); + while(omx_state != state) + pthread_cond_wait(&omx_state_cond, &audio_lock); + pthread_mutex_unlock(&audio_lock); + } + + u32 getLatency() + { + OMX_PARAM_U32TYPE param; + memset(¶m, 0, sizeof(OMX_PARAM_U32TYPE)); + param.nSize = sizeof(OMX_PARAM_U32TYPE); + param.nVersion.nVersion = OMX_VERSION; + param.nPortIndex = PORT_INDEX; + + OMX_ERRORTYPE error = OMX_GetConfig(omx_handle, OMX_IndexConfigAudioRenderingLatency, ¶m); + if(error != OMX_ErrorNone) + WARN_LOG(AUDIO, "OMX: failed to get OMX configuration (OMX_IndexConfigAudioRenderingLatency). Error 0x%X", error); + + return param.nU32 * 1000 / OUTPUT_FREQ; + } + +public: + OMXAudioBackend() + : AudioBackend("omx", "OpenMAX IL") {} + + bool init() override + { + OMX_ERRORTYPE error; + + error = OMX_Init(); + if (error != OMX_ErrorNone) + { + WARN_LOG(AUDIO, "OMX: OMX_Init() failed. Error 0x%X", error); + return false; + } + + // Initialize settings + latency_max = config::OmxAudioLatency; + buffer_size = config::AudioBufferSize * 4; + buffer_count = 2 + OUTPUT_FREQ * latency_max / (buffer_size * 1000); + + OMX_CALLBACKTYPE callbacks; + callbacks.EventHandler = eventHandler; + callbacks.EmptyBufferDone = emptyBufferDone; + callbacks.FillBufferDone = fillBufferDone; + + error = OMX_GetHandle(&omx_handle, (OMX_STRING)"OMX.broadcom.audio_render", this, &callbacks); + if (error != OMX_ErrorNone) + { + WARN_LOG(AUDIO, "OMX: OMX_GetHandle() failed. Error 0x%X", error); + OMX_Deinit(); + return false; + } + + OMX_PARAM_PORTDEFINITIONTYPE param; + memset(¶m, 0, sizeof(OMX_PARAM_PORTDEFINITIONTYPE)); + param.nSize = sizeof(OMX_PARAM_PORTDEFINITIONTYPE); + param.nVersion.nVersion = OMX_VERSION; + param.nPortIndex = PORT_INDEX; + param.nBufferSize = buffer_size; + param.nBufferCountActual = buffer_count; + param.format.audio.eEncoding = OMX_AUDIO_CodingPCM; + + error = OMX_SetParameter(omx_handle, OMX_IndexParamPortDefinition, ¶m); + if (error != OMX_ErrorNone) + { + WARN_LOG(AUDIO, "OMX: failed to set OMX_IndexParamPortDefinition. Error 0x%X", error); + OMX_Deinit(); + return false; + } + + OMX_AUDIO_PARAM_PCMMODETYPE pcm; + memset(&pcm, 0, sizeof(OMX_AUDIO_PARAM_PCMMODETYPE)); + pcm.nSize = sizeof(OMX_AUDIO_PARAM_PCMMODETYPE); + pcm.nVersion.nVersion = OMX_VERSION; + pcm.nPortIndex = PORT_INDEX; + pcm.nChannels = 2; + pcm.eNumData = OMX_NumericalDataSigned; + pcm.eEndian = OMX_EndianLittle; + pcm.nSamplingRate = OUTPUT_FREQ; + pcm.bInterleaved = OMX_TRUE; + pcm.nBitPerSample = 16; + pcm.ePCMMode = OMX_AUDIO_PCMModeLinear; + pcm.eChannelMapping[1] = OMX_AUDIO_ChannelRF; + pcm.eChannelMapping[0] = OMX_AUDIO_ChannelLF; + + error = OMX_SetParameter(omx_handle, OMX_IndexParamAudioPcm, &pcm); + if (error != OMX_ErrorNone) + { + WARN_LOG(AUDIO, "OMX: failed to set OMX_IndexParamAudioPcm. Error 0x%X", error); + OMX_Deinit(); + return false; + } + + // Disable all ports + error = OMX_SendCommand(omx_handle, OMX_CommandPortDisable, PORT_INDEX, NULL); + if (error != OMX_ErrorNone) + WARN_LOG(AUDIO, "OMX: failed to do OMX_CommandPortDisable. Error 0x%X", error); + + OMX_PORT_PARAM_TYPE param2; + memset(¶m2, 0, sizeof(OMX_PORT_PARAM_TYPE)); + param2.nSize = sizeof(OMX_PORT_PARAM_TYPE); + param2.nVersion.nVersion = OMX_VERSION; + error = OMX_GetParameter(omx_handle, OMX_IndexParamOtherInit, ¶m2); + if (error != OMX_ErrorNone) + { + WARN_LOG(AUDIO, "OMX: failed to get OMX_IndexParamOtherInit. Error 0x%X", error); + } + else + { + for(u32 i = 0; i < param2.nPorts; i++) + { + u32 port = param2.nStartPortNumber + i; + error = OMX_SendCommand(omx_handle, OMX_CommandPortDisable, port, NULL); + if(error != OMX_ErrorNone) + WARN_LOG(AUDIO, "OMX: failed to do OMX_CommandPortDisable on port %u. Error 0x%X", port, error); + } + } + + // Go into idle state + error = OMX_SendCommand(omx_handle, OMX_CommandStateSet, OMX_StateIdle, NULL); + if (error != OMX_ErrorNone) + { + WARN_LOG(AUDIO, "OMX: failed to set OMX_CommandStateSet. Error 0x%X", error); + OMX_Deinit(); + return false; + } + waitForState(OMX_StateIdle); + + // Check if we're in a state able to recieve buffers + OMX_STATETYPE state; + error = OMX_GetState(omx_handle, &state); + if (error != OMX_ErrorNone || !(state == OMX_StateIdle || state == OMX_StateExecuting || state == OMX_StatePause)) + { + WARN_LOG(AUDIO, "OMX: state is incorrect. State 0x%X; Error 0x%X", state, error); + OMX_Deinit(); + return false; + } + + // Create audio buffers + INFO_LOG(AUDIO, "OMX: creating %u buffers", buffer_count); + + // Enable port + error = OMX_SendCommand(omx_handle, OMX_CommandPortEnable, PORT_INDEX, NULL); + if (error != OMX_ErrorNone) + WARN_LOG(AUDIO, "OMX: failed to do OMX_CommandPortEnable. Error 0x%X", error); + + // Free audio buffers if they're allocated + if (audio_buffers != NULL) + delete[] audio_buffers; + + // Allocate buffers + audio_buffers = new OMX_BUFFERHEADERTYPE*[buffer_count]; + for (size_t i = 0; i < buffer_count; i++) + { + error = OMX_AllocateBuffer(omx_handle, &audio_buffers[i], PORT_INDEX, NULL, buffer_size); + if (error != OMX_ErrorNone) + { + WARN_LOG(AUDIO, "OMX: failed to allocate buffer[%u]. Error 0x%X", i, error); + OMX_Deinit(); + return false; + } + } + + // Set state to executing + error = OMX_SendCommand(omx_handle, OMX_CommandStateSet, OMX_StateExecuting, NULL); + if (error != OMX_ErrorNone) + { + WARN_LOG(AUDIO, "OMX: failed to do OMX_CommandStateSet. Error 0x%X", error); + OMX_Deinit(); + return false; + } + waitForState(OMX_StateExecuting); + + // Empty buffers + for (size_t i = 0; i < buffer_count; i++) + { + memset(audio_buffers[i]->pBuffer, 0, buffer_size); + audio_buffers[i]->nOffset = 0; + audio_buffers[i]->nFilledLen = buffer_size; + + error = OMX_EmptyThisBuffer(omx_handle, audio_buffers[i]); if(error != OMX_ErrorNone) - WARN_LOG(AUDIO, "OMX: failed to do OMX_CommandPortDisable on port %u. Error 0x%X", port, error); + WARN_LOG(AUDIO, "OMX: failed to empty buffer[%u]. Error 0x%X", i, error); } - } - // Go into idle state - error = OMX_SendCommand(omx_handle, OMX_CommandStateSet, OMX_StateIdle, NULL); - if(error != OMX_ErrorNone) - { - WARN_LOG(AUDIO, "OMX: failed to set OMX_CommandStateSet. Error 0x%X", error); - return; - } - omx_wait_for_state(OMX_StateIdle); + const char* output_device = "local"; + if (config::OmxAudioHdmi) + output_device = (const char*)"hdmi"; - // Check if we're in a state able to recieve buffers - OMX_STATETYPE state; - error = OMX_GetState(omx_handle, &state); - if(error != OMX_ErrorNone || !(state == OMX_StateIdle || state == OMX_StateExecuting || state == OMX_StatePause)) - { - WARN_LOG(AUDIO, "OMX: state is incorrect. State 0x%X; Error 0x%X", state, error); - return; - } - - // Create audio buffers - INFO_LOG(AUDIO, "OMX: creating %u buffers", buffer_count); - - // Enable port - error = OMX_SendCommand(omx_handle, OMX_CommandPortEnable, PORT_INDEX, NULL); - if(error != OMX_ErrorNone) - WARN_LOG(AUDIO, "OMX: failed to do OMX_CommandPortEnable. Error 0x%X", error); - - // Free audio buffers if they're allocated - if(audio_buffers != NULL) - delete[] audio_buffers; - - // Allocate buffers - audio_buffers = new OMX_BUFFERHEADERTYPE*[buffer_count]; - for(size_t i = 0; i < buffer_count; i++) - { - error = OMX_AllocateBuffer(omx_handle, &audio_buffers[i], PORT_INDEX, NULL, buffer_size); - if(error != OMX_ErrorNone) + // Set audio destination + OMX_CONFIG_BRCMAUDIODESTINATIONTYPE ar_dest; + memset(&ar_dest, 0, sizeof(ar_dest)); + ar_dest.nSize = sizeof(OMX_CONFIG_BRCMAUDIODESTINATIONTYPE); + ar_dest.nVersion.nVersion = OMX_VERSION; + strcpy((char *)ar_dest.sName, output_device); + error = OMX_SetConfig(omx_handle, OMX_IndexConfigBrcmAudioDestination, &ar_dest); + if (error != OMX_ErrorNone) { - WARN_LOG(AUDIO, "OMX: failed to allocate buffer[%u]. Error 0x%X", i, error); - return; + WARN_LOG(AUDIO, "OMX: failed to set OMX configuration (OMX_IndexConfigBrcmAudioDestination). Error 0x%X", error); + OMX_Deinit(); + return false; } + + audio_buffer_idx = 0; + buffer_length = 0; + + INFO_LOG(AUDIO, "OMX: audio output to '%s'", ar_dest.sName); + + return true; } - // Set state to executing - error = OMX_SendCommand(omx_handle, OMX_CommandStateSet, OMX_StateExecuting, NULL); - if(error != OMX_ErrorNone) + u32 push(const void* frame, u32 samples, bool wait) override { - WARN_LOG(AUDIO, "OMX: failed to do OMX_CommandStateSet. Error 0x%X", error); - return; - } - omx_wait_for_state(OMX_StateExecuting); + if(audio_buffers == NULL) + return 1; - // Empty buffers - for(size_t i = 0; i < buffer_count; i++) - { - memset(audio_buffers[i]->pBuffer, 0, buffer_size); - audio_buffers[i]->nOffset = 0; - audio_buffers[i]->nFilledLen = buffer_size; + size_t data_size = samples * 4; - error = OMX_EmptyThisBuffer(omx_handle, audio_buffers[i]); - if(error != OMX_ErrorNone) - WARN_LOG(AUDIO, "OMX: failed to empty buffer[%u]. Error 0x%X", i, error); - } + while(data_size > 0) + { + size_t copy_size = std::min(buffer_size - buffer_length, data_size); - const char* output_device = "local"; - if (config::OmxAudioHdmi) - output_device = (const char*)"hdmi"; + // Don't have more than maximum audio latency + u32 latency = getLatency(); + if(latency > latency_max) + { + usleep((latency - latency_max) * 1000); + } + else if(latency == 0) + { + INFO_LOG(AUDIO, "OMX: underrun occurred"); + } - // Set audio destination - OMX_CONFIG_BRCMAUDIODESTINATIONTYPE ar_dest; - memset(&ar_dest, 0, sizeof(ar_dest)); - ar_dest.nSize = sizeof(OMX_CONFIG_BRCMAUDIODESTINATIONTYPE); - ar_dest.nVersion.nVersion = OMX_VERSION; - strcpy((char *)ar_dest.sName, output_device); - error = OMX_SetConfig(omx_handle, OMX_IndexConfigBrcmAudioDestination, &ar_dest); - if(error != OMX_ErrorNone) - { - WARN_LOG(AUDIO, "OMX: failed to set OMX configuration (OMX_IndexConfigBrcmAudioDestination). Error 0x%X", error); - return; - } + memcpy(audio_buffers[audio_buffer_idx]->pBuffer + buffer_length, frame, copy_size); + buffer_length += copy_size; + frame = (char *)frame + copy_size; - audio_buffer_idx = 0; - buffer_length = 0; + // Flush buffer and swap + if(buffer_length >= buffer_size) + { + audio_buffers[audio_buffer_idx]->nOffset = 0; + audio_buffers[audio_buffer_idx]->nFilledLen = buffer_size; - INFO_LOG(AUDIO, "OMX: audio output to '%s'", ar_dest.sName); -} + OMX_ERRORTYPE error = OMX_EmptyThisBuffer(omx_handle, audio_buffers[audio_buffer_idx]); + if(error != OMX_ErrorNone) + INFO_LOG(AUDIO, "OMX: failed to empty buffer[%u]. Error 0x%X", audio_buffer_idx, error); + + audio_buffer_idx = (audio_buffer_idx + 1) % buffer_count; + buffer_length = 0; + } + + data_size -= copy_size; + } -static u32 omx_push(const void* frame, u32 samples, bool wait) -{ - if(audio_buffers == NULL) return 1; - - size_t data_size = samples * 4; - - while(data_size > 0) - { - size_t copy_size = std::min(buffer_size - buffer_length, data_size); - - // Don't have more than maximum audio latency - u32 latency = omx_get_latency(); - if(latency > latency_max) - { - usleep((latency - latency_max) * 1000); - } - else if(latency == 0) - { - INFO_LOG(AUDIO, "OMX: underrun occurred"); - } - - memcpy(audio_buffers[audio_buffer_idx]->pBuffer + buffer_length, frame, copy_size); - buffer_length += copy_size; - frame = (char *)frame + copy_size; - - // Flush buffer and swap - if(buffer_length >= buffer_size) - { - audio_buffers[audio_buffer_idx]->nOffset = 0; - audio_buffers[audio_buffer_idx]->nFilledLen = buffer_size; - - OMX_ERRORTYPE error = OMX_EmptyThisBuffer(omx_handle, audio_buffers[audio_buffer_idx]); - if(error != OMX_ErrorNone) - INFO_LOG(AUDIO, "OMX: failed to empty buffer[%u]. Error 0x%X", audio_buffer_idx, error); - - audio_buffer_idx = (audio_buffer_idx + 1) % buffer_count; - buffer_length = 0; - } - - data_size -= copy_size; } - return 1; -} - -static void omx_term() -{ - OMX_ERRORTYPE error; - - // Is there anything else that needs to be done for omx? - - error = OMX_Deinit(); - if(error != OMX_ErrorNone) - WARN_LOG(AUDIO, "OMX: OMX_Deinit() failed. Error 0x%X", error); - - if(audio_buffers != NULL) + void term() override { - delete[] audio_buffers; - audio_buffers = NULL; - } -} + OMX_ERRORTYPE error; -static audiobackend_t audiobackend_omx = { - "omx", // Slug - "OpenMAX IL", // Name - &omx_init, - &omx_push, - &omx_term, - NULL + // Is there anything else that needs to be done for omx? + + error = OMX_Deinit(); + if(error != OMX_ErrorNone) + WARN_LOG(AUDIO, "OMX: OMX_Deinit() failed. Error 0x%X", error); + + if(audio_buffers != NULL) + { + delete[] audio_buffers; + audio_buffers = NULL; + } + } }; +static OMXAudioBackend omxAudioBackend; -static bool omx = RegisterAudioBackend(&audiobackend_omx); #endif diff --git a/core/oslib/audiobackend_oss.cpp b/core/oslib/audiobackend_oss.cpp index f6624d197..d4818cab1 100644 --- a/core/oslib/audiobackend_oss.cpp +++ b/core/oslib/audiobackend_oss.cpp @@ -5,117 +5,112 @@ #include #include -static int oss_audio_fd = -1; -static int oss_rec_fd = -1; - -static void oss_init() +class OSSAudioBackend : public AudioBackend { - oss_audio_fd = open("/dev/dsp", O_WRONLY); - if (oss_audio_fd < 0) - { - WARN_LOG(AUDIO, "Couldn't open /dev/dsp."); - } - else + int audioFD = -1; + int recordFD = -1; + +public: + OSSAudioBackend() + : AudioBackend("oss", "Open Sound System") {} + + bool init() override { + audioFD = open("/dev/dsp", O_WRONLY); + if (audioFD < 0) + { + WARN_LOG(AUDIO, "Couldn't open /dev/dsp."); + return false; + } INFO_LOG(AUDIO, "sound enabled, dsp opened for write"); int tmp=44100; int err_ret; - err_ret=ioctl(oss_audio_fd,SNDCTL_DSP_SPEED,&tmp); + err_ret=ioctl(audioFD,SNDCTL_DSP_SPEED,&tmp); INFO_LOG(AUDIO, "set Frequency to %i, return %i (rate=%i)", 44100, err_ret, tmp); int channels=2; - err_ret=ioctl(oss_audio_fd, SNDCTL_DSP_CHANNELS, &channels); + err_ret=ioctl(audioFD, SNDCTL_DSP_CHANNELS, &channels); INFO_LOG(AUDIO, "set dsp to stereo (%i => %i)", channels, err_ret); int format=AFMT_S16_LE; - err_ret=ioctl(oss_audio_fd, SNDCTL_DSP_SETFMT, &format); + err_ret=ioctl(audioFD, SNDCTL_DSP_SETFMT, &format); INFO_LOG(AUDIO, "set dsp to %s audio (%i/%i => %i)", "16bits signed", AFMT_S16_LE, format, err_ret); - } -} -static u32 oss_push(const void* frame, u32 samples, bool wait) -{ - write(oss_audio_fd, frame, samples*4); - return 1; -} - -static void oss_term() -{ - if (oss_audio_fd >= 0) - close(oss_audio_fd); - oss_audio_fd = -1; -} - -// recording untested - -static bool oss_init_record(u32 sampling_freq) -{ - int oss_rec_fd = open("/dev/dsp", O_RDONLY); - if (oss_rec_fd < 0) - { - INFO_LOG(AUDIO, "OSS: can't open default audio capture device"); - return false; - } - int tmp = AFMT_S16_NE; // Native 16 bits - if (ioctl(oss_rec_fd, SNDCTL_DSP_SETFMT, &tmp) == -1 || tmp != AFMT_S16_NE) - { - INFO_LOG(AUDIO, "OSS: can't set sample format"); - close(oss_rec_fd); - oss_rec_fd = -1; - return false; - } - tmp = 1; - if (ioctl(oss_rec_fd, SNDCTL_DSP_CHANNELS, &tmp) == -1) - { - INFO_LOG(AUDIO, "OSS: can't set channel count"); - close(oss_rec_fd); - oss_rec_fd = -1; - return false; - } - tmp = sampling_freq; - if (ioctl(oss_rec_fd, SNDCTL_DSP_SPEED, &tmp) == -1) - { - INFO_LOG(AUDIO, "OSS: can't set sample rate"); - close(oss_rec_fd); - oss_rec_fd = -1; - return false; + return true; } - return true; -} - -static void oss_term_record() -{ - if (oss_rec_fd >= 0) - close(oss_rec_fd); - oss_rec_fd = -1; -} - -static u32 oss_record(void *buffer, u32 samples) -{ - samples *= 2; - int l = read(oss_rec_fd, buffer, samples); - if (l < (int)samples) + u32 push(const void* frame, u32 samples, bool wait) override { - if (l < 0) + write(audioFD, frame, samples * 4); + return 1; + } + + void term() override + { + if (audioFD >= 0) + close(audioFD); + audioFD = -1; + } + + // recording untested + + bool initRecord(u32 sampling_freq) + { + recordFD = open("/dev/dsp", O_RDONLY); + if (recordFD < 0) { - INFO_LOG(AUDIO, "OSS: Recording error"); - l = 0; + INFO_LOG(AUDIO, "OSS: can't open default audio capture device"); + return false; } - memset((u8 *)buffer + l, 0, samples - l); + int tmp = AFMT_S16_NE; // Native 16 bits + if (ioctl(recordFD, SNDCTL_DSP_SETFMT, &tmp) == -1 || tmp != AFMT_S16_NE) + { + INFO_LOG(AUDIO, "OSS: can't set sample format"); + close(recordFD); + recordFD = -1; + return false; + } + tmp = 1; + if (ioctl(recordFD, SNDCTL_DSP_CHANNELS, &tmp) == -1) + { + INFO_LOG(AUDIO, "OSS: can't set channel count"); + close(recordFD); + recordFD = -1; + return false; + } + tmp = sampling_freq; + if (ioctl(recordFD, SNDCTL_DSP_SPEED, &tmp) == -1) + { + INFO_LOG(AUDIO, "OSS: can't set sample rate"); + close(recordFD); + recordFD = -1; + return false; + } + + return true; } - return l / 2; -} -static audiobackend_t audiobackend_oss = { - "oss", // Slug - "Open Sound System", // Name - &oss_init, - &oss_push, - &oss_term, - NULL, - &oss_init_record, - &oss_record, - &oss_term_record + void termRecord() override + { + if (recordFD >= 0) + close(recordFD); + recordFD = -1; + } + + u32 record(void *buffer, u32 samples) override + { + samples *= 2; + int l = read(recordFD, buffer, samples); + if (l < (int)samples) + { + if (l < 0) + { + INFO_LOG(AUDIO, "OSS: Recording error"); + l = 0; + } + memset((u8 *)buffer + l, 0, samples - l); + } + return l / 2; + } }; +static OSSAudioBackend ossAudioBackend; -static bool oss = RegisterAudioBackend(&audiobackend_oss); #endif diff --git a/core/oslib/audiobackend_pulseaudio.cpp b/core/oslib/audiobackend_pulseaudio.cpp index e7b965bfb..5efc77156 100644 --- a/core/oslib/audiobackend_pulseaudio.cpp +++ b/core/oslib/audiobackend_pulseaudio.cpp @@ -1,370 +1,251 @@ #ifdef USE_PULSEAUDIO #include "audiostream.h" - -#ifdef PULSEAUDIO_SIMPLE -#include -#include - -static pa_simple *pulse_stream; -static pa_simple *pulse_record; - -static void pulseaudio_simple_init() -{ - pa_sample_spec ss; - ss.format = PA_SAMPLE_S16LE; - ss.channels = 2; - ss.rate = 44100; - - /* Create a new playback stream */ - int error; - pulse_stream = pa_simple_new(NULL, "flycast", PA_STREAM_PLAYBACK, NULL, "flycast", &ss, NULL, NULL, &error); - if (!pulse_stream) - WARN_LOG(AUDIO, "PulseAudio: pa_simple_new failed: %s", pa_strerror(error)); -} - -static u32 pulseaudio_simple_push(const void* frame, u32 samples, bool wait) -{ - if (pa_simple_write(pulse_stream, frame, (size_t)samples * 4, NULL) < 0) - WARN_LOG(AUDIO, "PulseAudio: pa_simple_write() failed!"); - - return 0; -} - -static void pulseaudio_simple_term() -{ - if (pulse_stream != NULL) - { - // Make sure that every single sample was played - if (pa_simple_drain(pulse_stream, NULL) < 0) - WARN_LOG(AUDIO, "PulseAudio: pa_simple_drain() failed!"); - pa_simple_free(pulse_stream); - } -} - -static bool pulseaudio_init_record(u32 sampling_freq) -{ - static const pa_sample_spec ss = { - .format = PA_SAMPLE_S16LE, - .rate = sampling_freq, - .channels = 1 - }; - int error; - pulse_record = pa_simple_new(NULL, "flycast", PA_STREAM_RECORD, NULL, "flycast", &ss, NULL, NULL, &error); - if (pulse_record == nullptr) - { - INFO_LOG(AUDIO, "PulseAudio: pa_simple_new() failed: %s", pa_strerror(error)); - return false; - } - INFO_LOG(AUDIO, "PulseAudio: Successfully initialized capture device"); - - return true; -} - -static void pulseaudio_term_record() -{ - if (pulse_record != nullptr) - { - pa_simple_free(pulse_record); - pulse_record = nullptr; - } -} - -static u32 pulseaudio_record(void *buffer, u32 samples) -{ - if (pulse_record == nullptr) - return 0; - int error; - if (pa_simple_read(pulse_record, buffer, samples * 2, &error) < 0) - { - INFO_LOG(AUDIO, "PulseAudio: pa_simple_read() failed: %s", pa_strerror(error)); - return 0; - } - return samples; -} - -static audiobackend_t audiobackend_pulseaudio = { - "pulse", // Slug - "PulseAudio", // Name - &pulseaudio_simple_init, - &pulseaudio_simple_push, - &pulseaudio_simple_term, - NULL, - &pulseaudio_init_record, - &pulseaudio_record, - &pulseaudio_term_record, -}; - -#else // !PULSEAUDIO_SIMPLE - #include -pa_threaded_mainloop *mainloop; -pa_context *context; -pa_stream *stream; -pa_stream *record_stream; - -static void context_state_cb(pa_context *c, void *userdata) +class PulseAudioBackend : public AudioBackend { - switch (pa_context_get_state(c)) + pa_threaded_mainloop *mainloop = nullptr; + pa_context *context = nullptr; + pa_stream *stream = nullptr; + pa_stream *record_stream = nullptr; + + static void context_state_cb(pa_context *c, void *userdata) { - case PA_CONTEXT_READY: - case PA_CONTEXT_TERMINATED: - case PA_CONTEXT_FAILED: - pa_threaded_mainloop_signal(mainloop, 0); - break; - default: - break; - } -} - -#if 0 -static void stream_state_cb(pa_stream *s, void *data) -{ - switch (pa_stream_get_state(s)) - { - case PA_STREAM_READY: - case PA_STREAM_FAILED: - case PA_STREAM_TERMINATED: - pa_threaded_mainloop_signal(mainloop, 0); - break; - default: - break; - } -} - -static void stream_latency_update_cb(pa_stream *s, void *data) -{ - pa_threaded_mainloop_signal(mainloop, 0); -} - -static void underrun_update_cb(pa_stream *s, void *data) -{ - //DEBUG_LOG(AUDIO, "PulseAudio: buffer underrun"); -} - -static void buffer_attr_cb(pa_stream *s, void *data) -{ - const pa_buffer_attr *server_attr = pa_stream_get_buffer_attr(s); - if (server_attr) - { - u32 buffer_size = server_attr->tlength; - DEBUG_LOG(AUDIO, "PulseAudio: new buffer size %d", buffer_size); - } -} -#endif - -static void stream_request_cb(pa_stream *s, size_t length, void *data) -{ - pa_threaded_mainloop_signal(mainloop, 0); -} - -static void pulseaudio_init() -{ - mainloop = pa_threaded_mainloop_new(); - if (!mainloop) - { - WARN_LOG(AUDIO, "PulseAudio: pa_threaded_mainloop_new failed"); - return; - } - context = pa_context_new(pa_threaded_mainloop_get_api(mainloop), "flycast"); - if (!context) - { - WARN_LOG(AUDIO, "PulseAudio: pa_context_new failed"); - return; - } - pa_context_set_state_callback(context, context_state_cb, nullptr); - - if (pa_context_connect(context, nullptr, PA_CONTEXT_NOFLAGS, nullptr) < 0) - { - WARN_LOG(AUDIO, "PulseAudio: pa_context_connect failed"); - return; - } - - pa_threaded_mainloop_lock(mainloop); - if (pa_threaded_mainloop_start(mainloop) < 0) - { - WARN_LOG(AUDIO, "PulseAudio: pa_threaded_mainloop_start failed"); - return; - } - pa_threaded_mainloop_wait(mainloop); - - if (pa_context_get_state(context) != PA_CONTEXT_READY) - { - WARN_LOG(AUDIO, "PulseAudio: context isn't ready"); - return; - } - pa_sample_spec spec{ PA_SAMPLE_S16LE, 44100, 2 }; - - stream = pa_stream_new(context, "audio", &spec, NULL); - if (!stream) - { - WARN_LOG(AUDIO, "PulseAudio: pa_stream_new failed"); - return; - } - - //pa_stream_set_state_callback(stream, stream_state_cb, nullptr); - pa_stream_set_write_callback(stream, stream_request_cb, nullptr); - //pa_stream_set_latency_update_callback(stream, stream_latency_update_cb, nullptr); - //pa_stream_set_underflow_callback(stream, underrun_update_cb, nullptr); - //pa_stream_set_buffer_attr_callback(stream, buffer_attr_cb, nullptr); - - pa_buffer_attr buffer_attr; - buffer_attr.maxlength = -1; - buffer_attr.tlength = pa_usec_to_bytes(config::AudioBufferSize * PA_USEC_PER_SEC / 44100, &spec); - buffer_attr.prebuf = -1; - buffer_attr.minreq = -1; - buffer_attr.fragsize = -1; - - if (pa_stream_connect_playback(stream, nullptr, &buffer_attr, PA_STREAM_ADJUST_LATENCY, nullptr, nullptr) < 0) - { - WARN_LOG(AUDIO, "PulseAudio: pa_stream_connect_playback failed"); - return; - } - - pa_threaded_mainloop_wait(mainloop); - - if (pa_stream_get_state(stream) != PA_STREAM_READY) - { - WARN_LOG(AUDIO, "PulseAudio: stream isn't ready"); - return; - } - - const pa_buffer_attr *server_attr = pa_stream_get_buffer_attr(stream); - if (server_attr) - DEBUG_LOG(AUDIO, "PulseAudio: requested %d samples buffer, got %d", buffer_attr.tlength / 4, server_attr->tlength / 4); - - pa_threaded_mainloop_unlock(mainloop); -} - -static u32 pulseaudio_push(const void* frame, u32 samples, bool wait) -{ - const u8 *buf = (const u8 *)frame; - size_t size = samples * 4; - - pa_threaded_mainloop_lock(mainloop); - while (size) - { - size_t writable = std::min(size, pa_stream_writable_size(stream)); - - if (writable) + PulseAudioBackend *backend = (PulseAudioBackend *)userdata; + switch (pa_context_get_state(c)) { - pa_stream_write(stream, buf, writable, NULL, 0, PA_SEEK_RELATIVE); - buf += writable; - size -= writable; - } - else if (wait) - pa_threaded_mainloop_wait(mainloop); - else + case PA_CONTEXT_READY: + case PA_CONTEXT_TERMINATED: + case PA_CONTEXT_FAILED: + pa_threaded_mainloop_signal(backend->mainloop, 0); break; - } - pa_threaded_mainloop_unlock(mainloop); - - return 0; -} - -static void pulseaudio_term() -{ - if (mainloop) - pa_threaded_mainloop_stop(mainloop); - - if (stream) - { - pa_stream_disconnect(stream); - pa_stream_unref(stream); - } - if (context) - { - pa_context_disconnect(context); - pa_context_unref(context); + default: + break; + } } - if (mainloop) - pa_threaded_mainloop_free(mainloop); -} - -static bool pulseaudio_init_record(u32 sampling_freq) -{ - pa_sample_spec spec{ PA_SAMPLE_S16LE, sampling_freq, 1 }; - - record_stream = pa_stream_new(context, "record", &spec, NULL); - if (!record_stream) + static void stream_request_cb(pa_stream *s, size_t length, void *userdata) { - INFO_LOG(AUDIO, "PulseAudio: pa_stream_new failed"); - return false; + PulseAudioBackend *backend = (PulseAudioBackend *)userdata; + pa_threaded_mainloop_signal(backend->mainloop, 0); } - pa_threaded_mainloop_lock(mainloop); +public: + PulseAudioBackend() + : AudioBackend("pulse", "PulseAudio") {} - pa_buffer_attr buffer_attr; - buffer_attr.fragsize = 240 * 2; - buffer_attr.maxlength = buffer_attr.fragsize * 2; - - if (pa_stream_connect_record(record_stream, nullptr, &buffer_attr, PA_STREAM_NOFLAGS) < 0) + bool init() override { - INFO_LOG(AUDIO, "PulseAudio: pa_stream_connect_record failed"); - pa_stream_unref(record_stream); - record_stream = nullptr; - return false; - } - pa_threaded_mainloop_unlock(mainloop); - INFO_LOG(AUDIO, "PulseAudio: Successfully initialized capture device"); + mainloop = pa_threaded_mainloop_new(); + if (!mainloop) + { + WARN_LOG(AUDIO, "PulseAudio: pa_threaded_mainloop_new failed"); + return false; + } + context = pa_context_new(pa_threaded_mainloop_get_api(mainloop), "flycast"); + if (!context) + { + WARN_LOG(AUDIO, "PulseAudio: pa_context_new failed"); + term(); + return false; + } + pa_context_set_state_callback(context, context_state_cb, this); - return true; -} + if (pa_context_connect(context, nullptr, PA_CONTEXT_NOFLAGS, nullptr) < 0) + { + WARN_LOG(AUDIO, "PulseAudio: pa_context_connect failed"); + term(); + return false; + } -static void pulseaudio_term_record() -{ - if (record_stream != nullptr) - { pa_threaded_mainloop_lock(mainloop); - pa_stream_disconnect(record_stream); - pa_stream_unref(record_stream); - record_stream = nullptr; - pa_threaded_mainloop_unlock(mainloop); - } -} + if (pa_threaded_mainloop_start(mainloop) < 0) + { + WARN_LOG(AUDIO, "PulseAudio: pa_threaded_mainloop_start failed"); + pa_threaded_mainloop_unlock(mainloop); + term(); + return false; + } + pa_threaded_mainloop_wait(mainloop); -static u32 pulseaudio_record(void *buffer, u32 samples) -{ - if (record_stream == nullptr) - return 0; - pa_threaded_mainloop_lock(mainloop); - const void *data; - size_t size; - if (pa_stream_peek(record_stream, &data, &size) < 0) + if (pa_context_get_state(context) != PA_CONTEXT_READY) + { + WARN_LOG(AUDIO, "PulseAudio: context isn't ready"); + pa_threaded_mainloop_unlock(mainloop); + term(); + return false; + } + pa_sample_spec spec{ PA_SAMPLE_S16LE, 44100, 2 }; + + stream = pa_stream_new(context, "audio", &spec, NULL); + if (!stream) + { + WARN_LOG(AUDIO, "PulseAudio: pa_stream_new failed"); + pa_threaded_mainloop_unlock(mainloop); + term(); + return false; + } + + //pa_stream_set_state_callback(stream, stream_state_cb, this); + pa_stream_set_write_callback(stream, stream_request_cb, this); + //pa_stream_set_latency_update_callback(stream, stream_latency_update_cb, this); + //pa_stream_set_underflow_callback(stream, underrun_update_cb, this); + //pa_stream_set_buffer_attr_callback(stream, buffer_attr_cb, this); + + pa_buffer_attr buffer_attr; + buffer_attr.maxlength = -1; + buffer_attr.tlength = pa_usec_to_bytes(config::AudioBufferSize * PA_USEC_PER_SEC / 44100, &spec); + buffer_attr.prebuf = -1; + buffer_attr.minreq = -1; + buffer_attr.fragsize = -1; + + if (pa_stream_connect_playback(stream, nullptr, &buffer_attr, PA_STREAM_ADJUST_LATENCY, nullptr, nullptr) < 0) + { + WARN_LOG(AUDIO, "PulseAudio: pa_stream_connect_playback failed"); + pa_threaded_mainloop_unlock(mainloop); + term(); + return false; + } + + pa_threaded_mainloop_wait(mainloop); + + if (pa_stream_get_state(stream) != PA_STREAM_READY) + { + WARN_LOG(AUDIO, "PulseAudio: stream isn't ready"); + pa_threaded_mainloop_unlock(mainloop); + term(); + return false; + } + + const pa_buffer_attr *server_attr = pa_stream_get_buffer_attr(stream); + if (server_attr) + DEBUG_LOG(AUDIO, "PulseAudio: requested %d samples buffer, got %d", buffer_attr.tlength / 4, server_attr->tlength / 4); + + pa_threaded_mainloop_unlock(mainloop); + + return true; + } + + u32 push(const void* frame, u32 samples, bool wait) override { + const u8 *buf = (const u8 *)frame; + size_t size = samples * 4; + + pa_threaded_mainloop_lock(mainloop); + while (size) + { + size_t writable = std::min(size, pa_stream_writable_size(stream)); + + if (writable) + { + pa_stream_write(stream, buf, writable, NULL, 0, PA_SEEK_RELATIVE); + buf += writable; + size -= writable; + } + else if (wait) + pa_threaded_mainloop_wait(mainloop); + else + break; + } pa_threaded_mainloop_unlock(mainloop); - DEBUG_LOG(AUDIO, "PulseAudio: pa_stream_peek error"); + return 0; } - if (size == 0) + + void term() override { - pa_threaded_mainloop_unlock(mainloop); - return 0; + if (mainloop) + pa_threaded_mainloop_stop(mainloop); + + if (stream) + { + pa_stream_disconnect(stream); + pa_stream_unref(stream); + stream = nullptr; + } + if (context) + { + pa_context_disconnect(context); + pa_context_unref(context); + context = nullptr; + } + + if (mainloop) + pa_threaded_mainloop_free(mainloop); + mainloop = nullptr; } - size = std::min((size_t)samples * 2, size); - if (data != nullptr) - memcpy(buffer, data, size); - else - memset(buffer, 0, size); - pa_stream_drop(record_stream); - pa_threaded_mainloop_unlock(mainloop); - return size / 2; -} + bool initRecord(u32 sampling_freq) override + { + pa_sample_spec spec{ PA_SAMPLE_S16LE, sampling_freq, 1 }; -static audiobackend_t audiobackend_pulseaudio = { - "pulse", // Slug - "PulseAudio", // Name - &pulseaudio_init, - &pulseaudio_push, - &pulseaudio_term, - NULL, - &pulseaudio_init_record, - &pulseaudio_record, - &pulseaudio_term_record, + record_stream = pa_stream_new(context, "record", &spec, NULL); + if (!record_stream) + { + INFO_LOG(AUDIO, "PulseAudio: pa_stream_new failed"); + return false; + } + + pa_threaded_mainloop_lock(mainloop); + + pa_buffer_attr buffer_attr; + buffer_attr.fragsize = 240 * 2; + buffer_attr.maxlength = buffer_attr.fragsize * 2; + + if (pa_stream_connect_record(record_stream, nullptr, &buffer_attr, PA_STREAM_NOFLAGS) < 0) + { + INFO_LOG(AUDIO, "PulseAudio: pa_stream_connect_record failed"); + pa_stream_unref(record_stream); + record_stream = nullptr; + return false; + } + pa_threaded_mainloop_unlock(mainloop); + INFO_LOG(AUDIO, "PulseAudio: Successfully initialized capture device"); + + return true; + } + + void termRecord() override + { + if (record_stream != nullptr) + { + pa_threaded_mainloop_lock(mainloop); + pa_stream_disconnect(record_stream); + pa_stream_unref(record_stream); + record_stream = nullptr; + pa_threaded_mainloop_unlock(mainloop); + } + } + + u32 record(void *buffer, u32 samples) override + { + if (record_stream == nullptr) + return 0; + pa_threaded_mainloop_lock(mainloop); + const void *data; + size_t size; + if (pa_stream_peek(record_stream, &data, &size) < 0) + { + pa_threaded_mainloop_unlock(mainloop); + DEBUG_LOG(AUDIO, "PulseAudio: pa_stream_peek error"); + return 0; + } + if (size == 0) + { + pa_threaded_mainloop_unlock(mainloop); + return 0; + } + size = std::min((size_t)samples * 2, size); + if (data != nullptr) + memcpy(buffer, data, size); + else + memset(buffer, 0, size); + pa_stream_drop(record_stream); + pa_threaded_mainloop_unlock(mainloop); + + return size / 2; + } }; -#endif // !PULSEAUDIO_SIMPLE +static PulseAudioBackend pulseAudioBackend; -static bool pulse = RegisterAudioBackend(&audiobackend_pulseaudio); #endif diff --git a/core/oslib/audiobackend_sdl2.cpp b/core/oslib/audiobackend_sdl2.cpp index 53bce42f7..d15c6f7e2 100644 --- a/core/oslib/audiobackend_sdl2.cpp +++ b/core/oslib/audiobackend_sdl2.cpp @@ -7,224 +7,231 @@ #include #include -static SDL_AudioDeviceID audiodev; -static bool needs_resampling; -static cResetEvent read_wait; -static std::mutex stream_mutex; -static struct { - uint32_t prevs; - uint32_t *sample_buffer; -} audiobuf; -static unsigned sample_count = 0; +class SDLAudioBackend : AudioBackend +{ + SDL_AudioDeviceID audiodev {}; + bool needs_resampling = false; + cResetEvent read_wait; + std::mutex stream_mutex; + struct { + uint32_t prevs; + uint32_t *sample_buffer; + } audiobuf; + unsigned sample_count = 0; -static SDL_AudioDeviceID recorddev; -u8 recordbuf[480 * 4]; -std::atomic rec_read; -std::atomic rec_write; + SDL_AudioDeviceID recorddev {}; + u8 recordbuf[480 * 4]; + std::atomic rec_read; + std::atomic rec_write; -// To easily access samples. -union Sample { int16_t s[2]; uint32_t l; }; + // To easily access samples. + union Sample { int16_t s[2]; uint32_t l; }; -static float InterpolateCatmull4pt3oX(float x0, float x1, float x2, float x3, float t) { - return 0.45 * ((2 * x1) + t * ((-x0 + x2) + t * ((2 * x0 - 5 * x1 + 4 * x2 - x3) + t * (-x0 + 3 * x1 - 3 * x2 + x3)))); -} - -static void sdl2_audiocb(void* userdata, Uint8* stream, int len) { - stream_mutex.lock(); - // Wait until there's enough samples to feed the kraken - unsigned oslen = len / sizeof(uint32_t); - unsigned islen = needs_resampling ? oslen * 16 / 17 : oslen; - unsigned minlen = needs_resampling ? islen + 2 : islen; // Resampler looks ahead by 2 samples. - - if (sample_count < minlen) { - // No data, just output a bit of silence for the underrun - memset(stream, 0, len); - stream_mutex.unlock(); - read_wait.Set(); - return; + static float InterpolateCatmull4pt3oX(float x0, float x1, float x2, float x3, float t) { + return 0.45 * ((2 * x1) + t * ((-x0 + x2) + t * ((2 * x0 - 5 * x1 + 4 * x2 - x3) + t * (-x0 + 3 * x1 - 3 * x2 + x3)))); } - if (!needs_resampling) { - // Just copy bytes for this case. - memcpy(stream, &audiobuf.sample_buffer[0], len); + static void audioCallback(void* userdata, Uint8* stream, int len) + { + SDLAudioBackend *backend = (SDLAudioBackend *)userdata; + + backend->stream_mutex.lock(); + // Wait until there's enough samples to feed the kraken + unsigned oslen = len / sizeof(uint32_t); + unsigned islen = backend->needs_resampling ? oslen * 16 / 17 : oslen; + unsigned minlen = backend->needs_resampling ? islen + 2 : islen; // Resampler looks ahead by 2 samples. + + if (backend->sample_count < minlen) { + // No data, just output a bit of silence for the underrun + memset(stream, 0, len); + backend->stream_mutex.unlock(); + backend->read_wait.Set(); + return; + } + + if (!backend->needs_resampling) { + // Just copy bytes for this case. + memcpy(stream, &backend->audiobuf.sample_buffer[0], len); + } + else { + // 44.1KHz to 48KHz (actually 46.86KHz) resampling + uint32_t *outbuf = (uint32_t*)stream; + const float ra = 1.0f / 17; + Sample *sbuf = (Sample*)&backend->audiobuf.sample_buffer[0]; // [-1] stores the previous iteration last sample output + for (u32 i = 0; i < islen/16; i++) { + *outbuf++ = sbuf[i*16+ 0].l; // First sample stays at the same location. + for (int k = 1; k < 17; k++) { + Sample r; + // Note we access offset -1 on first iteration, as to access prevs + r.s[0] = InterpolateCatmull4pt3oX(sbuf[i*16+k-2].s[0], sbuf[i*16+k-1].s[0], sbuf[i*16+k].s[0], sbuf[i*16+k+1].s[0], 1 - ra*k); + r.s[1] = InterpolateCatmull4pt3oX(sbuf[i*16+k-2].s[1], sbuf[i*16+k-1].s[1], sbuf[i*16+k].s[1], sbuf[i*16+k+1].s[1], 1 - ra*k); + *outbuf++ = r.l; + } + } + backend->audiobuf.prevs = backend->audiobuf.sample_buffer[islen-1]; + } + + // Move samples in the buffer and consume them + memmove(&backend->audiobuf.sample_buffer[0], &backend->audiobuf.sample_buffer[islen], (backend->sample_count-islen)*sizeof(uint32_t)); + backend->sample_count -= islen; + + backend->stream_mutex.unlock(); + backend->read_wait.Set(); } - else { - // 44.1KHz to 48KHz (actually 46.86KHz) resampling - uint32_t *outbuf = (uint32_t*)stream; - const float ra = 1.0f / 17; - Sample *sbuf = (Sample*)&audiobuf.sample_buffer[0]; // [-1] stores the previous iteration last sample output - for (u32 i = 0; i < islen/16; i++) { - *outbuf++ = sbuf[i*16+ 0].l; // First sample stays at the same location. - for (int k = 1; k < 17; k++) { - Sample r; - // Note we access offset -1 on first iteration, as to access prevs - r.s[0] = InterpolateCatmull4pt3oX(sbuf[i*16+k-2].s[0], sbuf[i*16+k-1].s[0], sbuf[i*16+k].s[0], sbuf[i*16+k+1].s[0], 1 - ra*k); - r.s[1] = InterpolateCatmull4pt3oX(sbuf[i*16+k-2].s[1], sbuf[i*16+k-1].s[1], sbuf[i*16+k].s[1], sbuf[i*16+k+1].s[1], 1 - ra*k); - *outbuf++ = r.l; + +public: + SDLAudioBackend() + : AudioBackend("sdl2", "Simple DirectMedia Layer 2 Audio") {} + + bool init() override + { + if (!SDL_WasInit(SDL_INIT_AUDIO)) + { + if (SDL_InitSubSystem(SDL_INIT_AUDIO)) { + ERROR_LOG(AUDIO, "SDL2 error initializing audio subsystem: %s", SDL_GetError()); + return false; } } - audiobuf.prevs = audiobuf.sample_buffer[islen-1]; - } - - // Move samples in the buffer and consume them - memmove(&audiobuf.sample_buffer[0], &audiobuf.sample_buffer[islen], (sample_count-islen)*sizeof(uint32_t)); - sample_count -= islen; - - stream_mutex.unlock(); - read_wait.Set(); -} - -static void sdl2_audio_init() { - if (!SDL_WasInit(SDL_INIT_AUDIO)) - { - if (SDL_InitSubSystem(SDL_INIT_AUDIO)) - ERROR_LOG(AUDIO, "SDL2 error initializing audio subsystem: %s", SDL_GetError()); - } - - audiobuf.sample_buffer = new uint32_t[config::AudioBufferSize](); - - // Support 44.1KHz (native) but also upsampling to 48KHz - SDL_AudioSpec wav_spec, out_spec; - memset(&wav_spec, 0, sizeof(wav_spec)); - wav_spec.freq = 44100; - wav_spec.format = AUDIO_S16; - wav_spec.channels = 2; - wav_spec.samples = SAMPLE_COUNT * 2; // Must be power of two - wav_spec.callback = sdl2_audiocb; - // Try 44.1KHz which should be faster since it's native. - audiodev = SDL_OpenAudioDevice(NULL, 0, &wav_spec, &out_spec, 0); - if (!audiodev) - { - WARN_LOG(AUDIO, "SDL2: SDL_OpenAudioDevice failed: %s", SDL_GetError()); - needs_resampling = true; - wav_spec.freq = 48000; + audiobuf.sample_buffer = new uint32_t[config::AudioBufferSize](); + + // Support 44.1KHz (native) but also upsampling to 48KHz + SDL_AudioSpec wav_spec, out_spec; + memset(&wav_spec, 0, sizeof(wav_spec)); + wav_spec.freq = 44100; + wav_spec.format = AUDIO_S16; + wav_spec.channels = 2; + wav_spec.samples = SAMPLE_COUNT * 2; // Must be power of two + wav_spec.callback = audioCallback; + wav_spec.userdata = this; + + // Try 44.1KHz which should be faster since it's native. audiodev = SDL_OpenAudioDevice(NULL, 0, &wav_spec, &out_spec, 0); - if (!audiodev) - ERROR_LOG(AUDIO, "SDL2: SDL_OpenAudioDevice failed: %s", SDL_GetError()); - else - INFO_LOG(AUDIO, "SDL2: Using resampling to 48 KHz"); + if (audiodev == 0) + { + WARN_LOG(AUDIO, "SDL2: SDL_OpenAudioDevice failed: %s", SDL_GetError()); + needs_resampling = true; + wav_spec.freq = 48000; + audiodev = SDL_OpenAudioDevice(NULL, 0, &wav_spec, &out_spec, 0); + if (audiodev == 0) + ERROR_LOG(AUDIO, "SDL2: SDL_OpenAudioDevice failed: %s", SDL_GetError()); + else + INFO_LOG(AUDIO, "SDL2: Using resampling to 48 KHz"); + } + + return audiodev != 0; } -} -static u32 sdl2_audio_push(const void* frame, u32 samples, bool wait) { - // Unpause the device shall it be paused. - if (SDL_GetAudioDeviceStatus(audiodev) != SDL_AUDIO_PLAYING) - SDL_PauseAudioDevice(audiodev, 0); + u32 push(const void* frame, u32 samples, bool wait) override + { + // Unpause the device shall it be paused. + if (SDL_GetAudioDeviceStatus(audiodev) != SDL_AUDIO_PLAYING) + SDL_PauseAudioDevice(audiodev, 0); - // If wait, then wait for the buffer to be smaller than a certain size. - stream_mutex.lock(); - if (wait) { - while (sample_count + samples > (u32)config::AudioBufferSize) { - stream_mutex.unlock(); - read_wait.Wait(); - stream_mutex.lock(); + // If wait, then wait for the buffer to be smaller than a certain size. + stream_mutex.lock(); + if (wait) { + while (sample_count + samples > (u32)config::AudioBufferSize) { + stream_mutex.unlock(); + read_wait.Wait(); + stream_mutex.lock(); + } + } + + // Copy as many samples as possible, drop any remaining (this should not happen usually) + unsigned free_samples = config::AudioBufferSize - sample_count; + unsigned tocopy = samples < free_samples ? samples : free_samples; + memcpy(&audiobuf.sample_buffer[sample_count], frame, tocopy * sizeof(uint32_t)); + sample_count += tocopy; + stream_mutex.unlock(); + + return 1; + } + + void term() override + { + if (audiodev) + { + // Stop audio playback. + SDL_PauseAudioDevice(audiodev, 1); + read_wait.Set(); + SDL_CloseAudioDevice(audiodev); + audiodev = SDL_AudioDeviceID(); + } + delete [] audiobuf.sample_buffer; + audiobuf.sample_buffer = nullptr; + } + + static void recordCallback(void *userdata, u8 *stream, int len) + { + SDLAudioBackend *backend = (SDLAudioBackend *)userdata; + DEBUG_LOG(AUDIO, "SDL2: sdl2_record_cb len %d write %zd read %zd", len, (size_t)backend->rec_write, (size_t)backend->rec_read); + while (len > 0) + { + size_t plen = std::min((size_t)len, sizeof(backend->recordbuf) - backend->rec_write); + memcpy(&backend->recordbuf[backend->rec_write], stream, plen); + len -= plen; + backend->rec_write = (backend->rec_write + plen) % sizeof(backend->recordbuf); + stream += plen; } } - // Copy as many samples as possible, drop any remaining (this should not happen usually) - unsigned free_samples = config::AudioBufferSize - sample_count; - unsigned tocopy = samples < free_samples ? samples : free_samples; - memcpy(&audiobuf.sample_buffer[sample_count], frame, tocopy * sizeof(uint32_t)); - sample_count += tocopy; - stream_mutex.unlock(); - - return 1; -} - -static void sdl2_audio_term() { - if (audiodev) + bool initRecord(u32 sampling_freq) override { - // Stop audio playback. - SDL_PauseAudioDevice(audiodev, 1); - read_wait.Set(); - SDL_CloseAudioDevice(audiodev); - audiodev = SDL_AudioDeviceID(); - } - delete [] audiobuf.sample_buffer; - audiobuf.sample_buffer = nullptr; -} + rec_write = 0; + rec_read = 0; -void sdl2_record_cb(void *userdata, u8 *stream, int len) -{ - DEBUG_LOG(AUDIO, "SDL2: sdl2_record_cb len %d write %zd read %zd", len, (size_t)rec_write, (size_t)rec_read); - while (len > 0) + SDL_AudioSpec wav_spec, out_spec; + memset(&wav_spec, 0, sizeof(wav_spec)); + wav_spec.freq = sampling_freq; + wav_spec.format = AUDIO_S16; + wav_spec.channels = 1; + wav_spec.samples = 256; // Must be power of two + wav_spec.callback = recordCallback; + wav_spec.userdata = this; + recorddev = SDL_OpenAudioDevice(NULL, 1, &wav_spec, &out_spec, 0); + if (recorddev == 0) + { + ERROR_LOG(AUDIO, "SDL2: Cannot open audio capture device: %s", SDL_GetError()); + return false; + } + SDL_PauseAudioDevice(recorddev, 0); + INFO_LOG(AUDIO, "SDL2: opened audio capture device"); + + return true; + } + + void termRecord() override { - size_t plen = std::min((size_t)len, sizeof(recordbuf) - rec_write); - memcpy(&recordbuf[rec_write], stream, plen); - len -= plen; - rec_write = (rec_write + plen) % sizeof(recordbuf); - stream += plen; + if (recorddev != 0) + { + SDL_PauseAudioDevice(recorddev, 1); + SDL_CloseAudioDevice(recorddev); + recorddev = 0; + } } -} -static bool sdl2_record_init(u32 sampling_freq) -{ - rec_write = 0; - rec_read = 0; - - SDL_AudioSpec wav_spec, out_spec; - memset(&wav_spec, 0, sizeof(wav_spec)); - wav_spec.freq = sampling_freq; - wav_spec.format = AUDIO_S16; - wav_spec.channels = 1; - wav_spec.samples = 256; // Must be power of two - wav_spec.callback = sdl2_record_cb; - recorddev = SDL_OpenAudioDevice(NULL, 1, &wav_spec, &out_spec, 0); - if (recorddev == 0) + u32 record(void* frame, u32 samples) override { - INFO_LOG(AUDIO, "SDL2: Cannot open audio capture device: %s", SDL_GetError()); - return false; + u32 count = 0; + samples *= 2; + while (samples > 0) + { + u32 avail = std::min(rec_write - rec_read, sizeof(recordbuf) - rec_read); + if (avail == 0) + break; + avail = std::min(avail, samples); + memcpy((u8 *)frame + count, &recordbuf[rec_read], avail); + rec_read = (rec_read + avail) % sizeof(recordbuf); + samples -= avail; + count += avail; + } + DEBUG_LOG(AUDIO, "SDL2: sdl2_record len %d ret %d write %zd read %zd", samples * 2, count, (size_t)rec_write, (size_t)rec_read); + + return count / 2; } - SDL_PauseAudioDevice(recorddev, 0); - INFO_LOG(AUDIO, "SDL2: opened audio capture device"); - - return true; -} - -static void sdl2_record_term() -{ - if (recorddev != 0) - { - SDL_PauseAudioDevice(recorddev, 1); - SDL_CloseAudioDevice(recorddev); - recorddev = 0; - } -} - -static u32 sdl2_record(void* frame, u32 samples) -{ - u32 count = 0; - samples *= 2; - while (samples > 0) - { - u32 avail = std::min(rec_write - rec_read, sizeof(recordbuf) - rec_read); - if (avail == 0) - break; - avail = std::min(avail, samples); - memcpy((u8 *)frame + count, &recordbuf[rec_read], avail); - rec_read = (rec_read + avail) % sizeof(recordbuf); - samples -= avail; - count += avail; - } - DEBUG_LOG(AUDIO, "SDL2: sdl2_record len %d ret %d write %zd read %zd", samples * 2, count, (size_t)rec_write, (size_t)rec_read); - - return count / 2; -} - -static audiobackend_t audiobackend_sdl2audio = { - "sdl2", // Slug - "Simple DirectMedia Layer 2 Audio", // Name - &sdl2_audio_init, - &sdl2_audio_push, - &sdl2_audio_term, - NULL, - &sdl2_record_init, - &sdl2_record, - &sdl2_record_term }; - -static bool sdl2audiobe = RegisterAudioBackend(&audiobackend_sdl2audio); +static SDLAudioBackend sdlAudioBackend; #endif diff --git a/core/oslib/audiostream.cpp b/core/oslib/audiostream.cpp index 2852b739f..0cfc1a94f 100644 --- a/core/oslib/audiostream.cpp +++ b/core/oslib/audiostream.cpp @@ -6,74 +6,42 @@ struct SoundFrame { s16 l; s16 r; }; static SoundFrame Buffer[SAMPLE_COUNT]; static u32 writePtr; // next sample index -static audiobackend_t *audiobackend_current = nullptr; -static std::unique_ptr> audiobackends; // Using a pointer to avoid out of order init +static AudioBackend *currentBackend; +std::vector *AudioBackend::backends; static bool audio_recording_started; static bool eight_khz; -u32 GetAudioBackendCount() +AudioBackend *AudioBackend::getBackend(const std::string& slug) { - return audiobackends != nullptr ? (u32)audiobackends->size() : 0; -} - -audiobackend_t* GetAudioBackend(int num) -{ - return audiobackends->at(num); -} - -static void SortAudioBackends() -{ - if (audiobackends != nullptr) - std::sort(audiobackends->begin(), audiobackends->end(), [](audiobackend_t *b1, audiobackend_t *b2) { return b1->slug < b2->slug; }); -} - -bool RegisterAudioBackend(audiobackend_t *backend) -{ - verify(backend != nullptr); - verify(!backend->slug.empty() && backend->slug != "auto"); - - if (audiobackends == nullptr) - audiobackends = std::unique_ptr>(new std::vector()); - audiobackends->push_back(backend); - SortAudioBackends(); - - return true; -} - -audiobackend_t* GetAudioBackend(const std::string& slug) -{ - if (audiobackends != nullptr && !audiobackends->empty()) + if (backends == nullptr) + return nullptr; + if (slug == "auto") { - if (slug == "auto") + // Prefer sdl2 if available and avoid the null driver + AudioBackend *sdlBackend = nullptr; + AudioBackend *autoBackend = nullptr; + for (auto backend : *backends) { - // Don't select the null or OpenSL/Oboe drivers - audiobackend_t *autoselection = nullptr; - for (auto backend : *audiobackends) - if (backend->slug != "null" && backend->slug != "OpenSL" && backend->slug != "Oboe") - { - autoselection = backend; - break; - } - if (autoselection == nullptr) - autoselection = audiobackends->front(); - INFO_LOG(AUDIO, "Auto-selected audio backend \"%s\" (%s).", autoselection->slug.c_str(), autoselection->name.c_str()); - return autoselection; - } - else - { - for (auto backend : *audiobackends) - { - if (backend->slug == slug) - return backend; - } - WARN_LOG(AUDIO, "WARNING: Audio backend \"%s\" not found!", slug.c_str()); + if (backend->slug == "sdl2") + sdlBackend = backend; + if (backend->slug != "null" && autoBackend == nullptr) + autoBackend = backend; } + if (sdlBackend != nullptr) + autoBackend = sdlBackend; + if (autoBackend == nullptr) + autoBackend = backends->front(); + INFO_LOG(AUDIO, "Auto-selected audio backend \"%s\" (%s).", autoBackend->slug.c_str(), autoBackend->name.c_str()); + + return autoBackend; } - else + for (auto backend : *backends) { - WARN_LOG(AUDIO, "WARNING: No audio backends available!"); + if (backend->slug == slug) + return backend; } + WARN_LOG(AUDIO, "WARNING: Audio backend \"%s\" not found!", slug.c_str()); return nullptr; } @@ -84,8 +52,8 @@ void WriteSample(s16 r, s16 l) if (++writePtr == SAMPLE_COUNT) { - if (audiobackend_current != nullptr) - audiobackend_current->push(Buffer, SAMPLE_COUNT, config::LimitFPS); + if (currentBackend != nullptr) + currentBackend->push(Buffer, SAMPLE_COUNT, config::LimitFPS); writePtr = 0; } } @@ -94,17 +62,36 @@ void InitAudio() { TermAudio(); - SortAudioBackends(); + std::string slug = config::AudioBackend; + currentBackend = AudioBackend::getBackend(slug); + if (currentBackend == nullptr && slug != "auto") + { + slug = "auto"; + currentBackend = AudioBackend::getBackend(slug); + } + if (currentBackend != nullptr) + { + INFO_LOG(AUDIO, "Initializing audio backend \"%s\" (%s)...", currentBackend->slug.c_str(), currentBackend->name.c_str()); + if (!currentBackend->init()) + { + currentBackend = nullptr; + if (slug != "auto") + { + WARN_LOG(AUDIO, "Audio driver %s failed to initialize. Defaulting to 'auto'", slug.c_str()); + slug = "auto"; + currentBackend = AudioBackend::getBackend(slug); + if (!currentBackend->init()) + currentBackend = nullptr; + } + } + } - std::string audiobackend_slug = config::AudioBackend; - audiobackend_current = GetAudioBackend(audiobackend_slug); - if (audiobackend_current == nullptr) { - INFO_LOG(AUDIO, "WARNING: Running without audio!"); + if (currentBackend == nullptr) + { + WARN_LOG(AUDIO, "Running without audio!"); return; } - INFO_LOG(AUDIO, "Initializing audio backend \"%s\" (%s)...", audiobackend_current->slug.c_str(), audiobackend_current->name.c_str()); - audiobackend_current->init(); if (audio_recording_started) { // Restart recording @@ -115,26 +102,23 @@ void InitAudio() void TermAudio() { - if (audiobackend_current != nullptr) { - // Save recording state before stopping - bool rec_started = audio_recording_started; - StopAudioRecording(); - audio_recording_started = rec_started; - audiobackend_current->term(); - INFO_LOG(AUDIO, "Terminating audio backend \"%s\" (%s)...", audiobackend_current->slug.c_str(), audiobackend_current->name.c_str()); - audiobackend_current = nullptr; - } + if (currentBackend == nullptr) + return; + + // Save recording state before stopping + bool rec_started = audio_recording_started; + StopAudioRecording(); + audio_recording_started = rec_started; + currentBackend->term(); + INFO_LOG(AUDIO, "Terminating audio backend \"%s\" (%s)...", currentBackend->slug.c_str(), currentBackend->name.c_str()); + currentBackend = nullptr; } void StartAudioRecording(bool eight_khz) { ::eight_khz = eight_khz; - if (audiobackend_current != nullptr) - { - audio_recording_started = false; - if (audiobackend_current->init_record != nullptr) - audio_recording_started = audiobackend_current->init_record(eight_khz ? 8000 : 11025); - } + if (currentBackend != nullptr) + audio_recording_started = currentBackend->initRecord(eight_khz ? 8000 : 11025); else // might be called between TermAudio/InitAudio audio_recording_started = true; @@ -142,15 +126,15 @@ void StartAudioRecording(bool eight_khz) u32 RecordAudio(void *buffer, u32 samples) { - if (!audio_recording_started || audiobackend_current == nullptr) + if (!audio_recording_started || currentBackend == nullptr) return 0; - return audiobackend_current->record(buffer, samples); + return currentBackend->record(buffer, samples); } void StopAudioRecording() { // might be called between TermAudio/InitAudio - if (audio_recording_started && audiobackend_current != nullptr && audiobackend_current->term_record != nullptr) - audiobackend_current->term_record(); + if (audio_recording_started && currentBackend != nullptr) + currentBackend->termRecord(); audio_recording_started = false; } diff --git a/core/oslib/audiostream.h b/core/oslib/audiostream.h index 00cc749af..b80e04815 100644 --- a/core/oslib/audiostream.h +++ b/core/oslib/audiostream.h @@ -5,44 +5,58 @@ #include #include -typedef std::vector (*audio_option_callback_t)(); -enum audio_option_type +class AudioBackend { - integer = 0 -, checkbox = 1 -, list = 2 +public: + virtual ~AudioBackend() = default; + + virtual bool init() = 0; + virtual u32 push(const void *data, u32 frames, bool wait) = 0; + virtual void term() {} + + struct Option { + std::string name; + std::string caption; + enum { integer, checkbox, list } type; + + int minValue; + int maxValue; + std::vector values; + }; + virtual const Option *getOptions(int *count) { + *count = 0; + return nullptr; + } + + virtual bool initRecord(u32 sampling_freq) { return false; } + virtual u32 record(void *, u32) { return 0; } + virtual void termRecord() {} + + std::string slug; + std::string name; + + static size_t getCount() { return backends == nullptr ? 0 : backends->size(); } + static AudioBackend *getBackend(size_t index) { return backends == nullptr ? nullptr : (*backends)[index]; } + static AudioBackend *getBackend(const std::string& slug); + +protected: + AudioBackend(const std::string& slug, const std::string& name) + : slug(slug), name(name) { + registerAudioBackend(this); + } + +private: + static void registerAudioBackend(AudioBackend *backend) + { + if (backends == nullptr) + backends = new std::vector(); + backends->push_back(backend); + std::sort(backends->begin(), backends->end(), [](AudioBackend *b1, AudioBackend *b2) { return b1->slug < b2->slug; }); + } + + static std::vector *backends; }; -typedef struct { - std::string cfg_name; - std::string caption; - audio_option_type type; - - // type int_value (spin edit) - int min_value; - int max_value; - - // type list edit (string/char*) - audio_option_callback_t list_callback; -} audio_option_t; - -typedef audio_option_t* (*audio_options_func_t)(int* option_count); - -typedef void (*audio_backend_init_func_t)(); -typedef u32 (*audio_backend_push_func_t)(const void *data, u32 frames, bool wait); -typedef void (*audio_backend_term_func_t)(); -typedef struct { - std::string slug; - std::string name; - audio_backend_init_func_t init; - audio_backend_push_func_t push; - audio_backend_term_func_t term; - audio_options_func_t get_options; - bool (*init_record)(u32 sampling_freq); - u32 (*record)(void *, u32); - audio_backend_term_func_t term_record; -} audiobackend_t; -bool RegisterAudioBackend(audiobackend_t* backend); void InitAudio(); void TermAudio(); void WriteSample(s16 right, s16 left); @@ -51,11 +65,7 @@ void StartAudioRecording(bool eight_khz); u32 RecordAudio(void *buffer, u32 samples); void StopAudioRecording(); -u32 GetAudioBackendCount(); -audiobackend_t* GetAudioBackend(int num); -audiobackend_t* GetAudioBackend(const std::string& slug); - -constexpr u32 SAMPLE_COUNT = 512; // push() is always called with that many frames +constexpr u32 SAMPLE_COUNT = 512; // AudioBackend::push() is always called with that many frames class RingBuffer { diff --git a/core/rend/gui.cpp b/core/rend/gui.cpp index ddf66010f..3f09670b4 100644 --- a/core/rend/gui.cpp +++ b/core/rend/gui.cpp @@ -1984,25 +1984,25 @@ static void gui_display_settings() ShowHelpMarker("Sets the maximum audio latency. Not supported by all audio drivers."); } - audiobackend_t* backend = nullptr; + AudioBackend *backend = nullptr; std::string backend_name = config::AudioBackend; if (backend_name != "auto") { - backend = GetAudioBackend(config::AudioBackend); - if (backend != NULL) + backend = AudioBackend::getBackend(config::AudioBackend); + if (backend != nullptr) backend_name = backend->slug; } - audiobackend_t* current_backend = backend; + AudioBackend *current_backend = backend; if (ImGui::BeginCombo("Audio Driver", backend_name.c_str(), ImGuiComboFlags_None)) { bool is_selected = (config::AudioBackend.get() == "auto"); if (ImGui::Selectable("auto - Automatic driver selection", &is_selected)) config::AudioBackend.set("auto"); - for (u32 i = 0; i < GetAudioBackendCount(); i++) + for (u32 i = 0; i < AudioBackend::getCount(); i++) { - backend = GetAudioBackend(i); + backend = AudioBackend::getBackend(i); is_selected = (config::AudioBackend.get() == backend->slug); if (is_selected) @@ -2018,42 +2018,42 @@ static void gui_display_settings() ImGui::SameLine(); ShowHelpMarker("The audio driver to use"); - if (current_backend != NULL && current_backend->get_options != NULL) + if (current_backend != nullptr) { // get backend specific options int option_count; - audio_option_t* options = current_backend->get_options(&option_count); + const AudioBackend::Option *options = current_backend->getOptions(&option_count); for (int o = 0; o < option_count; o++) { - std::string value = cfgLoadStr(current_backend->slug, options->cfg_name, ""); + std::string value = cfgLoadStr(current_backend->slug, options->name, ""); - if (options->type == integer) + if (options->type == AudioBackend::Option::integer) { int val = stoi(value); - if (ImGui::SliderInt(options->caption.c_str(), &val, options->min_value, options->max_value)) + if (ImGui::SliderInt(options->caption.c_str(), &val, options->minValue, options->maxValue)) { std::string s = std::to_string(val); - cfgSaveStr(current_backend->slug, options->cfg_name, s); + cfgSaveStr(current_backend->slug, options->name, s); } } - else if (options->type == checkbox) + else if (options->type == AudioBackend::Option::checkbox) { bool check = value == "1"; if (ImGui::Checkbox(options->caption.c_str(), &check)) - cfgSaveStr(current_backend->slug, options->cfg_name, + cfgSaveStr(current_backend->slug, options->name, check ? "1" : "0"); } - else if (options->type == ::list) + else if (options->type == AudioBackend::Option::list) { if (ImGui::BeginCombo(options->caption.c_str(), value.c_str(), ImGuiComboFlags_None)) { bool is_selected = false; - for (const auto& cur : options->list_callback()) + for (const auto& cur : options->values) { is_selected = value == cur; if (ImGui::Selectable(cur.c_str(), &is_selected)) - cfgSaveStr(current_backend->slug, options->cfg_name, cur); + cfgSaveStr(current_backend->slug, options->name, cur); if (is_selected) ImGui::SetItemDefaultFocus(); diff --git a/shell/android-studio/flycast/src/main/java/com/reicast/emulator/emu/AudioBackend.java b/shell/android-studio/flycast/src/main/java/com/reicast/emulator/emu/AudioBackend.java index 3a514fbe8..3654251f7 100644 --- a/shell/android-studio/flycast/src/main/java/com/reicast/emulator/emu/AudioBackend.java +++ b/shell/android-studio/flycast/src/main/java/com/reicast/emulator/emu/AudioBackend.java @@ -24,7 +24,7 @@ public final class AudioBackend { } // Called by native code - private void init(int bufferSize) + private boolean init(int bufferSize) { if (bufferSize == 0) bufferSize = AudioTrack.getMinBufferSize(44100, AudioFormat.CHANNEL_OUT_STEREO, AudioFormat.ENCODING_PCM_16BIT); @@ -42,16 +42,16 @@ public final class AudioBackend { if (audioTrack.getState() != STATE_INITIALIZED) { audioTrack = null; release(); - Log.e("reicast", "Error initializing AudioTrack. Disabling sound"); + Log.e("audio", "Error initializing AudioTrack. Disabling sound"); + return false; } - else { - size = bufferSize / 4; - writePosition = 0; + size = bufferSize / 4; + writePosition = 0; - Log.i("audcfg", "Audio streaming: buffer size " + size + " samples / " + size * 1000.0 / 44100.0 + " ms"); - audioTrack.play(); - } + Log.i("audio", "Audio streaming: buffer size " + size + " samples / " + size * 1000.0 / 44100.0 + " ms"); + audioTrack.play(); + return true; } // Called by native code diff --git a/shell/android-studio/flycast/src/main/jni/src/Android.cpp b/shell/android-studio/flycast/src/main/jni/src/Android.cpp index b0a72e136..4f9e0b49c 100644 --- a/shell/android-studio/flycast/src/main/jni/src/Android.cpp +++ b/shell/android-studio/flycast/src/main/jni/src/Android.cpp @@ -416,63 +416,56 @@ extern "C" JNIEXPORT void JNICALL Java_com_reicast_emulator_emu_JNIdc_guiSetInse } // Audio Stuff -static u32 androidaudio_push(const void* frame, u32 amt, bool wait) +class AndroidAudioBackend : AudioBackend { - jvm_attacher.getEnv()->SetShortArrayRegion(jsamples, 0, amt * 2, (jshort *)frame); - return jvm_attacher.getEnv()->CallIntMethod(g_audioBackend, writeBufferMid, jsamples, wait); -} +public: + AndroidAudioBackend() + : AudioBackend("android", "Android Audio") {} -static void androidaudio_init() -{ - jint bufferSize = config::AutoLatency ? 0 : config::AudioBufferSize; - jvm_attacher.getEnv()->CallVoidMethod(g_audioBackend, audioInitMid, bufferSize); -} + u32 push(const void* frame, u32 amt, bool wait) override + { + jvm_attacher.getEnv()->SetShortArrayRegion(jsamples, 0, amt * 2, (jshort *)frame); + return jvm_attacher.getEnv()->CallIntMethod(g_audioBackend, writeBufferMid, jsamples, wait); + } -static void androidaudio_term() -{ - jvm_attacher.getEnv()->CallVoidMethod(g_audioBackend, audioTermMid); -} + bool init() override + { + jint bufferSize = config::AutoLatency ? 0 : config::AudioBufferSize; + return jvm_attacher.getEnv()->CallBooleanMethod(g_audioBackend, audioInitMid, bufferSize); + } -static bool androidaudio_init_record(u32 sampling_freq) -{ - if (sipemu == nullptr) - return false; - jvm_attacher.getEnv()->CallVoidMethod(sipemu, startRecordingMid, sampling_freq); - return true; -} + void term() override + { + jvm_attacher.getEnv()->CallVoidMethod(g_audioBackend, audioTermMid); + } -static void androidaudio_term_record() -{ - jvm_attacher.getEnv()->CallVoidMethod(sipemu, stopRecordingMid); -} + bool initRecord(u32 sampling_freq) override + { + if (sipemu == nullptr) + return false; + jvm_attacher.getEnv()->CallVoidMethod(sipemu, startRecordingMid, sampling_freq); + return true; + } -static u32 androidaudio_record(void *buffer, u32 samples) -{ - jbyteArray jdata = (jbyteArray)jvm_attacher.getEnv()->CallObjectMethod(sipemu, getmicdata, samples); - if (jdata == NULL) - return 0; - jsize size = jvm_attacher.getEnv()->GetArrayLength(jdata); - samples = std::min(samples, (u32)size * 2); - jvm_attacher.getEnv()->GetByteArrayRegion(jdata, 0, samples * 2, (jbyte*)buffer); - jvm_attacher.getEnv()->DeleteLocalRef(jdata); + void termRecord() override + { + jvm_attacher.getEnv()->CallVoidMethod(sipemu, stopRecordingMid); + } - return samples; -} + u32 record(void *buffer, u32 samples) override + { + jbyteArray jdata = (jbyteArray)jvm_attacher.getEnv()->CallObjectMethod(sipemu, getmicdata, samples); + if (jdata == NULL) + return 0; + jsize size = jvm_attacher.getEnv()->GetArrayLength(jdata); + samples = std::min(samples, (u32)size * 2); + jvm_attacher.getEnv()->GetByteArrayRegion(jdata, 0, samples * 2, (jbyte*)buffer); + jvm_attacher.getEnv()->DeleteLocalRef(jdata); -audiobackend_t audiobackend_android = { - "android", // Slug - "Android Audio", // Name - &androidaudio_init, - &androidaudio_push, - &androidaudio_term, - NULL, - &androidaudio_init_record, - &androidaudio_record, - &androidaudio_term_record + return samples; + } }; - -static bool android = RegisterAudioBackend(&audiobackend_android); - +static AndroidAudioBackend androidAudioBackend; extern "C" JNIEXPORT void JNICALL Java_com_reicast_emulator_emu_AudioBackend_setInstance(JNIEnv *env, jobject obj, jobject instance) { @@ -488,7 +481,7 @@ extern "C" JNIEXPORT void JNICALL Java_com_reicast_emulator_emu_AudioBackend_set else { g_audioBackend = env->NewGlobalRef(instance); writeBufferMid = env->GetMethodID(env->GetObjectClass(g_audioBackend), "writeBuffer", "([SZ)I"); - audioInitMid = env->GetMethodID(env->GetObjectClass(g_audioBackend), "init", "(I)V"); + audioInitMid = env->GetMethodID(env->GetObjectClass(g_audioBackend), "init", "(I)Z"); audioTermMid = env->GetMethodID(env->GetObjectClass(g_audioBackend), "term", "()V"); if (jsamples == NULL) { jsamples = env->NewShortArray(SAMPLE_COUNT * 2); From 922df88cdd645fe048f495e682f398d75aff192f Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 22 Oct 2022 13:05:11 +0200 Subject: [PATCH 04/34] msvc build fix --- core/oslib/audiobackend_directsound.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/oslib/audiobackend_directsound.cpp b/core/oslib/audiobackend_directsound.cpp index b4c220ca8..bd2773723 100644 --- a/core/oslib/audiobackend_directsound.cpp +++ b/core/oslib/audiobackend_directsound.cpp @@ -140,7 +140,7 @@ public: ringBuffer.setCapacity(config::AudioBufferSize * 4); // Start the thread - audioThread = std::thread(&audioThreadMain, this); + audioThread = std::thread(&DirectSoundBackend::audioThreadMain, this); // Play the buffer ! if (FAILED(buffer->Play(0, 0, DSBPLAY_LOOPING))) From decf70c87c219cb8d23d0fd185a3fe62016afddf Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 22 Oct 2022 22:14:19 +0200 Subject: [PATCH 05/34] get rid of INLINE, NOINLINE, __forceinline --- core/hw/aica/sgc_if.cpp | 22 +++++++++++-------- core/hw/arm7/arm7.cpp | 1 - core/hw/pvr/helper_classes.h | 7 ++----- core/hw/pvr/pvr_mem.cpp | 4 ++-- core/hw/pvr/ta.cpp | 5 ++--- core/hw/sh4/interpr/sh4_fpu.cpp | 25 ---------------------- core/hw/sh4/interpr/sh4_opcodes.cpp | 2 +- core/hw/sh4/sh4_core.h | 16 +++++++------- core/hw/sh4/sh4_core_regs.cpp | 4 ++-- core/hw/sh4/sh4_opcode_list.h | 10 ++++----- core/oslib/oslib.h | 2 +- core/rend/TexCache.h | 14 +++++++------ core/rend/gles/gldraw.cpp | 3 +-- core/stdclass.h | 2 +- core/types.h | 29 -------------------------- shell/libretro/libretro_core_options.h | 2 +- 16 files changed, 47 insertions(+), 101 deletions(-) diff --git a/core/hw/aica/sgc_if.cpp b/core/hw/aica/sgc_if.cpp index 2bd3d4787..bb89a5de8 100755 --- a/core/hw/aica/sgc_if.cpp +++ b/core/hw/aica/sgc_if.cpp @@ -398,7 +398,7 @@ struct ChannelEx struct { s32 val; - __forceinline s32 GetValue() { return val >> EG_STEP_BITS;} + s32 GetValue() { return val >> EG_STEP_BITS;} void SetValue(u32 aegb) { val = aegb << EG_STEP_BITS; } _EG_state state=EG_Attack; @@ -413,7 +413,7 @@ struct ChannelEx struct { u32 value; - __forceinline u32 GetValue() { return value >> EG_STEP_BITS;} + u32 GetValue() { return value >> EG_STEP_BITS;} void SetValue(u32 fegb) { value = fegb << EG_STEP_BITS; } _EG_state state = EG_Attack; @@ -439,7 +439,7 @@ struct ChannelEx int *plfo_scale; void (* alfo_calc)(ChannelEx* ch); void (* plfo_calc)(ChannelEx* ch); - __forceinline void Step(ChannelEx* ch) { counter--;if (counter==0) { state++; counter=start_value; alfo_calc(ch);plfo_calc(ch); } } + void Step(ChannelEx* ch) { counter--;if (counter==0) { state++; counter=start_value; alfo_calc(ch);plfo_calc(ch); } } void Reset(ChannelEx* ch) { state=0; counter=start_value; alfo_calc(ch); plfo_calc(ch); } void SetStartValue(u32 nv) { start_value = nv;} } lfo; @@ -458,17 +458,20 @@ struct ChannelEx quiet = false; disable(); } + void disable() { enabled=false; SetAegState(EG_Release); AEG.SetValue(0x3FF); } + void enable() { enabled=true; } - __forceinline SampleType InterpolateSample() + + SampleType InterpolateSample() { SampleType rv; u32 fp=step.fp; @@ -477,7 +480,8 @@ struct ChannelEx return rv; } - __forceinline bool Step(SampleType& oLeft, SampleType& oRight, SampleType& oDsp) + + bool Step(SampleType& oLeft, SampleType& oRight, SampleType& oDsp) { if (!enabled) { @@ -545,7 +549,7 @@ struct ChannelEx } } - __forceinline void Step(SampleType& mixl, SampleType& mixr) + void Step(SampleType& mixl, SampleType& mixr) { SampleType oLeft,oRight,oDsp; @@ -559,7 +563,7 @@ struct ChannelEx mixr+=oRight; } - __forceinline static void StepAll(SampleType& mixl, SampleType& mixr) + static void StepAll(SampleType& mixl, SampleType& mixr) { for (ChannelEx& channel : Chans) channel.Step(mixl, mixr); @@ -875,7 +879,7 @@ struct ChannelEx } }; -static __forceinline SampleType DecodeADPCM(u32 sample,s32 prev,s32& quant) +static SampleType DecodeADPCM(u32 sample,s32 prev,s32& quant) { s32 sign=1-2*(sample/8); @@ -895,7 +899,7 @@ static __forceinline SampleType DecodeADPCM(u32 sample,s32 prev,s32& quant) } template -__forceinline void StepDecodeSample(ChannelEx* ch,u32 CA) +void StepDecodeSample(ChannelEx* ch,u32 CA) { if (!last && PCMS<2) return ; diff --git a/core/hw/arm7/arm7.cpp b/core/hw/arm7/arm7.cpp index 8a901e5e6..531f48666 100644 --- a/core/hw/arm7/arm7.cpp +++ b/core/hw/arm7/arm7.cpp @@ -305,7 +305,6 @@ void aicaarm::reset() reg[15].I += 4; } -NOINLINE void CPUFiq() { u32 PC = reg[R15_ARM_NEXT].I+4; diff --git a/core/hw/pvr/helper_classes.h b/core/hw/pvr/helper_classes.h index a478401b1..88773b5c0 100644 --- a/core/hw/pvr/helper_classes.h +++ b/core/hw/pvr/helper_classes.h @@ -10,10 +10,9 @@ struct List bool* overrun; const char *list_name; - __forceinline int used() const { return size-avail; } - __forceinline int bytes() const { return used()* sizeof(T); } + int used() const { return size-avail; } + int bytes() const { return used()* sizeof(T); } - NOINLINE T* sig_overrun() { *overrun |= true; @@ -24,7 +23,6 @@ struct List return daty; } - __forceinline T* Append(int n=1) { int ad=avail-n; @@ -40,7 +38,6 @@ struct List return sig_overrun(); } - __forceinline T* LastPtr(int n = 1) const { return daty-n; diff --git a/core/hw/pvr/pvr_mem.cpp b/core/hw/pvr/pvr_mem.cpp index 26ee43cb2..d31642cb6 100644 --- a/core/hw/pvr/pvr_mem.cpp +++ b/core/hw/pvr/pvr_mem.cpp @@ -88,7 +88,7 @@ static void YUV_Block8x8(const u8* inuv, const u8* iny, u8* out) } } -static INLINE void YUV_Block384(const u8 *in, u8 *out) +static void YUV_Block384(const u8 *in, u8 *out) { const u8 *inuv = in; const u8 *iny = in + 128; @@ -100,7 +100,7 @@ static INLINE void YUV_Block384(const u8 *in, u8 *out) YUV_Block8x8(inuv+36,iny+192,p_out+YUV_x_size*8*2+8*2); //(8,8) } -static INLINE void YUV_ConvertMacroBlock(const u8 *datap) +static void YUV_ConvertMacroBlock(const u8 *datap) { //do shit TA_YUV_TEX_CNT++; diff --git a/core/hw/pvr/ta.cpp b/core/hw/pvr/ta.cpp index 4f542a29b..15c2d9ed7 100644 --- a/core/hw/pvr/ta.cpp +++ b/core/hw/pvr/ta.cpp @@ -204,7 +204,7 @@ static const HollyInterruptID ListEndInterrupt[5]= }; -static NOINLINE void DYNACALL ta_handle_cmd(u32 trans) +static void DYNACALL ta_handle_cmd(u32 trans) { Ta_Dma* dat=(Ta_Dma*)(ta_tad.thd_data-32); @@ -511,8 +511,7 @@ void ta_vtx_SoftReset() ta_cur_state = TAS_NS; } -static INLINE -void DYNACALL ta_thd_data32_i(const simd256_t *data) +static void DYNACALL ta_thd_data32_i(const simd256_t *data) { if (ta_ctx == NULL) { diff --git a/core/hw/sh4/interpr/sh4_fpu.cpp b/core/hw/sh4/interpr/sh4_fpu.cpp index 0d403ca46..09bbbe7f9 100644 --- a/core/hw/sh4/interpr/sh4_fpu.cpp +++ b/core/hw/sh4/interpr/sh4_fpu.cpp @@ -44,34 +44,9 @@ static void iNimp(const char *str); #define WriteMemBOU16(addr,offset,data) WriteMemU16(addr+offset,data) #define WriteMemBOU8(addr,offset,data) WriteMemU8(addr+offset,data) -INLINE void Denorm32(float &value) -{ - if (fpscr.DN) - { - u32* v=(u32*)&value; - if (IS_DENORMAL(v) && (*v&0x7fFFFFFF)!=0) - { - *v&=0x80000000; - //printf("Denormal ..\n"); - } - if ((*v<=0x007FFFFF) && *v>0) - { - *v=0; - INFO_LOG(INTERPRETER, "Fixed +denorm"); - } - else if ((*v<=0x807FFFFF) && *v>0x80000000) - { - *v=0x80000000; - INFO_LOG(INTERPRETER, "Fixed -denorm"); - } - } -} - - #define CHECK_FPU_32(v) v = fixNaN(v) #define CHECK_FPU_64(v) v = fixNaN64(v) - //fadd , sh4op(i1111_nnnn_mmmm_0000) { diff --git a/core/hw/sh4/interpr/sh4_opcodes.cpp b/core/hw/sh4/interpr/sh4_opcodes.cpp index 3ce78c92a..9dbafabf5 100644 --- a/core/hw/sh4/interpr/sh4_opcodes.cpp +++ b/core/hw/sh4/interpr/sh4_opcodes.cpp @@ -1157,7 +1157,7 @@ sh4op(i0000_nnnn_1011_0011) //pref @ template -INLINE void DYNACALL do_sqw(u32 Dest) +void DYNACALL do_sqw(u32 Dest) { //TODO : Check for enabled store queues ? u32 Address; diff --git a/core/hw/sh4/sh4_core.h b/core/hw/sh4/sh4_core.h index 86bc96729..94f1091f2 100644 --- a/core/hw/sh4/sh4_core.h +++ b/core/hw/sh4/sh4_core.h @@ -40,7 +40,7 @@ union DoubleReg f32 sgl[2]; }; -static INLINE f64 GetDR(u32 n) +static inline f64 GetDR(u32 n) { #ifdef TRACE if (n>7) @@ -54,7 +54,7 @@ static INLINE f64 GetDR(u32 n) return t.dbl; } -static INLINE f64 GetXD(u32 n) +static inline f64 GetXD(u32 n) { #ifdef TRACE if (n>7) @@ -68,7 +68,7 @@ static INLINE f64 GetXD(u32 n) return t.dbl; } -static INLINE void SetDR(u32 n,f64 val) +static inline void SetDR(u32 n,f64 val) { #ifdef TRACE if (n>7) @@ -82,7 +82,7 @@ static INLINE void SetDR(u32 n,f64 val) fr[(n<<1) | 0]=t.sgl[1]; } -static INLINE void SetXD(u32 n,f64 val) +static inline void SetXD(u32 n,f64 val) { #ifdef TRACE if (n>7) @@ -104,7 +104,7 @@ struct SH4ThrownException { u32 callVect; }; -static INLINE void RaiseFPUDisableException() +static inline void RaiseFPUDisableException() { if (config::FullMMU) { @@ -113,7 +113,7 @@ static INLINE void RaiseFPUDisableException() } } -static INLINE void AdjustDelaySlotException(SH4ThrownException& ex) +static inline void AdjustDelaySlotException(SH4ThrownException& ex) { ex.epc -= 2; if (ex.expEvn == 0x800) // FPU disable exception @@ -123,7 +123,7 @@ static INLINE void AdjustDelaySlotException(SH4ThrownException& ex) } // The SH4 sets the signaling bit to 0 for qNaN (unlike all recent CPUs). Some games rely on this. -static INLINE f32 fixNaN(f32 f) +static inline f32 fixNaN(f32 f) { #ifdef STRICT_MODE u32& hex = *(u32 *)&f; @@ -140,7 +140,7 @@ static INLINE f32 fixNaN(f32 f) return f; } -static INLINE f64 fixNaN64(f64 f) +static inline f64 fixNaN64(f64 f) { #ifdef STRICT_MODE u64& hex = *(u64 *)&f; diff --git a/core/hw/sh4/sh4_core_regs.cpp b/core/hw/sh4/sh4_core_regs.cpp index c488f5e67..6f96ee4b3 100644 --- a/core/hw/sh4/sh4_core_regs.cpp +++ b/core/hw/sh4/sh4_core_regs.cpp @@ -10,12 +10,12 @@ Sh4RCB* p_sh4rcb; sh4_if sh4_cpu; -static INLINE void ChangeGPR() +static void ChangeGPR() { std::swap((u32 (&)[8])r, r_bank); } -static INLINE void ChangeFP() +static void ChangeFP() { std::swap((f32 (&)[16])Sh4cntx.xffr, *(f32 (*)[16])&Sh4cntx.xffr[16]); } diff --git a/core/hw/sh4/sh4_opcode_list.h b/core/hw/sh4/sh4_opcode_list.h index 2335d6b47..e2c215954 100644 --- a/core/hw/sh4/sh4_opcode_list.h +++ b/core/hw/sh4/sh4_opcode_list.h @@ -51,27 +51,27 @@ struct sh4_opcodelistentry strcpy(strout, text.c_str()); } - INLINE bool SetPC() const + bool SetPC() const { return (type & WritesPC)!=0; } - INLINE bool NeedPC() const + bool NeedPC() const { return (type & ReadsPC)!=0; } - INLINE bool SetSR() const + bool SetSR() const { return (type & WritesSR)!=0; } - INLINE bool SetFPSCR() const + bool SetFPSCR() const { return (type & WritesFPSCR)!=0; } - INLINE bool IsFloatingPoint() const + bool IsFloatingPoint() const { return (type & UsesFPU) != 0; } diff --git a/core/oslib/oslib.h b/core/oslib/oslib.h index e5803d16b..3ffb874a5 100644 --- a/core/oslib/oslib.h +++ b/core/oslib/oslib.h @@ -18,7 +18,7 @@ void os_UninstallFaultHandler(); #include #endif -u32 static INLINE bitscanrev(u32 v) +u32 static inline bitscanrev(u32 v) { #ifdef __GNUC__ return 31-__builtin_clz(v); diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h index 1d16de3a6..28145ca5b 100644 --- a/core/rend/TexCache.h +++ b/core/rend/TexCache.h @@ -91,31 +91,33 @@ public: pixels_per_line = 1 << level; } - __forceinline pixel_type *data(u32 x = 0, u32 y = 0) + pixel_type *data(u32 x = 0, u32 y = 0) { return p_current_mipmap + pixels_per_line * y + x; } - __forceinline void prel(u32 x, pixel_type value) + void prel(u32 x, pixel_type value) { p_current_pixel[x] = value; } - __forceinline void prel(u32 x, u32 y, pixel_type value) + void prel(u32 x, u32 y, pixel_type value) { p_current_pixel[y * pixels_per_line + x] = value; } - __forceinline void rmovex(u32 value) + void rmovex(u32 value) { p_current_pixel += value; } - __forceinline void rmovey(u32 value) + + void rmovey(u32 value) { p_current_line += pixels_per_line * value; p_current_pixel = p_current_line; } - __forceinline void amove(u32 x_m, u32 y_m) + + void amove(u32 x_m, u32 y_m) { //p_current_pixel=p_buffer_start; p_current_line = p_current_mipmap + pixels_per_line * y_m; diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index e9d124f3b..1054afc3f 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -102,8 +102,7 @@ static void SetBaseClipping() } template -__forceinline - void SetGPState(const PolyParam* gp,u32 cflip=0) +void SetGPState(const PolyParam* gp,u32 cflip=0) { if (gp->pcw.Texture && gp->tsp.FilterMode > 1 && Type != ListType_Punch_Through && gp->tcw.MipMapped == 1) { diff --git a/core/stdclass.h b/core/stdclass.h index eebd96ca5..deca86406 100644 --- a/core/stdclass.h +++ b/core/stdclass.h @@ -85,7 +85,7 @@ public: std::memset(data, 0, size); } - INLINE u8& operator [](unsigned i) { + u8& operator [](unsigned i) { #ifdef MEM_BOUND_CHECK if (i >= size) { diff --git a/core/types.h b/core/types.h index cb9c23490..e73d66a99 100644 --- a/core/types.h +++ b/core/types.h @@ -1,17 +1,6 @@ #pragma once - #include "build.h" -#ifndef _MSC_VER -#ifndef __forceinline -#define __forceinline inline -#endif -#ifndef _WIN32 -#define __debugbreak -#endif -#endif - - #if HOST_CPU == CPU_X86 #ifdef _MSC_VER #define DYNACALL __fastcall @@ -42,17 +31,8 @@ typedef uint64_t u64; typedef float f32; typedef double f64; -#ifdef _M_X64 -#undef X86 -#define X64 -#endif - typedef size_t unat; -#ifdef X64 -typedef u64 unat; -#endif - //intc function pointer and enums enum HollyInterruptType { @@ -172,15 +152,6 @@ inline static void JITWriteProtect(bool enabled) { #include #include -#define INLINE __forceinline - -//no inline -- fixme -#ifdef _MSC_VER -#define NOINLINE __declspec(noinline) -#else -#define NOINLINE __attribute__ ((noinline)) -#endif - #ifdef _MSC_VER #define likely(x) x #define unlikely(x) x diff --git a/shell/libretro/libretro_core_options.h b/shell/libretro/libretro_core_options.h index 8a4aefc66..7397fc204 100644 --- a/shell/libretro/libretro_core_options.h +++ b/shell/libretro/libretro_core_options.h @@ -1529,7 +1529,7 @@ struct retro_core_options_v2 *options_intl[RETRO_LANGUAGE_LAST] = { * be as painless as possible for core devs) */ -static INLINE void libretro_set_core_options(retro_environment_t environ_cb, +static inline void libretro_set_core_options(retro_environment_t environ_cb, bool *categories_supported) { unsigned version = 0; From d1e48c657e62b04f41f3afb4344e3dc543e481a8 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 23 Oct 2022 15:44:12 +0200 Subject: [PATCH 06/34] missing overrides --- core/oslib/audiobackend_directsound.cpp | 2 +- core/oslib/audiobackend_null.cpp | 21 --------------------- core/oslib/audiobackend_oss.cpp | 2 +- 3 files changed, 2 insertions(+), 23 deletions(-) diff --git a/core/oslib/audiobackend_directsound.cpp b/core/oslib/audiobackend_directsound.cpp index bd2773723..c8e675fb8 100644 --- a/core/oslib/audiobackend_directsound.cpp +++ b/core/oslib/audiobackend_directsound.cpp @@ -225,7 +225,7 @@ public: return (p1bytes + p2bytes) / 2; } - void termRecord() + void termRecord() override { if (!dcapture) return; diff --git a/core/oslib/audiobackend_null.cpp b/core/oslib/audiobackend_null.cpp index 08a8d876a..67c935a7b 100644 --- a/core/oslib/audiobackend_null.cpp +++ b/core/oslib/audiobackend_null.cpp @@ -17,10 +17,6 @@ public: return true; } - void term() override - { - } - u32 push(const void* frame, u32 samples, bool wait) override { if (wait) @@ -52,21 +48,4 @@ public: private: the_clock::time_point last_time; }; - static NullAudioBackend nullBackend; - -/* -static audiobackend_t audiobackend_null = { - "null", // Slug - "No Audio", // Name - &null_init, - &null_push, - &null_term, - nullptr, - &null_init_record, - &null_record, - &null_term -}; - -static bool null = RegisterAudioBackend(&audiobackend_null); -*/ diff --git a/core/oslib/audiobackend_oss.cpp b/core/oslib/audiobackend_oss.cpp index d4818cab1..b9f81c601 100644 --- a/core/oslib/audiobackend_oss.cpp +++ b/core/oslib/audiobackend_oss.cpp @@ -52,7 +52,7 @@ public: // recording untested - bool initRecord(u32 sampling_freq) + bool initRecord(u32 sampling_freq) override { recordFD = open("/dev/dsp", O_RDONLY); if (recordFD < 0) From e3c260f4ca9767d28510f3bba0cfe10e58eca266 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 23 Oct 2022 15:49:23 +0200 Subject: [PATCH 07/34] pvr: don't use op list to mark block if tile pointer is null Naomi Doom sets the opaque list block to null in the tile, but op size in TA_ALLOC_CTRL is non-zero. So use the TR list in this case. Issue #682 --- core/hw/pvr/ta.cpp | 14 ++++++++++++-- core/hw/pvr/ta_ctx.h | 1 + core/hw/pvr/ta_vtx.cpp | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/core/hw/pvr/ta.cpp b/core/hw/pvr/ta.cpp index 15c2d9ed7..2c88033cc 100644 --- a/core/hw/pvr/ta.cpp +++ b/core/hw/pvr/ta.cpp @@ -467,10 +467,20 @@ static u32 opbSize(int n) static void markObjectListBlocks() { - u32 addr = TA_OL_BASE; + u32 addr; + u32 tile_size; + getRegionTileAddrAndSize(addr, tile_size); + + // Read the opaque pointer of the first tile and check that it's non-null (Naomi doom) + u32 opbAddr = pvr_read32p(addr + 4); + bool emptyOpaqueList = (opbAddr & 0x80000000) != 0; + + addr = TA_OL_BASE; // opaque u32 opBlockSize = opbSize(TA_ALLOC_CTRL & 3); - if (opBlockSize == 0) + if (emptyOpaqueList) + addr += opBlockSize * (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1); + if (opBlockSize == 0 || emptyOpaqueList) { // skip modvols OPBs addr += opbSize((TA_ALLOC_CTRL >> 4) & 3) * (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1); diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index 7b1593876..e9e8ca486 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -415,6 +415,7 @@ void ta_set_tileclip(u32 tileclip); u32 ta_get_list_type(); void ta_set_list_type(u32 listType); void ta_parse_reset(); +void getRegionTileAddrAndSize(u32& address, u32& size); class TAParserException : public FlycastException { diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 55cc40333..844a1d3ae 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -1767,7 +1767,7 @@ void FillBGP(TA_context* ctx) cv[3].v = max_v; } -static void getRegionTileAddrAndSize(u32& address, u32& size) +void getRegionTileAddrAndSize(u32& address, u32& size) { address = REGION_BASE; const bool type1_tile = ((FPU_PARAM_CFG >> 21) & 1) == 0; From 5722dc90f06cc62eb05aa6e70e89be2e8d4718bf Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 23 Oct 2022 16:32:42 +0200 Subject: [PATCH 08/34] Full framebuffer emulation. Renderer interface changes Helps for: Densha de Go! 2, Issue #171 Vigilante 8, Issue #275 Xtreme Sports Sonic Shuffle The Ring of the Nibelungen --- CMakeLists.txt | 9 +- core/cfg/option.cpp | 1 + core/cfg/option.h | 1 + core/emulator.cpp | 14 +- core/hw/pvr/Renderer_if.cpp | 442 ++++++++++++++---------- core/hw/pvr/Renderer_if.h | 42 ++- core/hw/pvr/pvr_regs.cpp | 14 - core/hw/pvr/pvr_regs.h | 4 +- core/hw/pvr/spg.cpp | 44 +-- core/hw/pvr/spg.h | 2 +- core/hw/pvr/ta_ctx.cpp | 9 +- core/hw/pvr/ta_ctx.h | 12 +- core/hw/pvr/ta_vtx.cpp | 7 +- core/rend/TexCache.cpp | 176 ++++++++-- core/rend/TexCache.h | 10 +- core/rend/dx11/dx11_renderer.cpp | 215 +++++++++--- core/rend/dx11/dx11_renderer.h | 8 +- core/rend/dx11/dx11context.cpp | 1 - core/rend/dx11/oit/dx11_oitrenderer.cpp | 28 +- core/rend/dx9/d3d_renderer.cpp | 318 ++++++++++------- core/rend/dx9/d3d_renderer.h | 11 +- core/rend/dx9/dxcontext.cpp | 1 - core/rend/gl4/gldraw.cpp | 4 +- core/rend/gl4/gles.cpp | 70 ++-- core/rend/gles/gldraw.cpp | 111 +++++- core/rend/gles/gles.cpp | 110 +++--- core/rend/gles/gles.h | 95 +++-- core/rend/gles/gltex.cpp | 265 ++++++-------- core/rend/gles/opengl_driver.cpp | 4 +- core/rend/gles/opengl_driver.h | 4 +- core/rend/gles/postprocess.cpp | 49 +-- core/rend/gles/postprocess.h | 8 +- core/rend/gui.cpp | 3 + core/rend/mainui.cpp | 1 - core/rend/transform_matrix.h | 157 +++++++-- core/rend/vulkan/buffer.cpp | 28 +- core/rend/vulkan/buffer.h | 16 +- core/rend/vulkan/desc_set.h | 41 --- core/rend/vulkan/drawer.cpp | 104 +++++- core/rend/vulkan/drawer.h | 33 +- core/rend/vulkan/oit/oit_drawer.cpp | 24 +- core/rend/vulkan/oit/oit_drawer.h | 18 +- core/rend/vulkan/oit/oit_renderer.cpp | 44 ++- core/rend/vulkan/oit/oit_renderpass.cpp | 2 +- core/rend/vulkan/oit/oit_renderpass.h | 9 +- core/rend/vulkan/oit/oit_shaders.h | 14 + core/rend/vulkan/pipeline.h | 9 + core/rend/vulkan/shaders.h | 15 + core/rend/vulkan/texture.cpp | 22 +- core/rend/vulkan/texture.h | 6 + core/rend/vulkan/vk_context_lr.cpp | 2 +- core/rend/vulkan/vk_context_lr.h | 2 - core/rend/vulkan/vmallocator.h | 10 +- core/rend/vulkan/vulkan_context.cpp | 6 +- core/rend/vulkan/vulkan_context.h | 1 - core/rend/vulkan/vulkan_renderer.cpp | 44 ++- core/rend/vulkan/vulkan_renderer.h | 148 ++++---- shell/libretro/libretro.cpp | 8 +- shell/libretro/libretro_core_options.h | 16 +- shell/libretro/option.cpp | 1 + shell/libretro/vmu_xhair.cpp | 2 +- 61 files changed, 1767 insertions(+), 1108 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a564216a..59c336ec8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -473,8 +473,13 @@ if(UNIX AND NOT APPLE AND NOT ANDROID) endif() if(ASAN) - target_compile_options(${PROJECT_NAME} PRIVATE -fsanitize=address -static-libasan) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -static-libasan") + if(ANDROID) + target_compile_options(${PROJECT_NAME} PRIVATE -fsanitize=address -fno-omit-frame-pointer) + set_target_properties(${PROJECT_NAME} PROPERTIES LINK_FLAGS -fsanitize=address) + else() + target_compile_options(${PROJECT_NAME} PRIVATE -fsanitize=address -static-libasan) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -static-libasan") + endif() endif() if(ANDROID AND NOT LIBRETRO) diff --git a/core/cfg/option.cpp b/core/cfg/option.cpp index 78028a7e8..bf1a86f0b 100644 --- a/core/cfg/option.cpp +++ b/core/cfg/option.cpp @@ -104,6 +104,7 @@ Option ThreadedRendering("rend.ThreadedRendering", true); Option DupeFrames("rend.DupeFrames", false); Option PerPixelLayers("rend.PerPixelLayers", 32); Option NativeDepthInterpolation("rend.NativeDepthInterpolation", false); +Option EmulateFramebuffer("rend.EmulateFramebuffer", false); // Misc diff --git a/core/cfg/option.h b/core/cfg/option.h index 61a265060..e3d850aea 100644 --- a/core/cfg/option.h +++ b/core/cfg/option.h @@ -466,6 +466,7 @@ extern Option TextureFiltering; // 0: default, 1: force nearest, 2: force l extern Option ThreadedRendering; extern Option DupeFrames; extern Option NativeDepthInterpolation; +extern Option EmulateFramebuffer; // Misc diff --git a/core/emulator.cpp b/core/emulator.cpp index 4406fad50..296a49a92 100644 --- a/core/emulator.cpp +++ b/core/emulator.cpp @@ -223,6 +223,12 @@ static void loadSpecialSettings() NOTICE_LOG(BOOT, "Forcing PAL broadcasting"); config::Broadcast.override(1); } + if (prod_id == "T1102M" // Densha de Go! 2 + || prod_id == "T00000A") // The Ring of the Nibelungen (demo, hack) + { + NOTICE_LOG(BOOT, "Forcing Full Framebuffer Emulation"); + config::EmulateFramebuffer.override(true); + } } else if (settings.platform.isArcade()) { @@ -764,11 +770,12 @@ void Emulator::run() void Emulator::start() { + if (state == Running) + return; verify(state == Loaded); state = Running; SetMemoryHandlers(); settings.aica.NoBatch = config::ForceWindowsCE || config::DSPEnabled || config::GGPOEnable; - rend_resize_renderer(); #if FEAT_SHREC != DYNAREC_NONE if (config::DynarecEnabled) { @@ -829,11 +836,10 @@ bool Emulator::checkStatus() bool Emulator::render() { - rend_resize_renderer_if_needed(); + if (state != Running) + return false; if (!config::ThreadedRendering) { - if (state != Running) - return false; run(); // TODO if stopping due to a user request, no frame has been rendered return !renderTimeout; diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index ea53286fd..23227422c 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -2,12 +2,15 @@ #include "spg.h" #include "hw/pvr/pvr_mem.h" #include "rend/TexCache.h" +#include "rend/transform_matrix.h" #include "cfg/option.h" #include "network/ggpo.h" #include "emulator.h" #include "serialize.h" +#include "hw/holly/holly_intc.h" #include +#include void retro_rend_present(); #ifndef LIBRETRO @@ -17,15 +20,13 @@ void retro_rend_present() sh4_cpu.Stop(); } #endif +void retro_resize_renderer(int w, int h); -u32 VertexCount=0; u32 FrameCount=1; Renderer* renderer; -cResetEvent rs, re; -static bool do_swap; -std::mutex swap_mutex; +static cResetEvent renderEnd; u32 fb_w_cur = 1; static cResetEvent vramRollback; @@ -38,74 +39,195 @@ bool fb_dirty; static bool pend_rend; TA_context* _pvrrc; -extern bool rend_needs_resize; -static bool rend_frame(TA_context* ctx) +static bool presented; + +class PvrMessageQueue { - bool proc = renderer->Process(ctx); + using lock_guard = std::lock_guard; - if (!proc || (!ctx->rend.isRTT && !ctx->rend.isRenderFramebuffer)) - // If rendering to texture, continue locking until the frame is rendered - re.Set(); - rend_allow_rollback(); +public: + enum MessageType { NoMessage = -1, Render, RenderFramebuffer, Present, Stop }; + struct Message + { + Message() = default; + Message(MessageType type, FramebufferInfo config) + : type(type), config(config) {} - return proc && renderer->Render(); -} + MessageType type = NoMessage; + FramebufferInfo config; + }; + + void enqueue(MessageType type, FramebufferInfo config = FramebufferInfo()) + { + Message msg { type, config }; + if (config::ThreadedRendering) + { + // FIXME need some synchronization to avoid blinking in densha de go + // or use !threaded rendering for emufb? + // or read framebuffer vram on emu thread + bool dupe; + do { + dupe = false; + { + const lock_guard lock(mutex); + for (const auto& m : queue) + if (m.type == type) { + dupe = true; + break; + } + if (!dupe) + queue.push_back(msg); + } + if (dupe) + { + if (type == Stop) + return; + dequeueEvent.Wait(); + } + } while (dupe); + enqueueEvent.Set(); + } + else + { + // drain the queue after switching to !threaded rendering + while (!queue.empty()) + waitAndExecute(); + execute(msg); + } + } + + bool waitAndExecute(int timeoutMs = -1) + { + return execute(dequeue(timeoutMs)); + } + + void reset() { + const lock_guard lock(mutex); + queue.clear(); + } + + void cancelEnqueue() + { + const lock_guard lock(mutex); + for (auto it = queue.begin(); it != queue.end(); ) + { + if (it->type != Render) + it = queue.erase(it); + else + ++it; + } + dequeueEvent.Set(); + } +private: + Message dequeue(int timeoutMs = -1) + { + Message msg; + while (true) + { + { + const lock_guard lock(mutex); + if (!queue.empty()) + { + msg = queue.front(); + queue.pop_front(); + } + } + if (msg.type != NoMessage) { + dequeueEvent.Set(); + break; + } + if (timeoutMs == -1) + enqueueEvent.Wait(); + else if (!enqueueEvent.Wait(timeoutMs)) + break; + } + return msg; + } + + bool execute(Message msg) + { + switch (msg.type) + { + case Render: + render(); + break; + case RenderFramebuffer: + renderFramebuffer(msg.config); + break; + case Present: + present(); + break; + case Stop: + return false; + default: + break; + } + return true; + } + + void render() + { + _pvrrc = DequeueRender(); + if (_pvrrc == nullptr) + return; + + bool renderToScreen = !_pvrrc->rend.isRTT && !config::EmulateFramebuffer; +#ifdef LIBRETRO + if (renderToScreen) + retro_resize_renderer(_pvrrc->rend.framebufferWidth, _pvrrc->rend.framebufferHeight); +#endif + bool proc = renderer->Process(_pvrrc); + if (!proc || renderToScreen) + // If rendering to texture or in full framebuffer emulation, continue locking until the frame is rendered + renderEnd.Set(); + rend_allow_rollback(); + if (proc) + { + renderer->Render(); + if (!renderToScreen) + renderEnd.Set(); + } + + //clear up & free data .. + FinishRender(_pvrrc); + _pvrrc = nullptr; + } + + void renderFramebuffer(const FramebufferInfo& config) + { +#ifdef LIBRETRO + int w, h; + getTAViewport(w, h); // FIXME ? + retro_resize_renderer(w, h); +#endif + renderer->RenderFramebuffer(config); + } + + void present() + { + if (renderer->Present()) + { + presented = true; + retro_rend_present(); + } + } + + std::mutex mutex; + cResetEvent enqueueEvent; + cResetEvent dequeueEvent; + std::deque queue; +}; + +static PvrMessageQueue pvrQueue; bool rend_single_frame(const bool& enabled) { - do - { - if (config::ThreadedRendering && !rs.Wait(50)) + presented = false; + while (enabled && !presented) + if (!pvrQueue.waitAndExecute(50)) return false; - if (do_swap) - { - do_swap = false; - if (renderer->Present()) - { - rs.Set(); // don't miss any render - retro_rend_present(); - return true; - } - } - if (!enabled) - return false; - - _pvrrc = DequeueRender(); - if (!config::ThreadedRendering && _pvrrc == nullptr) - return false; - } - while (_pvrrc == nullptr); - - bool frame_rendered = rend_frame(_pvrrc); - - if (frame_rendered) - { - { - std::lock_guard lock(swap_mutex); - if (config::DelayFrameSwapping && !_pvrrc->rend.isRenderFramebuffer && fb_w_cur != FB_R_SOF1 && !do_swap) - // Delay swap - frame_rendered = false; - else - // Swap now - do_swap = false; - } - if (frame_rendered) - { - frame_rendered = renderer->Present(); - if (frame_rendered) - retro_rend_present(); - } - } - - if (_pvrrc->rend.isRTT) - re.Set(); - - //clear up & free data .. - FinishRender(_pvrrc); - _pvrrc = nullptr; - - return frame_rendered; + return true; } Renderer* rend_GLES2(); @@ -181,99 +303,107 @@ void rend_term_renderer() void rend_reset() { FinishRender(DequeueRender()); - do_swap = false; render_called = false; pend_rend = false; FrameCount = 1; - VertexCount = 0; fb_w_cur = 1; + pvrQueue.reset(); } -void rend_start_render(TA_context *ctx) +void rend_start_render() { render_called = true; pend_rend = false; - if (ctx == nullptr) + + TA_context *ctx = nullptr; + u32 addresses[MAX_PASSES]; + int count = getTAContextAddresses(addresses); + if (count > 0) { - u32 addresses[MAX_PASSES]; - int count = getTAContextAddresses(addresses); - if (count > 0) + ctx = tactx_Pop(addresses[0]); + if (ctx != nullptr) { - ctx = tactx_Pop(addresses[0]); - if (ctx != nullptr) + TA_context *linkedCtx = ctx; + for (int i = 1; i < count; i++) { - TA_context *linkedCtx = ctx; - for (int i = 1; i < count; i++) - { - linkedCtx->nextContext = tactx_Pop(addresses[i]); - if (linkedCtx->nextContext != nullptr) - linkedCtx = linkedCtx->nextContext; - } + linkedCtx->nextContext = tactx_Pop(addresses[i]); + if (linkedCtx->nextContext != nullptr) + linkedCtx = linkedCtx->nextContext; } } } - // No end of render interrupt when rendering the framebuffer - if (!ctx || !ctx->rend.isRenderFramebuffer) - SetREP(ctx); + scheduleRenderDone(ctx); - if (ctx) + if (ctx == nullptr) + return; + + FillBGP(ctx); + + ctx->rend.isRTT = (FB_W_SOF1 & 0x1000000) != 0; + ctx->rend.fb_W_SOF1 = FB_W_SOF1; + ctx->rend.fb_W_CTRL.full = FB_W_CTRL.full; + + ctx->rend.fb_X_CLIP = FB_X_CLIP; + ctx->rend.fb_Y_CLIP = FB_Y_CLIP; + ctx->rend.fb_W_LINESTRIDE = FB_W_LINESTRIDE.stride; + + ctx->rend.fog_clamp_min = FOG_CLAMP_MIN; + ctx->rend.fog_clamp_max = FOG_CLAMP_MAX; + + if (!ctx->rend.isRTT) { - if (ctx->rend.isRenderFramebuffer) - { - ctx->rend.isRTT = false; - ctx->rend.fb_X_CLIP.min = 0; - ctx->rend.fb_X_CLIP.max = 639; - ctx->rend.fb_Y_CLIP.min = 0; - ctx->rend.fb_Y_CLIP.max = 479; + int width, height; + getScaledFramebufferSize(width, height); + ctx->rend.framebufferWidth = width; + ctx->rend.framebufferHeight = height; + } - ctx->rend.fog_clamp_min.full = 0; - ctx->rend.fog_clamp_max.full = 0xffffffff; - } - else - { - FillBGP(ctx); - - ctx->rend.isRTT = (FB_W_SOF1 & 0x1000000) != 0; - ctx->rend.fb_W_SOF1 = FB_W_SOF1; - ctx->rend.fb_W_CTRL.full = FB_W_CTRL.full; - - ctx->rend.fb_X_CLIP = FB_X_CLIP; - ctx->rend.fb_Y_CLIP = FB_Y_CLIP; - ctx->rend.fb_W_LINESTRIDE = FB_W_LINESTRIDE.stride; - - ctx->rend.fog_clamp_min = FOG_CLAMP_MIN; - ctx->rend.fog_clamp_max = FOG_CLAMP_MAX; - } - - if (!config::DelayFrameSwapping && !ctx->rend.isRTT) - ggpo::endOfFrame(); + bool present = !config::DelayFrameSwapping && !ctx->rend.isRTT && !config::EmulateFramebuffer; + if (present) + ggpo::endOfFrame(); + if (QueueRender(ctx)) + { palette_update(); - if (QueueRender(ctx)) - { - pend_rend = true; - if (!config::ThreadedRendering) - rend_single_frame(true); - else - rs.Set(); - } + pend_rend = true; + pvrQueue.enqueue(PvrMessageQueue::Render); + if (present) + pvrQueue.enqueue(PvrMessageQueue::Present); } } -void rend_end_render() +int rend_end_render(int tag, int cycles, int jitter) { + if (settings.platform.isNaomi2()) + { + asic_RaiseInterruptBothCLX(holly_RENDER_DONE); + asic_RaiseInterruptBothCLX(holly_RENDER_DONE_isp); + asic_RaiseInterruptBothCLX(holly_RENDER_DONE_vd); + } + else + { + asic_RaiseInterrupt(holly_RENDER_DONE); + asic_RaiseInterrupt(holly_RENDER_DONE_isp); + asic_RaiseInterrupt(holly_RENDER_DONE_vd); + } if (pend_rend && config::ThreadedRendering) - re.Wait(); + renderEnd.Wait(); + + return 0; } void rend_vblank() { - if (!render_called && fb_dirty && FB_R_CTRL.fb_enable) + if (config::EmulateFramebuffer + || (!render_called && fb_dirty && FB_R_CTRL.fb_enable)) { - DEBUG_LOG(PVR, "Direct framebuffer write detected"); - TA_context *ctx = tactx_Alloc(); - ctx->rend.isRenderFramebuffer = true; - rend_start_render(ctx); + FramebufferInfo fbInfo; + fbInfo.update(); + pvrQueue.enqueue(PvrMessageQueue::RenderFramebuffer, fbInfo); + pvrQueue.enqueue(PvrMessageQueue::Present); + ggpo::endOfFrame(); + if (!config::EmulateFramebuffer) + DEBUG_LOG(PVR, "Direct framebuffer write detected"); fb_dirty = false; } render_called = false; @@ -293,8 +423,12 @@ void rend_cancel_emu_wait() if (config::ThreadedRendering) { FinishRender(NULL); - re.Set(); + renderEnd.Set(); rend_allow_rollback(); + pvrQueue.cancelEnqueue(); + // Needed for android where this function may be called + // from a thread different from the UI one + pvrQueue.enqueue(PvrMessageQueue::Stop); } } @@ -308,22 +442,12 @@ void rend_set_fb_write_addr(u32 fb_w_sof1) void rend_swap_frame(u32 fb_r_sof) { - swap_mutex.lock(); - if (fb_r_sof == fb_w_cur) + if (!config::EmulateFramebuffer && fb_r_sof == fb_w_cur) { - do_swap = true; - if (config::ThreadedRendering) - rs.Set(); - else - { - swap_mutex.unlock(); - rend_single_frame(true); - swap_mutex.lock(); - } + pvrQueue.enqueue(PvrMessageQueue::Present); if (config::DelayFrameSwapping) ggpo::endOfFrame(); } - swap_mutex.unlock(); } void rend_disable_rollback() @@ -364,42 +488,4 @@ void rend_deserialize(Deserializer& deser) deser >> fb_watch_addr_end; } pend_rend = false; - rend_needs_resize = true; -} - -void rend_resize_renderer() -{ - int fbwidth = 640 / (1 + VO_CONTROL.pixel_double) * (1 + SCALER_CTL.hscale); - int fbheight = FB_R_CTRL.vclk_div == 1 || SPG_CONTROL.interlace == 1 ? 480 : 240; - if (SPG_CONTROL.interlace == 0 && SCALER_CTL.vscalefactor > 0x400) - fbheight *= std::roundf((float)SCALER_CTL.vscalefactor / 0x400); - - float upscaling = config::RenderResolution / 480.f; - float hres = fbwidth * upscaling; - float vres = fbheight * upscaling; - if (config::Widescreen && !config::Rotate90) - { - if (config::SuperWidescreen) - hres *= (float)settings.display.width / settings.display.height / 4.f * 3.f; - else - hres *= 4.f / 3.f; - } - if (!config::Rotate90) - hres = std::roundf(hres / 2.f) * 2.f; - DEBUG_LOG(RENDERER, "rend_resize_renderer: %d x %d", (int)hres, (int)vres); - if (renderer != nullptr) - renderer->Resize((int)hres, (int)vres); - rend_needs_resize = false; -#ifdef LIBRETRO - void retro_resize_renderer(int w, int h); - - retro_resize_renderer((int)hres, (int)vres); -#endif -} - -void rend_resize_renderer_if_needed() -{ - if (!rend_needs_resize) - return; - rend_resize_renderer(); } diff --git a/core/hw/pvr/Renderer_if.h b/core/hw/pvr/Renderer_if.h index 5e8b80e51..4461cefa8 100644 --- a/core/hw/pvr/Renderer_if.h +++ b/core/hw/pvr/Renderer_if.h @@ -2,14 +2,13 @@ #include "types.h" #include "ta_ctx.h" -extern u32 VertexCount; extern u32 FrameCount; void rend_init_renderer(); void rend_term_renderer(); void rend_vblank(); -void rend_start_render(TA_context *ctx = nullptr); -void rend_end_render(); +void rend_start_render(); +int rend_end_render(int tag, int cycles, int jitter); void rend_cancel_emu_wait(); bool rend_single_frame(const bool& enabled); void rend_swap_frame(u32 fb_r_sof1); @@ -20,25 +19,46 @@ void rend_start_rollback(); void rend_allow_rollback(); void rend_serialize(Serializer& ser); void rend_deserialize(Deserializer& deser); -void rend_resize_renderer(); -void rend_resize_renderer_if_needed(); /////// extern TA_context* _pvrrc; #define pvrrc (_pvrrc->rend) +struct FramebufferInfo +{ + void update() + { + fb_r_size.full = FB_R_SIZE.full; + fb_r_ctrl.full = FB_R_CTRL.full; + spg_control.full = SPG_CONTROL.full; + spg_status.full = SPG_STATUS.full; + fb_r_sof1 = FB_R_SOF1; + fb_r_sof2 = FB_R_SOF2; + vo_control.full = VO_CONTROL.full; + vo_border_col.full = VO_BORDER_COL.full; + } + + FB_R_SIZE_type fb_r_size; + FB_R_CTRL_type fb_r_ctrl; + SPG_CONTROL_type spg_control; + SPG_STATUS_type spg_status; + u32 fb_r_sof1; + u32 fb_r_sof2; + VO_CONTROL_type vo_control; + VO_BORDER_COL_type vo_border_col; +}; + struct Renderer { - virtual bool Init()=0; virtual ~Renderer() = default; - - virtual void Resize(int w, int h)=0; - virtual void Term()=0; + virtual bool Init() = 0; + virtual void Term() = 0; - virtual bool Process(TA_context* ctx)=0; - virtual bool Render()=0; + virtual bool Process(TA_context *ctx) = 0; + virtual bool Render() = 0; + virtual void RenderFramebuffer(const FramebufferInfo& info) = 0; virtual bool RenderLastFrame() { return false; } virtual bool Present() { return true; } diff --git a/core/hw/pvr/pvr_regs.cpp b/core/hw/pvr/pvr_regs.cpp index 1975bbb47..261b5babd 100644 --- a/core/hw/pvr/pvr_regs.cpp +++ b/core/hw/pvr/pvr_regs.cpp @@ -7,7 +7,6 @@ bool pal_needs_update=true; bool fog_needs_update=true; -bool rend_needs_resize = true; u8 pvr_regs[pvr_RegSize]; @@ -160,16 +159,6 @@ void pvr_WriteReg(u32 paddr,u32 data) { PvrReg(addr, u32) = data; CalculateSync(); - if (addr == SPG_CONTROL_addr) - rend_needs_resize = true; - } - return; - - case VO_CONTROL_addr: - if (PvrReg(addr, u32) != data) - { - PvrReg(addr, u32) = data; - rend_needs_resize = true; } return; @@ -178,10 +167,7 @@ void pvr_WriteReg(u32 paddr,u32 data) bool vclk_div_changed = (PvrReg(addr, u32) ^ data) & (1 << 23); PvrReg(addr, u32) = data; if (vclk_div_changed) - { CalculateSync(); - rend_needs_resize = true; - } } return; diff --git a/core/hw/pvr/pvr_regs.h b/core/hw/pvr/pvr_regs.h index 3061dab39..9d5896fbb 100644 --- a/core/hw/pvr/pvr_regs.h +++ b/core/hw/pvr/pvr_regs.h @@ -122,14 +122,14 @@ struct RGBAColorTemplate float blue() const { return ((T *)this)->_blue / 255.f; } float alpha() const { return ((T *)this)->_alpha / 255.f; } - void getRGBColor(float rgb[3]) + void getRGBColor(float rgb[3]) const { rgb[0] = red(); rgb[1] = green(); rgb[2] = blue(); } - void getRGBAColor(float rgba[4]) + void getRGBAColor(float rgba[4]) const { getRGBColor(rgba); rgba[3] = alpha(); diff --git a/core/hw/pvr/spg.cpp b/core/hw/pvr/spg.cpp index 64a51b5e3..793aaf8bb 100755 --- a/core/hw/pvr/spg.cpp +++ b/core/hw/pvr/spg.cpp @@ -56,7 +56,7 @@ void CalculateSync() } //called from sh4 context , should update pvr/ta state and everything else -int spg_line_sched(int tag, int cycl, int jit) +static int spg_line_sched(int tag, int cycl, int jit) { clc_pvr_scanline += cycl; @@ -146,22 +146,9 @@ int spg_line_sched(int tag, int cycl, int jit) double spd_cpu=spd_vbs*Frame_Cycles; spd_cpu/=1000000; //mrhz kthx double fullvbs=(spd_vbs/spd_cpu)*200; - double mv=VertexCount/ts/(spd_cpu/200); - char mv_c=' '; Last_FC=FrameCount; - if (mv>750) - { - mv/=1000; //KV - mv_c='K'; - } - if (mv>750) - { - mv/=1000; // - mv_c='M'; - } - VertexCount=0; vblk_cnt=0; const char* mode=0; @@ -184,11 +171,10 @@ int spg_line_sched(int tag, int cycl, int jit) double full_rps = spd_fps + fskip / ts; - INFO_LOG(COMMON, "%s/%c - %4.2f - %4.2f - V: %4.2f (%.2f, %s%s%4.2f) R: %4.2f+%4.2f VTX: %4.2f%c", + INFO_LOG(COMMON, "%s/%c - %4.2f - %4.2f - V: %4.2f (%.2f, %s%s%4.2f) R: %4.2f+%4.2f", VER_SHORTNAME,'n',mspdf,spd_cpu*100/200,spd_vbs, spd_vbs/full_rps,mode,res,fullvbs, - spd_fps,fskip/ts - , mv, mv_c); + spd_fps,fskip/ts); fskip=0; last_fps=os_GetSeconds(); @@ -255,28 +241,10 @@ void read_lightgun_position(int x, int y) } } -int rend_end_sch(int tag, int cycl, int jitt) -{ - if (settings.platform.isNaomi2()) - { - asic_RaiseInterruptBothCLX(holly_RENDER_DONE); - asic_RaiseInterruptBothCLX(holly_RENDER_DONE_isp); - asic_RaiseInterruptBothCLX(holly_RENDER_DONE_vd); - } - else - { - asic_RaiseInterrupt(holly_RENDER_DONE); - asic_RaiseInterrupt(holly_RENDER_DONE_isp); - asic_RaiseInterrupt(holly_RENDER_DONE_vd); - } - rend_end_render(); - return 0; -} - bool spg_Init() { - render_end_schid=sh4_sched_register(0,&rend_end_sch); - vblank_schid=sh4_sched_register(0,&spg_line_sched); + render_end_schid = sh4_sched_register(0, &rend_end_render); + vblank_schid = sh4_sched_register(0, &spg_line_sched); return true; } @@ -299,7 +267,7 @@ void spg_Reset(bool hard) real_times.fill(0.0); } -void SetREP(TA_context* cntx) +void scheduleRenderDone(TA_context *cntx) { if (cntx) sh4_sched_request(render_end_schid, 500000 * 3); diff --git a/core/hw/pvr/spg.h b/core/hw/pvr/spg.h index 66f9cba7c..c155bb6df 100644 --- a/core/hw/pvr/spg.h +++ b/core/hw/pvr/spg.h @@ -11,4 +11,4 @@ void spg_Deserialize(Deserializer& deser); void CalculateSync(); void read_lightgun_position(int x, int y); -void SetREP(TA_context* cntx); +void scheduleRenderDone(TA_context *cntx); diff --git a/core/hw/pvr/ta_ctx.cpp b/core/hw/pvr/ta_ctx.cpp index 545860f7b..6ec8880e6 100644 --- a/core/hw/pvr/ta_ctx.cpp +++ b/core/hw/pvr/ta_ctx.cpp @@ -5,7 +5,6 @@ #include "serialize.h" extern u32 fskip; -extern u32 FrameCount; static int RenderCount; TA_context* ta_ctx; @@ -40,8 +39,8 @@ void SetCurrentTARC(u32 addr) } } -TA_context* rqueue; -cResetEvent frame_finished; +static TA_context* rqueue; +static cResetEvent frame_finished; bool QueueRender(TA_context* ctx) { @@ -86,10 +85,6 @@ TA_context* DequeueRender() return rqueue; } -bool rend_framePending() { - return rqueue != nullptr; -} - void FinishRender(TA_context* ctx) { if (ctx != nullptr) diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index e9e8ca486..f21b66721 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -6,8 +6,6 @@ #include "stdclass.h" #include "oslib/oslib.h" -#include - class BaseTextureCacheData; struct N2LightModel; @@ -228,13 +226,14 @@ struct rend_context bool Overrun; bool isRTT; - bool isRenderFramebuffer; FB_X_CLIP_type fb_X_CLIP; FB_Y_CLIP_type fb_Y_CLIP; u32 fb_W_LINESTRIDE; u32 fb_W_SOF1; FB_W_CTRL_type fb_W_CTRL; + u32 framebufferWidth; + u32 framebufferHeight; RGBAColor fog_clamp_min; RGBAColor fog_clamp_max; @@ -272,13 +271,13 @@ struct rend_context Overrun = false; fZ_min = 1000000.0f; fZ_max = 1.0f; - isRenderFramebuffer = false; matrices.Clear(); lightModels.Clear(); } void newRenderPass(); + // For RTT TODO merge with framebufferWidth/Height u32 getFramebufferWidth() const { u32 w = fb_X_CLIP.max + 1; if (fb_W_LINESTRIDE != 0) @@ -298,8 +297,6 @@ struct TA_context { u32 Address; - std::mutex rend_inuse; - tad_context tad; rend_context rend; @@ -352,10 +349,8 @@ struct TA_context verify(tad.End() - tad.thd_root <= TA_DATA_SIZE); tad.Clear(); nextContext = nullptr; - rend_inuse.lock(); rend.Clear(); rend.proc_end = rend.proc_start = tad.thd_root; - rend_inuse.unlock(); } ~TA_context() @@ -398,7 +393,6 @@ void FinishRender(TA_context* ctx); //must be moved to proper header void FillBGP(TA_context* ctx); -bool rend_framePending(); void SerializeTAContext(Serializer& ser); void DeserializeTAContext(Deserializer& deser); diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 844a1d3ae..e1f33266d 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -1276,7 +1276,6 @@ static void fix_texture_bleeding(const List *list) static bool ta_parse_vdrc(TA_context* ctx) { - ctx->rend_inuse.lock(); bool rv=false; verify(vd_ctx == nullptr); vd_ctx = ctx; @@ -1352,7 +1351,7 @@ static bool ta_parse_vdrc(TA_context* ctx) bool overrun = vd_ctx->rend.Overrun; if (overrun) WARN_LOG(PVR, "ERROR: TA context overrun"); - else if (config::RenderResolution > 480) + else if (config::RenderResolution > 480 && !config::EmulateFramebuffer) { fix_texture_bleeding(&vd_rc.global_param_op); fix_texture_bleeding(&vd_rc.global_param_pt); @@ -1369,7 +1368,6 @@ static bool ta_parse_vdrc(TA_context* ctx) } vd_ctx = nullptr; - ctx->rend_inuse.unlock(); ctx->rend.Overrun = overrun; @@ -1378,8 +1376,6 @@ static bool ta_parse_vdrc(TA_context* ctx) static bool ta_parse_naomi2(TA_context* ctx) { - ctx->rend_inuse.lock(); - for (PolyParam& pp : ctx->rend.global_param_op) { if (pp.pcw.Texture) @@ -1434,7 +1430,6 @@ static bool ta_parse_naomi2(TA_context* ctx) ctx->rend.fb_Y_CLIP.min = std::max(ctx->rend.fb_Y_CLIP.min, ymin); ctx->rend.fb_Y_CLIP.max = std::min(ctx->rend.fb_Y_CLIP.max, ymax + 31); } - ctx->rend_inuse.unlock(); return !overrun; } diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index 6654a397c..1eea3efc1 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -812,14 +812,14 @@ void BaseTextureCacheData::SetDirectXColorOrder(bool enabled) { } template -void ReadFramebuffer(PixelBuffer& pb, int& width, int& height) +void ReadFramebuffer(const FramebufferInfo& info, PixelBuffer& pb, int& width, int& height) { - width = (FB_R_SIZE.fb_x_size + 1) << 1; // in 16-bit words - height = FB_R_SIZE.fb_y_size + 1; - int modulus = (FB_R_SIZE.fb_modulus - 1) << 1; + width = (info.fb_r_size.fb_x_size + 1) * 2; // in 16-bit words + height = info.fb_r_size.fb_y_size + 1; + int modulus = (info.fb_r_size.fb_modulus - 1) * 2; int bpp; - switch (FB_R_CTRL.fb_depth) + switch (info.fb_r_ctrl.fb_depth) { case fbde_0555: case fbde_565: @@ -841,10 +841,10 @@ void ReadFramebuffer(PixelBuffer& pb, int& width, int& height) break; } - u32 addr = FB_R_SOF1; - if (SPG_CONTROL.interlace) + u32 addr = info.fb_r_sof1; + if (info.spg_control.interlace) { - if (width == modulus && FB_R_SOF2 == FB_R_SOF1 + width * bpp) + if (width == modulus && info.fb_r_sof2 == info.fb_r_sof1 + width * bpp) { // Typical case alternating even and odd lines -> take the whole buffer at once modulus = 0; @@ -852,14 +852,15 @@ void ReadFramebuffer(PixelBuffer& pb, int& width, int& height) } else { - addr = SPG_STATUS.fieldnum ? FB_R_SOF2 : FB_R_SOF1; + addr = info.spg_status.fieldnum ? info.fb_r_sof2 : info.fb_r_sof1; } } pb.init(width, height); u32 *dst = (u32 *)pb.data(); + const u32 fb_concat = info.fb_r_ctrl.fb_concat; - switch (FB_R_CTRL.fb_depth) + switch (info.fb_r_ctrl.fb_depth) { case fbde_0555: // 555 RGB for (int y = 0; y < height; y++) @@ -868,9 +869,9 @@ void ReadFramebuffer(PixelBuffer& pb, int& width, int& height) { u16 src = pvr_read32p(addr); *dst++ = Packer::pack( - (((src >> 10) & 0x1F) << 3) + FB_R_CTRL.fb_concat, - (((src >> 5) & 0x1F) << 3) + FB_R_CTRL.fb_concat, - (((src >> 0) & 0x1F) << 3) + FB_R_CTRL.fb_concat, + (((src >> 10) & 0x1F) << 3) | fb_concat, + (((src >> 5) & 0x1F) << 3) | fb_concat, + (((src >> 0) & 0x1F) << 3) | fb_concat, 0xff); addr += bpp; } @@ -885,9 +886,9 @@ void ReadFramebuffer(PixelBuffer& pb, int& width, int& height) { u16 src = pvr_read32p(addr); *dst++ = Packer::pack( - (((src >> 11) & 0x1F) << 3) + FB_R_CTRL.fb_concat, - (((src >> 5) & 0x3F) << 2) + (FB_R_CTRL.fb_concat & 3), - (((src >> 0) & 0x1F) << 3) + FB_R_CTRL.fb_concat, + (((src >> 11) & 0x1F) << 3) | fb_concat, + (((src >> 5) & 0x3F) << 2) | (fb_concat & 3), + (((src >> 0) & 0x1F) << 3) | fb_concat, 0xFF); addr += bpp; } @@ -933,11 +934,11 @@ void ReadFramebuffer(PixelBuffer& pb, int& width, int& height) break; } } -template void ReadFramebuffer(PixelBuffer& pb, int& width, int& height); -template void ReadFramebuffer(PixelBuffer& pb, int& width, int& height); +template void ReadFramebuffer(const FramebufferInfo& info, PixelBuffer& pb, int& width, int& height); +template void ReadFramebuffer(const FramebufferInfo& info, PixelBuffer& pb, int& width, int& height); template -void WriteTextureToVRam(u32 width, u32 height, u8 *data, u16 *dst, FB_W_CTRL_type fb_w_ctrl, u32 linestride) +void WriteTextureToVRam(u32 width, u32 height, const u8 *data, u16 *dst, FB_W_CTRL_type fb_w_ctrl, u32 linestride) { u32 padding = linestride; if (padding / 2 > width) @@ -948,7 +949,7 @@ void WriteTextureToVRam(u32 width, u32 height, u8 *data, u16 *dst, FB_W_CTRL_typ const u16 kval_bit = (fb_w_ctrl.fb_kval & 0x80) << 8; const u8 fb_alpha_threshold = fb_w_ctrl.fb_alpha_threshold; - u8 *p = data; + const u8 *p = data; for (u32 l = 0; l < height; l++) { switch(fb_w_ctrl.fb_packmode) @@ -981,8 +982,139 @@ void WriteTextureToVRam(u32 width, u32 height, u8 *data, u16 *dst, FB_W_CTRL_typ dst += padding; } } -template void WriteTextureToVRam<0, 1, 2, 3>(u32 width, u32 height, u8 *data, u16 *dst, FB_W_CTRL_type fb_w_ctrl, u32 linestride); -template void WriteTextureToVRam<2, 1, 0, 3>(u32 width, u32 height, u8 *data, u16 *dst, FB_W_CTRL_type fb_w_ctrl, u32 linestride); +template void WriteTextureToVRam<0, 1, 2, 3>(u32 width, u32 height, const u8 *data, u16 *dst, FB_W_CTRL_type fb_w_ctrl, u32 linestride); +template void WriteTextureToVRam<2, 1, 0, 3>(u32 width, u32 height, const u8 *data, u16 *dst, FB_W_CTRL_type fb_w_ctrl, u32 linestride); + +template +static inline u8 roundColor(u8 in) +{ + u8 out = in >> (8 - bits); + if (out != 0xffu >> (8 - bits)) + out += (in >> (8 - bits - 1)) & 1; + return out; +} + +template +void WriteFramebuffer(u32 width, u32 height, const u8 *data, u32 dstAddr, FB_W_CTRL_type fb_w_ctrl, u32 linestride, FB_X_CLIP_type xclip, FB_Y_CLIP_type yclip) +{ + int bpp; + switch (fb_w_ctrl.fb_packmode) + { + case 0: + case 1: + case 2: + case 3: + bpp = 2; + break; + case 4: + bpp = 3; + break; + case 5: + case 6: + bpp = 4; + break; + default: + die("Invalid framebuffer format"); + bpp = 4; + break; + } + + u32 padding = linestride; + if (padding > width * bpp) + padding = padding - width * bpp; + else + padding = 0; + + const u16 kval_bit = (fb_w_ctrl.fb_kval & 0x80) << 8; + const u8 fb_alpha_threshold = fb_w_ctrl.fb_alpha_threshold; + + const u8 *p = data + 4 * yclip.min * width; + dstAddr += bpp * yclip.min * (width + padding / bpp); + + for (u32 l = yclip.min; l < height && l <= yclip.max; l++) + { + p += 4 * xclip.min; + dstAddr += bpp * xclip.min; + + switch(fb_w_ctrl.fb_packmode) + { + case 0: // 0555 KRGB 16 bit (default) Bit 15 is the value of fb_kval[7]. + for (u32 c = xclip.min; c < width && c <= xclip.max; c++) { + pvr_write32p(dstAddr, (u16)((roundColor<5>(p[Red]) << 10) + | (roundColor<5>(p[Green]) << 5) + | roundColor<5>(p[Blue]) + | kval_bit)); + p += 4; + dstAddr += bpp; + } + break; + case 1: // 565 RGB 16 bit + for (u32 c = xclip.min; c < width && c <= xclip.max; c++) { + pvr_write32p(dstAddr, (u16)((roundColor<5>(p[Red]) << 11) + | (roundColor<6>(p[Green]) << 5) + | roundColor<5>(p[Blue]))); + p += 4; + dstAddr += bpp; + } + break; + case 2: // 4444 ARGB 16 bit + for (u32 c = xclip.min; c < width && c <= xclip.max; c++) { + pvr_write32p(dstAddr, (u16)((roundColor<4>(p[Red]) << 8) + | (roundColor<4>(p[Green]) << 4) + | roundColor<4>(p[Blue]) + | (roundColor<4>(p[Alpha]) << 12))); + p += 4; + dstAddr += bpp; + } + break; + case 3: // 1555 ARGB 16 bit The alpha value is determined by comparison with the value of fb_alpha_threshold. + for (u32 c = xclip.min; c < width && c <= xclip.max; c++) { + pvr_write32p(dstAddr, (u16)((roundColor<5>(p[Red]) << 10) + | (roundColor<5>(p[Green]) << 5) + | roundColor<5>(p[Blue]) + | (p[Alpha] > fb_alpha_threshold ? 0x8000 : 0))); + p += 4; + dstAddr += bpp; + } + break; + case 4: // 888 RGB 24 bit packed + for (u32 c = xclip.min; c < width - 3u && c <= xclip.max - 3u; c += 4) { + pvr_write32p(dstAddr, (u32)((p[Blue + 4] << 24) | (p[Red] << 16) | (p[Green] << 8) | p[Blue])); + p += 4; + dstAddr += 4; + pvr_write32p(dstAddr, (u32)((p[Green + 4] << 24) | (p[Blue + 4] << 16) | (p[Red] << 8) | p[Green])); + p += 4; + dstAddr += 4; + pvr_write32p(dstAddr, (u32)((p[Red + 4] << 24) | (p[Green + 4] << 16) | (p[Blue + 4] << 8) | p[Red])); + p += 8; + dstAddr += 4; + } + break; + case 5: // 0888 KRGB 32 bit (K is the value of fk_kval.) + for (u32 c = xclip.min; c < width && c <= xclip.max; c++) { + pvr_write32p(dstAddr, (u32)((p[Red] << 16) | (p[Green] << 8) | p[Blue] | (fb_w_ctrl.fb_kval << 24))); + p += 4; + dstAddr += bpp; + } + break; + case 6: // 8888 ARGB 32 bit + for (u32 c = xclip.min; c < width && c <= xclip.max; c++) { + pvr_write32p(dstAddr, (u32)((p[Red] << 16) | (p[Green] << 8) | p[Blue] | (p[Alpha] << 24))); + p += 4; + dstAddr += bpp; + } + break; + default: + break; + } + dstAddr += padding + (width - xclip.max - 1) * bpp; + p += (width - xclip.max - 1) * 4; + } +} +template void WriteFramebuffer<0, 1, 2, 3>(u32 width, u32 height, const u8 *data, u32 dstAddr, FB_W_CTRL_type fb_w_ctrl, + u32 linestride, FB_X_CLIP_type xclip, FB_Y_CLIP_type yclip); +template void WriteFramebuffer<2, 1, 0, 3>(u32 width, u32 height, const u8 *data, u32 dstAddr, FB_W_CTRL_type fb_w_ctrl, + u32 linestride, FB_X_CLIP_type xclip, FB_Y_CLIP_type yclip); void BaseTextureCacheData::invalidate() { diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h index 28145ca5b..e142cdb20 100644 --- a/core/rend/TexCache.h +++ b/core/rend/TexCache.h @@ -798,9 +798,15 @@ protected: }; template -void ReadFramebuffer(PixelBuffer& pb, int& width, int& height); +void ReadFramebuffer(const FramebufferInfo& info, PixelBuffer& pb, int& width, int& height); + +// width and height in pixels. linestride in bytes template -void WriteTextureToVRam(u32 width, u32 height, u8 *data, u16 *dst, FB_W_CTRL_type fb_w_ctrl, u32 linestride); +void WriteFramebuffer(u32 width, u32 height, const u8 *data, u32 dstAddr, FB_W_CTRL_type fb_w_ctrl, u32 linestride, FB_X_CLIP_type xclip, FB_Y_CLIP_type yclip); + +// width and height in pixels. linestride in bytes +template +void WriteTextureToVRam(u32 width, u32 height, const u8 *data, u16 *dst, FB_W_CTRL_type fb_w_ctrl, u32 linestride); void getRenderToTextureDimensions(u32& width, u32& height, u32& pow2Width, u32& pow2Height); static inline void MakeFogTexture(u8 *tex_data) diff --git a/core/rend/dx11/dx11_renderer.cpp b/core/rend/dx11/dx11_renderer.cpp index bb9da4990..99b21cd10 100644 --- a/core/rend/dx11/dx11_renderer.cpp +++ b/core/rend/dx11/dx11_renderer.cpp @@ -171,6 +171,9 @@ void DX11Renderer::Term() fbTex.reset(); fbTextureView.reset(); fbRenderTarget.reset(); + fbScaledRenderTarget.reset(); + fbScaledTextureView.reset(); + fbScaledTexture.reset(); quad.reset(); deviceContext.reset(); device.reset(); @@ -233,7 +236,7 @@ void DX11Renderer::createTexAndRenderTarget(ComPtr& texture, Co deviceContext->ClearRenderTargetView(renderTarget, black); } -void DX11Renderer::Resize(int w, int h) +void DX11Renderer::resize(int w, int h) { if (width == (u32)w && height == (u32)h) return; @@ -304,15 +307,7 @@ bool DX11Renderer::Process(TA_context* ctx) texCache.Clear(); texCache.Cleanup(); - if (ctx->rend.isRenderFramebuffer) - { - readDCFramebuffer(); - return true; - } - else - { - return ta_parse(ctx); - } + return ta_parse(ctx); } void DX11Renderer::configVertexShader() @@ -413,6 +408,8 @@ void DX11Renderer::setupPixelShaderConstants() bool DX11Renderer::Render() { + resize(pvrrc.framebufferWidth, pvrrc.framebufferHeight); + // make sure to unbind the framebuffer view before setting it as render target ID3D11ShaderResourceView *nullView = nullptr; deviceContext->PSSetShaderResources(0, 1, &nullView); @@ -426,27 +423,24 @@ bool DX11Renderer::Render() deviceContext->IASetInputLayout(mainInputLayout); - if (!pvrrc.isRenderFramebuffer) - { - n2Helper.resetCache(); - uploadGeometryBuffers(); + n2Helper.resetCache(); + uploadGeometryBuffers(); - updateFogTexture(); - updatePaletteTexture(); + updateFogTexture(); + updatePaletteTexture(); - setupPixelShaderConstants(); + setupPixelShaderConstants(); - drawStrips(); - } - else - { - renderDCFramebuffer(); - } + drawStrips(); if (is_rtt) { readRttRenderTarget(pvrrc.fb_W_SOF1 & VRAM_MASK); } + else if (config::EmulateFramebuffer) + { + writeFramebufferToVRAM(); + } else { #ifndef LIBRETRO @@ -467,24 +461,6 @@ bool DX11Renderer::Render() return !is_rtt; } -void DX11Renderer::renderDCFramebuffer() -{ - float colors[4]; - VO_BORDER_COL.getRGBColor(colors); - colors[3] = 1.f; - deviceContext->ClearRenderTargetView(fbRenderTarget, colors); - D3D11_VIEWPORT vp{}; - vp.Width = (FLOAT)width; - vp.Height = (FLOAT)height; - vp.MinDepth = 0.f; - vp.MaxDepth = 1.f; - deviceContext->RSSetViewports(1, &vp); - deviceContext->OMSetBlendState(blendStates.getState(false), nullptr, 0xffffffff); - - float bar = (width - height * 640.f / 480.f) / 2.f; - quad->draw(dcfbTextureView, samplers->getSampler(true), nullptr, bar / width * 2.f - 1.f, -1.f, (width - bar * 2.f) / width * 2.f, 2.f); -} - void DX11Renderer::renderFramebuffer() { #ifndef LIBRETRO @@ -899,18 +875,30 @@ bool DX11Renderer::RenderLastFrame() if (!frameRenderedOnce) return false; renderFramebuffer(); - return false; + return true; } -void DX11Renderer::readDCFramebuffer() +void DX11Renderer::RenderFramebuffer(const FramebufferInfo& info) { - if (FB_R_SIZE.fb_x_size == 0 || FB_R_SIZE.fb_y_size == 0) - return; - PixelBuffer pb; int width; int height; - ReadFramebuffer(pb, width, height); + + if (info.fb_r_ctrl.fb_enable == 0 || info.vo_control.blank_video == 1) + { + // Video output disabled + width = height = 1; + pb.init(width, height, false); + u8 *p = (u8 *)pb.data(0, 0); + p[0] = info.vo_border_col._blue; + p[1] = info.vo_border_col._green; + p[2] = info.vo_border_col._red; + p[3] = 255; + } + else + { + ReadFramebuffer(info, pb, width, height); + } if (dcfbTexture) { @@ -947,12 +935,43 @@ void DX11Renderer::readDCFramebuffer() } deviceContext->UpdateSubresource(dcfbTexture, 0, nullptr, pb.data(), width * sizeof(u32), width * sizeof(u32) * height); + + deviceContext->OMSetRenderTargets(1, &fbRenderTarget.get(), depthTexView); + float colors[4]; + info.vo_border_col.getRGBColor(colors); + colors[3] = 1.f; + deviceContext->ClearRenderTargetView(fbRenderTarget, colors); + D3D11_VIEWPORT vp{}; + vp.Width = (FLOAT)this->width; + vp.Height = (FLOAT)this->height; + vp.MinDepth = 0.f; + vp.MaxDepth = 1.f; + deviceContext->RSSetViewports(1, &vp); + deviceContext->OMSetBlendState(blendStates.getState(false), nullptr, 0xffffffff); + + float bar = (this->width - this->height * 640.f / 480.f) / 2.f; + quad->draw(dcfbTextureView, samplers->getSampler(true), nullptr, bar / this->width * 2.f - 1.f, -1.f, (this->width - bar * 2.f) / this->width * 2.f, 2.f); + +#ifndef LIBRETRO + deviceContext->OMSetRenderTargets(1, &theDX11Context.getRenderTarget().get(), nullptr); + renderFramebuffer(); + DrawOSD(false); + theDX11Context.setFrameRendered(); +#else + theDX11Context.drawOverlay(this->width, this->height); + ID3D11RenderTargetView *nullView = nullptr; + deviceContext->OMSetRenderTargets(1, &nullView, nullptr); + deviceContext->PSSetShaderResources(0, 1, &fbTextureView.get()); +#endif + frameRendered = true; + frameRenderedOnce = true; } void DX11Renderer::setBaseScissor() { - bool wide_screen_on = !pvrrc.isRTT && config::Widescreen && !matrices.IsClipped() && !config::Rotate90; - if (!wide_screen_on && !pvrrc.isRenderFramebuffer) + bool wide_screen_on = !pvrrc.isRTT && config::Widescreen && !matrices.IsClipped() + && !config::Rotate90 && !config::EmulateFramebuffer; + if (!wide_screen_on) { float fWidth; float fHeight; @@ -1163,6 +1182,104 @@ void DX11Renderer::DrawOSD(bool clear_screen) #endif } +void DX11Renderer::writeFramebufferToVRAM() +{ + u32 width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + u32 height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; + + float xscale = SCALER_CTL.hscale == 1 ? 0.5f : 1.f; + float yscale = 1024.f / SCALER_CTL.vscalefactor; + if (std::abs(yscale - 1.f) < 0.01) + yscale = 1.f; + + ComPtr fbTexture = fbTex; + + if (xscale != 1.f || yscale != 1.f) + { + u32 scaledW = width * xscale; + u32 scaledH = height * yscale; + + if (fbScaledTexture) + { + D3D11_TEXTURE2D_DESC desc; + fbScaledTexture->GetDesc(&desc); + if (desc.Width != scaledW || desc.Height != scaledH) + { + fbScaledTexture.reset(); + fbScaledTextureView.reset(); + fbScaledRenderTarget.reset(); + } + } + if (!fbScaledTexture) + { + createTexAndRenderTarget(fbScaledTexture, fbScaledRenderTarget, scaledW, scaledH); + + D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc{}; + viewDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + viewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + viewDesc.Texture2D.MipLevels = 1; + device->CreateShaderResourceView(fbScaledTexture, &viewDesc, &fbScaledTextureView.get()); + } + D3D11_VIEWPORT vp{}; + vp.Width = (FLOAT)width; + vp.Height = (FLOAT)height; + vp.MinDepth = 0.f; + vp.MaxDepth = 1.f; + deviceContext->RSSetViewports(1, &vp); + deviceContext->OMSetBlendState(blendStates.getState(false), nullptr, 0xffffffff); + quad->draw(fbTextureView, samplers->getSampler(true)); + + width = scaledW; + height = scaledH; + fbTexture = fbScaledTexture; + } + u32 texAddress = pvrrc.fb_W_SOF1 & VRAM_MASK; // TODO SCALER_CTL.interlace, SCALER_CTL.fieldselect + u32 linestride = pvrrc.fb_W_LINESTRIDE * 8; + + D3D11_TEXTURE2D_DESC desc; + fbTexture->GetDesc(&desc); + desc.Usage = D3D11_USAGE_STAGING; + desc.BindFlags = 0; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + + ComPtr stagingTex; + HRESULT hr = device->CreateTexture2D(&desc, nullptr, &stagingTex.get()); + if (FAILED(hr)) + { + WARN_LOG(RENDERER, "Staging RTT texture creation failed"); + return; + } + deviceContext->CopyResource(stagingTex, fbTexture); + + PixelBuffer tmp_buf; + tmp_buf.init(width, height); + u8 *p = (u8 *)tmp_buf.data(); + + D3D11_MAPPED_SUBRESOURCE mappedSubres; + hr = deviceContext->Map(stagingTex, 0, D3D11_MAP_READ, 0, &mappedSubres); + if (FAILED(hr)) + { + WARN_LOG(RENDERER, "Failed to map staging RTT texture"); + return; + } + if (width * sizeof(u32) == mappedSubres.RowPitch) + memcpy(p, mappedSubres.pData, width * height * sizeof(u32)); + else + { + u8 *src = (u8 *)mappedSubres.pData; + for (u32 y = 0; y < height; y++) + { + memcpy(p, src, width * sizeof(u32)); + p += width * sizeof(u32); + src += mappedSubres.RowPitch; + } + } + deviceContext->Unmap(stagingTex, 0); + + WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP); +} + Renderer *rend_DirectX11() { return new DX11Renderer(); diff --git a/core/rend/dx11/dx11_renderer.h b/core/rend/dx11/dx11_renderer.h index 554287176..63190d131 100644 --- a/core/rend/dx11/dx11_renderer.h +++ b/core/rend/dx11/dx11_renderer.h @@ -35,10 +35,10 @@ struct DX11Renderer : public Renderer { bool Init() override; - void Resize(int w, int h) override; void Term() override; bool Process(TA_context* ctx) override; bool Render() override; + void RenderFramebuffer(const FramebufferInfo& info) override; bool Present() override { @@ -83,6 +83,7 @@ protected: float trilinearAlpha; }; + virtual void resize(int w, int h); bool ensureBufferSize(ComPtr& buffer, D3D11_BIND_FLAG bind, u32& currentSize, u32 minSize); void createDepthTexAndView(ComPtr& texture, ComPtr& view, int width, int height, DXGI_FORMAT format = DXGI_FORMAT_D24_UNORM_S8_UINT, UINT bindFlags = 0); void createTexAndRenderTarget(ComPtr& texture, ComPtr& renderTarget, int width, int height); @@ -91,11 +92,11 @@ protected: void setupPixelShaderConstants(); void updateFogTexture(); void updatePaletteTexture(); - void renderDCFramebuffer(); void readRttRenderTarget(u32 texAddress); void renderFramebuffer(); void setCullMode(int mode); virtual void setRTTSize(int width, int height) {} + void writeFramebufferToVRAM(); ComPtr device; ComPtr deviceContext; @@ -153,6 +154,9 @@ private: ComPtr rttDepthTexView; ComPtr whiteTexture; ComPtr whiteTextureView; + ComPtr fbScaledTexture; + ComPtr fbScaledTextureView; + ComPtr fbScaledRenderTarget; ComPtr rasterCullNone, rasterCullFront, rasterCullBack; diff --git a/core/rend/dx11/dx11context.cpp b/core/rend/dx11/dx11context.cpp index c7b6e3592..526d4c9eb 100644 --- a/core/rend/dx11/dx11context.cpp +++ b/core/rend/dx11/dx11context.cpp @@ -302,7 +302,6 @@ void DX11Context::handleDeviceLost() term(); init(true); rend_init_renderer(); - rend_resize_renderer(); } #endif // !LIBRETRO diff --git a/core/rend/dx11/oit/dx11_oitrenderer.cpp b/core/rend/dx11/oit/dx11_oitrenderer.cpp index 5b17f15d0..5944562ad 100644 --- a/core/rend/dx11/oit/dx11_oitrenderer.cpp +++ b/core/rend/dx11/oit/dx11_oitrenderer.cpp @@ -131,8 +131,8 @@ struct DX11OITRenderer : public DX11Renderer } } - void Resize(int w, int h) override { - DX11Renderer::Resize(w, h); + void resize(int w, int h) override { + DX11Renderer::resize(w, h); checkMaxSize(w, h); } @@ -634,6 +634,7 @@ struct DX11OITRenderer : public DX11Renderer bool Render() override { + resize(pvrrc.framebufferWidth, pvrrc.framebufferHeight); if (pixelBufferSize != config::PixelBufferSize) { buffers.init(device, deviceContext); @@ -651,27 +652,24 @@ struct DX11OITRenderer : public DX11Renderer deviceContext->IASetInputLayout(mainInputLayout); - if (!pvrrc.isRenderFramebuffer) - { - n2Helper.resetCache(); - uploadGeometryBuffers(); + n2Helper.resetCache(); + uploadGeometryBuffers(); - updateFogTexture(); - updatePaletteTexture(); + updateFogTexture(); + updatePaletteTexture(); - setupPixelShaderConstants(); + setupPixelShaderConstants(); - drawStrips(); - } - else - { - renderDCFramebuffer(); - } + drawStrips(); if (is_rtt) { readRttRenderTarget(pvrrc.fb_W_SOF1 & VRAM_MASK); } + else if (config::EmulateFramebuffer) + { + writeFramebufferToVRAM(); + } else { #ifndef LIBRETRO diff --git a/core/rend/dx9/d3d_renderer.cpp b/core/rend/dx9/d3d_renderer.cpp index 070408331..e4b58bace 100644 --- a/core/rend/dx9/d3d_renderer.cpp +++ b/core/rend/dx9/d3d_renderer.cpp @@ -164,6 +164,8 @@ void D3DRenderer::preReset() rttTexture.reset(); dcfbSurface.reset(); dcfbTexture.reset(); + fbScaledTexture.reset(); + fbScaledSurface.reset(); fogTexture.reset(); paletteTexture.reset(); modVolVtxDecl.reset(); @@ -191,7 +193,7 @@ void D3DRenderer::postReset() u32 h = height; width = 0; height = 0; - Resize(w, h); + resize(w, h); verify(ensureVertexBufferSize(vertexBuffer, vertexBufferSize, 4 * 1024 * 1024)); verify(ensureIndexBufferSize(indexBuffer, indexBufferSize, 120 * 1024 * 4)); verifyWin(device->CreateVertexDeclaration(MainVtxElement, &mainVtxDecl.get())); @@ -233,19 +235,37 @@ BaseTextureCacheData *D3DRenderer::GetTexture(TSP tsp, TCW tcw) return tf; } -void D3DRenderer::readDCFramebuffer() +void D3DRenderer::RenderFramebuffer(const FramebufferInfo& info) { - if (FB_R_SIZE.fb_x_size == 0 || FB_R_SIZE.fb_y_size == 0) - return; - PixelBuffer pb; int width; int height; - ReadFramebuffer(pb, width, height); + if (info.fb_r_ctrl.fb_enable == 0 || info.vo_control.blank_video == 1) + { + // Video output disabled + width = height = 1; + pb.init(width, height, false); + u8 *p = (u8 *)pb.data(0, 0); + p[0] = info.vo_border_col._blue; + p[1] = info.vo_border_col._green; + p[2] = info.vo_border_col._red; + p[3] = 255; + } + else + { + ReadFramebuffer(info, pb, width, height); + } + + if (dcfbTexture) + { + D3DSURFACE_DESC desc; + dcfbTexture->GetLevelDesc(0, &desc); + if ((int)desc.Width != width || (int)desc.Height != height) + dcfbTexture.reset(); + } if (!dcfbTexture) { - // FIXME dimension can change device->CreateTexture(width, height, 1, D3DUSAGE_DYNAMIC, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &dcfbTexture.get(), 0); dcfbTexture->GetSurfaceLevel(0, &dcfbSurface.get()); } @@ -261,14 +281,17 @@ void D3DRenderer::readDCFramebuffer() memcpy(dst + y * rect.Pitch, pb.data() + y * width, width * sizeof(u32)); } dcfbTexture->UnlockRect(0); -} -void D3DRenderer::renderDCFramebuffer() -{ - device->ColorFill(framebufferSurface, 0, D3DCOLOR_ARGB(255, VO_BORDER_COL._red, VO_BORDER_COL._green, VO_BORDER_COL._blue)); - u32 bar = (width - height * 640 / 480) / 2; - RECT rd{ (LONG)bar, 0, (LONG)(width - bar), (LONG)height }; + device->ColorFill(framebufferSurface, 0, D3DCOLOR_ARGB(255, info.vo_border_col._red, info.vo_border_col._green, info.vo_border_col._blue)); + u32 bar = (this->width - this->height * 640 / 480) / 2; + RECT rd{ (LONG)bar, 0, (LONG)(this->width - bar), (LONG)this->height }; device->StretchRect(dcfbSurface, nullptr, framebufferSurface, &rd, D3DTEXF_LINEAR); + + displayFramebuffer(); + DrawOSD(false); + frameRendered = true; + frameRenderedOnce = true; + theDXContext.setFrameRendered(); } bool D3DRenderer::Process(TA_context* ctx) @@ -280,15 +303,7 @@ bool D3DRenderer::Process(TA_context* ctx) texCache.Clear(); texCache.Cleanup(); - if (ctx->rend.isRenderFramebuffer) - { - readDCFramebuffer(); - return true; - } - else - { - return ta_parse(ctx); - } + return ta_parse(ctx); } inline void D3DRenderer::setTexMode(D3DSAMPLERSTATETYPE state, u32 clamp, u32 mirror) @@ -789,7 +804,8 @@ void D3DRenderer::drawStrips() void D3DRenderer::setBaseScissor() { - bool wide_screen_on = !pvrrc.isRTT && config::Widescreen && !matrices.IsClipped() && !config::Rotate90; + bool wide_screen_on = !pvrrc.isRTT && config::Widescreen && !matrices.IsClipped() + && !config::Rotate90 && !config::EmulateFramebuffer; if (!wide_screen_on) { float fWidth; @@ -939,6 +955,8 @@ bool D3DRenderer::Render() { if (resetting) return false; + resize(pvrrc.framebufferWidth, pvrrc.framebufferHeight); + bool is_rtt = pvrrc.isRTT; backbuffer.reset(); @@ -970,100 +988,93 @@ bool D3DRenderer::Render() devCache.SetRenderState(D3DRS_SCISSORTESTENABLE, FALSE); device->Clear(0, NULL, D3DCLEAR_STENCIL | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); - if (!pvrrc.isRenderFramebuffer) + setFirstProvokingVertex(pvrrc); + // Set clip planes at (-1,0) (1,0) (0,-1) and (0,1). + // Helps avoiding interpolation errors on large triangles. + devCache.SetRenderState(D3DRS_CLIPPLANEENABLE, 15); + float v[4] {}; + v[3] = 1.f; + // left + v[0] = 1.f; + device->SetClipPlane(0, v); + // right + v[0] = -1.f; + device->SetClipPlane(1, v); + // top + v[0] = 0.f; + v[1] = 1.f; + device->SetClipPlane(2, v); + // bottom + v[1] = -1.f; + device->SetClipPlane(3, v); + + verify(ensureVertexBufferSize(vertexBuffer, vertexBufferSize, pvrrc.verts.bytes())); + void *ptr; + verifyWin(vertexBuffer->Lock(0, pvrrc.verts.bytes(), &ptr, D3DLOCK_DISCARD)); + memcpy(ptr, pvrrc.verts.head(), pvrrc.verts.bytes()); + vertexBuffer->Unlock(); + verify(ensureIndexBufferSize(indexBuffer, indexBufferSize, pvrrc.idx.bytes())); + verifyWin(indexBuffer->Lock(0, pvrrc.idx.bytes(), &ptr, D3DLOCK_DISCARD)); + memcpy(ptr, pvrrc.idx.head(), pvrrc.idx.bytes()); + indexBuffer->Unlock(); + + if (config::ModifierVolumes && pvrrc.modtrig.used()) { - setFirstProvokingVertex(pvrrc); - // Set clip planes at (-1,0) (1,0) (0,-1) and (0,1). - // Helps avoiding interpolation errors on large triangles. - devCache.SetRenderState(D3DRS_CLIPPLANEENABLE, 15); - float v[4] {}; - v[3] = 1.f; - // left - v[0] = 1.f; - device->SetClipPlane(0, v); - // right - v[0] = -1.f; - device->SetClipPlane(1, v); - // top - v[0] = 0.f; - v[1] = 1.f; - device->SetClipPlane(2, v); - // bottom - v[1] = -1.f; - device->SetClipPlane(3, v); - - verify(ensureVertexBufferSize(vertexBuffer, vertexBufferSize, pvrrc.verts.bytes())); - void *ptr; - verifyWin(vertexBuffer->Lock(0, pvrrc.verts.bytes(), &ptr, D3DLOCK_DISCARD)); - memcpy(ptr, pvrrc.verts.head(), pvrrc.verts.bytes()); - vertexBuffer->Unlock(); - verify(ensureIndexBufferSize(indexBuffer, indexBufferSize, pvrrc.idx.bytes())); - verifyWin(indexBuffer->Lock(0, pvrrc.idx.bytes(), &ptr, D3DLOCK_DISCARD)); - memcpy(ptr, pvrrc.idx.head(), pvrrc.idx.bytes()); - indexBuffer->Unlock(); - - if (config::ModifierVolumes && pvrrc.modtrig.used()) - { - verify(ensureVertexBufferSize(modvolBuffer, modvolBufferSize, pvrrc.modtrig.bytes())); - verifyWin(modvolBuffer->Lock(0, pvrrc.modtrig.bytes(), &ptr, D3DLOCK_DISCARD)); - memcpy(ptr, pvrrc.modtrig.head(), pvrrc.modtrig.bytes()); - modvolBuffer->Unlock(); - } - - updateFogTexture(); - updatePaletteTexture(); - - devCache.SetVertexShader(shaders.getVertexShader(true)); - - // VERT and RAM fog color constants - float ps_FOG_COL_VERT[4]; - float ps_FOG_COL_RAM[4]; - FOG_COL_VERT.getRGBColor(ps_FOG_COL_VERT); - FOG_COL_RAM.getRGBColor(ps_FOG_COL_RAM); - device->SetPixelShaderConstantF(1, ps_FOG_COL_VERT, 1); - device->SetPixelShaderConstantF(2, ps_FOG_COL_RAM, 1); - - // Fog density and scale constants - float fog_den_float = FOG_DENSITY.get() * config::ExtraDepthScale; - float fogDensityAndScale[4]= { fog_den_float, 1.f - FPU_SHAD_SCALE.scale_factor / 256.f, 0, 1 }; - device->SetPixelShaderConstantF(3, fogDensityAndScale, 1); - - // Color clamping - float color_clamp[4]; - pvrrc.fog_clamp_min.getRGBAColor(color_clamp); - device->SetPixelShaderConstantF(6, color_clamp, 1); - pvrrc.fog_clamp_max.getRGBAColor(color_clamp); - device->SetPixelShaderConstantF(7, color_clamp, 1); - - devCache.SetRenderState(D3DRS_ZENABLE, D3DZB_TRUE); - - device->SetVertexDeclaration(mainVtxDecl); - device->SetStreamSource(0, vertexBuffer, 0, sizeof(Vertex)); - device->SetIndices(indexBuffer); - - devCache.SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); - devCache.SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); - - devCache.SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE); - devCache.SetRenderState(D3DRS_ALPHATESTENABLE, FALSE); - devCache.SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID); - devCache.SetRenderState(D3DRS_CLIPPING, FALSE); - - setBaseScissor(); - - if (!SUCCEEDED(device->BeginScene())) - { - WARN_LOG(RENDERER, "Render: BeginScene failed!"); - return false; - } - drawStrips(); - device->EndScene(); - devCache.SetRenderState(D3DRS_CLIPPLANEENABLE, 0); + verify(ensureVertexBufferSize(modvolBuffer, modvolBufferSize, pvrrc.modtrig.bytes())); + verifyWin(modvolBuffer->Lock(0, pvrrc.modtrig.bytes(), &ptr, D3DLOCK_DISCARD)); + memcpy(ptr, pvrrc.modtrig.head(), pvrrc.modtrig.bytes()); + modvolBuffer->Unlock(); } - else + + updateFogTexture(); + updatePaletteTexture(); + + devCache.SetVertexShader(shaders.getVertexShader(true)); + + // VERT and RAM fog color constants + float ps_FOG_COL_VERT[4]; + float ps_FOG_COL_RAM[4]; + FOG_COL_VERT.getRGBColor(ps_FOG_COL_VERT); + FOG_COL_RAM.getRGBColor(ps_FOG_COL_RAM); + device->SetPixelShaderConstantF(1, ps_FOG_COL_VERT, 1); + device->SetPixelShaderConstantF(2, ps_FOG_COL_RAM, 1); + + // Fog density and scale constants + float fog_den_float = FOG_DENSITY.get() * config::ExtraDepthScale; + float fogDensityAndScale[4]= { fog_den_float, 1.f - FPU_SHAD_SCALE.scale_factor / 256.f, 0, 1 }; + device->SetPixelShaderConstantF(3, fogDensityAndScale, 1); + + // Color clamping + float color_clamp[4]; + pvrrc.fog_clamp_min.getRGBAColor(color_clamp); + device->SetPixelShaderConstantF(6, color_clamp, 1); + pvrrc.fog_clamp_max.getRGBAColor(color_clamp); + device->SetPixelShaderConstantF(7, color_clamp, 1); + + devCache.SetRenderState(D3DRS_ZENABLE, D3DZB_TRUE); + + device->SetVertexDeclaration(mainVtxDecl); + device->SetStreamSource(0, vertexBuffer, 0, sizeof(Vertex)); + device->SetIndices(indexBuffer); + + devCache.SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); + devCache.SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); + + devCache.SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE); + devCache.SetRenderState(D3DRS_ALPHATESTENABLE, FALSE); + devCache.SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID); + devCache.SetRenderState(D3DRS_CLIPPING, FALSE); + + setBaseScissor(); + + if (!SUCCEEDED(device->BeginScene())) { - renderDCFramebuffer(); + WARN_LOG(RENDERER, "Render: BeginScene failed!"); + return false; } + drawStrips(); + device->EndScene(); + devCache.SetRenderState(D3DRS_CLIPPLANEENABLE, 0); verifyWin(device->SetRenderTarget(0, backbuffer)); @@ -1071,9 +1082,13 @@ bool D3DRenderer::Render() { readRttRenderTarget(texAddress); } + else if (config::EmulateFramebuffer) + { + writeFramebufferToVRAM(); + } else { - renderFramebuffer(); + displayFramebuffer(); DrawOSD(false); frameRendered = true; frameRenderedOnce = true; @@ -1083,7 +1098,7 @@ bool D3DRenderer::Render() return !is_rtt; } -void D3DRenderer::Resize(int w, int h) +void D3DRenderer::resize(int w, int h) { if (width == (u32)w && height == (u32)h) return; @@ -1099,7 +1114,7 @@ void D3DRenderer::Resize(int w, int h) frameRenderedOnce = false; } -void D3DRenderer::renderFramebuffer() +void D3DRenderer::displayFramebuffer() { devCache.SetRenderState(D3DRS_SCISSORTESTENABLE, FALSE); device->ColorFill(backbuffer, 0, D3DCOLOR_ARGB(255, VO_BORDER_COL._red, VO_BORDER_COL._green, VO_BORDER_COL._blue)); @@ -1164,7 +1179,7 @@ bool D3DRenderer::RenderLastFrame() return false; backbuffer.reset(); verifyWin(device->GetRenderTarget(0, &backbuffer.get())); - renderFramebuffer(); + displayFramebuffer(); return true; } @@ -1223,6 +1238,75 @@ void D3DRenderer::DrawOSD(bool clear_screen) theDXContext.setOverlay(false); } +void D3DRenderer::writeFramebufferToVRAM() +{ + u32 width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + u32 height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; + + float xscale = SCALER_CTL.hscale == 1 ? 0.5f : 1.f; + float yscale = 1024.f / SCALER_CTL.vscalefactor; + if (std::abs(yscale - 1.f) < 0.01) + yscale = 1.f; + + ComPtr fbSurface = framebufferSurface; + + if (xscale != 1.f || yscale != 1.f) + { + u32 scaledW = width * xscale; + u32 scaledH = height * yscale; + + if (fbScaledTexture) + { + D3DSURFACE_DESC desc; + fbScaledTexture->GetLevelDesc(0, &desc); + if (desc.Width != scaledW || desc.Height != scaledH) + { + fbScaledTexture.reset(); + fbScaledSurface.reset(); + } + } + if (!fbScaledTexture) + { + device->CreateTexture(scaledW, scaledH, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &fbScaledTexture.get(), NULL); + fbScaledTexture->GetSurfaceLevel(0, &fbScaledSurface.get()); + } + device->StretchRect(framebufferSurface, nullptr, fbScaledSurface, nullptr, D3DTEXF_LINEAR); + + width = scaledW; + height = scaledH; + fbSurface = fbScaledSurface; + } + u32 texAddress = pvrrc.fb_W_SOF1 & VRAM_MASK; // TODO SCALER_CTL.interlace, SCALER_CTL.fieldselect + u32 linestride = pvrrc.fb_W_LINESTRIDE * 8; + + ComPtr offscreenSurface; + verifyWin(device->CreateOffscreenPlainSurface(width, height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &offscreenSurface.get(), nullptr)); + verifyWin(device->GetRenderTargetData(fbSurface, offscreenSurface)); + + PixelBuffer tmp_buf; + tmp_buf.init(width, height); + + u8 *p = (u8 *)tmp_buf.data(); + D3DLOCKED_RECT rect; + RECT lockRect { 0, 0, (long)width, (long)height }; + verifyWin(offscreenSurface->LockRect(&rect, &lockRect, D3DLOCK_READONLY)); + if ((u32)rect.Pitch == width * sizeof(u32)) + memcpy(p, rect.pBits, width * height * sizeof(u32)); + else + { + u8 *src = (u8 *)rect.pBits; + for (u32 y = 0; y < height; y++) + { + memcpy(p, src, width * sizeof(u32)); + src += rect.Pitch; + p += width * sizeof(u32); + } + } + verifyWin(offscreenSurface->UnlockRect()); + + WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP); +} + Renderer* rend_DirectX9() { return new D3DRenderer(); diff --git a/core/rend/dx9/d3d_renderer.h b/core/rend/dx9/d3d_renderer.h index 3a9da59e9..936832d27 100644 --- a/core/rend/dx9/d3d_renderer.h +++ b/core/rend/dx9/d3d_renderer.h @@ -100,7 +100,6 @@ public: struct D3DRenderer : public Renderer { bool Init() override; - void Resize(int w, int h) override; void Term() override; bool Process(TA_context* ctx) override; bool Render() override; @@ -117,10 +116,12 @@ struct D3DRenderer : public Renderer BaseTextureCacheData *GetTexture(TSP tsp, TCW tcw) override; void preReset(); void postReset(); + void RenderFramebuffer(const FramebufferInfo& info) override; private: enum ModifierVolumeMode { Xor, Or, Inclusion, Exclusion, ModeCount }; + void resize(int w, int h); void drawStrips(); template void drawList(const List& gply, int first, int count); @@ -130,9 +131,7 @@ private: bool ensureIndexBufferSize(ComPtr& buffer, u32& currentSize, u32 minSize); void updatePaletteTexture(); void updateFogTexture(); - void renderFramebuffer(); - void readDCFramebuffer(); - void renderDCFramebuffer(); + void displayFramebuffer(); void sortTriangles(int first, int count); void drawSorted(bool multipass); void setMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc); @@ -140,8 +139,8 @@ private: void setTexMode(D3DSAMPLERSTATETYPE state, u32 clamp, u32 mirror); void setBaseScissor(); void prepareRttRenderTarget(u32 texAddress); - void readRttRenderTarget(u32 texAddress); + void writeFramebufferToVRAM(); RenderStateCache devCache; ComPtr device; @@ -166,6 +165,8 @@ private: ComPtr rttTexture; ComPtr rttSurface; ComPtr depthSurface; + ComPtr fbScaledTexture; + ComPtr fbScaledSurface; u32 width = 0; u32 height = 0; diff --git a/core/rend/dx9/dxcontext.cpp b/core/rend/dx9/dxcontext.cpp index b3bc17260..4882fd202 100644 --- a/core/rend/dx9/dxcontext.cpp +++ b/core/rend/dx9/dxcontext.cpp @@ -120,7 +120,6 @@ void DXContext::Present() { renderer = new D3DRenderer(); renderer->Init(); - rend_resize_renderer(); } } } diff --git a/core/rend/gl4/gldraw.cpp b/core/rend/gl4/gldraw.cpp index 06da7b848..e4b7724e9 100644 --- a/core/rend/gl4/gldraw.cpp +++ b/core/rend/gl4/gldraw.cpp @@ -773,8 +773,8 @@ void gl4DrawVmuTexture(u8 vmu_screen_number) const float vmu_padding = 8.f; const float x_scale = 100.f / config::ScreenStretching; - const float y_scale = (float)gl.ofbo.width / gl.ofbo.height >= 8.f / 3.f - 0.1f ? 0.5f : 1.f; - float x = (config::Widescreen && config::ScreenStretching == 100 ? -1 / gl4ShaderUniforms.ndcMat[0][0] / 4.f : 0) + vmu_padding; + const float y_scale = gl.ofbo.framebuffer && (float)gl.ofbo.framebuffer->getWidth() / gl.ofbo.framebuffer->getHeight() >= 8.f / 3.f - 0.1f ? 0.5f : 1.f; + float x = (config::Widescreen && config::ScreenStretching == 100 && !config::EmulateFramebuffer ? -1 / gl4ShaderUniforms.ndcMat[0][0] / 4.f : 0) + vmu_padding; float y = vmu_padding; float w = (float)VMU_SCREEN_WIDTH * vmu_screen_params[vmu_screen_number].vmu_screen_size_mult * x_scale; float h = (float)VMU_SCREEN_HEIGHT * vmu_screen_params[vmu_screen_number].vmu_screen_size_mult * y_scale; diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index 7b21ffcad..27e10388c 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -22,6 +22,7 @@ #include "rend/osd.h" #include "glsl.h" #include "gl4naomi2.h" +#include "rend/gles/postprocess.h" //Fragment and vertex shaders code @@ -733,7 +734,6 @@ static void resize(int w, int h) } gl4CreateTextures(max_image_width, max_image_height); reshapeABuffer(max_image_width, max_image_height); - glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.origFbo); } } @@ -746,13 +746,7 @@ static bool RenderFrame(int width, int height) const glm::mat4& scissor_mat = matrices.GetScissorMatrix(); ViewportMatrix = matrices.GetViewportMatrix(); -#ifdef LIBRETRO - gl.ofbo.origFbo = glsm_get_current_framebuffer(); -#else - gl.ofbo.origFbo = 0; - glGetIntegerv(GL_FRAMEBUFFER_BINDING, (GLint *)&gl.ofbo.origFbo); -#endif - if (!is_rtt) + if (!is_rtt && !config::EmulateFramebuffer) gcflip = 0; else gcflip = 1; @@ -807,10 +801,10 @@ static bool RenderFrame(int width, int height) else { #ifdef LIBRETRO - gl.ofbo.width = width; - gl.ofbo.height = height; - if (config::PowerVR2Filter && !pvrrc.isRenderFramebuffer) + if (config::PowerVR2Filter) output_fbo = postProcessor.getFramebuffer(width, height); + else if (config::EmulateFramebuffer) + output_fbo = init_output_framebuffer(width, height); else output_fbo = glsm_get_current_framebuffer(); glViewport(0, 0, width, height); @@ -833,7 +827,7 @@ static bool RenderFrame(int width, int height) glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glClear(GL_COLOR_BUFFER_BIT); } - else if (!pvrrc.isRenderFramebuffer) + else { //Main VBO //move vertex to gpu @@ -861,7 +855,7 @@ static bool RenderFrame(int width, int height) } glCheck(); - if (is_rtt || !config::Widescreen || matrices.IsClipped() || config::Rotate90) + if (is_rtt || !config::Widescreen || matrices.IsClipped() || config::Rotate90 || config::EmulateFramebuffer) { float fWidth; float fHeight; @@ -932,24 +926,24 @@ static bool RenderFrame(int width, int height) gl4DrawStrips(output_fbo, rendering_width, rendering_height); #ifdef LIBRETRO if (config::PowerVR2Filter && !is_rtt) - postProcessor.render(glsm_get_current_framebuffer()); + { + if (config::EmulateFramebuffer) + postProcessor.render(init_output_framebuffer(width, height)); + else + postProcessor.render(glsm_get_current_framebuffer()); + } #endif } - else - { - glBindFramebuffer(GL_FRAMEBUFFER, output_fbo); - - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glClear(GL_COLOR_BUFFER_BIT); - - DrawFramebuffer(); - } if (is_rtt) ReadRTTBuffer(); + else if (config::EmulateFramebuffer) + writeFramebufferToVRAM(); #ifndef LIBRETRO - else + else { + gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(); render_output_framebuffer(); + } #endif glBindVertexArray(0); @@ -963,13 +957,6 @@ struct OpenGL4Renderer : OpenGLRenderer return gl4_init(); } - void Resize(int w, int h) override - { - width = w; - height = h; - resize(w, h); - } - void Term() override { termABuffer(); @@ -995,21 +982,24 @@ struct OpenGL4Renderer : OpenGLRenderer bool Render() override { - RenderFrame(width, height); - if (pvrrc.isRTT) + saveCurrentFramebuffer(); + RenderFrame(pvrrc.framebufferWidth, pvrrc.framebufferHeight); + if (pvrrc.isRTT) { + restoreCurrentFramebuffer(); return false; + } - DrawOSD(false); - frameRendered = true; + if (!config::EmulateFramebuffer) + { + DrawOSD(false); + gl.ofbo2.ready = false; + frameRendered = true; + } + restoreCurrentFramebuffer(); return true; } - bool RenderLastFrame() override - { - return render_output_framebuffer(); - } - GLenum getFogTextureSlot() const override { return GL_TEXTURE5; } diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 1054afc3f..c1f23ed6f 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -698,20 +698,103 @@ void DrawStrips() } } -void DrawFramebuffer() +void OpenGLRenderer::RenderFramebuffer(const FramebufferInfo& info) { - int sx = (int)roundf((gl.ofbo.width - 4.f / 3.f * gl.ofbo.height) / 2.f); - glViewport(sx, 0, gl.ofbo.width - sx * 2, gl.ofbo.height); - drawQuad(fbTextureId, false, true); - glcache.DeleteTextures(1, &fbTextureId); - fbTextureId = 0; + glReadFramebuffer(info); + saveCurrentFramebuffer(); +#ifndef LIBRETRO + if (gl.ofbo2.framebuffer != nullptr + && (gl.dcfb.width != gl.ofbo2.framebuffer->getWidth() || gl.dcfb.height != gl.ofbo2.framebuffer->getHeight())) + gl.ofbo2.framebuffer.reset(); + + if (gl.ofbo2.framebuffer == nullptr) + gl.ofbo2.framebuffer = std::unique_ptr(new GlFramebuffer(gl.dcfb.width, gl.dcfb.height)); + else + gl.ofbo2.framebuffer->bind(); + glCheck(); + gl.ofbo2.ready = true; +#endif + gl.ofbo.aspectRatio = getDCFramebufferAspectRatio(); + + glViewport(0, 0, gl.dcfb.width, gl.dcfb.height); + glcache.Disable(GL_SCISSOR_TEST); + + if (info.fb_r_ctrl.fb_enable == 0 || info.vo_control.blank_video == 1) + { + // Video output disabled + glcache.ClearColor(info.vo_border_col.red(), info.vo_border_col.green(), info.vo_border_col.blue(), 1.f); + glClear(GL_COLOR_BUFFER_BIT); + } + else + { + drawQuad(gl.dcfb.tex, false, true); + } +#ifndef LIBRETRO + render_output_framebuffer(); +#endif + + DrawOSD(false); + frameRendered = true; + restoreCurrentFramebuffer(); +} + +void writeFramebufferToVRAM() +{ + u32 width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + u32 height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; + + float xscale = SCALER_CTL.hscale == 1 ? 0.5f : 1.f; + float yscale = 1024.f / SCALER_CTL.vscalefactor; + if (std::abs(yscale - 1.f) < 0.01) + yscale = 1.f; + + if (xscale != 1.f || yscale != 1.f) + { + u32 scaledW = width * xscale; + u32 scaledH = height * yscale; + + if (gl.fbscaling.framebuffer != nullptr + && (gl.fbscaling.framebuffer->getWidth() != (int)scaledW || gl.fbscaling.framebuffer->getHeight() != (int)scaledH)) + gl.fbscaling.framebuffer.reset(); + if (gl.fbscaling.framebuffer == nullptr) + gl.fbscaling.framebuffer = std::unique_ptr(new GlFramebuffer(scaledW, scaledH)); + + gl.ofbo.framebuffer->bind(GL_READ_FRAMEBUFFER); + gl.fbscaling.framebuffer->bind(GL_DRAW_FRAMEBUFFER); + glBlitFramebuffer(0, 0, width, height, + 0, 0, scaledW, scaledH, + GL_COLOR_BUFFER_BIT, GL_LINEAR); + gl.fbscaling.framebuffer->bind(); + + width = scaledW; + height = scaledH; + } + u32 tex_addr = pvrrc.fb_W_SOF1 & VRAM_MASK; // TODO SCALER_CTL.interlace, SCALER_CTL.fieldselect + + glPixelStorei(GL_PACK_ALIGNMENT, 1); + u32 linestride = pvrrc.fb_W_LINESTRIDE * 8; + + PixelBuffer tmp_buf; + tmp_buf.init(width, height); + + u8 *p = (u8 *)tmp_buf.data(); + glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, p); + + WriteFramebuffer(width, height, p, tex_addr, pvrrc.fb_W_CTRL, linestride, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP); + + glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.origFbo); + glCheck(); } bool render_output_framebuffer() { + GlFramebuffer *framebuffer = gl.ofbo2.ready ? gl.ofbo2.framebuffer.get() : gl.ofbo.framebuffer.get(); + if (framebuffer == nullptr) + return false; + glcache.Disable(GL_SCISSOR_TEST); float screenAR = (float)settings.display.width / settings.display.height; - float renderAR = getOutputFramebufferAspectRatio(); + float renderAR = gl.ofbo.aspectRatio; int dx = 0; int dy = 0; @@ -722,26 +805,22 @@ bool render_output_framebuffer() if (gl.gl_major < 3 || config::Rotate90) { - if (gl.ofbo.tex == 0) - return false; glViewport(dx, dy, settings.display.width - dx * 2, settings.display.height - dy * 2); glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.origFbo); glcache.ClearColor(VO_BORDER_COL.red(), VO_BORDER_COL.green(), VO_BORDER_COL.blue(), 1.f); glClear(GL_COLOR_BUFFER_BIT); glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, config::TextureFiltering == 1 ? GL_NEAREST : GL_LINEAR); glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, config::TextureFiltering == 1 ? GL_NEAREST : GL_LINEAR); - drawQuad(gl.ofbo.tex, config::Rotate90); + drawQuad(framebuffer->getTexture(), config::Rotate90); } else { #ifndef GLES2 - if (gl.ofbo.fbo == 0) - return false; - glBindFramebuffer(GL_READ_FRAMEBUFFER, gl.ofbo.fbo); + framebuffer->bind(GL_READ_FRAMEBUFFER); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, gl.ofbo.origFbo); glcache.ClearColor(VO_BORDER_COL.red(), VO_BORDER_COL.green(), VO_BORDER_COL.blue(), 1.f); glClear(GL_COLOR_BUFFER_BIT); - glBlitFramebuffer(0, 0, gl.ofbo.width, gl.ofbo.height, + glBlitFramebuffer(0, 0, framebuffer->getWidth(), framebuffer->getHeight(), dx, dy, settings.display.width - dx, settings.display.height - dy, GL_COLOR_BUFFER_BIT, config::TextureFiltering == 1 ? GL_NEAREST : GL_LINEAR); glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.origFbo); @@ -822,8 +901,8 @@ void DrawVmuTexture(u8 vmu_screen_number) const float vmu_padding = 8.f; const float x_scale = 100.f / config::ScreenStretching; - const float y_scale = (float)gl.ofbo.width / gl.ofbo.height >= 8.f / 3.f - 0.1f ? 0.5f : 1.f; - float x = (config::Widescreen && config::ScreenStretching == 100 ? -1 / ShaderUniforms.ndcMat[0][0] / 4.f : 0) + vmu_padding; + const float y_scale = gl.ofbo.framebuffer && (float)gl.ofbo.framebuffer->getWidth() / gl.ofbo.framebuffer->getHeight() >= 8.f / 3.f - 0.1f ? 0.5f : 1.f; + float x = (config::Widescreen && config::ScreenStretching == 100 && !config::EmulateFramebuffer ? -1 / ShaderUniforms.ndcMat[0][0] / 4.f : 0) + vmu_padding; float y = vmu_padding; float w = (float)VMU_SCREEN_WIDTH * vmu_screen_params[vmu_screen_number].vmu_screen_size_mult * x_scale; float h = (float)VMU_SCREEN_HEIGHT * vmu_screen_params[vmu_screen_number].vmu_screen_size_mult * y_scale; diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 0e0a586c4..230dde528 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -13,6 +13,7 @@ #include "wsi/gl_context.h" #include "emulator.h" #include "naomi2.h" +#include "rend/gles/postprocess.h" #include @@ -356,6 +357,8 @@ void main() } )"; +static void gl_free_osd_resources(); + GLCache glcache; gl_ctx gl; @@ -428,18 +431,15 @@ void termGLCommon() glDeleteBuffers(1, &gl.rtt.pbo); gl.rtt.pbo = 0; gl.rtt.pboSize = 0; - glDeleteFramebuffers(1, &gl.rtt.fbo); - gl.rtt.fbo = 0; - glcache.DeleteTextures(1, &gl.rtt.tex); - gl.rtt.tex = 0; - glDeleteRenderbuffers(1, &gl.rtt.depthb); - gl.rtt.depthb = 0; + gl.rtt.framebuffer.reset(); gl.rtt.texAddress = ~0; gl_free_osd_resources(); - free_output_framebuffer(); - glcache.DeleteTextures(1, &fbTextureId); - fbTextureId = 0; + gl.ofbo.framebuffer.reset(); + glcache.DeleteTextures(1, &gl.dcfb.tex); + gl.dcfb.tex = 0; + gl.ofbo2.framebuffer.reset(); + gl.fbscaling.framebuffer.reset(); #ifdef LIBRETRO termVmuLightgun(); #endif @@ -799,6 +799,7 @@ bool CompilePipelineShader(PipelineShader* s) return glIsProgram(s->program)==GL_TRUE; } +#ifdef __ANDROID__ static void SetupOSDVBO() { #ifndef GLES2 @@ -828,7 +829,7 @@ static void SetupOSDVBO() bindVertexArray(0); } -void gl_load_osd_resources() +static void gl_load_osd_resources() { OpenGlSource vertexSource; vertexSource.addSource(VertexCompatShader) @@ -841,7 +842,6 @@ void gl_load_osd_resources() gl.OSD_SHADER.scale = glGetUniformLocation(gl.OSD_SHADER.program, "scale"); glUniform1i(glGetUniformLocation(gl.OSD_SHADER.program, "tex"), 0); //bind osd texture to slot 0 -#ifdef __ANDROID__ if (gl.OSD_SHADER.osd_tex == 0) { int width, height; @@ -854,11 +854,11 @@ void gl_load_osd_resources() delete[] image_data; } -#endif SetupOSDVBO(); } +#endif -void gl_free_osd_resources() +static void gl_free_osd_resources() { if (gl.OSD_SHADER.program != 0) { @@ -1001,7 +1001,7 @@ bool gles_init() } -void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format) +static void updateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format) { glActiveTexture(texture_slot); if (fogTextureId == 0) @@ -1026,7 +1026,7 @@ void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format glActiveTexture(GL_TEXTURE0); } -void UpdatePaletteTexture(GLenum texture_slot) +static void updatePaletteTexture(GLenum texture_slot) { glActiveTexture(texture_slot); if (paletteTextureId == 0) @@ -1139,25 +1139,17 @@ bool OpenGLRenderer::Process(TA_context* ctx) TexCache.Clear(); TexCache.Cleanup(); - if (ctx->rend.isRenderFramebuffer) + if (fog_needs_update && config::Fog) { - RenderFramebuffer(); - return true; + fog_needs_update = false; + updateFogTexture((u8 *)FOG_TABLE, getFogTextureSlot(), gl.single_channel_format); } - else + if (palette_updated) { - if (fog_needs_update && config::Fog) - { - fog_needs_update = false; - UpdateFogTexture((u8 *)FOG_TABLE, getFogTextureSlot(), gl.single_channel_format); - } - if (palette_updated) - { - UpdatePaletteTexture(getPaletteTextureSlot()); - palette_updated = false; - } - return ta_parse(ctx); + updatePaletteTexture(getPaletteTextureSlot()); + palette_updated = false; } + return ta_parse(ctx); } static void upload_vertex_indices() @@ -1199,7 +1191,7 @@ bool RenderFrame(int width, int height) const glm::mat4& scissor_mat = matrices.GetScissorMatrix(); ViewportMatrix = matrices.GetViewportMatrix(); - if (!is_rtt) + if (!is_rtt && !config::EmulateFramebuffer) gcflip = 0; else gcflip = 1; @@ -1245,12 +1237,6 @@ bool RenderFrame(int width, int height) } //setup render target first -#ifdef LIBRETRO - gl.ofbo.origFbo = glsm_get_current_framebuffer(); -#else - gl.ofbo.origFbo = 0; - glGetIntegerv(GL_FRAMEBUFFER_BINDING, (GLint *)&gl.ofbo.origFbo); -#endif if (is_rtt) { if (BindRTT() == 0) @@ -1259,12 +1245,17 @@ bool RenderFrame(int width, int height) else { #ifdef LIBRETRO - gl.ofbo.width = width; - gl.ofbo.height = height; - if (config::PowerVR2Filter && !pvrrc.isRenderFramebuffer) + if (config::PowerVR2Filter) glBindFramebuffer(GL_FRAMEBUFFER, postProcessor.getFramebuffer(width, height)); + else if (config::EmulateFramebuffer) + { + if (init_output_framebuffer(width, height) == 0) + return false; + } else + { glBindFramebuffer(GL_FRAMEBUFFER, glsm_get_current_framebuffer()); + } glViewport(0, 0, width, height); #else if (init_output_framebuffer(width, height) == 0) @@ -1272,7 +1263,8 @@ bool RenderFrame(int width, int height) #endif } - bool wide_screen_on = !is_rtt && config::Widescreen && !matrices.IsClipped() && !config::Rotate90; + bool wide_screen_on = !is_rtt && config::Widescreen && !matrices.IsClipped() + && !config::Rotate90 && !config::EmulateFramebuffer; //Color is cleared by the background plane @@ -1291,7 +1283,7 @@ bool RenderFrame(int width, int height) // Video output disabled glClear(GL_COLOR_BUFFER_BIT); } - else if (!pvrrc.isRenderFramebuffer) + else { //move vertex to gpu //Main VBO @@ -1373,20 +1365,24 @@ bool RenderFrame(int width, int height) DrawStrips(); #ifdef LIBRETRO if (config::PowerVR2Filter && !is_rtt) - postProcessor.render(glsm_get_current_framebuffer()); + { + if (config::EmulateFramebuffer) + postProcessor.render(init_output_framebuffer(width, height)); + else + postProcessor.render(glsm_get_current_framebuffer()); + } #endif } - else - { - glClear(GL_COLOR_BUFFER_BIT); - DrawFramebuffer(); - } if (is_rtt) ReadRTTBuffer(); + else if (config::EmulateFramebuffer) + writeFramebufferToVRAM(); #ifndef LIBRETRO - else + else { + gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(); render_output_framebuffer(); + } #endif bindVertexArray(0); @@ -1406,12 +1402,20 @@ void OpenGLRenderer::Term() bool OpenGLRenderer::Render() { - RenderFrame(width, height); - if (pvrrc.isRTT) + saveCurrentFramebuffer(); + RenderFrame(pvrrc.framebufferWidth, pvrrc.framebufferHeight); + if (pvrrc.isRTT) { + restoreCurrentFramebuffer(); return false; + } - DrawOSD(false); - frameRendered = true; + if (!config::EmulateFramebuffer) + { + DrawOSD(false); + frameRendered = true; + gl.ofbo2.ready = false; + } + restoreCurrentFramebuffer(); return true; } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 97638f696..dc043f004 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -5,7 +5,6 @@ #include "rend/TexCache.h" #include "wsi/gl_context.h" #include "glcache.h" -#include "postprocess.h" #include "rend/shader_util.h" #ifndef LIBRETRO #include "rend/imgui_driver.h" @@ -154,6 +153,36 @@ private: GLuint name; }; +class GlFramebuffer +{ +public: + GlFramebuffer(int width, int height, bool withDepth = false, GLuint texture = 0); + ~GlFramebuffer(); + + void bind(GLenum type = GL_FRAMEBUFFER) const { + glBindFramebuffer(type, framebuffer); + } + + int getWidth() const { return width; } + int getHeight() const { return height; } + + GLuint getTexture() const { return texture; } + GLuint detachTexture() { + GLuint t = texture; + texture = 0; + return t; + } + GLuint getFramebuffer() const { return framebuffer; } + +private: + int width; + int height; + GLuint texture; + GLuint framebuffer = 0; + GLuint colorBuffer = 0; + GLuint depthBuffer = 0; +}; + struct gl_ctx { struct @@ -201,9 +230,6 @@ struct gl_ctx struct { u32 texAddress = ~0; - GLuint depthb; - GLuint tex; - GLuint fbo; GLuint pbo; u32 pboSize; bool directXfer; @@ -211,19 +237,34 @@ struct gl_ctx u32 height; FB_W_CTRL_type fb_w_ctrl; u32 linestride; + std::unique_ptr framebuffer; } rtt; struct { - GLuint depthb; - GLuint colorb; - GLuint tex; - GLuint fbo; - int width; - int height; + std::unique_ptr framebuffer; + float aspectRatio; GLuint origFbo; } ofbo; + struct + { + GLuint tex; + int width; + int height; + } dcfb; + + struct + { + std::unique_ptr framebuffer; + } fbscaling; + + struct + { + std::unique_ptr framebuffer; + bool ready = false; + } ofbo2; + const char *gl_version; const char *glsl_version_header; int gl_major; @@ -242,36 +283,23 @@ struct gl_ctx }; extern gl_ctx gl; -extern GLuint fbTextureId; BaseTextureCacheData *gl_GetTexture(TSP tsp, TCW tcw); enum ModifierVolumeMode { Xor, Or, Inclusion, Exclusion, ModeCount }; -void gl_load_osd_resources(); -void gl_free_osd_resources(); -bool ProcessFrame(TA_context* ctx); -void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format); -void UpdatePaletteTexture(GLenum texture_slot); void termGLCommon(); void findGLVersion(); -void GetFramebufferScaling(float& scale_x, float& scale_y, float& scissoring_scale_x, float& scissoring_scale_y); -void GetFramebufferSize(float& dc_width, float& dc_height); -void SetupMatrices(float dc_width, float dc_height, - float scale_x, float scale_y, float scissoring_scale_x, float scissoring_scale_y, - float &ds2s_offs_x, glm::mat4& ndcMat, glm::mat4& scissor_mat); void SetCull(u32 CullMode); -s32 SetTileClip(u32 val, GLint uniform); void SetMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc); GLuint BindRTT(bool withDepthBuffer = true); void ReadRTTBuffer(); -void RenderFramebuffer(); -void DrawFramebuffer(); +void glReadFramebuffer(const FramebufferInfo& info); GLuint init_output_framebuffer(int width, int height); bool render_output_framebuffer(); -void free_output_framebuffer(); +void writeFramebufferToVRAM(); void OSD_DRAW(bool clear_screen); PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, @@ -375,13 +403,14 @@ extern const u32 SrcBlendGL[], DstBlendGL[]; struct OpenGLRenderer : Renderer { bool Init() override; - void Resize(int w, int h) override { width = w; height = h; } void Term() override; bool Process(TA_context* ctx) override; bool Render() override; + void RenderFramebuffer(const FramebufferInfo& info) override; + bool RenderLastFrame() override; void DrawOSD(bool clear_screen) override { OSD_DRAW(clear_screen); } @@ -409,9 +438,19 @@ struct OpenGLRenderer : Renderer return GL_TEXTURE2; } + void saveCurrentFramebuffer() { +#ifdef LIBRETRO + gl.ofbo.origFbo = glsm_get_current_framebuffer(); +#else + gl.ofbo.origFbo = 0; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, (GLint *)&gl.ofbo.origFbo); +#endif + } + void restoreCurrentFramebuffer() { + glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.origFbo); + } + bool frameRendered = false; - int width; - int height; }; void initQuad(); diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index 7e85a3608..c77b3a92c 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -165,12 +165,7 @@ GLuint BindRTT(bool withDepthBuffer) readAsyncPixelBuffer(gl.rtt.texAddress); gl.rtt.texAddress = texAddress; - if (gl.rtt.fbo != 0) - glDeleteFramebuffers(1, &gl.rtt.fbo); - if (gl.rtt.tex != 0) - glcache.DeleteTextures(1, &gl.rtt.tex); - if (gl.rtt.depthb != 0) - glDeleteRenderbuffers(1, &gl.rtt.depthb); + gl.rtt.framebuffer.reset(); u32 fbw2; u32 fbh2; @@ -193,64 +188,15 @@ GLuint BindRTT(bool withDepthBuffer) #endif // Create a texture for rendering to - gl.rtt.tex = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_2D, gl.rtt.tex); + GLuint texture = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, texture); glTexImage2D(GL_TEXTURE_2D, 0, channels, fbw2, fbh2, 0, channels, format, 0); - // Create the object that will allow us to render to the aforementioned texture - glGenFramebuffers(1, &gl.rtt.fbo); - glBindFramebuffer(GL_FRAMEBUFFER, gl.rtt.fbo); - - // Attach the texture to the FBO - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, gl.rtt.tex, 0); - - if (withDepthBuffer) - { - // Generate and bind a render buffer which will become a depth buffer - glGenRenderbuffers(1, &gl.rtt.depthb); - glBindRenderbuffer(GL_RENDERBUFFER, gl.rtt.depthb); - - // Currently it is unknown to GL that we want our new render buffer to be a depth buffer. - // glRenderbufferStorage will fix this and will allocate a depth buffer - if (gl.is_gles) - { -#if defined(GL_DEPTH24_STENCIL8) - if (gl.gl_major >= 3) - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, fbw2, fbh2); - else -#endif -#if defined(GL_DEPTH24_STENCIL8_OES) - if (gl.GL_OES_packed_depth_stencil_supported) - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8_OES, fbw2, fbh2); - else -#endif -#if defined(GL_DEPTH_COMPONENT24_OES) - if (gl.GL_OES_depth24_supported) - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, fbw2, fbh2); - else -#endif - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16, fbw2, fbh2); - } -#ifdef GL_DEPTH24_STENCIL8 - else - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, fbw2, fbh2); -#endif - - // Attach the depth buffer we just created to our FBO. - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, gl.rtt.depthb); - - if (!gl.is_gles || gl.gl_major >= 3 || gl.GL_OES_packed_depth_stencil_supported) - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, gl.rtt.depthb); - } - - // Check that our FBO creation was successful - GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); - - verify(uStatus == GL_FRAMEBUFFER_COMPLETE); + gl.rtt.framebuffer = std::unique_ptr(new GlFramebuffer((int)fbw2, (int)fbh2, withDepthBuffer, texture)); glViewport(0, 0, fbw, fbh); - return gl.rtt.fbo; + return gl.rtt.framebuffer->getFramebuffer(); } void ReadRTTBuffer() @@ -329,8 +275,7 @@ void ReadRTTBuffer() { TextureCacheData *texture_data = TexCache.getRTTexture(gl.rtt.texAddress, fb_packmode, w, h); glcache.DeleteTextures(1, &texture_data->texID); - texture_data->texID = gl.rtt.tex; - gl.rtt.tex = 0; + texture_data->texID = gl.rtt.framebuffer->detachTexture(); texture_data->dirty = 0; texture_data->unprotectVRam(); } @@ -409,22 +354,15 @@ BaseTextureCacheData *gl_GetTexture(TSP tsp, TCW tcw) return tf; } -GLuint fbTextureId; - -void RenderFramebuffer() +void glReadFramebuffer(const FramebufferInfo& info) { - if (FB_R_SIZE.fb_x_size == 0 || FB_R_SIZE.fb_y_size == 0) - return; - PixelBuffer pb; - int width; - int height; - ReadFramebuffer(pb, width, height); + ReadFramebuffer(info, pb, gl.dcfb.width, gl.dcfb.height); - if (fbTextureId == 0) - fbTextureId = glcache.GenTexture(); + if (gl.dcfb.tex == 0) + gl.dcfb.tex = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_2D, fbTextureId); + glcache.BindTexture(GL_TEXTURE_2D, gl.dcfb.tex); //set texture repeat mode glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); @@ -432,55 +370,59 @@ void RenderFramebuffer() glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, pb.data()); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, gl.dcfb.width, gl.dcfb.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, pb.data()); } GLuint init_output_framebuffer(int width, int height) { - if (width != gl.ofbo.width || height != gl.ofbo.height - // if the rotate90 setting has changed - || (gl.gl_major >= 3 && (gl.ofbo.tex == 0) == config::Rotate90)) + if (gl.ofbo.framebuffer != nullptr + && (width != gl.ofbo.framebuffer->getWidth() || height != gl.ofbo.framebuffer->getHeight() + // if the rotate90 setting has changed + || (gl.gl_major >= 3 && (gl.ofbo.framebuffer->getTexture() == 0) == config::Rotate90))) { - free_output_framebuffer(); - gl.ofbo.width = width; - gl.ofbo.height = height; + gl.ofbo.framebuffer.reset(); } - if (gl.ofbo.fbo == 0) + if (gl.ofbo.framebuffer == nullptr) { - // Create the depth+stencil renderbuffer - glGenRenderbuffers(1, &gl.ofbo.depthb); - glBindRenderbuffer(GL_RENDERBUFFER, gl.ofbo.depthb); - - if (gl.is_gles) - { -#if defined(GL_DEPTH24_STENCIL8) - if (gl.gl_major >= 3) - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, width, height); - else -#endif -#if defined(GL_DEPTH24_STENCIL8_OES) - if (gl.GL_OES_packed_depth_stencil_supported) - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8_OES, width, height); - else -#endif -#if defined(GL_DEPTH_COMPONENT24_OES) - if (gl.GL_OES_depth24_supported) - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, width, height); - else -#endif - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16, width, height); - } -#ifdef GL_DEPTH24_STENCIL8 - else - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, width, height); -#endif - - if (gl.gl_major < 3 || config::Rotate90) + GLuint texture = 0; + if (config::Rotate90) { // Create a texture for rendering to - gl.ofbo.tex = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_2D, gl.ofbo.tex); + texture = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, texture); + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + gl.ofbo.framebuffer = std::unique_ptr(new GlFramebuffer(width, height, true, texture)); + + glcache.Disable(GL_SCISSOR_TEST); + glcache.ClearColor(0.f, 0.f, 0.f, 0.f); + glClear(GL_COLOR_BUFFER_BIT); + } + else + gl.ofbo.framebuffer->bind(); + + glViewport(0, 0, width, height); + glCheck(); + + return gl.ofbo.framebuffer->getFramebuffer(); +} + +GlFramebuffer::GlFramebuffer(int width, int height, bool withDepth, GLuint texture) + : width(width), height(height), texture(texture) +{ + if (texture == 0) + { + if (gl.gl_major < 3) + { + // Create a texture for rendering to + texture = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, texture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); @@ -491,65 +433,68 @@ GLuint init_output_framebuffer(int width, int height) else { // Use a renderbuffer and glBlitFramebuffer - glGenRenderbuffers(1, &gl.ofbo.colorb); - glBindRenderbuffer(GL_RENDERBUFFER, gl.ofbo.colorb); + glGenRenderbuffers(1, &colorBuffer); + glBindRenderbuffer(GL_RENDERBUFFER, colorBuffer); #ifdef GL_RGBA8 glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height); #endif } + } - // Create the framebuffer - glGenFramebuffers(1, &gl.ofbo.fbo); - glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.fbo); + // Create the framebuffer + glGenFramebuffers(1, &framebuffer); + bind(); - // Attach the depth buffer to our FBO. - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, gl.ofbo.depthb); + if (withDepth) + { + // Generate and bind a render buffer which will become a depth buffer + glGenRenderbuffers(1, &depthBuffer); + glBindRenderbuffer(GL_RENDERBUFFER, depthBuffer); + + // Currently it is unknown to GL that we want our new render buffer to be a depth buffer. + // glRenderbufferStorage will fix this and will allocate a depth buffer + if (gl.is_gles) + { +#if defined(GL_DEPTH24_STENCIL8) + if (gl.gl_major >= 3) + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, width, height); + else +#endif +#if defined(GL_DEPTH24_STENCIL8_OES) + if (gl.GL_OES_packed_depth_stencil_supported) + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8_OES, width, height); + else +#endif +#if defined(GL_DEPTH_COMPONENT24_OES) + if (gl.GL_OES_depth24_supported) + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, width, height); + else +#endif + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16, width, height); + } +#ifdef GL_DEPTH24_STENCIL8 + else + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, width, height); +#endif + + // Attach the depth buffer we just created to our FBO. + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depthBuffer); if (!gl.is_gles || gl.gl_major >= 3 || gl.GL_OES_packed_depth_stencil_supported) - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, gl.ofbo.depthb); - - // Attach the texture/renderbuffer to the FBO - if (gl.ofbo.tex != 0) - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, gl.ofbo.tex, 0); - else - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, gl.ofbo.colorb); - - // Check that our FBO creation was successful - GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); - - if (uStatus != GL_FRAMEBUFFER_COMPLETE) - return 0; - - glcache.Disable(GL_SCISSOR_TEST); - glcache.ClearColor(0.f, 0.f, 0.f, 0.f); - glClear(GL_COLOR_BUFFER_BIT); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, depthBuffer); } + + // Attach the texture/renderbuffer to the FBO + if (texture != 0) + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, 0); else - glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.fbo); - - glViewport(0, 0, width, height); - glCheck(); - - return gl.ofbo.fbo; + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, colorBuffer); } -void free_output_framebuffer() +GlFramebuffer::~GlFramebuffer() { - if (gl.ofbo.fbo != 0) - { - glDeleteFramebuffers(1, &gl.ofbo.fbo); - gl.ofbo.fbo = 0; - glDeleteRenderbuffers(1, &gl.ofbo.depthb); - gl.ofbo.depthb = 0; - if (gl.ofbo.tex != 0) - { - glcache.DeleteTextures(1, &gl.ofbo.tex); - gl.ofbo.tex = 0; - } - if (gl.ofbo.colorb != 0) - { - glDeleteRenderbuffers(1, &gl.ofbo.colorb); - gl.ofbo.colorb = 0; - } - } + glDeleteFramebuffers(1, &framebuffer); + glDeleteRenderbuffers(1, &depthBuffer); + glcache.DeleteTextures(1, &texture); + glDeleteRenderbuffers(1, &colorBuffer); } diff --git a/core/rend/gles/opengl_driver.cpp b/core/rend/gles/opengl_driver.cpp index 8641195cd..f035dc95e 100644 --- a/core/rend/gles/opengl_driver.cpp +++ b/core/rend/gles/opengl_driver.cpp @@ -50,8 +50,8 @@ OpenGLDriver::OpenGLDriver() for (auto& tex : vmu_lcd_tex_ids) tex = ImTextureID(); ImGui_ImplOpenGL3_Init(); - EventManager::listen(Event::Resume, emuEventCallback, this); - EventManager::listen(Event::Pause, emuEventCallback, this); + EventManager::listen(Event::Start, emuEventCallback, this); + EventManager::listen(Event::Terminate, emuEventCallback, this); } OpenGLDriver::~OpenGLDriver() diff --git a/core/rend/gles/opengl_driver.h b/core/rend/gles/opengl_driver.h index c419e45e4..2f543383f 100644 --- a/core/rend/gles/opengl_driver.h +++ b/core/rend/gles/opengl_driver.h @@ -53,10 +53,10 @@ private: { switch (event) { - case Event::Resume: + case Event::Start: gameStarted = true; break; - case Event::Pause: + case Event::Terminate: gameStarted = false; break; default: diff --git a/core/rend/gles/postprocess.cpp b/core/rend/gles/postprocess.cpp index 6a07049ca..f29cf789f 100644 --- a/core/rend/gles/postprocess.cpp +++ b/core/rend/gles/postprocess.cpp @@ -217,32 +217,9 @@ private: std::array PostProcessShader::shaders; -void PostProcessor::init() +void PostProcessor::init(int width, int height) { - glGenFramebuffers(1, &framebuffer); - glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); - - texture = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_2D, texture); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, 0); - - glGenRenderbuffers(1, &depthBuffer); - glBindRenderbuffer(GL_RENDERBUFFER, depthBuffer); - -#ifdef GLES2 - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8_OES, width, height); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depthBuffer); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, depthBuffer); -#else - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, width, height); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, depthBuffer); -#endif - GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); - verify(uStatus == GL_FRAMEBUFFER_COMPLETE); - glcache.BindTexture(GL_TEXTURE_2D, 0); + framebuffer = std::unique_ptr(new GlFramebuffer(width, height)); float vertices[] = { -1, 1, 1, @@ -271,12 +248,7 @@ void PostProcessor::init() void PostProcessor::term() { - glcache.DeleteTextures(1, &texture); - texture = 0; - glDeleteFramebuffers(1, &framebuffer); - framebuffer = 0; - glDeleteRenderbuffers(1, &depthBuffer); - depthBuffer = 0; + framebuffer.reset(); glDeleteBuffers(1, &vertexBuffer); vertexBuffer = 0; deleteVertexArray(vertexArray); @@ -288,15 +260,14 @@ void PostProcessor::term() GLuint PostProcessor::getFramebuffer(int width, int height) { - if (width != this->width || height != this->height) + if (framebuffer != nullptr + && (width != framebuffer->getWidth() || height != framebuffer->getHeight())) term(); - if (framebuffer == 0) { - this->width = width; - this->height = height; - init(); - } - return framebuffer; + if (framebuffer == nullptr) + init(width, height); + + return framebuffer->getFramebuffer(); } void PostProcessor::render(GLuint output_fbo) @@ -322,7 +293,7 @@ void PostProcessor::render(GLuint output_fbo) glBindFramebuffer(GL_FRAMEBUFFER, output_fbo); glActiveTexture(GL_TEXTURE0); - glcache.BindTexture(GL_TEXTURE_2D, texture); + glcache.BindTexture(GL_TEXTURE_2D, framebuffer->getTexture()); glcache.ClearColor(0.f, 0.f, 0.f, 0.f); glClear(GL_COLOR_BUFFER_BIT); diff --git a/core/rend/gles/postprocess.h b/core/rend/gles/postprocess.h index 5179e0b9b..a5b9eee5d 100644 --- a/core/rend/gles/postprocess.h +++ b/core/rend/gles/postprocess.h @@ -27,15 +27,11 @@ public: GLuint getFramebuffer(int width, int height); private: - void init(); + void init(int width, int height); - GLuint texture = 0; - GLuint framebuffer = 0; - GLuint depthBuffer = 0; GLuint vertexBuffer = 0; GLuint vertexArray = 0; - float width = 0; - float height = 0; + std::unique_ptr framebuffer; }; extern PostProcessor postProcessor; diff --git a/core/rend/gui.cpp b/core/rend/gui.cpp index 3f09670b4..19f4b41fa 100644 --- a/core/rend/gui.cpp +++ b/core/rend/gui.cpp @@ -1771,6 +1771,9 @@ static void gui_display_settings() "Useful to avoid flashing screen or glitchy videos. Not recommended on slow platforms"); OptionCheckbox("Native Depth Interpolation", config::NativeDepthInterpolation, "Helps with texture corruption and depth issues on AMD GPUs. Can also help Intel GPUs in some cases."); + OptionCheckbox("Full Framebuffer Emulation", config::EmulateFramebuffer, + "Fully accurate VRAM framebuffer emulation. Helps games that directly access the framebuffer for special effects. " + "Very slow and incompatible with upscaling and wide screen."); constexpr int apiCount = 0 #ifdef USE_VULKAN + 1 diff --git a/core/rend/mainui.cpp b/core/rend/mainui.cpp index a450b5087..230899912 100644 --- a/core/rend/mainui.cpp +++ b/core/rend/mainui.cpp @@ -67,7 +67,6 @@ bool mainui_rend_frame() void mainui_init() { rend_init_renderer(); - rend_resize_renderer(); } void mainui_term() diff --git a/core/rend/transform_matrix.h b/core/rend/transform_matrix.h index 086e0f978..35df79cd6 100644 --- a/core/rend/transform_matrix.h +++ b/core/rend/transform_matrix.h @@ -26,6 +26,29 @@ #include #include +inline static void getTAViewport(int& width, int& height) +{ + width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; +} + +inline static void getPvrFramebufferSize(int& width, int& height) +{ + getTAViewport(width, height); + if (!config::EmulateFramebuffer) + { + int maxHeight = FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0 ? 240 : 480; + if (SCALER_CTL.vscalefactor != 0 + && (SCALER_CTL.vscalefactor > 1025 || SCALER_CTL.vscalefactor < 1024) + && SPG_CONTROL.interlace == 0) + maxHeight /= 1024.f / SCALER_CTL.vscalefactor; + if (FB_R_CTRL.fb_line_double) + maxHeight /= 2; + height = std::min(maxHeight, height); + // TODO Use FB_R_SIZE too? + } +} + // Dreamcast: // +Y is down // OpenGL: @@ -48,10 +71,12 @@ public: bool IsClipped() const { + int width, height; + getTAViewport(width, height); + float sx, sy; + GetScissorScaling(sx, sy); return renderingContext->fb_X_CLIP.min != 0 - || lroundf((renderingContext->fb_X_CLIP.max + 1) / scale_x) != 640L - || renderingContext->fb_Y_CLIP.min != 0 - || lroundf((renderingContext->fb_Y_CLIP.max + 1) / scale_y) != 480L; + || lroundf((renderingContext->fb_X_CLIP.max + 1) / sx) != width; } const glm::mat4& GetNormalMatrix() const { @@ -76,15 +101,13 @@ public: void CalcMatrices(const rend_context *renderingContext, int width = 0, int height = 0) { - constexpr int screenFlipY = System == COORD_OPENGL || System == COORD_DIRECTX ? -1 : 1; + const int screenFlipY = (System == COORD_OPENGL && !config::EmulateFramebuffer) || System == COORD_DIRECTX ? -1 : 1; constexpr int rttFlipY = System == COORD_DIRECTX ? -1 : 1; constexpr int framebufferFlipY = System == COORD_DIRECTX ? -1 : 1; renderViewport = { width == 0 ? settings.display.width : width, height == 0 ? settings.display.height : height }; this->renderingContext = renderingContext; - GetFramebufferScaling(false, scale_x, scale_y); - if (renderingContext->isRTT) { dcViewport.x = (float)(renderingContext->fb_X_CLIP.max - renderingContext->fb_X_CLIP.min + 1); @@ -96,8 +119,10 @@ public: } else { - dcViewport.x = 640.f * scale_x; - dcViewport.y = 480.f * scale_y; + int w, h; + getPvrFramebufferSize(w, h); + dcViewport.x = w; + dcViewport.y = h; float startx = 0; float starty = 0; @@ -142,9 +167,9 @@ public: scissorMatrix = normalMatrix; float scissoring_scale_x, scissoring_scale_y; - GetFramebufferScaling(true, scissoring_scale_x, scissoring_scale_y); + GetScissorScaling(scissoring_scale_x, scissoring_scale_y); - if (config::Widescreen && !config::Rotate90) + if (config::Widescreen && !config::Rotate90 && !config::EmulateFramebuffer) { sidebarWidth = (1 - dcViewport.x / dcViewport.y * renderViewport.y / renderViewport.x) / 2; if (config::SuperWidescreen) @@ -185,34 +210,25 @@ public: } private: - void GetFramebufferScaling(bool scissor, float& scale_x, float& scale_y) + void GetScissorScaling(float& scale_x, float& scale_y) const { scale_x = 1.f; scale_y = 1.f; - if (!renderingContext->isRTT && !renderingContext->isRenderFramebuffer) + if (!renderingContext->isRTT && !config::EmulateFramebuffer) { - if (!scissor && (FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0)) - scale_y /= 2.f; if (SCALER_CTL.vscalefactor > 0x400) - { - // Interlace mode A (single framebuffer) - if (SCALER_CTL.interlace == 0) - scale_y *= roundf((float)SCALER_CTL.vscalefactor / 0x400); - else if (SCALER_CTL.interlace == 1 && scissor) - // Interlace mode B (alternating framebuffers) - scale_y *= roundf((float)SCALER_CTL.vscalefactor / 0x400); - } - - // VO pixel doubling is done after fb rendering/clipping - if (VO_CONTROL.pixel_double && !scissor) - scale_x /= 2.f; - - // the X Scaler halves the horizontal resolution but - // before clipping/scissoring + scale_y *= std::round(SCALER_CTL.vscalefactor / 1024.f); if (SCALER_CTL.hscale) scale_x *= 2.f; } + else if (config::EmulateFramebuffer) + { + if (SCALER_CTL.hscale) + scale_x *= 2.f; + if (SCALER_CTL.vscalefactor > 0x401 || SCALER_CTL.vscalefactor < 0x400) + scale_y *= SCALER_CTL.vscalefactor / 1024.f; + } } const rend_context *renderingContext = nullptr; @@ -222,29 +238,96 @@ private: glm::mat4 viewportMatrix; glm::vec2 dcViewport; glm::vec2 renderViewport; - float scale_x = 0; - float scale_y = 0; float sidebarWidth = 0; }; +inline static void getScaledFramebufferSize(int& width, int& height) +{ + getPvrFramebufferSize(width, height); + if (!config::EmulateFramebuffer) + { + float upscaling = config::RenderResolution / 480.f; + float w = width * upscaling; + float h = height * upscaling; + if (config::Widescreen && !config::Rotate90) + { + if (config::SuperWidescreen) + w *= (float)settings.display.width / settings.display.height / 4.f * 3.f; + else + w *= 4.f / 3.f; + } + if (!config::Rotate90) + w = std::round(w / 2.f) * 2.f; + h = std::round(h); + width = w; + height = h; + } +} + inline static float getOutputFramebufferAspectRatio() { - float renderAR; + int w,h; + getPvrFramebufferSize(w, h); + + float width = w; + float height = h; + width *= 1 + VO_CONTROL.pixel_double; + width /= 1 + SCALER_CTL.hscale; + height *= 1 + (FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0); + height *= 1 + (FB_R_CTRL.fb_line_double); + if (SCALER_CTL.vscalefactor != 0 + && (SCALER_CTL.vscalefactor > 1025 || SCALER_CTL.vscalefactor < 1024) + && SPG_CONTROL.interlace == 0) + { + if (config::EmulateFramebuffer) + height *= 1024.f / SCALER_CTL.vscalefactor; + else if (SCALER_CTL.vscalefactor > 1025) + height *= std::round(1024.f / SCALER_CTL.vscalefactor); + + } + + float renderAR = width / height; if (config::Rotate90) { - renderAR = 3.f / 4.f; + renderAR = 1 / renderAR; } else { - if (config::Widescreen) + if (config::Widescreen && !config::EmulateFramebuffer) { if (config::SuperWidescreen) renderAR = (float)settings.display.width / settings.display.height; else - renderAR = 16.f / 9.f; + renderAR *= 4 / 3.f; } - else - renderAR = 4.f / 3.f; } return renderAR * config::ScreenStretching / 100.f; } + +inline static float getDCFramebufferAspectRatio() +{ + int width = FB_R_SIZE.fb_x_size + 1; // in 32-bit words + int height = FB_R_SIZE.fb_y_size + 1; + + switch (FB_R_CTRL.fb_depth) + { + case fbde_0555: + case fbde_565: + width *= 2; + break; + case fbde_888: + width = width * 4 / 3; + break; + case fbde_C888: + default: + break; + } + width *= 1 + VO_CONTROL.pixel_double; + height *= 1 + (FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0); + height *= 1 + (FB_R_CTRL.fb_line_double); + height *= 1 + SPG_CONTROL.interlace; + float aspectRatio = (float)width / height; + if (config::Rotate90) + aspectRatio = 1 / aspectRatio; + return aspectRatio * config::ScreenStretching / 100.f; +} diff --git a/core/rend/vulkan/buffer.cpp b/core/rend/vulkan/buffer.cpp index af052fbe0..58ca65ccb 100644 --- a/core/rend/vulkan/buffer.cpp +++ b/core/rend/vulkan/buffer.cpp @@ -22,20 +22,34 @@ #include "utils.h" #include "vulkan_context.h" -BufferData::BufferData(vk::DeviceSize size, const vk::BufferUsageFlags& usage, const vk::MemoryPropertyFlags& propertyFlags) - : bufferSize(size), m_usage(usage), m_propertyFlags(propertyFlags) +BufferData::BufferData(vk::DeviceSize size, vk::BufferUsageFlags usage, vk::MemoryPropertyFlags propertyFlags) + : bufferSize(size), m_usage(usage) { VulkanContext *context = VulkanContext::Instance(); buffer = context->GetDevice().createBufferUnique(vk::BufferCreateInfo(vk::BufferCreateFlags(), size, usage)); - VmaAllocationCreateInfo allocInfo = { - (propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent) ? VMA_ALLOCATION_CREATE_MAPPED_BIT : (VmaAllocationCreateFlags)0, - (propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal) ? VmaMemoryUsage::VMA_MEMORY_USAGE_GPU_ONLY : VmaMemoryUsage::VMA_MEMORY_USAGE_CPU_TO_GPU - }; + VmaAllocationCreateInfo allocInfo {}; + if (propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal) + { + allocInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + else + { + // FIXME VMA_ALLOCATION_CREATE_MAPPED_BIT ? #ifdef __APPLE__ - if (!(propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal)) // cpu memory management is fucked up with moltenvk allocInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + // host coherent memory not supported on apple platforms + propertyFlags &= ~vk::MemoryPropertyFlagBits::eHostCoherent; #endif + if (propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible) + { + allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + if (propertyFlags & vk::MemoryPropertyFlagBits::eHostCached) + allocInfo.preferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + if (propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent) + allocInfo.preferredFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + } + } allocation = context->GetAllocator().AllocateForBuffer(*buffer, allocInfo); } diff --git a/core/rend/vulkan/buffer.h b/core/rend/vulkan/buffer.h index aac074391..2f199f28e 100644 --- a/core/rend/vulkan/buffer.h +++ b/core/rend/vulkan/buffer.h @@ -25,14 +25,9 @@ struct BufferData { - BufferData(vk::DeviceSize size, const vk::BufferUsageFlags& usage, - const vk::MemoryPropertyFlags& propertyFlags = - vk::MemoryPropertyFlagBits::eHostVisible -#ifndef __APPLE__ - // host coherent memory not supported on apple platforms - | vk::MemoryPropertyFlagBits::eHostCoherent -#endif - ); + BufferData(vk::DeviceSize size, vk::BufferUsageFlags usage, + vk::MemoryPropertyFlags propertyFlags = + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); ~BufferData() { buffer.reset(); @@ -40,7 +35,6 @@ struct BufferData void upload(u32 size, const void *data, u32 bufOffset = 0) const { - verify((bool)(m_propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible)); verify(bufOffset + size <= bufferSize); void* dataPtr = (u8 *)allocation.MapMemory() + bufOffset; @@ -50,8 +44,6 @@ struct BufferData void upload(size_t count, const u32 *sizes, const void * const *data, u32 bufOffset = 0) const { - verify((bool)(m_propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible)); - u32 totalSize = 0; for (size_t i = 0; i < count; i++) totalSize += sizes[i]; @@ -68,7 +60,6 @@ struct BufferData void download(u32 size, void *data, u32 bufOffset = 0) const { - verify((bool)(m_propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible)); verify(bufOffset + size <= bufferSize); void* dataPtr = (u8 *)allocation.MapMemory() + bufOffset; @@ -91,7 +82,6 @@ struct BufferData private: vk::BufferUsageFlags m_usage; - vk::MemoryPropertyFlags m_propertyFlags; }; class BufferPacker diff --git a/core/rend/vulkan/desc_set.h b/core/rend/vulkan/desc_set.h index b71b226f9..396b90679 100644 --- a/core/rend/vulkan/desc_set.h +++ b/core/rend/vulkan/desc_set.h @@ -18,47 +18,6 @@ */ #pragma once #include "vulkan_context.h" -#include - -template -class DescSetAlloc -{ -public: - void setLayout(vk::DescriptorSetLayout layout) { - this->layout = layout; - } - void setAllocChunk(int size) { - this->allocChunk = size; - } - - void nextFrame() - { - index = (index + 1) % Size; - for (auto& descset : descSetsInFlight[index]) - descSets.emplace_back(std::move(descset)); - descSetsInFlight[index].clear(); - } - - vk::DescriptorSet alloc() - { - if (descSets.empty()) - { - std::vector layouts(allocChunk, layout); - descSets = VulkanContext::Instance()->GetDevice().allocateDescriptorSetsUnique( - vk::DescriptorSetAllocateInfo(VulkanContext::Instance()->GetDescriptorPool(), (u32)layouts.size(), &layouts[0])); - } - descSetsInFlight[index].emplace_back(std::move(descSets.back())); - descSets.pop_back(); - return *descSetsInFlight[index].back(); - } - -private: - vk::DescriptorSetLayout layout; - std::vector descSets; - std::array, Size> descSetsInFlight; - int index = 0; - int allocChunk = 10; -}; class DynamicDescSetAlloc { diff --git a/core/rend/vulkan/drawer.cpp b/core/rend/vulkan/drawer.cpp index ce49e6c77..74873a7a4 100644 --- a/core/rend/vulkan/drawer.cpp +++ b/core/rend/vulkan/drawer.cpp @@ -62,8 +62,8 @@ TileClipping BaseDrawer::SetTileClip(u32 val, vk::Rect2D& clipRect) void BaseDrawer::SetBaseScissor(const vk::Extent2D& viewport) { - bool wide_screen_on = config::Widescreen && !pvrrc.isRenderFramebuffer - && !matrices.IsClipped() && !config::Rotate90; + bool wide_screen_on = config::Widescreen + && !matrices.IsClipped() && !config::Rotate90 && !config::EmulateFramebuffer; if (!wide_screen_on) { float width; @@ -103,6 +103,76 @@ void BaseDrawer::SetBaseScissor(const vk::Extent2D& viewport) } } +void BaseDrawer::scaleAndWriteFramebuffer(vk::CommandBuffer commandBuffer, FramebufferAttachment *finalFB) +{ + u32 width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + u32 height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; + + float xscale = SCALER_CTL.hscale == 1 ? 0.5f : 1.f; + float yscale = 1024.f / SCALER_CTL.vscalefactor; + if (std::abs(yscale - 1.f) < 0.01f) + yscale = 1.f; + + FramebufferAttachment *scaledFB = nullptr; + + if (xscale != 1.f || yscale != 1.f) + { + u32 scaledW = width * xscale; + u32 scaledH = height * yscale; + + scaledFB = new FramebufferAttachment(GetContext()->GetPhysicalDevice(), GetContext()->GetDevice()); + scaledFB->Init(scaledW, scaledH, vk::Format::eR8G8B8A8Unorm, vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst); + + setImageLayout(commandBuffer, scaledFB->GetImage(), vk::Format::eR8G8B8A8Unorm, 1, vk::ImageLayout::eUndefined, + vk::ImageLayout::eTransferDstOptimal); + + vk::ImageBlit imageBlit; + imageBlit.setSrcOffsets({ vk::Offset3D(0, 0, 0), vk::Offset3D(width, height, 1) }); + imageBlit.setSrcSubresource(vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1)); + imageBlit.setDstOffsets({ vk::Offset3D(0, 0, 0), vk::Offset3D(scaledW, scaledH, 1) }); + imageBlit.setDstSubresource(imageBlit.srcSubresource); + commandBuffer.blitImage(finalFB->GetImage(), vk::ImageLayout::eTransferSrcOptimal, scaledFB->GetImage(), vk::ImageLayout::eTransferDstOptimal, + 1, &imageBlit, vk::Filter::eLinear); + + setImageLayout(commandBuffer, scaledFB->GetImage(), vk::Format::eR8G8B8A8Unorm, 1, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eTransferSrcOptimal); + + finalFB = scaledFB; + width = scaledW; + height = scaledH; + } + + vk::BufferImageCopy copyRegion(0, width, height, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), vk::Offset3D(0, 0, 0), + vk::Extent3D(width, height, 1)); + commandBuffer.copyImageToBuffer(finalFB->GetImage(), vk::ImageLayout::eTransferSrcOptimal, + *finalFB->GetBufferData()->buffer, copyRegion); + + vk::BufferMemoryBarrier bufferMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, + vk::AccessFlagBits::eHostRead, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + *finalFB->GetBufferData()->buffer, + 0, + VK_WHOLE_SIZE); + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eHost, {}, nullptr, bufferMemoryBarrier, nullptr); + + commandBuffer.end(); + commandPool->EndFrame(); + + vk::Fence fence = commandPool->GetCurrentFence(); + GetContext()->GetDevice().waitForFences(1, &fence, true, UINT64_MAX); + PixelBuffer tmpBuf; + tmpBuf.init(width, height); + finalFB->GetBufferData()->download(width * height * 4, tmpBuf.data()); + + WriteFramebuffer(width, height, (u8 *)tmpBuf.data(), pvrrc.fb_W_SOF1 & VRAM_MASK, + pvrrc.fb_W_CTRL, pvrrc.fb_W_LINESTRIDE * 8, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP); + + delete scaledFB; +} + void Drawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, const PolyParam& poly, u32 first, u32 count) { vk::Rect2D scissorRect; @@ -557,10 +627,11 @@ void ScreenDrawer::Init(SamplerManager *samplerManager, ShaderManager *shaderMan { vk::AttachmentDescription attachmentDescriptions[] = { // Color attachment - vk::AttachmentDescription(vk::AttachmentDescriptionFlags(), GetContext()->GetColorFormat(), vk::SampleCountFlagBits::e1, + vk::AttachmentDescription(vk::AttachmentDescriptionFlags(), vk::Format::eR8G8B8A8Unorm, vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, vk::AttachmentStoreOp::eDontCare, - vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal), + config::EmulateFramebuffer ? vk::ImageLayout::eTransferSrcOptimal : vk::ImageLayout::eShaderReadOnlyOptimal, + config::EmulateFramebuffer ? vk::ImageLayout::eTransferSrcOptimal : vk::ImageLayout::eShaderReadOnlyOptimal), // Depth attachment vk::AttachmentDescription(vk::AttachmentDescriptionFlags(), GetContext()->GetDepthFormat(), vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eClear, vk::AttachmentStoreOp::eDontCare, @@ -610,8 +681,12 @@ void ScreenDrawer::Init(SamplerManager *samplerManager, ShaderManager *shaderMan { colorAttachments.push_back(std::unique_ptr( new FramebufferAttachment(GetContext()->GetPhysicalDevice(), GetContext()->GetDevice()))); - colorAttachments.back()->Init(viewport.width, viewport.height, GetContext()->GetColorFormat(), - vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled); + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eColorAttachment; + if (config::EmulateFramebuffer) + usage |= vk::ImageUsageFlagBits::eTransferSrc; + else + usage |= vk::ImageUsageFlagBits::eSampled; + colorAttachments.back()->Init(viewport.width, viewport.height, vk::Format::eR8G8B8A8Unorm, usage); attachments[0] = colorAttachments.back()->GetImageView(); vk::FramebufferCreateInfo createInfo(vk::FramebufferCreateFlags(), *renderPassLoad, ARRAY_SIZE(attachments), attachments, viewport.width, viewport.height, 1); @@ -630,13 +705,15 @@ void ScreenDrawer::Init(SamplerManager *samplerManager, ShaderManager *shaderMan vk::CommandBuffer ScreenDrawer::BeginRenderPass() { + NewImage(); vk::CommandBuffer commandBuffer = commandPool->Allocate(); commandBuffer.begin(vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit)); if (transitionNeeded[GetCurrentImage()]) { - setImageLayout(commandBuffer, colorAttachments[GetCurrentImage()]->GetImage(), GetContext()->GetColorFormat(), - 1, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal); + setImageLayout(commandBuffer, colorAttachments[GetCurrentImage()]->GetImage(), vk::Format::eR8G8B8A8Unorm, + 1, vk::ImageLayout::eUndefined, + config::EmulateFramebuffer ? vk::ImageLayout::eTransferSrcOptimal : vk::ImageLayout::eShaderReadOnlyOptimal); transitionNeeded[GetCurrentImage()] = false; } @@ -659,9 +736,16 @@ vk::CommandBuffer ScreenDrawer::BeginRenderPass() void ScreenDrawer::EndRenderPass() { currentCommandBuffer.endRenderPass(); - currentCommandBuffer.end(); + if (config::EmulateFramebuffer) + { + scaleAndWriteFramebuffer(currentCommandBuffer, colorAttachments[GetCurrentImage()].get()); + } + else + { + currentCommandBuffer.end(); + commandPool->EndFrame(); + } currentCommandBuffer = nullptr; - commandPool->EndFrame(); Drawer::EndRenderPass(); frameRendered = true; } diff --git a/core/rend/vulkan/drawer.h b/core/rend/vulkan/drawer.h index 823a048d7..b6ad62da1 100644 --- a/core/rend/vulkan/drawer.h +++ b/core/rend/vulkan/drawer.h @@ -42,8 +42,9 @@ protected: VulkanContext *GetContext() const { return VulkanContext::Instance(); } TileClipping SetTileClip(u32 val, vk::Rect2D& clipRect); void SetBaseScissor(const vk::Extent2D& viewport = vk::Extent2D()); + void scaleAndWriteFramebuffer(vk::CommandBuffer commandBuffer, FramebufferAttachment *finalFB); - void SetScissor(const vk::CommandBuffer& cmdBuffer, const vk::Rect2D& scissor) + void SetScissor(vk::CommandBuffer cmdBuffer, const vk::Rect2D& scissor) { if (scissor != currentScissor) { @@ -170,6 +171,13 @@ class Drawer : public BaseDrawer { public: virtual ~Drawer() = default; + + void Term() + { + descriptorSets.term(); + mainBuffers.clear(); + } + bool Draw(const Texture *fogTexture, const Texture *paletteTexture); virtual void EndRenderPass() { renderPass++; } vk::CommandBuffer GetCurrentCommandBuffer() const { return currentCommandBuffer; } @@ -259,6 +267,18 @@ class ScreenDrawer : public Drawer { public: void Init(SamplerManager *samplerManager, ShaderManager *shaderManager, const vk::Extent2D& viewport); + + void Term() + { + screenPipelineManager.reset(); + renderPassLoad.reset(); + renderPassClear.reset(); + framebuffers.clear(); + colorAttachments.clear(); + depthAttachment.reset(); + Drawer::Term(); + } + vk::RenderPass GetRenderPass() const { return *renderPassClear; } void EndRenderPass() override; bool PresentFrame() @@ -268,7 +288,6 @@ public: frameRendered = false; GetContext()->PresentFrame(colorAttachments[GetCurrentImage()]->GetImage(), colorAttachments[GetCurrentImage()]->GetImageView(), viewport); - NewImage(); return true; } @@ -296,6 +315,16 @@ class TextureDrawer : public Drawer { public: void Init(SamplerManager *samplerManager, ShaderManager *shaderManager, TextureCache *textureCache); + + void Term() + { + rttPipelineManager.reset(); + framebuffers.clear(); + colorAttachment.reset(); + depthAttachment.reset(); + Drawer::Term(); + } + void EndRenderPass() override; protected: diff --git a/core/rend/vulkan/oit/oit_drawer.cpp b/core/rend/vulkan/oit/oit_drawer.cpp index e134722a5..fb8205c21 100644 --- a/core/rend/vulkan/oit/oit_drawer.cpp +++ b/core/rend/vulkan/oit/oit_drawer.cpp @@ -444,7 +444,7 @@ void OITDrawer::MakeBuffers(int width, int height) attachment.reset(); attachment = std::unique_ptr( new FramebufferAttachment(GetContext()->GetPhysicalDevice(), GetContext()->GetDevice())); - attachment->Init(maxWidth, maxHeight, GetColorFormat(), + attachment->Init(maxWidth, maxHeight, vk::Format::eR8G8B8A8Unorm, vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eInputAttachment); } @@ -483,12 +483,18 @@ void OITScreenDrawer::MakeFramebuffers(const vk::Extent2D& viewport) finalColorAttachments.clear(); transitionNeeded.clear(); clearNeeded.clear(); + + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eColorAttachment; + if (config::EmulateFramebuffer) + usage |= vk::ImageUsageFlagBits::eTransferSrc; + else + usage |= vk::ImageUsageFlagBits::eSampled; while (finalColorAttachments.size() < GetSwapChainSize()) { finalColorAttachments.push_back(std::unique_ptr( new FramebufferAttachment(GetContext()->GetPhysicalDevice(), GetContext()->GetDevice()))); - finalColorAttachments.back()->Init(viewport.width, viewport.height, GetContext()->GetColorFormat(), - vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled); + finalColorAttachments.back()->Init(viewport.width, viewport.height, vk::Format::eR8G8B8A8Unorm, + usage); vk::ImageView attachments[] = { finalColorAttachments.back()->GetImageView(), colorAttachments[0]->GetImageView(), @@ -659,18 +665,16 @@ void OITTextureDrawer::EndFrame() vk::CommandBuffer OITScreenDrawer::NewFrame() { - if (frameRendered) - { - // in case the previous image was never presented - frameRendered = false; - NewImage(); - } + frameRendered = false; + NewImage(); vk::CommandBuffer commandBuffer = commandPool->Allocate(); commandBuffer.begin(vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit)); if (transitionNeeded[GetCurrentImage()]) { - setImageLayout(commandBuffer, finalColorAttachments[GetCurrentImage()]->GetImage(), GetColorFormat(), 1, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal); + setImageLayout(commandBuffer, finalColorAttachments[GetCurrentImage()]->GetImage(), vk::Format::eR8G8B8A8Unorm, 1, + vk::ImageLayout::eUndefined, + config::EmulateFramebuffer ? vk::ImageLayout::eTransferSrcOptimal : vk::ImageLayout::eShaderReadOnlyOptimal); transitionNeeded[GetCurrentImage()] = false; } matrices.CalcMatrices(&pvrrc, viewport.extent.width, viewport.extent.height); diff --git a/core/rend/vulkan/oit/oit_drawer.h b/core/rend/vulkan/oit/oit_drawer.h index d97f5840b..e9fbccea1 100644 --- a/core/rend/vulkan/oit/oit_drawer.h +++ b/core/rend/vulkan/oit/oit_drawer.h @@ -70,6 +70,8 @@ protected: depthAttachments[1].reset(); mainBuffers.clear(); descriptorSets.term(); + maxWidth = 0; + maxHeight = 0; } int GetCurrentImage() const { return imageIndex; } @@ -104,7 +106,6 @@ protected: }; void MakeBuffers(int width, int height); - virtual vk::Format GetColorFormat() const = 0; virtual vk::Framebuffer GetFinalFramebuffer() const = 0; vk::Rect2D viewport; @@ -177,12 +178,20 @@ public: } vk::CommandBuffer NewFrame() override; + void EndFrame() override { currentCommandBuffer.endRenderPass(); - currentCommandBuffer.end(); + if (config::EmulateFramebuffer) + { + scaleAndWriteFramebuffer(currentCommandBuffer, finalColorAttachments[GetCurrentImage()].get()); + } + else + { + currentCommandBuffer.end(); + commandPool->EndFrame(); + } currentCommandBuffer = nullptr; - commandPool->EndFrame(); OITDrawer::EndFrame(); frameRendered = true; } @@ -194,7 +203,6 @@ public: frameRendered = false; GetContext()->PresentFrame(finalColorAttachments[GetCurrentImage()]->GetImage(), finalColorAttachments[GetCurrentImage()]->GetImageView(), viewport.extent); - NewImage(); return true; } @@ -203,7 +211,6 @@ public: protected: vk::Framebuffer GetFinalFramebuffer() const override { return *framebuffers[GetCurrentImage()]; } - vk::Format GetColorFormat() const override { return GetContext()->GetColorFormat(); } private: void MakeFramebuffers(const vk::Extent2D& viewport); @@ -241,7 +248,6 @@ public: protected: vk::CommandBuffer NewFrame() override; vk::Framebuffer GetFinalFramebuffer() const override { return *framebuffers[GetCurrentImage()]; } - vk::Format GetColorFormat() const override { return vk::Format::eR8G8B8A8Unorm; } private: u32 textureAddr = 0; diff --git a/core/rend/vulkan/oit/oit_renderer.cpp b/core/rend/vulkan/oit/oit_renderer.cpp index 3c2fdb542..55e1544be 100644 --- a/core/rend/vulkan/oit/oit_renderer.cpp +++ b/core/rend/vulkan/oit/oit_renderer.cpp @@ -38,7 +38,7 @@ public: screenDrawer.Init(&samplerManager, &oitShaderManager, &oitBuffers, viewport); screenDrawer.SetCommandPool(&texCommandPool); BaseInit(screenDrawer.GetRenderPass(), 2); - + emulateFramebuffer = config::EmulateFramebuffer; return true; } @@ -49,15 +49,6 @@ public: return false; } - void Resize(int w, int h) override - { - if ((u32)w == viewport.width && (u32)h == viewport.height) - return; - BaseVulkanRenderer::Resize(w, h); - GetContext()->WaitIdle(); - screenDrawer.Init(&samplerManager, &oitShaderManager, &oitBuffers, viewport); - } - void Term() override { DEBUG_LOG(RENDERER, "OITVulkanRenderer::Term"); @@ -65,22 +56,35 @@ public: screenDrawer.Term(); textureDrawer.Term(); oitBuffers.Term(); + oitShaderManager.term(); + samplerManager.term(); BaseVulkanRenderer::Term(); } bool Render() override { try { + if (emulateFramebuffer != config::EmulateFramebuffer) + { + VulkanContext::Instance()->WaitIdle(); + screenDrawer.Term(); + screenDrawer.Init(&samplerManager, &oitShaderManager, &oitBuffers, viewport); + BaseInit(screenDrawer.GetRenderPass(), 2); + emulateFramebuffer = config::EmulateFramebuffer; + } OITDrawer *drawer; if (pvrrc.isRTT) drawer = &textureDrawer; - else + else { + resize(pvrrc.framebufferWidth, pvrrc.framebufferHeight); drawer = &screenDrawer; + } drawer->Draw(fogTexture.get(), paletteTexture.get()); #ifdef LIBRETRO if (!pvrrc.isRTT) - overlay->Draw(screenDrawer.GetCurrentCommandBuffer(), viewport, (int)config::RenderResolution / 480.f, true, true); + overlay->Draw(screenDrawer.GetCurrentCommandBuffer(), viewport, + config::EmulateFramebuffer ? 1 : (int)config::RenderResolution / 480.f, true, true); #endif drawer->EndFrame(); @@ -96,7 +100,20 @@ public: bool Present() override { - return screenDrawer.PresentFrame(); + if (config::EmulateFramebuffer) + return presentFramebuffer(); + else + return screenDrawer.PresentFrame(); + } + +protected: + void resize(int w, int h) override + { + if ((u32)w == viewport.width && (u32)h == viewport.height) + return; + BaseVulkanRenderer::resize(w, h); + GetContext()->WaitIdle(); + screenDrawer.Init(&samplerManager, &oitShaderManager, &oitBuffers, viewport); } private: @@ -105,6 +122,7 @@ private: OITShaderManager oitShaderManager; OITScreenDrawer screenDrawer; OITTextureDrawer textureDrawer; + bool emulateFramebuffer = false; }; Renderer* rend_OITVulkan() diff --git a/core/rend/vulkan/oit/oit_renderpass.cpp b/core/rend/vulkan/oit/oit_renderpass.cpp index c3e0bd4cc..89722ff21 100644 --- a/core/rend/vulkan/oit/oit_renderpass.cpp +++ b/core/rend/vulkan/oit/oit_renderpass.cpp @@ -26,7 +26,7 @@ vk::UniqueRenderPass RenderPasses::MakeRenderPass(bool initial, bool last) // Swap chain image GetAttachment0Description(initial, last), // OP+PT color attachment - vk::AttachmentDescription(vk::AttachmentDescriptionFlags(), GetColorFormat(), vk::SampleCountFlagBits::e1, + vk::AttachmentDescription(vk::AttachmentDescriptionFlags(), vk::Format::eR8G8B8A8Unorm, vk::SampleCountFlagBits::e1, initial ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, last ? vk::AttachmentStoreOp::eDontCare : vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, vk::AttachmentStoreOp::eDontCare, diff --git a/core/rend/vulkan/oit/oit_renderpass.h b/core/rend/vulkan/oit/oit_renderpass.h index f90eb7ff7..a67af4f11 100644 --- a/core/rend/vulkan/oit/oit_renderpass.h +++ b/core/rend/vulkan/oit/oit_renderpass.h @@ -43,12 +43,13 @@ protected: vk::UniqueRenderPass MakeRenderPass(bool initial, bool last); virtual vk::AttachmentDescription GetAttachment0Description(bool initial, bool last) const { - return vk::AttachmentDescription(vk::AttachmentDescriptionFlags(), GetContext()->GetColorFormat(), vk::SampleCountFlagBits::e1, + return vk::AttachmentDescription(vk::AttachmentDescriptionFlags(), vk::Format::eR8G8B8A8Unorm, vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, vk::AttachmentStoreOp::eDontCare, - vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal); + config::EmulateFramebuffer && last ? vk::ImageLayout::eTransferSrcOptimal : vk::ImageLayout::eShaderReadOnlyOptimal, + config::EmulateFramebuffer && last ? vk::ImageLayout::eTransferSrcOptimal : vk::ImageLayout::eShaderReadOnlyOptimal); } - virtual vk::Format GetColorFormat() const { return GetContext()->GetColorFormat(); } + virtual std::vector GetSubpassDependencies() const { std::vector deps; @@ -72,7 +73,7 @@ protected: vk::ImageLayout::eUndefined, config::RenderToTextureBuffer && last ? vk::ImageLayout::eTransferSrcOptimal : vk::ImageLayout::eShaderReadOnlyOptimal); } - vk::Format GetColorFormat() const override { return vk::Format::eR8G8B8A8Unorm; } + std::vector GetSubpassDependencies() const override { std::vector deps; diff --git a/core/rend/vulkan/oit/oit_shaders.h b/core/rend/vulkan/oit/oit_shaders.h index 356cae7b2..98ceb85ef 100644 --- a/core/rend/vulkan/oit/oit_shaders.h +++ b/core/rend/vulkan/oit/oit_shaders.h @@ -124,6 +124,20 @@ public: return *clearShader; } + void term() + { + vertexShaders.clear(); + fragmentShaders.clear(); + modVolVertexShaders.clear(); + modVolShaders[0].reset(); + modVolShaders[1].reset(); + trModVolShaders.clear(); + + finalVertexShader.reset(); + finalFragmentShader.reset(); + clearShader.reset(); + } + private: template vk::ShaderModule getShader(std::map& map, T params) diff --git a/core/rend/vulkan/pipeline.h b/core/rend/vulkan/pipeline.h index cf83b84fc..9c96b2c58 100644 --- a/core/rend/vulkan/pipeline.h +++ b/core/rend/vulkan/pipeline.h @@ -420,6 +420,15 @@ public: GetContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); } + void Term() + { + descriptorSet.reset(); + pipeline.reset(); + sampler.reset(); + pipelineLayout.reset(); + descSetLayout.reset(); + } + vk::Pipeline GetPipeline() { if (!pipeline) diff --git a/core/rend/vulkan/shaders.h b/core/rend/vulkan/shaders.h index f58ac0d44..148122335 100644 --- a/core/rend/vulkan/shaders.h +++ b/core/rend/vulkan/shaders.h @@ -158,6 +158,21 @@ public: return *osdFragmentShader; } + void term() + { + vertexShaders.clear(); + fragmentShaders.clear(); + modVolVertexShaders.clear(); + modVolShaders[0].reset(); + modVolShaders[1].reset(); + quadVertexShader.reset(); + quadRotateVertexShader.reset(); + quadFragmentShader.reset(); + quadNoAlphaFragmentShader.reset(); + osdVertexShader.reset(); + osdFragmentShader.reset(); + } + private: template vk::ShaderModule getShader(std::map& map, T params) diff --git a/core/rend/vulkan/texture.cpp b/core/rend/vulkan/texture.cpp index 2ccb57536..8d15e241c 100644 --- a/core/rend/vulkan/texture.cpp +++ b/core/rend/vulkan/texture.cpp @@ -406,7 +406,8 @@ void FramebufferAttachment::Init(u32 width, u32 height, vk::Format format, const if (usage & vk::ImageUsageFlagBits::eTransferSrc) { stagingBufferData = std::unique_ptr(new BufferData(width * height * 4, - vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst)); + vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCached | vk::MemoryPropertyFlagBits::eHostCoherent)); } vk::ImageCreateInfo imageCreateInfo(vk::ImageCreateFlags(), vk::ImageType::e2D, format, vk::Extent3D(extent, 1), 1, 1, vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal, usage, @@ -418,15 +419,18 @@ void FramebufferAttachment::Init(u32 width, u32 height, vk::Format format, const allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; allocation = VulkanContext::Instance()->GetAllocator().AllocateForImage(*image, allocCreateInfo); - vk::ImageViewCreateInfo imageViewCreateInfo(vk::ImageViewCreateFlags(), image.get(), vk::ImageViewType::e2D, - format, vk::ComponentMapping(), vk::ImageSubresourceRange(depth ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1)); - imageView = device.createImageViewUnique(imageViewCreateInfo); - - if ((usage & vk::ImageUsageFlagBits::eDepthStencilAttachment) && (usage & vk::ImageUsageFlagBits::eInputAttachment)) + if ((usage & vk::ImageUsageFlagBits::eColorAttachment) || (usage & vk::ImageUsageFlagBits::eDepthStencilAttachment)) { - // Also create an imageView for the stencil - imageViewCreateInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); - stencilView = device.createImageViewUnique(imageViewCreateInfo); + vk::ImageViewCreateInfo imageViewCreateInfo(vk::ImageViewCreateFlags(), image.get(), vk::ImageViewType::e2D, + format, vk::ComponentMapping(), vk::ImageSubresourceRange(depth ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1)); + imageView = device.createImageViewUnique(imageViewCreateInfo); + + if ((usage & vk::ImageUsageFlagBits::eDepthStencilAttachment) && (usage & vk::ImageUsageFlagBits::eInputAttachment)) + { + // Also create an imageView for the stencil + imageViewCreateInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); + stencilView = device.createImageViewUnique(imageViewCreateInfo); + } } } diff --git a/core/rend/vulkan/texture.h b/core/rend/vulkan/texture.h index 0a3d356cb..a924d3f42 100644 --- a/core/rend/vulkan/texture.h +++ b/core/rend/vulkan/texture.h @@ -59,9 +59,11 @@ public: u64 GetIntId() { return (u64)reinterpret_cast(this); } std::string GetId() override { char s[20]; sprintf(s, "%p", this); return s; } vk::ImageView GetImageView() const { return *imageView; } + vk::Image GetImage() const { return *image; } vk::ImageView GetReadOnlyImageView() const { return readOnlyImageView ? readOnlyImageView : *imageView; } void SetCommandBuffer(vk::CommandBuffer commandBuffer) { this->commandBuffer = commandBuffer; } bool Force32BitTexture(TextureType type) const override { return !VulkanContext::Instance()->IsFormatSupported(type); } + vk::Extent2D getSize() const { return extent; } private: void Init(u32 width, u32 height, vk::Format format ,u32 dataSize, bool mipmapped, bool mipmapsIncluded); @@ -93,6 +95,10 @@ private: class SamplerManager { public: + void term() { + samplers.clear(); + } + vk::Sampler GetSampler(TSP tsp) { u32 samplerHash = tsp.full & TSP_Mask; // MipMapD, FilterMode, ClampU, ClampV, FlipU, FlipV diff --git a/core/rend/vulkan/vk_context_lr.cpp b/core/rend/vulkan/vk_context_lr.cpp index 512db32fa..66262c0f0 100644 --- a/core/rend/vulkan/vk_context_lr.cpp +++ b/core/rend/vulkan/vk_context_lr.cpp @@ -274,7 +274,7 @@ bool VulkanContext::init(retro_hw_render_interface_vulkan *retro_render_if) retro_image.image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; retro_image.create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; retro_image.create_info.pNext = nullptr; - retro_image.create_info.format = (VkFormat)colorFormat; + retro_image.create_info.format = VK_FORMAT_R8G8B8A8_UNORM; retro_image.create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; retro_image.create_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; retro_image.create_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; diff --git a/core/rend/vulkan/vk_context_lr.h b/core/rend/vulkan/vk_context_lr.h index 069c3d103..80c1e92e7 100644 --- a/core/rend/vulkan/vk_context_lr.h +++ b/core/rend/vulkan/vk_context_lr.h @@ -78,7 +78,6 @@ public: + "." + std::to_string(VK_VERSION_MINOR(props.driverVersion)) + "." + std::to_string(VK_VERSION_PATCH(props.driverVersion)); } - vk::Format GetColorFormat() const { return colorFormat; } vk::Format GetDepthFormat() const { return depthFormat; } static VulkanContext *Instance() { return contextInstance; } bool SupportsSamplerAnisotropy() const { return samplerAnisotropy; } @@ -113,7 +112,6 @@ public: bool dedicatedAllocationSupported = false; private: u32 vendorID = 0; - vk::Format colorFormat = vk::Format::eR8G8B8A8Unorm; vk::UniqueDescriptorPool descriptorPool; diff --git a/core/rend/vulkan/vmallocator.h b/core/rend/vulkan/vmallocator.h index 49b0c2123..c6e89d4c2 100644 --- a/core/rend/vulkan/vmallocator.h +++ b/core/rend/vulkan/vmallocator.h @@ -64,14 +64,20 @@ public: return allocInfo.pMappedData; void *p; vmaMapMemory(allocator, allocation, &p); + VkMemoryPropertyFlags flags; + vmaGetMemoryTypeProperties(allocator, allocInfo.memoryType, &flags); + if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) && (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == 0) + vmaInvalidateAllocation(allocator, allocation, allocInfo.offset, allocInfo.size); return p; } void UnmapMemory() const { if (allocInfo.pMappedData != nullptr) return; - // Only needed (and useful) for non-host coherent memory - vmaFlushAllocation(allocator, allocation, allocInfo.offset, allocInfo.size); + VkMemoryPropertyFlags flags; + vmaGetMemoryTypeProperties(allocator, allocInfo.memoryType, &flags); + if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) && (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == 0) + vmaFlushAllocation(allocator, allocation, allocInfo.offset, allocInfo.size); vmaUnmapMemory(allocator, allocation); } diff --git a/core/rend/vulkan/vulkan_context.cpp b/core/rend/vulkan/vulkan_context.cpp index d0e284980..68d37ac69 100644 --- a/core/rend/vulkan/vulkan_context.cpp +++ b/core/rend/vulkan/vulkan_context.cpp @@ -392,17 +392,17 @@ bool VulkanContext::InitDevice() #ifdef VK_DEBUG else if (!strcmp(property.extensionName, VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) { - NOTICE_LOG(RENDERER, "Debug extension %s available", property.extensionName); + NOTICE_LOG(RENDERER, "Debug extension %s available", property.extensionName.data()); deviceExtensions.push_back(VK_EXT_DEBUG_MARKER_EXTENSION_NAME); } else if(!strcmp(property.extensionName, VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { - NOTICE_LOG(RENDERER, "Debug extension %s available", property.extensionName); + NOTICE_LOG(RENDERER, "Debug extension %s available", property.extensionName.data()); deviceExtensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); } else if (!strcmp(property.extensionName, VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - NOTICE_LOG(RENDERER, "Debug extension %s available", property.extensionName); + NOTICE_LOG(RENDERER, "Debug extension %s available", property.extensionName.data()); deviceExtensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } #endif diff --git a/core/rend/vulkan/vulkan_context.h b/core/rend/vulkan/vulkan_context.h index cee9b8fd2..e60436c9e 100644 --- a/core/rend/vulkan/vulkan_context.h +++ b/core/rend/vulkan/vulkan_context.h @@ -91,7 +91,6 @@ public: } std::string getDriverName() override; std::string getDriverVersion() override; - vk::Format GetColorFormat() const { return colorFormat; } vk::Format GetDepthFormat() const { return depthFormat; } static VulkanContext *Instance() { return contextInstance; } bool SupportsSamplerAnisotropy() const { return samplerAnisotropy; } diff --git a/core/rend/vulkan/vulkan_renderer.cpp b/core/rend/vulkan/vulkan_renderer.cpp index 40fd76f79..2e41d9829 100644 --- a/core/rend/vulkan/vulkan_renderer.cpp +++ b/core/rend/vulkan/vulkan_renderer.cpp @@ -36,40 +36,46 @@ public: screenDrawer.Init(&samplerManager, &shaderManager, viewport); screenDrawer.SetCommandPool(&texCommandPool); BaseInit(screenDrawer.GetRenderPass()); + emulateFramebuffer = config::EmulateFramebuffer; return true; } - void Resize(int w, int h) override - { - if ((u32)w == viewport.width && (u32)h == viewport.height) - return; - BaseVulkanRenderer::Resize(w, h); - GetContext()->WaitIdle(); - screenDrawer.Init(&samplerManager, &shaderManager, viewport); - } - void Term() override { DEBUG_LOG(RENDERER, "VulkanRenderer::Term"); GetContext()->WaitIdle(); + screenDrawer.Term(); + textureDrawer.Term(); + samplerManager.term(); BaseVulkanRenderer::Term(); } bool Render() override { try { + if (emulateFramebuffer != config::EmulateFramebuffer) + { + VulkanContext::Instance()->WaitIdle(); + screenDrawer.Term(); + screenDrawer.Init(&samplerManager, &shaderManager, viewport); + BaseInit(screenDrawer.GetRenderPass()); + emulateFramebuffer = config::EmulateFramebuffer; + } Drawer *drawer; if (pvrrc.isRTT) drawer = &textureDrawer; - else + else { + resize(pvrrc.framebufferWidth, pvrrc.framebufferHeight); drawer = &screenDrawer; + } drawer->Draw(fogTexture.get(), paletteTexture.get()); #ifdef LIBRETRO if (!pvrrc.isRTT) - overlay->Draw(screenDrawer.GetCurrentCommandBuffer(), viewport, (int)config::RenderResolution / 480.f, true, true); + overlay->Draw(screenDrawer.GetCurrentCommandBuffer(), viewport, + config::EmulateFramebuffer ? 1 : (int)config::RenderResolution / 480.f, true, true); #endif drawer->EndRenderPass(); @@ -85,13 +91,27 @@ public: bool Present() override { - return screenDrawer.PresentFrame(); + if (config::EmulateFramebuffer) + return presentFramebuffer(); + else + return screenDrawer.PresentFrame(); + } + +protected: + void resize(int w, int h) override + { + if ((u32)w == viewport.width && (u32)h == viewport.height) + return; + BaseVulkanRenderer::resize(w, h); + GetContext()->WaitIdle(); + screenDrawer.Init(&samplerManager, &shaderManager, viewport); } private: SamplerManager samplerManager; ScreenDrawer screenDrawer; TextureDrawer textureDrawer; + bool emulateFramebuffer = false; }; Renderer* rend_Vulkan() diff --git a/core/rend/vulkan/vulkan_renderer.h b/core/rend/vulkan/vulkan_renderer.h index fefe620c2..ff1479fe6 100644 --- a/core/rend/vulkan/vulkan_renderer.h +++ b/core/rend/vulkan/vulkan_renderer.h @@ -36,6 +36,7 @@ protected: bool BaseInit(vk::RenderPass renderPass, int subpass = 0) { texCommandPool.Init(); + fbCommandPool.Init(); #if defined(__ANDROID__) && !defined(LIBRETRO) if (!vjoyTexture) @@ -61,9 +62,11 @@ protected: vk::BufferUsageFlagBits::eVertexBuffer)); } #endif -#ifdef LIBRETRO quadPipeline = std::unique_ptr(new QuadPipeline(false, false)); quadPipeline->Init(&shaderManager, renderPass, subpass); + framebufferDrawer = std::unique_ptr(new QuadDrawer()); + framebufferDrawer->Init(quadPipeline.get()); +#ifdef LIBRETRO overlay = std::unique_ptr(new VulkanOverlay()); overlay->Init(quadPipeline.get()); #endif @@ -78,15 +81,19 @@ public: #ifdef LIBRETRO overlay->Term(); overlay.reset(); - quadPipeline.reset(); #endif + framebufferDrawer.reset(); + quadPipeline.reset(); osdBuffer.reset(); + osdPipeline.Term(); vjoyTexture.reset(); textureCache.Clear(); fogTexture = nullptr; paletteTexture = nullptr; texCommandPool.Term(); + fbCommandPool.Term(); framebufferTextures.clear(); + shaderManager.term(); } BaseTextureCacheData *GetTexture(TSP tsp, TCW tcw) override @@ -126,11 +133,7 @@ public: texCommandBuffer = texCommandPool.Allocate(); texCommandBuffer.begin(vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit)); - bool result; - if (ctx->rend.isRenderFramebuffer) - result = RenderFramebuffer(ctx); - else - result = ta_parse(ctx); + bool result = ta_parse(ctx); if (result) { @@ -151,15 +154,10 @@ public: return result; } - void Resize(int w, int h) override - { - viewport.width = w; - viewport.height = h; - } - void ReInitOSD() { texCommandPool.Init(); + fbCommandPool.Init(); #if defined(__ANDROID__) && !defined(LIBRETRO) osdPipeline.Init(&shaderManager, vjoyTexture->GetImageView(), GetContext()->GetRenderPass()); #endif @@ -212,21 +210,8 @@ public: #endif } -protected: - BaseVulkanRenderer() : viewport(640, 480) {} - - VulkanContext *GetContext() const { return VulkanContext::Instance(); } - - bool RenderFramebuffer(TA_context* ctx) + void RenderFramebuffer(const FramebufferInfo& info) override { - if (FB_R_SIZE.fb_x_size == 0 || FB_R_SIZE.fb_y_size == 0) - return false; - - PixelBuffer pb; - int width; - int height; - ReadFramebuffer(pb, width, height); - if (framebufferTextures.size() != GetContext()->GetSwapChainSize()) framebufferTextures.resize(GetContext()->GetSwapChainSize()); std::unique_ptr& curTexture = framebufferTextures[GetContext()->GetCurrentImageIndex()]; @@ -235,72 +220,42 @@ protected: curTexture = std::unique_ptr(new Texture()); curTexture->tex_type = TextureType::_8888; } - curTexture->SetCommandBuffer(texCommandBuffer); - curTexture->UploadToGPU(width, height, (u8*)pb.data(), false); + + fbCommandPool.BeginFrame(); + vk::CommandBuffer commandBuffer = fbCommandPool.Allocate(); + commandBuffer.begin(vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit)); + curTexture->SetCommandBuffer(commandBuffer); + + if (info.fb_r_ctrl.fb_enable == 0 || info.vo_control.blank_video == 1) + { + // Video output disabled + u8 rgba[] { (u8)info.vo_border_col._red, (u8)info.vo_border_col._green, (u8)info.vo_border_col._blue, 255 }; + curTexture->UploadToGPU(1, 1, rgba, false); + } + else + { + PixelBuffer pb; + int width; + int height; + ReadFramebuffer(info, pb, width, height); + + curTexture->UploadToGPU(width, height, (u8*)pb.data(), false); + } + curTexture->SetCommandBuffer(nullptr); + commandBuffer.end(); + fbCommandPool.EndFrame(); + } - // Use background poly vtx and param - Vertex *vtx = ctx->rend.verts.head(); - vtx[0].x = 0.f; - vtx[0].y = 0.f; - vtx[0].z = 0.1f; - vtx[0].u = 0.f; - vtx[0].v = 0.f; +protected: + BaseVulkanRenderer() : viewport(640, 480) {} - vtx[1] = vtx[0]; - vtx[1].x = 640.f; - vtx[1].u = 1.f; + VulkanContext *GetContext() const { return VulkanContext::Instance(); } - vtx[2] = vtx[0]; - vtx[2].y = 480.f; - vtx[2].v = 1.f; - - vtx[3] = vtx[0]; - vtx[3].x = 640.f; - vtx[3].y = 480.f; - vtx[3].u = 1.f; - vtx[3].v = 1.f; - - u32 *idx = ctx->rend.idx.Append(4); - idx[0] = 0; - idx[1] = 1; - idx[2] = 2; - idx[3] = 3; - - PolyParam *pp = ctx->rend.global_param_op.head(); - pp->first = 0; - pp->count = 4; - - pp->isp.full = 0; - pp->isp.DepthMode = 7; - - pp->pcw.full = 0; - pp->pcw.Gouraud = 1; - pp->pcw.Texture = 1; - - pp->tcw.full = 0; - pp->tcw.TexAddr = 0x1fffff; - pp->tcw1.full = (u32)-1; - - pp->tsp.full = 0; - pp->tsp.FilterMode = 1; - pp->tsp.FogCtrl = 2; - pp->tsp.SrcInstr = 1; - pp->tsp1.full = (u32)-1; - - pp->texture = curTexture.get(); - pp->texture1 = nullptr; - pp->tileclip = 0; - - RenderPass *pass = ctx->rend.render_passes.Append(1); - pass->autosort = false; - pass->mvo_count = 0; - pass->mvo_tr_count = 0; - pass->op_count = 1; - pass->pt_count = 0; - pass->tr_count = 0; - - return true; + virtual void resize(int w, int h) + { + viewport.width = w; + viewport.height = h; } void CheckFogTexture() @@ -342,6 +297,17 @@ protected: paletteTexture->SetCommandBuffer(nullptr); } + bool presentFramebuffer() + { + if (GetContext()->GetCurrentImageIndex() >= (int)framebufferTextures.size()) + return false; + Texture *fbTexture = framebufferTextures[GetContext()->GetCurrentImageIndex()].get(); + if (fbTexture == nullptr) + return false; + GetContext()->PresentFrame(fbTexture->GetImage(), fbTexture->GetImageView(), fbTexture->getSize()); + return true; + } + ShaderManager shaderManager; std::unique_ptr fogTexture; std::unique_ptr paletteTexture; @@ -353,8 +319,10 @@ protected: TextureCache textureCache; vk::Extent2D viewport; vk::CommandBuffer texCommandBuffer; + std::unique_ptr quadPipeline; + std::unique_ptr framebufferDrawer; + CommandPool fbCommandPool; #ifdef LIBRETRO std::unique_ptr overlay; - std::unique_ptr quadPipeline; #endif }; diff --git a/shell/libretro/libretro.cpp b/shell/libretro/libretro.cpp index 1d5caed43..bd45ccd13 100644 --- a/shell/libretro/libretro.cpp +++ b/shell/libretro/libretro.cpp @@ -769,7 +769,6 @@ static void update_variables(bool first_startup) if (!first_startup && previous_renderer != config::RendererType) { rend_term_renderer(); rend_init_renderer(); - rend_resize_renderer(); } #if defined(HAVE_OIT) || defined(HAVE_VULKAN) || defined(HAVE_D3D11) @@ -1030,7 +1029,6 @@ static void update_variables(bool first_startup) rotate_screen ^= rotate_game; if (rotate_game) config::Widescreen.override(false); - rend_resize_renderer(); if ((libretro_detect_vsync_swap_interval != prevDetectVsyncSwapInterval) && !libretro_detect_vsync_swap_interval && @@ -1127,7 +1125,6 @@ void retro_reset() config::Widescreen.override(false); config::Rotate90 = false; - rend_resize_renderer(); retro_game_geometry geometry; setGameGeometry(geometry); environ_cb(RETRO_ENVIRONMENT_SET_GEOMETRY, &geometry); @@ -1147,7 +1144,6 @@ static void context_reset() rend_term_renderer(); theGLContext.init(); rend_init_renderer(); - rend_resize_renderer(); } static void context_destroy() @@ -1613,7 +1609,6 @@ static void retro_vk_context_reset() theVulkanContext.init((retro_hw_render_interface_vulkan *)vulkan); rend_term_renderer(); rend_init_renderer(); - rend_resize_renderer(); } static void retro_vk_context_destroy() @@ -1747,7 +1742,6 @@ static void dx11_context_reset() else if (config::RendererType != RenderType::DirectX11_OIT) config::RendererType = RenderType::DirectX11; rend_init_renderer(); - rend_resize_renderer(); } static void dx11_context_destroy() @@ -2413,7 +2407,7 @@ static void updateLightgunCoordinates(u32 port) { int x = input_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X); int y = input_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y); - if (config::Widescreen && config::ScreenStretching == 100) + if (config::Widescreen && config::ScreenStretching == 100 && !config::EmulateFramebuffer) mo_x_abs[port] = 640.f * ((x + 0x8000) * 4.f / 3.f / 0x10000 - (4.f / 3.f - 1.f) / 2.f); else mo_x_abs[port] = (x + 0x8000) * 640.f / 0x10000; diff --git a/shell/libretro/libretro_core_options.h b/shell/libretro/libretro_core_options.h index 7397fc204..8d241de5d 100644 --- a/shell/libretro/libretro_core_options.h +++ b/shell/libretro/libretro_core_options.h @@ -371,11 +371,25 @@ struct retro_core_option_v2_definition option_defs_us[] = { "32", }, #endif + { + CORE_OPTION_NAME "_emulate_framebuffer", + "Full framebuffer emulation", + NULL, + "Enable full framebuffer emulation in VRAM. This is useful for games that directly read or write the framebuffer in VRAM. When enabled, Internal Resolution is forced to 640x480 and performance may be severely impacted.", + NULL, + "video", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, {/* TODO: needs explanation */ CORE_OPTION_NAME "_enable_rttb", "Enable RTT (Render To Texture) Buffer", NULL, - "", + "Copy rendered textures back from the GPU to VRAM. This option is normally enabled for games that require it. When enabled, texture rendering upscaling is disabled and performance may be impacted.", NULL, "video", { diff --git a/shell/libretro/option.cpp b/shell/libretro/option.cpp index 6412bdc10..221e8b388 100644 --- a/shell/libretro/option.cpp +++ b/shell/libretro/option.cpp @@ -91,6 +91,7 @@ Option PowerVR2Filter(CORE_OPTION_NAME "_pvr2_filtering"); Option PixelBufferSize("", 512 * 1024 * 1024); IntOption PerPixelLayers(CORE_OPTION_NAME "_oit_layers"); Option NativeDepthInterpolation(CORE_OPTION_NAME "_native_depth_interpolation"); +Option EmulateFramebuffer(CORE_OPTION_NAME "_emulate_framebuffer", false); // Misc diff --git a/shell/libretro/vmu_xhair.cpp b/shell/libretro/vmu_xhair.cpp index 850376d5f..5a1cf7030 100644 --- a/shell/libretro/vmu_xhair.cpp +++ b/shell/libretro/vmu_xhair.cpp @@ -117,7 +117,7 @@ std::pair getCrosshairPosition(int playerNum) { float fx = lightgun_params[playerNum].x * config::RenderResolution * config::ScreenStretching / 480.f / 100.f; float fy = lightgun_params[playerNum].y * config::RenderResolution / 480.f; - if (config::Widescreen && config::ScreenStretching == 100) + if (config::Widescreen && config::ScreenStretching == 100 && !config::EmulateFramebuffer) fx += (480.f * 16.f / 9.f - 640.f) / 2.f * config::RenderResolution / 480.f; return std::make_pair(fx, fy); From c1f0dd81d2596d32e777571bb18f72e335b7c23f Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 25 Oct 2022 15:00:04 +0200 Subject: [PATCH 09/34] pvr: copy pvr regs at start_render time to avoid concurrent update copy TA_GLOB_TILE_CLIP and SCALER_CTL to rend context when starting render. Use rend context copy to calculate framebuffer size. Fixes framebuffer size glitches in vf4 water stages. lr: pass aspect ratio when resizing. dx11: Wrong sizing of rotated games. gl: test automation fixes. --- core/hw/pvr/Renderer_if.cpp | 13 +++-- core/hw/pvr/ta_ctx.h | 2 + core/rend/dx11/dx11_renderer.cpp | 18 ++++--- core/rend/dx11/dx11_renderer.h | 3 +- core/rend/dx11/oit/dx11_oitrenderer.cpp | 3 +- core/rend/dx9/d3d_renderer.cpp | 9 ++-- core/rend/dx9/d3d_renderer.h | 1 + core/rend/gl4/gles.cpp | 2 +- core/rend/gles/gldraw.cpp | 2 +- core/rend/gles/gles.cpp | 18 ++++--- core/rend/gles/gles.h | 3 ++ core/rend/gles/gltex.cpp | 31 ++++++++++++ core/rend/transform_matrix.h | 66 ++++++++++++++----------- core/rend/vulkan/drawer.cpp | 1 + core/rend/vulkan/drawer.h | 3 +- core/rend/vulkan/oit/oit_drawer.h | 6 ++- core/rend/vulkan/vk_context_lr.cpp | 2 +- core/rend/vulkan/vk_context_lr.h | 2 +- core/rend/vulkan/vulkan_context.cpp | 18 +++---- core/rend/vulkan/vulkan_context.h | 5 +- core/rend/vulkan/vulkan_renderer.h | 6 ++- shell/libretro/libretro.cpp | 6 ++- 22 files changed, 143 insertions(+), 77 deletions(-) diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index 23227422c..056e91e1f 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -20,7 +20,7 @@ void retro_rend_present() sh4_cpu.Stop(); } #endif -void retro_resize_renderer(int w, int h); +void retro_resize_renderer(int w, int h, float aspectRatio); u32 FrameCount=1; @@ -175,7 +175,8 @@ private: bool renderToScreen = !_pvrrc->rend.isRTT && !config::EmulateFramebuffer; #ifdef LIBRETRO if (renderToScreen) - retro_resize_renderer(_pvrrc->rend.framebufferWidth, _pvrrc->rend.framebufferHeight); + retro_resize_renderer(_pvrrc->rend.framebufferWidth, _pvrrc->rend.framebufferHeight, + getOutputFramebufferAspectRatio(_pvrrc->rend)); #endif bool proc = renderer->Process(_pvrrc); if (!proc || renderToScreen) @@ -198,8 +199,8 @@ private: { #ifdef LIBRETRO int w, h; - getTAViewport(w, h); // FIXME ? - retro_resize_renderer(w, h); + getDCFramebufferReadSize(w, h); + retro_resize_renderer(w, h, getDCFramebufferAspectRatio()); #endif renderer->RenderFramebuffer(config); } @@ -344,6 +345,8 @@ void rend_start_render() ctx->rend.fb_W_SOF1 = FB_W_SOF1; ctx->rend.fb_W_CTRL.full = FB_W_CTRL.full; + ctx->rend.ta_GLOB_TILE_CLIP = TA_GLOB_TILE_CLIP; + ctx->rend.scaler_ctl = SCALER_CTL; ctx->rend.fb_X_CLIP = FB_X_CLIP; ctx->rend.fb_Y_CLIP = FB_Y_CLIP; ctx->rend.fb_W_LINESTRIDE = FB_W_LINESTRIDE.stride; @@ -354,7 +357,7 @@ void rend_start_render() if (!ctx->rend.isRTT) { int width, height; - getScaledFramebufferSize(width, height); + getScaledFramebufferSize(ctx->rend, width, height); ctx->rend.framebufferWidth = width; ctx->rend.framebufferHeight = height; } diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index f21b66721..36da3e60b 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -227,6 +227,8 @@ struct rend_context bool Overrun; bool isRTT; + TA_GLOB_TILE_CLIP_type ta_GLOB_TILE_CLIP; + SCALER_CTL_type scaler_ctl; FB_X_CLIP_type fb_X_CLIP; FB_Y_CLIP_type fb_Y_CLIP; u32 fb_W_LINESTRIDE; diff --git a/core/rend/dx11/dx11_renderer.cpp b/core/rend/dx11/dx11_renderer.cpp index 99b21cd10..cfbf006d3 100644 --- a/core/rend/dx11/dx11_renderer.cpp +++ b/core/rend/dx11/dx11_renderer.cpp @@ -443,9 +443,10 @@ bool DX11Renderer::Render() } else { + aspectRatio = getOutputFramebufferAspectRatio(pvrrc); #ifndef LIBRETRO deviceContext->OMSetRenderTargets(1, &theDX11Context.getRenderTarget().get(), nullptr); - renderFramebuffer(); + displayFramebuffer(); DrawOSD(false); theDX11Context.setFrameRendered(); #else @@ -461,7 +462,7 @@ bool DX11Renderer::Render() return !is_rtt; } -void DX11Renderer::renderFramebuffer() +void DX11Renderer::displayFramebuffer() { #ifndef LIBRETRO D3D11_VIEWPORT vp{}; @@ -479,10 +480,12 @@ void DX11Renderer::renderFramebuffer() deviceContext->ClearRenderTargetView(theDX11Context.getRenderTarget(), colors); int outwidth = settings.display.width; int outheight = settings.display.height; - float renderAR = getOutputFramebufferAspectRatio(); - float screenAR = (float)outwidth / outheight; - if (config::Rotate90) + float renderAR = aspectRatio; + if (config::Rotate90) { std::swap(outwidth, outheight); + renderAR = 1 / renderAR; + } + float screenAR = (float)outwidth / outheight; int dy = 0; int dx = 0; if (renderAR > screenAR) @@ -874,7 +877,7 @@ bool DX11Renderer::RenderLastFrame() { if (!frameRenderedOnce) return false; - renderFramebuffer(); + displayFramebuffer(); return true; } @@ -952,9 +955,10 @@ void DX11Renderer::RenderFramebuffer(const FramebufferInfo& info) float bar = (this->width - this->height * 640.f / 480.f) / 2.f; quad->draw(dcfbTextureView, samplers->getSampler(true), nullptr, bar / this->width * 2.f - 1.f, -1.f, (this->width - bar * 2.f) / this->width * 2.f, 2.f); + aspectRatio = getDCFramebufferAspectRatio(); #ifndef LIBRETRO deviceContext->OMSetRenderTargets(1, &theDX11Context.getRenderTarget().get(), nullptr); - renderFramebuffer(); + displayFramebuffer(); DrawOSD(false); theDX11Context.setFrameRendered(); #else diff --git a/core/rend/dx11/dx11_renderer.h b/core/rend/dx11/dx11_renderer.h index 63190d131..559c86173 100644 --- a/core/rend/dx11/dx11_renderer.h +++ b/core/rend/dx11/dx11_renderer.h @@ -93,7 +93,7 @@ protected: void updateFogTexture(); void updatePaletteTexture(); void readRttRenderTarget(u32 texAddress); - void renderFramebuffer(); + void displayFramebuffer(); void setCullMode(int mode); virtual void setRTTSize(int width, int height) {} void writeFramebufferToVRAM(); @@ -122,6 +122,7 @@ protected: bool frameRendered = false; bool frameRenderedOnce = false; Naomi2Helper n2Helper; + float aspectRatio = 4.f / 3.f; private: void readDCFramebuffer(); diff --git a/core/rend/dx11/oit/dx11_oitrenderer.cpp b/core/rend/dx11/oit/dx11_oitrenderer.cpp index 5944562ad..3780ce8d0 100644 --- a/core/rend/dx11/oit/dx11_oitrenderer.cpp +++ b/core/rend/dx11/oit/dx11_oitrenderer.cpp @@ -672,9 +672,10 @@ struct DX11OITRenderer : public DX11Renderer } else { + aspectRatio = getOutputFramebufferAspectRatio(pvrrc); #ifndef LIBRETRO deviceContext->OMSetRenderTargets(1, &theDX11Context.getRenderTarget().get(), nullptr); - renderFramebuffer(); + displayFramebuffer(); DrawOSD(false); theDX11Context.setFrameRendered(); #else diff --git a/core/rend/dx9/d3d_renderer.cpp b/core/rend/dx9/d3d_renderer.cpp index e4b58bace..6d1cdbab5 100644 --- a/core/rend/dx9/d3d_renderer.cpp +++ b/core/rend/dx9/d3d_renderer.cpp @@ -287,6 +287,7 @@ void D3DRenderer::RenderFramebuffer(const FramebufferInfo& info) RECT rd{ (LONG)bar, 0, (LONG)(this->width - bar), (LONG)this->height }; device->StretchRect(dcfbSurface, nullptr, framebufferSurface, &rd, D3DTEXF_LINEAR); + aspectRatio = getDCFramebufferAspectRatio(); displayFramebuffer(); DrawOSD(false); frameRendered = true; @@ -1088,6 +1089,7 @@ bool D3DRenderer::Render() } else { + aspectRatio = getOutputFramebufferAspectRatio(pvrrc); displayFramebuffer(); DrawOSD(false); frameRendered = true; @@ -1118,14 +1120,13 @@ void D3DRenderer::displayFramebuffer() { devCache.SetRenderState(D3DRS_SCISSORTESTENABLE, FALSE); device->ColorFill(backbuffer, 0, D3DCOLOR_ARGB(255, VO_BORDER_COL._red, VO_BORDER_COL._green, VO_BORDER_COL._blue)); - float renderAR = getOutputFramebufferAspectRatio(); float screenAR = (float)settings.display.width / settings.display.height; int dx = 0; int dy = 0; - if (renderAR > screenAR) - dy = (int)roundf(settings.display.height * (1 - screenAR / renderAR) / 2.f); + if (aspectRatio > screenAR) + dy = (int)roundf(settings.display.height * (1 - screenAR / aspectRatio) / 2.f); else - dx = (int)roundf(settings.display.width * (1 - renderAR / screenAR) / 2.f); + dx = (int)roundf(settings.display.width * (1 - aspectRatio / screenAR) / 2.f); if (!config::Rotate90) { diff --git a/core/rend/dx9/d3d_renderer.h b/core/rend/dx9/d3d_renderer.h index 936832d27..e36f21881 100644 --- a/core/rend/dx9/d3d_renderer.h +++ b/core/rend/dx9/d3d_renderer.h @@ -180,5 +180,6 @@ private: bool frameRendered = false; bool frameRenderedOnce = false; int maxAnisotropy = 1; + float aspectRatio = 4.f / 3.f; }; diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index 27e10388c..041b676fc 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -941,7 +941,7 @@ static bool RenderFrame(int width, int height) writeFramebufferToVRAM(); #ifndef LIBRETRO else { - gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(); + gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend); render_output_framebuffer(); } #endif diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index c1f23ed6f..74473efd2 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -708,7 +708,7 @@ void OpenGLRenderer::RenderFramebuffer(const FramebufferInfo& info) gl.ofbo2.framebuffer.reset(); if (gl.ofbo2.framebuffer == nullptr) - gl.ofbo2.framebuffer = std::unique_ptr(new GlFramebuffer(gl.dcfb.width, gl.dcfb.height)); + gl.ofbo2.framebuffer = std::unique_ptr(new GlFramebuffer(gl.dcfb.width, gl.dcfb.height, false, true)); else gl.ofbo2.framebuffer->bind(); glCheck(); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 230dde528..5ce133d60 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -373,13 +373,16 @@ void do_swap_automation() static FILE* video_file = fopen(cfgLoadStr("record", "rawvid","").c_str(), "wb"); extern bool do_screenshot; + GlFramebuffer *framebuffer = gl.ofbo2.ready ? gl.ofbo2.framebuffer.get() : gl.ofbo.framebuffer.get(); + if (framebuffer == nullptr) + return; + int bytesz = framebuffer->getWidth() * framebuffer->getHeight() * 3; if (video_file) { - int bytesz = gl.ofbo.width * gl.ofbo.height * 3; u8* img = new u8[bytesz]; - glBindFramebuffer(GL_READ_FRAMEBUFFER, gl.ofbo.fbo); - glReadPixels(0, 0, gl.ofbo.width, gl.ofbo.height, GL_RGB, GL_UNSIGNED_BYTE, img); + framebuffer->bind(GL_READ_FRAMEBUFFER); + glReadPixels(0, 0, framebuffer->getWidth(), framebuffer->getHeight(), GL_RGB, GL_UNSIGNED_BYTE, img); fwrite(img, 1, bytesz, video_file); delete[] img; fflush(video_file); @@ -387,13 +390,12 @@ void do_swap_automation() if (do_screenshot) { - int bytesz = gl.ofbo.width * gl.ofbo.height * 3; u8* img = new u8[bytesz]; - glBindFramebuffer(GL_READ_FRAMEBUFFER, gl.ofbo.fbo); + framebuffer->bind(GL_READ_FRAMEBUFFER); glPixelStorei(GL_PACK_ALIGNMENT, 1); - glReadPixels(0, 0, gl.ofbo.width, gl.ofbo.height, GL_RGB, GL_UNSIGNED_BYTE, img); - dump_screenshot(img, gl.ofbo.width, gl.ofbo.height); + glReadPixels(0, 0, framebuffer->getWidth(), framebuffer->getHeight(), GL_RGB, GL_UNSIGNED_BYTE, img); + dump_screenshot(img, framebuffer->getWidth(), framebuffer->getHeight()); delete[] img; dc_exit(); flycast_term(); @@ -1380,7 +1382,7 @@ bool RenderFrame(int width, int height) writeFramebufferToVRAM(); #ifndef LIBRETRO else { - gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(); + gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend); render_output_framebuffer(); } #endif diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index dc043f004..23c60d952 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -157,6 +157,7 @@ class GlFramebuffer { public: GlFramebuffer(int width, int height, bool withDepth = false, GLuint texture = 0); + GlFramebuffer(int width, int height, bool withDepth, bool withTexture); ~GlFramebuffer(); void bind(GLenum type = GL_FRAMEBUFFER) const { @@ -175,6 +176,8 @@ public: GLuint getFramebuffer() const { return framebuffer; } private: + void makeFramebuffer(bool withDepth); + int width; int height; GLuint texture; diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index c77b3a92c..a708e4227 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -440,7 +440,11 @@ GlFramebuffer::GlFramebuffer(int width, int height, bool withDepth, GLuint textu #endif } } + makeFramebuffer(withDepth); +} +void GlFramebuffer::makeFramebuffer(bool withDepth) +{ // Create the framebuffer glGenFramebuffers(1, &framebuffer); bind(); @@ -491,6 +495,33 @@ GlFramebuffer::GlFramebuffer(int width, int height, bool withDepth, GLuint textu glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, colorBuffer); } +GlFramebuffer::GlFramebuffer(int width, int height, bool withDepth, bool withTexture) +{ + if (gl.gl_major < 3 || withTexture) + { + // Create a texture for rendering to + texture = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, texture); + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + else + { + // Use a renderbuffer and glBlitFramebuffer + glGenRenderbuffers(1, &colorBuffer); + glBindRenderbuffer(GL_RENDERBUFFER, colorBuffer); +#ifdef GL_RGBA8 + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height); +#endif + } + + makeFramebuffer(withDepth); +} + GlFramebuffer::~GlFramebuffer() { glDeleteFramebuffers(1, &framebuffer); diff --git a/core/rend/transform_matrix.h b/core/rend/transform_matrix.h index 35df79cd6..0663dff79 100644 --- a/core/rend/transform_matrix.h +++ b/core/rend/transform_matrix.h @@ -26,22 +26,22 @@ #include #include -inline static void getTAViewport(int& width, int& height) +inline static void getTAViewport(const rend_context& rendCtx, int& width, int& height) { - width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; - height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; + width = (rendCtx.ta_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + height = (rendCtx.ta_GLOB_TILE_CLIP.tile_y_num + 1) * 32; } -inline static void getPvrFramebufferSize(int& width, int& height) +inline static void getPvrFramebufferSize(const rend_context& rendCtx, int& width, int& height) { - getTAViewport(width, height); + getTAViewport(rendCtx, width, height); if (!config::EmulateFramebuffer) { int maxHeight = FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0 ? 240 : 480; - if (SCALER_CTL.vscalefactor != 0 - && (SCALER_CTL.vscalefactor > 1025 || SCALER_CTL.vscalefactor < 1024) + if (rendCtx.scaler_ctl.vscalefactor != 0 + && (rendCtx.scaler_ctl.vscalefactor > 1025 || rendCtx.scaler_ctl.vscalefactor < 1024) && SPG_CONTROL.interlace == 0) - maxHeight /= 1024.f / SCALER_CTL.vscalefactor; + maxHeight /= 1024.f / rendCtx.scaler_ctl.vscalefactor; if (FB_R_CTRL.fb_line_double) maxHeight /= 2; height = std::min(maxHeight, height); @@ -72,7 +72,7 @@ public: bool IsClipped() const { int width, height; - getTAViewport(width, height); + getTAViewport(*renderingContext, width, height); float sx, sy; GetScissorScaling(sx, sy); return renderingContext->fb_X_CLIP.min != 0 @@ -120,7 +120,7 @@ public: else { int w, h; - getPvrFramebufferSize(w, h); + getPvrFramebufferSize(*renderingContext, w, h); dcViewport.x = w; dcViewport.y = h; @@ -217,17 +217,17 @@ private: if (!renderingContext->isRTT && !config::EmulateFramebuffer) { - if (SCALER_CTL.vscalefactor > 0x400) - scale_y *= std::round(SCALER_CTL.vscalefactor / 1024.f); - if (SCALER_CTL.hscale) + if (renderingContext->scaler_ctl.vscalefactor > 0x400) + scale_y *= std::round(renderingContext->scaler_ctl.vscalefactor / 1024.f); + if (renderingContext->scaler_ctl.hscale) scale_x *= 2.f; } else if (config::EmulateFramebuffer) { - if (SCALER_CTL.hscale) + if (renderingContext->scaler_ctl.hscale) scale_x *= 2.f; - if (SCALER_CTL.vscalefactor > 0x401 || SCALER_CTL.vscalefactor < 0x400) - scale_y *= SCALER_CTL.vscalefactor / 1024.f; + if (renderingContext->scaler_ctl.vscalefactor > 0x401 || renderingContext->scaler_ctl.vscalefactor < 0x400) + scale_y *= renderingContext->scaler_ctl.vscalefactor / 1024.f; } } @@ -241,9 +241,9 @@ private: float sidebarWidth = 0; }; -inline static void getScaledFramebufferSize(int& width, int& height) +inline static void getScaledFramebufferSize(const rend_context& rendCtx, int& width, int& height) { - getPvrFramebufferSize(width, height); + getPvrFramebufferSize(rendCtx, width, height); if (!config::EmulateFramebuffer) { float upscaling = config::RenderResolution / 480.f; @@ -264,25 +264,25 @@ inline static void getScaledFramebufferSize(int& width, int& height) } } -inline static float getOutputFramebufferAspectRatio() +inline static float getOutputFramebufferAspectRatio(const rend_context& rendCtx) { int w,h; - getPvrFramebufferSize(w, h); + getPvrFramebufferSize(rendCtx, w, h); float width = w; float height = h; width *= 1 + VO_CONTROL.pixel_double; - width /= 1 + SCALER_CTL.hscale; + width /= 1 + rendCtx.scaler_ctl.hscale; height *= 1 + (FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0); height *= 1 + (FB_R_CTRL.fb_line_double); - if (SCALER_CTL.vscalefactor != 0 - && (SCALER_CTL.vscalefactor > 1025 || SCALER_CTL.vscalefactor < 1024) + if (rendCtx.scaler_ctl.vscalefactor != 0 + && (rendCtx.scaler_ctl.vscalefactor > 1025 || rendCtx.scaler_ctl.vscalefactor < 1024) && SPG_CONTROL.interlace == 0) { if (config::EmulateFramebuffer) - height *= 1024.f / SCALER_CTL.vscalefactor; - else if (SCALER_CTL.vscalefactor > 1025) - height *= std::round(1024.f / SCALER_CTL.vscalefactor); + height *= 1024.f / rendCtx.scaler_ctl.vscalefactor; + else if (rendCtx.scaler_ctl.vscalefactor > 1025) + height *= std::round(1024.f / rendCtx.scaler_ctl.vscalefactor); } @@ -304,10 +304,10 @@ inline static float getOutputFramebufferAspectRatio() return renderAR * config::ScreenStretching / 100.f; } -inline static float getDCFramebufferAspectRatio() +inline static void getDCFramebufferReadSize(int& width, int& height) { - int width = FB_R_SIZE.fb_x_size + 1; // in 32-bit words - int height = FB_R_SIZE.fb_y_size + 1; + width = FB_R_SIZE.fb_x_size + 1; // in 32-bit words + height = FB_R_SIZE.fb_y_size + 1; switch (FB_R_CTRL.fb_depth) { @@ -322,6 +322,14 @@ inline static float getDCFramebufferAspectRatio() default: break; } +} + +inline static float getDCFramebufferAspectRatio() +{ + int width; + int height; + getDCFramebufferReadSize(width, height); + width *= 1 + VO_CONTROL.pixel_double; height *= 1 + (FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0); height *= 1 + (FB_R_CTRL.fb_line_double); diff --git a/core/rend/vulkan/drawer.cpp b/core/rend/vulkan/drawer.cpp index 74873a7a4..4530608dd 100644 --- a/core/rend/vulkan/drawer.cpp +++ b/core/rend/vulkan/drawer.cpp @@ -744,6 +744,7 @@ void ScreenDrawer::EndRenderPass() { currentCommandBuffer.end(); commandPool->EndFrame(); + aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend); } currentCommandBuffer = nullptr; Drawer::EndRenderPass(); diff --git a/core/rend/vulkan/drawer.h b/core/rend/vulkan/drawer.h index b6ad62da1..cba0f6ad1 100644 --- a/core/rend/vulkan/drawer.h +++ b/core/rend/vulkan/drawer.h @@ -287,7 +287,7 @@ public: return false; frameRendered = false; GetContext()->PresentFrame(colorAttachments[GetCurrentImage()]->GetImage(), - colorAttachments[GetCurrentImage()]->GetImageView(), viewport); + colorAttachments[GetCurrentImage()]->GetImageView(), viewport, aspectRatio); return true; } @@ -309,6 +309,7 @@ private: std::vector transitionNeeded; std::vector clearNeeded; bool frameRendered = false; + float aspectRatio = 0.f; }; class TextureDrawer : public Drawer diff --git a/core/rend/vulkan/oit/oit_drawer.h b/core/rend/vulkan/oit/oit_drawer.h index e9fbccea1..b07b24ffc 100644 --- a/core/rend/vulkan/oit/oit_drawer.h +++ b/core/rend/vulkan/oit/oit_drawer.h @@ -167,7 +167,7 @@ public: OITDrawer::Init(samplerManager, screenPipelineManager.get(), oitBuffers); MakeFramebuffers(viewport); - GetContext()->PresentFrame(vk::Image(), vk::ImageView(), viewport); + GetContext()->PresentFrame(vk::Image(), vk::ImageView(), viewport, 0); } void Term() { @@ -190,6 +190,7 @@ public: { currentCommandBuffer.end(); commandPool->EndFrame(); + aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend); } currentCommandBuffer = nullptr; OITDrawer::EndFrame(); @@ -202,7 +203,7 @@ public: return false; frameRendered = false; GetContext()->PresentFrame(finalColorAttachments[GetCurrentImage()]->GetImage(), - finalColorAttachments[GetCurrentImage()]->GetImageView(), viewport.extent); + finalColorAttachments[GetCurrentImage()]->GetImageView(), viewport.extent, aspectRatio); return true; } @@ -220,6 +221,7 @@ private: std::unique_ptr screenPipelineManager; std::vector transitionNeeded; bool frameRendered = false; + float aspectRatio = 0.f; }; class OITTextureDrawer : public OITDrawer diff --git a/core/rend/vulkan/vk_context_lr.cpp b/core/rend/vulkan/vk_context_lr.cpp index 66262c0f0..71d9528c3 100644 --- a/core/rend/vulkan/vk_context_lr.cpp +++ b/core/rend/vulkan/vk_context_lr.cpp @@ -290,7 +290,7 @@ bool VulkanContext::init(retro_hw_render_interface_vulkan *retro_render_if) return true; } -void VulkanContext::PresentFrame(vk::Image image, vk::ImageView imageView, const vk::Extent2D& extent) +void VulkanContext::PresentFrame(vk::Image image, vk::ImageView imageView, const vk::Extent2D& extent, float aspectRatio) { retro_image.image_view = (VkImageView)imageView; retro_image.create_info.image = (VkImage)image; diff --git a/core/rend/vulkan/vk_context_lr.h b/core/rend/vulkan/vk_context_lr.h index 80c1e92e7..f05f3a28a 100644 --- a/core/rend/vulkan/vk_context_lr.h +++ b/core/rend/vulkan/vk_context_lr.h @@ -38,7 +38,7 @@ public: void term() override; u32 GetGraphicsQueueFamilyIndex() const { return retro_render_if->queue_index; } - void PresentFrame(vk::Image image, vk::ImageView imageView, const vk::Extent2D& extent); + void PresentFrame(vk::Image image, vk::ImageView imageView, const vk::Extent2D& extent, float aspectRatio); vk::PhysicalDevice GetPhysicalDevice() const { return physicalDevice; } vk::Device GetDevice() const { return device; } diff --git a/core/rend/vulkan/vulkan_context.cpp b/core/rend/vulkan/vulkan_context.cpp index 68d37ac69..5a43f12f5 100644 --- a/core/rend/vulkan/vulkan_context.cpp +++ b/core/rend/vulkan/vulkan_context.cpp @@ -823,7 +823,7 @@ void VulkanContext::Present() noexcept if (lastFrameView && IsValid() && !gui_is_open()) for (int i = 1; i < swapInterval; i++) { - PresentFrame(vk::Image(), lastFrameView, lastFrameExtent); + PresentFrame(vk::Image(), lastFrameView, lastFrameExtent, lastFrameAR); presentQueue.presentKHR(vk::PresentInfoKHR(1, &(*renderCompleteSemaphores[currentSemaphore]), 1, &(*swapChain), ¤tImage)); currentSemaphore = (currentSemaphore + 1) % imageViews.size(); } @@ -847,7 +847,7 @@ void VulkanContext::Present() noexcept } } -void VulkanContext::DrawFrame(vk::ImageView imageView, const vk::Extent2D& extent) +void VulkanContext::DrawFrame(vk::ImageView imageView, const vk::Extent2D& extent, float aspectRatio) { QuadVertex vtx[] = { { { -1, -1, 0 }, { 0, 0 } }, @@ -862,14 +862,13 @@ void VulkanContext::DrawFrame(vk::ImageView imageView, const vk::Extent2D& exten else quadPipeline->BindPipeline(commandBuffer); - float renderAR = getOutputFramebufferAspectRatio(); float screenAR = (float)width / height; float dx = 0; float dy = 0; - if (renderAR > screenAR) - dy = height * (1 - screenAR / renderAR) / 2; + if (aspectRatio > screenAR) + dy = height * (1 - screenAR / aspectRatio) / 2; else - dx = width * (1 - renderAR / screenAR) / 2; + dx = width * (1 - aspectRatio / screenAR) / 2; vk::Viewport viewport(dx, dy, width - dx * 2, height - dy * 2); commandBuffer.setViewport(0, 1, &viewport); @@ -928,10 +927,11 @@ vk::CommandBuffer VulkanContext::PrepareOverlay(bool vmu, bool crosshair) extern Renderer *renderer; -void VulkanContext::PresentFrame(vk::Image image, vk::ImageView imageView, const vk::Extent2D& extent) noexcept +void VulkanContext::PresentFrame(vk::Image image, vk::ImageView imageView, const vk::Extent2D& extent, float aspectRatio) noexcept { lastFrameView = imageView; lastFrameExtent = extent; + lastFrameAR = aspectRatio; if (imageView && IsValid()) { @@ -942,7 +942,7 @@ void VulkanContext::PresentFrame(vk::Image image, vk::ImageView imageView, const BeginRenderPass(); if (lastFrameView) // Might have been nullified if swap chain recreated - DrawFrame(imageView, extent); + DrawFrame(imageView, extent, aspectRatio); DrawOverlay(settings.display.uiScale, config::FloatVMUs, true); renderer->DrawOSD(false); @@ -955,7 +955,7 @@ void VulkanContext::PresentFrame(vk::Image image, vk::ImageView imageView, const void VulkanContext::PresentLastFrame() { if (lastFrameView && IsValid()) - DrawFrame(lastFrameView, lastFrameExtent); + DrawFrame(lastFrameView, lastFrameExtent, lastFrameAR); } void VulkanContext::term() diff --git a/core/rend/vulkan/vulkan_context.h b/core/rend/vulkan/vulkan_context.h index e60436c9e..a4ce9ac2e 100644 --- a/core/rend/vulkan/vulkan_context.h +++ b/core/rend/vulkan/vulkan_context.h @@ -59,7 +59,7 @@ public: void BeginRenderPass(); void EndFrame(vk::CommandBuffer cmdBuffer = vk::CommandBuffer()); void Present() noexcept; - void PresentFrame(vk::Image image, vk::ImageView imageView, const vk::Extent2D& extent) noexcept; + void PresentFrame(vk::Image image, vk::ImageView imageView, const vk::Extent2D& extent, float aspectRatio) noexcept; void PresentLastFrame(); vk::PhysicalDevice GetPhysicalDevice() const { return physicalDevice; } @@ -136,7 +136,7 @@ private: bool InitInstance(const char** extensions, uint32_t extensions_count); void InitImgui(); void DoSwapAutomation(); - void DrawFrame(vk::ImageView imageView, const vk::Extent2D& extent); + void DrawFrame(vk::ImageView imageView, const vk::Extent2D& extent, float aspectRatio); vk::SurfaceKHR GetSurface() const { return *surface; } bool HasSurfaceDimensionChanged() const; @@ -206,6 +206,7 @@ private: vk::ImageView lastFrameView; vk::Extent2D lastFrameExtent; + float lastFrameAR = 0.f; std::unique_ptr overlay; // only used to delay the destruction of overlay textures diff --git a/core/rend/vulkan/vulkan_renderer.h b/core/rend/vulkan/vulkan_renderer.h index ff1479fe6..4fefccbda 100644 --- a/core/rend/vulkan/vulkan_renderer.h +++ b/core/rend/vulkan/vulkan_renderer.h @@ -23,6 +23,7 @@ #include "pipeline.h" #include "rend/osd.h" #include "overlay.h" +#include "rend/transform_matrix.h" #ifndef LIBRETRO #include "rend/gui.h" #endif @@ -77,7 +78,7 @@ public: void Term() override { GetContext()->WaitIdle(); - GetContext()->PresentFrame(nullptr, nullptr, vk::Extent2D()); + GetContext()->PresentFrame(nullptr, nullptr, vk::Extent2D(), 0); #ifdef LIBRETRO overlay->Term(); overlay.reset(); @@ -304,7 +305,8 @@ protected: Texture *fbTexture = framebufferTextures[GetContext()->GetCurrentImageIndex()].get(); if (fbTexture == nullptr) return false; - GetContext()->PresentFrame(fbTexture->GetImage(), fbTexture->GetImageView(), fbTexture->getSize()); + GetContext()->PresentFrame(fbTexture->GetImage(), fbTexture->GetImageView(), fbTexture->getSize(), + getDCFramebufferAspectRatio()); return true; } diff --git a/shell/libretro/libretro.cpp b/shell/libretro/libretro.cpp index bd45ccd13..d693194cc 100644 --- a/shell/libretro/libretro.cpp +++ b/shell/libretro/libretro.cpp @@ -165,6 +165,7 @@ static int framebufferWidth; static int framebufferHeight; static int maxFramebufferWidth; static int maxFramebufferHeight; +static float framebufferAspectRatio = 4.f / 3.f; float libretro_expected_audio_samples_per_run; unsigned libretro_vsync_swap_interval = 1; @@ -600,7 +601,7 @@ static bool set_variable_visibility(void) static void setGameGeometry(retro_game_geometry& geometry) { - geometry.aspect_ratio = getOutputFramebufferAspectRatio(); + geometry.aspect_ratio = framebufferAspectRatio; if (rotate_screen) geometry.aspect_ratio = 1 / geometry.aspect_ratio; geometry.max_width = std::max(framebufferHeight * 16 / 9, framebufferWidth); @@ -622,12 +623,13 @@ void setAVInfo(retro_system_av_info& avinfo) libretro_expected_audio_samples_per_run = sample_rate / fps; } -void retro_resize_renderer(int w, int h) +void retro_resize_renderer(int w, int h, float aspectRatio) { if (w == framebufferWidth && h == framebufferHeight) return; framebufferWidth = w; framebufferHeight = h; + framebufferAspectRatio = aspectRatio; bool avinfoNeeded = framebufferHeight > maxFramebufferHeight || framebufferWidth > maxFramebufferWidth; maxFramebufferHeight = std::max(maxFramebufferHeight, framebufferHeight); maxFramebufferWidth = std::max(maxFramebufferWidth, framebufferWidth); From b935bef9068fc76af5798722e23f641950186b34 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 25 Oct 2022 17:50:41 +0200 Subject: [PATCH 10/34] gl: missing init of width and height in GlFramebuffer gl,vk,dx9,dx11: always use the TA context copy for TA_GLOB_TILE_CLIP and SCALER_CTL --- core/rend/dx11/dx11_renderer.cpp | 8 ++++---- core/rend/dx9/d3d_renderer.cpp | 8 ++++---- core/rend/gles/gldraw.cpp | 8 ++++---- core/rend/gles/gltex.cpp | 1 + core/rend/vulkan/drawer.cpp | 8 ++++---- 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/core/rend/dx11/dx11_renderer.cpp b/core/rend/dx11/dx11_renderer.cpp index cfbf006d3..b88702a7d 100644 --- a/core/rend/dx11/dx11_renderer.cpp +++ b/core/rend/dx11/dx11_renderer.cpp @@ -1188,11 +1188,11 @@ void DX11Renderer::DrawOSD(bool clear_screen) void DX11Renderer::writeFramebufferToVRAM() { - u32 width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; - u32 height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; + u32 width = (pvrrc.ta_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + u32 height = (pvrrc.ta_GLOB_TILE_CLIP.tile_y_num + 1) * 32; - float xscale = SCALER_CTL.hscale == 1 ? 0.5f : 1.f; - float yscale = 1024.f / SCALER_CTL.vscalefactor; + float xscale = pvrrc.scaler_ctl.hscale == 1 ? 0.5f : 1.f; + float yscale = 1024.f / pvrrc.scaler_ctl.vscalefactor; if (std::abs(yscale - 1.f) < 0.01) yscale = 1.f; diff --git a/core/rend/dx9/d3d_renderer.cpp b/core/rend/dx9/d3d_renderer.cpp index 6d1cdbab5..e7d545c48 100644 --- a/core/rend/dx9/d3d_renderer.cpp +++ b/core/rend/dx9/d3d_renderer.cpp @@ -1241,11 +1241,11 @@ void D3DRenderer::DrawOSD(bool clear_screen) void D3DRenderer::writeFramebufferToVRAM() { - u32 width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; - u32 height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; + u32 width = (pvrrc.ta_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + u32 height = (pvrrc.ta_GLOB_TILE_CLIP.tile_y_num + 1) * 32; - float xscale = SCALER_CTL.hscale == 1 ? 0.5f : 1.f; - float yscale = 1024.f / SCALER_CTL.vscalefactor; + float xscale = pvrrc.scaler_ctl.hscale == 1 ? 0.5f : 1.f; + float yscale = 1024.f / pvrrc.scaler_ctl.vscalefactor; if (std::abs(yscale - 1.f) < 0.01) yscale = 1.f; diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 74473efd2..81ed17e04 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -740,11 +740,11 @@ void OpenGLRenderer::RenderFramebuffer(const FramebufferInfo& info) void writeFramebufferToVRAM() { - u32 width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; - u32 height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; + u32 width = (pvrrc.ta_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + u32 height = (pvrrc.ta_GLOB_TILE_CLIP.tile_y_num + 1) * 32; - float xscale = SCALER_CTL.hscale == 1 ? 0.5f : 1.f; - float yscale = 1024.f / SCALER_CTL.vscalefactor; + float xscale = pvrrc.scaler_ctl.hscale == 1 ? 0.5f : 1.f; + float yscale = 1024.f / pvrrc.scaler_ctl.vscalefactor; if (std::abs(yscale - 1.f) < 0.01) yscale = 1.f; diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index a708e4227..197437c69 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -496,6 +496,7 @@ void GlFramebuffer::makeFramebuffer(bool withDepth) } GlFramebuffer::GlFramebuffer(int width, int height, bool withDepth, bool withTexture) + : width(width), height(height), texture(0) { if (gl.gl_major < 3 || withTexture) { diff --git a/core/rend/vulkan/drawer.cpp b/core/rend/vulkan/drawer.cpp index 4530608dd..5c7848ae4 100644 --- a/core/rend/vulkan/drawer.cpp +++ b/core/rend/vulkan/drawer.cpp @@ -105,11 +105,11 @@ void BaseDrawer::SetBaseScissor(const vk::Extent2D& viewport) void BaseDrawer::scaleAndWriteFramebuffer(vk::CommandBuffer commandBuffer, FramebufferAttachment *finalFB) { - u32 width = (TA_GLOB_TILE_CLIP.tile_x_num + 1) * 32; - u32 height = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * 32; + u32 width = (pvrrc.ta_GLOB_TILE_CLIP.tile_x_num + 1) * 32; + u32 height = (pvrrc.ta_GLOB_TILE_CLIP.tile_y_num + 1) * 32; - float xscale = SCALER_CTL.hscale == 1 ? 0.5f : 1.f; - float yscale = 1024.f / SCALER_CTL.vscalefactor; + float xscale = pvrrc.scaler_ctl.hscale == 1 ? 0.5f : 1.f; + float yscale = 1024.f / pvrrc.scaler_ctl.vscalefactor; if (std::abs(yscale - 1.f) < 0.01f) yscale = 1.f; From 40dbf5379294e7d4d3bc47e89f25d7333f59674c Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 30 Oct 2022 12:10:24 +0100 Subject: [PATCH 11/34] pvr: use render pass # to read the right tile when marking blocks Fixes Unreal Tournament flashing/black screen. gl: mark vram area in rtt to avoid overwriting data (Worms World Party) Force Worms World Party to use CopyToVRam (regression due to 6a5db32d5d2f1973804ab200d1644aaeb697abac) Don't calculate precise aspect ratio and use 4/3 instead (or more if widescreen or stretch). Fix clipping issue when using SCALER_CTL.vscalefactor. Avoir crash when clip values are >= width or height. vulkan: RenderFramebuffer wasn't working at all if !EmulateFramebuffer --- core/emulator.cpp | 4 +- core/hw/pvr/Renderer_if.cpp | 6 ++- core/hw/pvr/pvr.cpp | 6 +++ core/hw/pvr/pvr_regs.cpp | 4 +- core/hw/pvr/ta.cpp | 23 +++++++---- core/hw/pvr/ta.h | 2 +- core/rend/dx11/dx11_renderer.cpp | 15 ++++++- core/rend/dx11/oit/dx11_oitrenderer.cpp | 2 +- core/rend/dx9/d3d_renderer.cpp | 15 ++++++- core/rend/gl4/gles.cpp | 2 +- core/rend/gles/gldraw.cpp | 14 ++++++- core/rend/gles/gles.cpp | 2 +- core/rend/gles/gltex.cpp | 22 ++++++---- core/rend/transform_matrix.h | 55 ++++++++----------------- core/rend/vulkan/drawer.cpp | 15 ++++++- core/rend/vulkan/oit/oit_drawer.h | 2 +- core/rend/vulkan/oit/oit_renderer.cpp | 2 +- core/rend/vulkan/vulkan_renderer.cpp | 2 +- core/rend/vulkan/vulkan_renderer.h | 4 ++ core/serialize.h | 3 +- 20 files changed, 129 insertions(+), 71 deletions(-) diff --git a/core/emulator.cpp b/core/emulator.cpp index 296a49a92..5fafac70f 100644 --- a/core/emulator.cpp +++ b/core/emulator.cpp @@ -93,7 +93,9 @@ static void loadSpecialSettings() // JSR (JP) || prod_id == "HDR-0078" // JSR (EU) - || prod_id == "MK-5105850") + || prod_id == "MK-5105850" + // Worms World Party + || prod_id == "T7016D 50") { INFO_LOG(BOOT, "Enabling RTT Copy to VRAM for game %s", prod_id.c_str()); config::RenderToTextureBuffer.override(true); diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index 056e91e1f..c13418c3d 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -176,7 +176,7 @@ private: #ifdef LIBRETRO if (renderToScreen) retro_resize_renderer(_pvrrc->rend.framebufferWidth, _pvrrc->rend.framebufferHeight, - getOutputFramebufferAspectRatio(_pvrrc->rend)); + getOutputFramebufferAspectRatio()); #endif bool proc = renderer->Process(_pvrrc); if (!proc || renderToScreen) @@ -330,8 +330,12 @@ void rend_start_render() linkedCtx->nextContext = tactx_Pop(addresses[i]); if (linkedCtx->nextContext != nullptr) linkedCtx = linkedCtx->nextContext; + else + INFO_LOG(PVR, "rend_start_render: Context%d @ %x not found", i, addresses[i]); } } + else + INFO_LOG(PVR, "rend_start_render: Context0 @ %x not found", addresses[0]); } scheduleRenderDone(ctx); diff --git a/core/hw/pvr/pvr.cpp b/core/hw/pvr/pvr.cpp index 18a0c3048..3be80598e 100644 --- a/core/hw/pvr/pvr.cpp +++ b/core/hw/pvr/pvr.cpp @@ -29,6 +29,7 @@ // ta.cpp extern u8 ta_fsm[2049]; //[2048] stores the current state extern u32 ta_fsm_cl; +extern u32 taRenderPass; // pvr_regs.cpp extern bool fog_needs_update; extern bool pal_needs_update; @@ -72,6 +73,7 @@ void serialize(Serializer& ser) ser << ta_fsm[2048]; ser << ta_fsm_cl; + ser << taRenderPass; SerializeTAContext(ser); @@ -106,6 +108,10 @@ void deserialize(Deserializer& deser) deser >> ta_fsm[2048]; deser >> ta_fsm_cl; + if (deser.version() >= Deserializer::V29) + deser >> taRenderPass; + else + taRenderPass = 0; if (deser.version() >= Deserializer::V5_LIBRETRO && deser.version() < Deserializer::V9_LIBRETRO) { deser.skip(); // pal_needs_update diff --git a/core/hw/pvr/pvr_regs.cpp b/core/hw/pvr/pvr_regs.cpp index 261b5babd..59bb0696f 100644 --- a/core/hw/pvr/pvr_regs.cpp +++ b/core/hw/pvr/pvr_regs.cpp @@ -137,7 +137,7 @@ void pvr_WriteReg(u32 paddr,u32 data) case TA_LIST_INIT_addr: if (data >> 31) { - ta_vtx_ListInit(); + ta_vtx_ListInit(false); TA_NEXT_OPB = TA_NEXT_OPB_INIT; TA_ITP_CURRENT = TA_ISP_BASE; } @@ -150,7 +150,7 @@ void pvr_WriteReg(u32 paddr,u32 data) case TA_LIST_CONT_addr: //a write of anything works ? - ta_vtx_ListInit(); + ta_vtx_ListInit(true); break; case SPG_CONTROL_addr: diff --git a/core/hw/pvr/ta.cpp b/core/hw/pvr/ta.cpp index 2c88033cc..719dbd858 100644 --- a/core/hw/pvr/ta.cpp +++ b/core/hw/pvr/ta.cpp @@ -72,6 +72,7 @@ enum ta_state u8 ta_fsm[2049]; //[2048] stores the current state u32 ta_fsm_cl=7; +u32 taRenderPass; static void fill_fsm(ta_state st, s8 pt, s8 obj, ta_state next, u32 proc=0, u32 sz64=0) { @@ -465,35 +466,39 @@ static u32 opbSize(int n) return n == 0 ? 0 : 16 << n; } -static void markObjectListBlocks() +static void markObjectListBlocks(int renderPass) { u32 addr; u32 tile_size; getRegionTileAddrAndSize(addr, tile_size); + addr += tile_size * renderPass; // Read the opaque pointer of the first tile and check that it's non-null (Naomi doom) u32 opbAddr = pvr_read32p(addr + 4); bool emptyOpaqueList = (opbAddr & 0x80000000) != 0; addr = TA_OL_BASE; + const int tileCount = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1); // opaque u32 opBlockSize = opbSize(TA_ALLOC_CTRL & 3); if (emptyOpaqueList) - addr += opBlockSize * (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1); + addr += opBlockSize * tileCount; if (opBlockSize == 0 || emptyOpaqueList) { // skip modvols OPBs - addr += opbSize((TA_ALLOC_CTRL >> 4) & 3) * (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1); + addr += opbSize((TA_ALLOC_CTRL >> 4) & 3) * tileCount; // transparent opBlockSize = opbSize((TA_ALLOC_CTRL >> 8) & 3); if (opBlockSize == 0) { // skip TR modvols OPBs - addr += opbSize((TA_ALLOC_CTRL >> 12) & 3) * (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1); + addr += opbSize((TA_ALLOC_CTRL >> 12) & 3) * tileCount; // punch-through opBlockSize = opbSize((TA_ALLOC_CTRL >> 16) & 3); - if (opBlockSize == 0) + if (opBlockSize == 0) { + INFO_LOG(PVR, "markObjectListBlocks: all lists are empty"); return; + } } } for (int y = 0; y <= TA_GLOB_TILE_CLIP.tile_y_num; y++) @@ -504,11 +509,15 @@ static void markObjectListBlocks() } } -void ta_vtx_ListInit() +void ta_vtx_ListInit(bool continuation) { + if (!continuation) + taRenderPass = 0; + else + taRenderPass++; SetCurrentTARC(TA_OL_BASE); ta_tad.ClearPartial(); - markObjectListBlocks(); + markObjectListBlocks(taRenderPass); ta_cur_state = TAS_NS; ta_fsm_cl = 7; diff --git a/core/hw/pvr/ta.h b/core/hw/pvr/ta.h index 4a4d91e80..6aba412c9 100644 --- a/core/hw/pvr/ta.h +++ b/core/hw/pvr/ta.h @@ -8,7 +8,7 @@ constexpr u32 SZ64 = 2; struct TA_context; -void ta_vtx_ListInit(); +void ta_vtx_ListInit(bool continuation); void ta_vtx_SoftReset(); void DYNACALL ta_vtx_data32(const SQBuffer *data); diff --git a/core/rend/dx11/dx11_renderer.cpp b/core/rend/dx11/dx11_renderer.cpp index b88702a7d..f88134d9f 100644 --- a/core/rend/dx11/dx11_renderer.cpp +++ b/core/rend/dx11/dx11_renderer.cpp @@ -443,7 +443,7 @@ bool DX11Renderer::Render() } else { - aspectRatio = getOutputFramebufferAspectRatio(pvrrc); + aspectRatio = getOutputFramebufferAspectRatio(); #ifndef LIBRETRO deviceContext->OMSetRenderTargets(1, &theDX11Context.getRenderTarget().get(), nullptr); displayFramebuffer(); @@ -1197,6 +1197,8 @@ void DX11Renderer::writeFramebufferToVRAM() yscale = 1.f; ComPtr fbTexture = fbTex; + FB_X_CLIP_type xClip = pvrrc.fb_X_CLIP; + FB_Y_CLIP_type yClip = pvrrc.fb_Y_CLIP; if (xscale != 1.f || yscale != 1.f) { @@ -1236,6 +1238,11 @@ void DX11Renderer::writeFramebufferToVRAM() width = scaledW; height = scaledH; fbTexture = fbScaledTexture; + // FB_Y_CLIP is applied before vscalefactor if > 1, so it must be scaled here + if (yscale > 1) { + yClip.min = std::round(yClip.min * yscale); + yClip.max = std::round(yClip.max * yscale); + } } u32 texAddress = pvrrc.fb_W_SOF1 & VRAM_MASK; // TODO SCALER_CTL.interlace, SCALER_CTL.fieldselect u32 linestride = pvrrc.fb_W_LINESTRIDE * 8; @@ -1281,7 +1288,11 @@ void DX11Renderer::writeFramebufferToVRAM() } deviceContext->Unmap(stagingTex, 0); - WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP); + xClip.min = std::min(xClip.min, width - 1); + xClip.max = std::min(xClip.max, width - 1); + yClip.min = std::min(yClip.min, height - 1); + yClip.max = std::min(yClip.max, height - 1); + WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, xClip, yClip); } Renderer *rend_DirectX11() diff --git a/core/rend/dx11/oit/dx11_oitrenderer.cpp b/core/rend/dx11/oit/dx11_oitrenderer.cpp index 3780ce8d0..44d5ee2c6 100644 --- a/core/rend/dx11/oit/dx11_oitrenderer.cpp +++ b/core/rend/dx11/oit/dx11_oitrenderer.cpp @@ -672,7 +672,7 @@ struct DX11OITRenderer : public DX11Renderer } else { - aspectRatio = getOutputFramebufferAspectRatio(pvrrc); + aspectRatio = getOutputFramebufferAspectRatio(); #ifndef LIBRETRO deviceContext->OMSetRenderTargets(1, &theDX11Context.getRenderTarget().get(), nullptr); displayFramebuffer(); diff --git a/core/rend/dx9/d3d_renderer.cpp b/core/rend/dx9/d3d_renderer.cpp index e7d545c48..70e8b0b6f 100644 --- a/core/rend/dx9/d3d_renderer.cpp +++ b/core/rend/dx9/d3d_renderer.cpp @@ -1089,7 +1089,7 @@ bool D3DRenderer::Render() } else { - aspectRatio = getOutputFramebufferAspectRatio(pvrrc); + aspectRatio = getOutputFramebufferAspectRatio(); displayFramebuffer(); DrawOSD(false); frameRendered = true; @@ -1250,6 +1250,8 @@ void D3DRenderer::writeFramebufferToVRAM() yscale = 1.f; ComPtr fbSurface = framebufferSurface; + FB_X_CLIP_type xClip = pvrrc.fb_X_CLIP; + FB_Y_CLIP_type yClip = pvrrc.fb_Y_CLIP; if (xscale != 1.f || yscale != 1.f) { @@ -1276,6 +1278,11 @@ void D3DRenderer::writeFramebufferToVRAM() width = scaledW; height = scaledH; fbSurface = fbScaledSurface; + // FB_Y_CLIP is applied before vscalefactor if > 1, so it must be scaled here + if (yscale > 1) { + yClip.min = std::round(yClip.min * yscale); + yClip.max = std::round(yClip.max * yscale); + } } u32 texAddress = pvrrc.fb_W_SOF1 & VRAM_MASK; // TODO SCALER_CTL.interlace, SCALER_CTL.fieldselect u32 linestride = pvrrc.fb_W_LINESTRIDE * 8; @@ -1305,7 +1312,11 @@ void D3DRenderer::writeFramebufferToVRAM() } verifyWin(offscreenSurface->UnlockRect()); - WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP); + xClip.min = std::min(xClip.min, width - 1); + xClip.max = std::min(xClip.max, width - 1); + yClip.min = std::min(yClip.min, height - 1); + yClip.max = std::min(yClip.max, height - 1); + WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, xClip, yClip); } Renderer* rend_DirectX9() diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index 041b676fc..27e10388c 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -941,7 +941,7 @@ static bool RenderFrame(int width, int height) writeFramebufferToVRAM(); #ifndef LIBRETRO else { - gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend); + gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(); render_output_framebuffer(); } #endif diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 81ed17e04..19aefff33 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -747,6 +747,8 @@ void writeFramebufferToVRAM() float yscale = 1024.f / pvrrc.scaler_ctl.vscalefactor; if (std::abs(yscale - 1.f) < 0.01) yscale = 1.f; + FB_X_CLIP_type xClip = pvrrc.fb_X_CLIP; + FB_Y_CLIP_type yClip = pvrrc.fb_Y_CLIP; if (xscale != 1.f || yscale != 1.f) { @@ -761,6 +763,7 @@ void writeFramebufferToVRAM() gl.ofbo.framebuffer->bind(GL_READ_FRAMEBUFFER); gl.fbscaling.framebuffer->bind(GL_DRAW_FRAMEBUFFER); + glcache.Disable(GL_SCISSOR_TEST); glBlitFramebuffer(0, 0, width, height, 0, 0, scaledW, scaledH, GL_COLOR_BUFFER_BIT, GL_LINEAR); @@ -768,6 +771,11 @@ void writeFramebufferToVRAM() width = scaledW; height = scaledH; + // FB_Y_CLIP is applied before vscalefactor if > 1, so it must be scaled here + if (yscale > 1) { + yClip.min = std::round(yClip.min * yscale); + yClip.max = std::round(yClip.max * yscale); + } } u32 tex_addr = pvrrc.fb_W_SOF1 & VRAM_MASK; // TODO SCALER_CTL.interlace, SCALER_CTL.fieldselect @@ -780,7 +788,11 @@ void writeFramebufferToVRAM() u8 *p = (u8 *)tmp_buf.data(); glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, p); - WriteFramebuffer(width, height, p, tex_addr, pvrrc.fb_W_CTRL, linestride, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP); + xClip.min = std::min(xClip.min, width - 1); + xClip.max = std::min(xClip.max, width - 1); + yClip.min = std::min(yClip.min, height - 1); + yClip.max = std::min(yClip.max, height - 1); + WriteFramebuffer(width, height, p, tex_addr, pvrrc.fb_W_CTRL, linestride, xClip, yClip); glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.origFbo); glCheck(); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5ce133d60..0458e9357 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -1382,7 +1382,7 @@ bool RenderFrame(int width, int height) writeFramebufferToVRAM(); #ifndef LIBRETRO else { - gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend); + gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(); render_output_framebuffer(); } #endif diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index 197437c69..a516271cf 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -199,6 +199,8 @@ GLuint BindRTT(bool withDepthBuffer) return gl.rtt.framebuffer->getFramebuffer(); } +constexpr u32 MAGIC_NUMBER = 0xbaadf00d; + void ReadRTTBuffer() { u32 w = pvrrc.getFramebufferWidth(); @@ -242,6 +244,8 @@ void ReadRTTBuffer() { gl.rtt.directXfer = true; glReadPixels(0, 0, w, h, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, dst); + if (dst == nullptr) + *(u32 *)&vram[tex_addr] = MAGIC_NUMBER; } else { @@ -250,6 +254,7 @@ void ReadRTTBuffer() { gl.rtt.fb_w_ctrl = pvrrc.fb_W_CTRL; glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, 0); + *(u32 *)&vram[tex_addr] = MAGIC_NUMBER; } else { @@ -303,13 +308,16 @@ static void readAsyncPixelBuffer(u32 addr) return; } u16 *dst = (u16 *)&vram[tex_addr]; - - if (gl.rtt.directXfer) - // Can be read directly into vram - memcpy(dst, ptr, gl.rtt.width * gl.rtt.height * 2); - else - WriteTextureToVRam(gl.rtt.width, gl.rtt.height, ptr, dst, gl.rtt.fb_w_ctrl, gl.rtt.linestride); - + // Make sure the vram region hasn't been overwritten already, otherwise we skip the copy + // (Worms World Party intro) + if (*(u32 *)dst == MAGIC_NUMBER) + { + if (gl.rtt.directXfer) + // Can be read directly into vram + memcpy(dst, ptr, gl.rtt.width * gl.rtt.height * 2); + else + WriteTextureToVRam(gl.rtt.width, gl.rtt.height, ptr, dst, gl.rtt.fb_w_ctrl, gl.rtt.linestride); + } glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); #endif diff --git a/core/rend/transform_matrix.h b/core/rend/transform_matrix.h index 0663dff79..fb1560a51 100644 --- a/core/rend/transform_matrix.h +++ b/core/rend/transform_matrix.h @@ -226,8 +226,13 @@ private: { if (renderingContext->scaler_ctl.hscale) scale_x *= 2.f; + // vscalefactor is applied after scissoring if > 1 if (renderingContext->scaler_ctl.vscalefactor > 0x401 || renderingContext->scaler_ctl.vscalefactor < 0x400) - scale_y *= renderingContext->scaler_ctl.vscalefactor / 1024.f; + { + float vscalefactor = 1024.f / renderingContext->scaler_ctl.vscalefactor; + if (vscalefactor < 1) + scale_y /= vscalefactor; + } } } @@ -264,44 +269,28 @@ inline static void getScaledFramebufferSize(const rend_context& rendCtx, int& wi } } -inline static float getOutputFramebufferAspectRatio(const rend_context& rendCtx) +inline static float getOutputFramebufferAspectRatio() { - int w,h; - getPvrFramebufferSize(rendCtx, w, h); - - float width = w; - float height = h; - width *= 1 + VO_CONTROL.pixel_double; - width /= 1 + rendCtx.scaler_ctl.hscale; - height *= 1 + (FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0); - height *= 1 + (FB_R_CTRL.fb_line_double); - if (rendCtx.scaler_ctl.vscalefactor != 0 - && (rendCtx.scaler_ctl.vscalefactor > 1025 || rendCtx.scaler_ctl.vscalefactor < 1024) - && SPG_CONTROL.interlace == 0) - { - if (config::EmulateFramebuffer) - height *= 1024.f / rendCtx.scaler_ctl.vscalefactor; - else if (rendCtx.scaler_ctl.vscalefactor > 1025) - height *= std::round(1024.f / rendCtx.scaler_ctl.vscalefactor); - - } - - float renderAR = width / height; + float aspectRatio; if (config::Rotate90) { - renderAR = 1 / renderAR; + aspectRatio = 3.f / 4.f; } else { if (config::Widescreen && !config::EmulateFramebuffer) { if (config::SuperWidescreen) - renderAR = (float)settings.display.width / settings.display.height; + aspectRatio = (float)settings.display.width / settings.display.height; else - renderAR *= 4 / 3.f; + aspectRatio = 16.f / 9.f; + } + else + { + aspectRatio = 4.f / 3.f; } } - return renderAR * config::ScreenStretching / 100.f; + return aspectRatio * config::ScreenStretching / 100.f; } inline static void getDCFramebufferReadSize(int& width, int& height) @@ -326,16 +315,6 @@ inline static void getDCFramebufferReadSize(int& width, int& height) inline static float getDCFramebufferAspectRatio() { - int width; - int height; - getDCFramebufferReadSize(width, height); - - width *= 1 + VO_CONTROL.pixel_double; - height *= 1 + (FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0); - height *= 1 + (FB_R_CTRL.fb_line_double); - height *= 1 + SPG_CONTROL.interlace; - float aspectRatio = (float)width / height; - if (config::Rotate90) - aspectRatio = 1 / aspectRatio; + float aspectRatio = config::Rotate90 ? 3.f / 4.f : 4.f / 3.f; return aspectRatio * config::ScreenStretching / 100.f; } diff --git a/core/rend/vulkan/drawer.cpp b/core/rend/vulkan/drawer.cpp index 5c7848ae4..129b99be3 100644 --- a/core/rend/vulkan/drawer.cpp +++ b/core/rend/vulkan/drawer.cpp @@ -114,6 +114,8 @@ void BaseDrawer::scaleAndWriteFramebuffer(vk::CommandBuffer commandBuffer, Frame yscale = 1.f; FramebufferAttachment *scaledFB = nullptr; + FB_X_CLIP_type xClip = pvrrc.fb_X_CLIP; + FB_Y_CLIP_type yClip = pvrrc.fb_Y_CLIP; if (xscale != 1.f || yscale != 1.f) { @@ -140,6 +142,11 @@ void BaseDrawer::scaleAndWriteFramebuffer(vk::CommandBuffer commandBuffer, Frame finalFB = scaledFB; width = scaledW; height = scaledH; + // FB_Y_CLIP is applied before vscalefactor if > 1, so it must be scaled here + if (yscale > 1) { + yClip.min = std::round(yClip.min * yscale); + yClip.max = std::round(yClip.max * yscale); + } } vk::BufferImageCopy copyRegion(0, width, height, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), vk::Offset3D(0, 0, 0), @@ -167,8 +174,12 @@ void BaseDrawer::scaleAndWriteFramebuffer(vk::CommandBuffer commandBuffer, Frame tmpBuf.init(width, height); finalFB->GetBufferData()->download(width * height * 4, tmpBuf.data()); + xClip.min = std::min(xClip.min, width - 1); + xClip.max = std::min(xClip.max, width - 1); + yClip.min = std::min(yClip.min, height - 1); + yClip.max = std::min(yClip.max, height - 1); WriteFramebuffer(width, height, (u8 *)tmpBuf.data(), pvrrc.fb_W_SOF1 & VRAM_MASK, - pvrrc.fb_W_CTRL, pvrrc.fb_W_LINESTRIDE * 8, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP); + pvrrc.fb_W_CTRL, pvrrc.fb_W_LINESTRIDE * 8, xClip, yClip); delete scaledFB; } @@ -744,7 +755,7 @@ void ScreenDrawer::EndRenderPass() { currentCommandBuffer.end(); commandPool->EndFrame(); - aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend); + aspectRatio = getOutputFramebufferAspectRatio(); } currentCommandBuffer = nullptr; Drawer::EndRenderPass(); diff --git a/core/rend/vulkan/oit/oit_drawer.h b/core/rend/vulkan/oit/oit_drawer.h index b07b24ffc..1035a2bea 100644 --- a/core/rend/vulkan/oit/oit_drawer.h +++ b/core/rend/vulkan/oit/oit_drawer.h @@ -190,7 +190,7 @@ public: { currentCommandBuffer.end(); commandPool->EndFrame(); - aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend); + aspectRatio = getOutputFramebufferAspectRatio(); } currentCommandBuffer = nullptr; OITDrawer::EndFrame(); diff --git a/core/rend/vulkan/oit/oit_renderer.cpp b/core/rend/vulkan/oit/oit_renderer.cpp index 55e1544be..4de01ce8d 100644 --- a/core/rend/vulkan/oit/oit_renderer.cpp +++ b/core/rend/vulkan/oit/oit_renderer.cpp @@ -100,7 +100,7 @@ public: bool Present() override { - if (config::EmulateFramebuffer) + if (config::EmulateFramebuffer || framebufferRendered) return presentFramebuffer(); else return screenDrawer.PresentFrame(); diff --git a/core/rend/vulkan/vulkan_renderer.cpp b/core/rend/vulkan/vulkan_renderer.cpp index 2e41d9829..61c9d718a 100644 --- a/core/rend/vulkan/vulkan_renderer.cpp +++ b/core/rend/vulkan/vulkan_renderer.cpp @@ -91,7 +91,7 @@ public: bool Present() override { - if (config::EmulateFramebuffer) + if (config::EmulateFramebuffer || framebufferRendered) return presentFramebuffer(); else return screenDrawer.PresentFrame(); diff --git a/core/rend/vulkan/vulkan_renderer.h b/core/rend/vulkan/vulkan_renderer.h index 4fefccbda..aa239bd4f 100644 --- a/core/rend/vulkan/vulkan_renderer.h +++ b/core/rend/vulkan/vulkan_renderer.h @@ -145,6 +145,8 @@ public: CheckFogTexture(); CheckPaletteTexture(); texCommandBuffer.end(); + if (!ctx->rend.isRTT) + framebufferRendered = false; } else { @@ -246,6 +248,7 @@ public: curTexture->SetCommandBuffer(nullptr); commandBuffer.end(); fbCommandPool.EndFrame(); + framebufferRendered = true; } protected: @@ -324,6 +327,7 @@ protected: std::unique_ptr quadPipeline; std::unique_ptr framebufferDrawer; CommandPool fbCommandPool; + bool framebufferRendered = false; #ifdef LIBRETRO std::unique_ptr overlay; #endif diff --git a/core/serialize.h b/core/serialize.h index 232dec742..70421e2ac 100644 --- a/core/serialize.h +++ b/core/serialize.h @@ -64,7 +64,8 @@ public: V26, V27, V28, - Current = V28, + V29, + Current = V29, Next = Current + 1, }; From c88cdde9ecaf5c2a364d92a8971fa1015677a449 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 30 Oct 2022 12:47:39 +0100 Subject: [PATCH 12/34] test fix --- tests/src/serialize_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/serialize_test.cpp b/tests/src/serialize_test.cpp index 9c5ebb460..93314d135 100644 --- a/tests/src/serialize_test.cpp +++ b/tests/src/serialize_test.cpp @@ -31,7 +31,7 @@ TEST_F(SerializeTest, SizeTest) std::vector data(30000000); Serializer ser(data.data(), data.size()); dc_serialize(ser); - ASSERT_EQ(28191599u, ser.size()); + ASSERT_EQ(28191603u, ser.size()); } From 5a1a766bd4a975a43a392fe394efb744ee5531d2 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 1 Nov 2022 18:23:54 +0100 Subject: [PATCH 13/34] pvr: emu unresponsive if no render. Present when FB_R_SOF = FB_W_SOF Fix no input polling while waiting for present. Present frame immediately if FB_R_SOF = FB_W_SOF. --- core/hw/pvr/Renderer_if.cpp | 28 +++++++++++++++------------- shell/libretro/libretro.cpp | 3 --- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index c13418c3d..c85efd9d8 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -12,15 +12,10 @@ #include #include +#ifdef LIBRETRO void retro_rend_present(); -#ifndef LIBRETRO -void retro_rend_present() -{ - if (!config::ThreadedRendering) - sh4_cpu.Stop(); -} -#endif void retro_resize_renderer(int w, int h, float aspectRatio); +#endif u32 FrameCount=1; @@ -151,19 +146,18 @@ private: { case Render: render(); - break; + return true; case RenderFramebuffer: renderFramebuffer(msg.config); - break; + return true; case Present: present(); - break; + return true; case Stop: - return false; + case NoMessage: default: - break; + return false; } - return true; } void render() @@ -188,6 +182,8 @@ private: renderer->Render(); if (!renderToScreen) renderEnd.Set(); + else if (config::DelayFrameSwapping && fb_w_cur == FB_R_SOF1) + present(); } //clear up & free data .. @@ -210,7 +206,11 @@ private: if (renderer->Present()) { presented = true; + if (!config::ThreadedRendering) + sh4_cpu.Stop(); +#ifdef LIBRETRO retro_rend_present(); +#endif } } @@ -337,6 +337,8 @@ void rend_start_render() else INFO_LOG(PVR, "rend_start_render: Context0 @ %x not found", addresses[0]); } + else + INFO_LOG(PVR, "rend_start_render: No context not found"); scheduleRenderDone(ctx); diff --git a/shell/libretro/libretro.cpp b/shell/libretro/libretro.cpp index d693194cc..9c53511b5 100644 --- a/shell/libretro/libretro.cpp +++ b/shell/libretro/libretro.cpp @@ -2231,10 +2231,7 @@ unsigned retro_api_version() void retro_rend_present() { if (!config::ThreadedRendering) - { is_dupe = false; - sh4_cpu.Stop(); - } } static uint32_t get_time_ms() From 0b0e64304e5e011eb891286c606d6b9ac33c6429 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 1 Nov 2022 18:29:44 +0100 Subject: [PATCH 14/34] pvr: adaptive end-of-render interrupt delay EoR interrupt delay now depends on the TA context size. Issue #634 Fixes issues with FMV in some WinCE games (Resident Evil 2, Next Tetris, Nightmare Creatures 2) --- core/hw/pvr/spg.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/core/hw/pvr/spg.cpp b/core/hw/pvr/spg.cpp index 793aaf8bb..c2583a83d 100755 --- a/core/hw/pvr/spg.cpp +++ b/core/hw/pvr/spg.cpp @@ -270,7 +270,20 @@ void spg_Reset(bool hard) void scheduleRenderDone(TA_context *cntx) { if (cntx) - sh4_sched_request(render_end_schid, 500000 * 3); + { + int cycles; + if (settings.platform.isNaomi2()) { + cycles = 1500000; + } + else + { + int size = 0; + for (TA_context *c = cntx; c != nullptr; c = c->nextContext) + size += c->tad.thd_data - c->tad.thd_root; + cycles = std::min(100000 + size * 2, 1500000); + } + sh4_sched_request(render_end_schid, cycles); + } else sh4_sched_request(render_end_schid, 4096); } From 2332884d8b689a5d28ca040c0a9b52b833f4736e Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Fri, 4 Nov 2022 11:22:50 +0100 Subject: [PATCH 15/34] maple: lower xfer rate to 1 Mbps. More accurate payload size calculation Maple bus max xfer rate is 2 Mbps but actual speed is 1 Mbps. Fixes Slave Zero (PAL) freeze with HLE. Some improvements on payload size calc. Clean up. --- core/hw/maple/maple_if.cpp | 91 +++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 51 deletions(-) diff --git a/core/hw/maple/maple_if.cpp b/core/hw/maple/maple_if.cpp index dbde1f800..319638712 100644 --- a/core/hw/maple/maple_if.cpp +++ b/core/hw/maple/maple_if.cpp @@ -72,17 +72,17 @@ void maple_vblank() static void maple_SB_MSHTCL_Write(u32 addr, u32 data) { - if (data&1) - maple_ddt_pending_reset=false; + if (data & 1) + maple_ddt_pending_reset = false; } static void maple_SB_MDST_Write(u32 addr, u32 data) { - if (data & 0x1) + if (data & 1) { - if (SB_MDEN &1) + if (SB_MDEN & 1) { - SB_MDST=1; + SB_MDST = 1; maple_DoDma(); } } @@ -90,12 +90,10 @@ static void maple_SB_MDST_Write(u32 addr, u32 data) static void maple_SB_MDEN_Write(u32 addr, u32 data) { - SB_MDEN=data&1; + SB_MDEN = data & 1; - if ((data & 0x1)==0 && SB_MDST) - { + if ((data & 1) == 0 && SB_MDST) INFO_LOG(MAPLE, "Maple DMA abort ?"); - } } #ifdef STRICT_MODE @@ -121,23 +119,10 @@ static void maple_SB_MDSTAR_Write(u32 addr, u32 data) } #endif -bool IsOnSh4Ram(u32 addr) -{ - if (((addr>>26)&0x7)==3) - { - if ((((addr>>29) &0x7)!=7)) - { - return true; - } - } - - return false; -} - static void maple_DoDma() { - verify(SB_MDEN &1); - verify(SB_MDST &1); + verify(SB_MDEN & 1); + verify(SB_MDST & 1); DEBUG_LOG(MAPLE, "Maple: DoMapleDma SB_MDSTAR=%x", SB_MDSTAR); u32 addr = SB_MDSTAR; @@ -153,18 +138,17 @@ static void maple_DoDma() ggpo::getInput(mapleInputState); const bool swap_msb = (SB_MMSEL == 0); - u32 xfer_count=0; + u32 xfer_count = 0; bool last = false; bool occupy = false; - while (last != true) + while (!last) { u32 header_1 = ReadMem32_nommu(addr); - u32 header_2 = ReadMem32_nommu(addr + 4) &0x1FFFFFE0; + u32 header_2 = ReadMem32_nommu(addr + 4) & 0x1FFFFFE0; - last = (header_1 >> 31) == 1;//is last transfer ? - u32 plen = (header_1 & 0xFF )+1;//transfer length (32-bit unit) - u32 maple_op=(header_1>>8)&7; // Pattern selection: 0 - START, 2 - SDCKB occupy permission, 3 - RESET, 4 - SDCKB occupy cancel, 7 - NOP - xfer_count+=plen*4; + last = (header_1 >> 31) == 1; // is last transfer ? + u32 plen = (header_1 & 0xFF) + 1; // transfer length (32-bit unit) + u32 maple_op = (header_1 >> 8) & 7; // Pattern selection: 0 - START, 2 - SDCKB occupy permission, 3 - RESET, 4 - SDCKB occupy cancel, 7 - NOP //this is kinda wrong .. but meh //really need to properly process the commands at some point @@ -181,17 +165,17 @@ static void maple_DoDma() return; } #else - if (!IsOnSh4Ram(header_2)) + if (GetMemPtr(header_2, 1) == nullptr) { - INFO_LOG(MAPLE, "MAPLE ERROR : DESTINATION NOT ON SH4 RAM 0x%X", header_2); - header_2&=0xFFFFFF; - header_2|=(3<<26); + WARN_LOG(MAPLE, "MAPLE ERROR : DESTINATION NOT ON SH4 RAM %x", header_2); + header_2 &= RAM_MASK; + header_2 |= 3 << 26; } #endif u32* p_data = (u32 *)GetMemPtr(addr + 8, plen * sizeof(u32)); if (p_data == nullptr) { - INFO_LOG(MAPLE, "MAPLE ERROR : INVALID SB_MDSTAR value 0x%X", addr); + WARN_LOG(MAPLE, "MAPLE ERROR : INVALID SB_MDSTAR value 0x%X", addr); SB_MDST = 0; mapleDmaOut.clear(); return; @@ -207,8 +191,8 @@ static void maple_DoDma() //Number of additional words in frame u32 inlen = (frame_header >> 24) & 0xFF; - u32 port=maple_GetPort(reci); - u32 bus=maple_GetBusId(reci); + u32 port = maple_GetPort(reci); + u32 bus = maple_GetBusId(reci); if (MapleDevices[bus][5] && MapleDevices[bus][port]) { @@ -220,9 +204,10 @@ static void maple_DoDma() maple_in_buf[i] = SWAP32(p_data[i]); p_data = maple_in_buf; } + inlen = (inlen + 1) * 4; u32 outbuf[1024 / 4]; - u32 outlen = MapleDevices[bus][port]->RawDma(&p_data[0], inlen * 4 + 4, outbuf); - xfer_count += outlen; + u32 outlen = MapleDevices[bus][port]->RawDma(&p_data[0], inlen, outbuf); + xfer_count += inlen + 3 + outlen + 3; // start, parity and stop bytes #ifdef STRICT_MODE if (!check_mdapro(header_2 + outlen - 1)) { @@ -252,8 +237,10 @@ static void maple_DoDma() case MP_SDCKBOccupy: { u32 bus = (header_1 >> 16) & 3; - if (MapleDevices[bus][5]) + if (MapleDevices[bus][5]) { occupy = MapleDevices[bus][5]->get_lightgun_pos(); + xfer_count++; + } addr += 1 * 4; } break; @@ -264,6 +251,7 @@ static void maple_DoDma() case MP_Reset: addr += 1 * 4; + xfer_count++; break; case MP_NOP: @@ -276,9 +264,10 @@ static void maple_DoDma() } } - //printf("Maple XFER size %d bytes - %.2f ms\n", xfer_count, xfer_count * 1000.0f / (2 * 1024 * 1024 / 8)); + // Maple bus max speed: 2 Mb/s, actual speed: 1 Mb/s + //printf("Maple XFER size %d bytes - %.2f ms\n", xfer_count, xfer_count * 1000.0f / (128 * 1024)); if (!occupy) - sh4_sched_request(maple_schid, std::min((u64)xfer_count * (SH4_MAIN_CLOCK / (2 * 1024 * 1024 / 8)), (u64)SH4_MAIN_CLOCK)); + sh4_sched_request(maple_schid, std::min((u64)xfer_count * (SH4_MAIN_CLOCK / (128 * 1024)), (u64)SH4_MAIN_CLOCK)); } static int maple_schd(int tag, int c, int j) @@ -304,7 +293,7 @@ static int maple_schd(int tag, int c, int j) return 0; } -void maple_SB_MDAPRO_Write(u32 addr, u32 data) +static void maple_SB_MDAPRO_Write(u32 addr, u32 data) { if ((data >> 16) == 0x6155) SB_MDAPRO = data & 0x00007f7f; @@ -313,20 +302,20 @@ void maple_SB_MDAPRO_Write(u32 addr, u32 data) //Init registers :) void maple_Init() { - sb_rio_register(SB_MDST_addr,RIO_WF,0,&maple_SB_MDST_Write); - sb_rio_register(SB_MDEN_addr,RIO_WF,0,&maple_SB_MDEN_Write); - sb_rio_register(SB_MSHTCL_addr,RIO_WF,0,&maple_SB_MSHTCL_Write); - sb_rio_register(SB_MDAPRO_addr, RIO_WO_FUNC, nullptr, &maple_SB_MDAPRO_Write); + sb_rio_register(SB_MDST_addr, RIO_WF, nullptr, maple_SB_MDST_Write); + sb_rio_register(SB_MDEN_addr, RIO_WF, nullptr, maple_SB_MDEN_Write); + sb_rio_register(SB_MSHTCL_addr, RIO_WF, nullptr, maple_SB_MSHTCL_Write); + sb_rio_register(SB_MDAPRO_addr, RIO_WO_FUNC, nullptr, maple_SB_MDAPRO_Write); #ifdef STRICT_MODE - sb_rio_register(SB_MDSTAR_addr, RIO_WF, nullptr, &maple_SB_MDSTAR_Write); + sb_rio_register(SB_MDSTAR_addr, RIO_WF, nullptr, maple_SB_MDSTAR_Write); #endif - maple_schid=sh4_sched_register(0,&maple_schd); + maple_schid = sh4_sched_register(0, maple_schd); } void maple_Reset(bool hard) { - maple_ddt_pending_reset=false; + maple_ddt_pending_reset = false; SB_MDTSEL = 0; SB_MDEN = 0; SB_MDST = 0; From 19204882c45e35c30eff0de206dc1503d71e30b4 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Fri, 4 Nov 2022 18:34:43 +0100 Subject: [PATCH 16/34] pvr: reschedule spg when SPG_HBLANK_INT is updated Fixes Triggerheart Exelica input lag Issue #691 --- core/hw/pvr/pvr_regs.cpp | 8 +++++ core/hw/pvr/spg.cpp | 65 +++++++++++++++++++++------------------- core/hw/pvr/spg.h | 1 + 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/core/hw/pvr/pvr_regs.cpp b/core/hw/pvr/pvr_regs.cpp index 59bb0696f..235c728fb 100644 --- a/core/hw/pvr/pvr_regs.cpp +++ b/core/hw/pvr/pvr_regs.cpp @@ -205,6 +205,14 @@ void pvr_WriteReg(u32 paddr,u32 data) data &= 0x01fffffc; break; + case SPG_HBLANK_INT_addr: + data &= 0x03FF33FF; + if (data != SPG_HBLANK_INT.full) { + SPG_HBLANK_INT.full = data; + rescheduleSPG(); + } + return; + case PAL_RAM_CTRL_addr: pal_needs_update = pal_needs_update || ((data ^ PAL_RAM_CTRL) & 3) != 0; break; diff --git a/core/hw/pvr/spg.cpp b/core/hw/pvr/spg.cpp index c2583a83d..406818071 100755 --- a/core/hw/pvr/spg.cpp +++ b/core/hw/pvr/spg.cpp @@ -55,6 +55,39 @@ void CalculateSync() sh4_sched_request(vblank_schid, Line_Cycles); } +static int getNextSpgInterrupt() +{ + u32 min_scanline = prv_cur_scanline + 1; + u32 min_active = pvr_numscanlines; + + if (min_scanline < SPG_VBLANK_INT.vblank_in_interrupt_line_number) + min_active = std::min(min_active, SPG_VBLANK_INT.vblank_in_interrupt_line_number); + + if (min_scanline < SPG_VBLANK_INT.vblank_out_interrupt_line_number) + min_active = std::min(min_active, SPG_VBLANK_INT.vblank_out_interrupt_line_number); + + if (min_scanline < SPG_VBLANK.vstart) + min_active = std::min(min_active, SPG_VBLANK.vstart); + + if (min_scanline < SPG_VBLANK.vbend) + min_active = std::min(min_active, SPG_VBLANK.vbend); + + if (lightgun_line != 0xffff && min_scanline < lightgun_line) + min_active = std::min(min_active, lightgun_line); + + if (SPG_HBLANK_INT.hblank_int_mode == 0 && min_scanline < SPG_HBLANK_INT.line_comp_val) + min_active = std::min(min_active, SPG_HBLANK_INT.line_comp_val); + + min_active = std::max(min_active, min_scanline); + + return (min_active - prv_cur_scanline) * Line_Cycles; +} + +void rescheduleSPG() +{ + sh4_sched_request(vblank_schid, getNextSpgInterrupt()); +} + //called from sh4 context , should update pvr/ta state and everything else static int spg_line_sched(int tag, int cycl, int jit) { @@ -190,37 +223,7 @@ static int spg_line_sched(int tag, int cycl, int jit) } } - //interrupts - //0 - //vblank_in_interrupt_line_number - //vblank_out_interrupt_line_number - //vstart - //vbend - //pvr_numscanlines - u32 min_scanline=prv_cur_scanline+1; - u32 min_active=pvr_numscanlines; - - if (min_scanline Date: Mon, 5 Dec 2022 21:24:27 +0100 Subject: [PATCH 17/34] race condition causing emu thread exceptions to be ignored in multithreaded mode, checkStatus() *must* be called to report exceptions thrown by the emu thread --- core/emulator.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/emulator.cpp b/core/emulator.cpp index 22cd26a53..0bf549bf6 100644 --- a/core/emulator.cpp +++ b/core/emulator.cpp @@ -842,16 +842,18 @@ bool Emulator::checkStatus() bool Emulator::render() { - if (state != Running) - return false; if (!config::ThreadedRendering) { + if (state != Running) + return false; run(); // TODO if stopping due to a user request, no frame has been rendered return !renderTimeout; } if (!checkStatus()) return false; + if (state != Running) + return false; return rend_single_frame(true); // FIXME stop flag? } From 8001af97433ef9aff00f71cf55920c43b32e42cd Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 6 Dec 2022 17:58:30 +0100 Subject: [PATCH 18/34] sh4: proper write masks for memory-mapped registers --- core/hw/sh4/modules/bsc.cpp | 80 +++-- core/hw/sh4/modules/ccn.cpp | 53 +-- core/hw/sh4/modules/cpg.cpp | 24 +- core/hw/sh4/modules/dmac.cpp | 43 ++- core/hw/sh4/modules/intc.cpp | 34 +- core/hw/sh4/modules/mmu.cpp | 3 +- core/hw/sh4/modules/mmu.h | 2 + core/hw/sh4/modules/rtc.cpp | 38 +-- core/hw/sh4/modules/serial.cpp | 38 +-- core/hw/sh4/modules/tmu.cpp | 60 ++-- core/hw/sh4/modules/ubc.cpp | 20 +- core/hw/sh4/sh4_mmr.cpp | 577 +++++++++++++++++---------------- core/hw/sh4/sh4_mmr.h | 48 ++- core/hw/sh4/sh4_sched.cpp | 1 - 14 files changed, 535 insertions(+), 486 deletions(-) diff --git a/core/hw/sh4/modules/bsc.cpp b/core/hw/sh4/modules/bsc.cpp index 90cec4ff9..b45964cb8 100644 --- a/core/hw/sh4/modules/bsc.cpp +++ b/core/hw/sh4/modules/bsc.cpp @@ -8,8 +8,7 @@ BSC_PDTRA_type BSC_PDTRA; - -void write_BSC_PCTRA(u32 addr, u32 data) +static void write_BSC_PCTRA(u32 addr, u32 data) { BSC_PCTRA.full = data; if (settings.platform.isNaomi()) @@ -17,17 +16,18 @@ void write_BSC_PCTRA(u32 addr, u32 data) //else //printf("C:BSC_PCTRA = %08X\n",data); } + //u32 port_out_data; -void write_BSC_PDTRA(u32 addr, u32 data) +static void write_BSC_PDTRA(u32 addr, u32 data) { - BSC_PDTRA.full=(u16)data; + BSC_PDTRA.full = (u16)data; //printf("D:BSC_PDTRA = %04x\n", (u16)data); if (settings.platform.isNaomi()) NaomiBoardIDWrite((u16)data); } -u32 read_BSC_PDTRA(u32 addr) +static u32 read_BSC_PDTRA(u32 addr) { if (settings.platform.isNaomi()) { @@ -41,16 +41,16 @@ u32 read_BSC_PDTRA(u32 addr) u32 tfinal=0; // magic values - if ((tpctra&0xf) == 0x8) + if ((tpctra & 0xf) == 0x8) tfinal = 3; - else if ((tpctra&0xf) == 0xB) + else if ((tpctra & 0xf) == 0xB) tfinal = 3; else tfinal = 0; - if ((tpctra&0xf) == 0xB && (tpdtra&0xf) == 2) + if ((tpctra & 0xf) == 0xB && (tpdtra & 0xf) == 2) tfinal = 0; - else if ((tpctra&0xf) == 0xC && (tpdtra&0xf) == 2) + else if ((tpctra & 0xf) == 0xC && (tpdtra & 0xf) == 2) tfinal = 3; tfinal |= config::Cable << 8; @@ -63,55 +63,53 @@ u32 read_BSC_PDTRA(u32 addr) void bsc_init() { //BSC BCR1 0xFF800000 0x1F800000 32 0x00000000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_BCR1_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //BSC BCR2 0xFF800004 0x1F800004 16 0x3FFC Held Held Held Bclk - sh4_rio_reg(BSC,BSC_BCR2_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //BSC WCR1 0xFF800008 0x1F800008 32 0x77777777 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_WCR1_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //BSC WCR2 0xFF80000C 0x1F80000C 32 0xFFFEEFFF Held Held Held Bclk - sh4_rio_reg(BSC,BSC_WCR2_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //BSC WCR3 0xFF800010 0x1F800010 32 0x07777777 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_WCR3_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //BSC MCR 0xFF800014 0x1F800014 32 0x00000000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_MCR_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //BSC PCR 0xFF800018 0x1F800018 16 0x0000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_PCR_addr,RIO_DATA,16); + sh4_rio_reg16(); //BSC RTCSR 0xFF80001C 0x1F80001C 16 0x0000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_RTCSR_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //BSC RTCNT 0xFF800020 0x1F800020 16 0x0000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_RTCNT_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //BSC RTCOR 0xFF800024 0x1F800024 16 0x0000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_RTCOR_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //BSC RFCR 0xFF800028 0x1F800028 16 0x0000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_RFCR_addr,RIO_DATA,16); + // forced to 0x17 to help naomi/aw boot + sh4_rio_reg(BSC, BSC_RFCR_addr, RIO_RO); //BSC PCTRA 0xFF80002C 0x1F80002C 32 0x00000000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_PCTRA_addr,RIO_WF,16,0,write_BSC_PCTRA); + sh4_rio_reg(BSC, BSC_PCTRA_addr, RIO_WF, nullptr, write_BSC_PCTRA); //BSC PDTRA 0xFF800030 0x1F800030 16 Undefined Held Held Held Bclk - sh4_rio_reg(BSC,BSC_PDTRA_addr,RIO_FUNC,16,&read_BSC_PDTRA,&write_BSC_PDTRA); + sh4_rio_reg(BSC, BSC_PDTRA_addr, RIO_FUNC, read_BSC_PDTRA, write_BSC_PDTRA); //BSC PCTRB 0xFF800040 0x1F800040 32 0x00000000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_PCTRB_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //BSC PDTRB 0xFF800044 0x1F800044 16 Undefined Held Held Held Bclk - sh4_rio_reg(BSC,BSC_PDTRB_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //BSC GPIOIC 0xFF800048 0x1F800048 16 0x00000000 Held Held Held Bclk - sh4_rio_reg(BSC,BSC_GPIOIC_addr,RIO_DATA,16); - - //note: naomi//aw might depend on rfcr - sh4_rio_reg(BSC, BSC_RFCR_addr, RIO_RO, 16); + sh4_rio_reg16(); } void bsc_reset(bool hard) @@ -137,24 +135,24 @@ void bsc_reset(bool hard) BSC SDMR2 H'FF90 xxxx H'1F90 xxxx 8 Write-only Bclk BSC SDMR3 H'FF94 xxxx H'1F94 xxxx 8 Bclk */ - BSC_BCR1.full=0x0; - BSC_BCR2.full=0x3FFC; - BSC_WCR1.full=0x77777777; - BSC_WCR2.full=0xFFFEEFFF; - BSC_WCR3.full=0x07777777; + BSC_BCR1.full = 0; + BSC_BCR2.full = 0x3FFC; + BSC_WCR1.full = 0x77777777; + BSC_WCR2.full = 0xFFFEEFFF; + BSC_WCR3.full = 0x07777777; - BSC_MCR.full=0x0; - BSC_PCR.full=0x0; - BSC_RTCSR.full=0x0; - BSC_RTCNT.full=0x0; - BSC_RTCOR.full=0x0; - BSC_PCTRA.full=0x0; + BSC_MCR.full = 0; + BSC_PCR.full = 0; + BSC_RTCSR.full = 0; + BSC_RTCNT.full = 0; + BSC_RTCOR.full = 0; + BSC_PCTRA.full = 0; if (hard) BSC_PDTRA.full = 0; - BSC_PCTRB.full=0x0; + BSC_PCTRB.full = 0; if (hard) BSC_PDTRB.full = 0; - BSC_GPIOIC.full=0x0; + BSC_GPIOIC.full = 0; BSC_RFCR.full = 17; } diff --git a/core/hw/sh4/modules/ccn.cpp b/core/hw/sh4/modules/ccn.cpp index a35ba5553..fc5c3cca7 100644 --- a/core/hw/sh4/modules/ccn.cpp +++ b/core/hw/sh4/modules/ccn.cpp @@ -18,9 +18,9 @@ template void CCN_QACR_write(u32 addr, u32 value) { if (idx == 0) - CCN_QACR0.reg_data = value; + CCN_QACR0.reg_data = value & 0x1c; else - CCN_QACR1.reg_data = value; + CCN_QACR1.reg_data = value & 0x1c; u32 area = ((CCN_QACR_type&)value).Area; @@ -45,20 +45,22 @@ void CCN_QACR_write(u32 addr, u32 value) } } -void CCN_PTEH_write(u32 addr, u32 value) +static void CCN_PTEH_write(u32 addr, u32 value) { CCN_PTEH_type temp; - temp.reg_data = value; + temp.reg_data = value & 0xfffffcff; +#ifdef FAST_MMU if (temp.ASID != CCN_PTEH.ASID) mmuAddressLUTFlush(false); +#endif CCN_PTEH = temp; } -void CCN_MMUCR_write(u32 addr, u32 value) +static void CCN_MMUCR_write(u32 addr, u32 value) { CCN_MMUCR_type temp; - temp.reg_data=value; + temp.reg_data = value & 0xfcfcff05; bool mmu_changed_state = temp.AT != CCN_MMUCR.AT; @@ -69,7 +71,7 @@ void CCN_MMUCR_write(u32 addr, u32 value) temp.TI = 0; } - CCN_MMUCR=temp; + CCN_MMUCR = temp; if (mmu_changed_state) { @@ -78,10 +80,11 @@ void CCN_MMUCR_write(u32 addr, u32 value) mmu_set_state(); } } -void CCN_CCR_write(u32 addr, u32 value) + +static void CCN_CCR_write(u32 addr, u32 value) { CCN_CCR_type temp; - temp.reg_data=value; + temp.reg_data = value & 0x89AF; if (temp.ICI) { DEBUG_LOG(SH4, "Sh4: i-cache invalidation %08X", curr_pc); @@ -114,52 +117,52 @@ static u32 CCN_PRR_read(u32 addr) void ccn_init() { //CCN PTEH 0xFF000000 0x1F000000 32 Undefined Undefined Held Held Iclk - sh4_rio_reg(CCN,CCN_PTEH_addr,RIO_WF,32,0,&CCN_PTEH_write); + sh4_rio_reg(CCN, CCN_PTEH_addr, RIO_WF, nullptr, CCN_PTEH_write); //CCN PTEL 0xFF000004 0x1F000004 32 Undefined Undefined Held Held Iclk - sh4_rio_reg(CCN,CCN_PTEL_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //CCN TTB 0xFF000008 0x1F000008 32 Undefined Undefined Held Held Iclk - sh4_rio_reg(CCN,CCN_TTB_addr,RIO_DATA,32); + sh4_rio_reg(CCN, CCN_TTB_addr, RIO_DATA); //CCN TEA 0xFF00000C 0x1F00000C 32 Undefined Held Held Held Iclk - sh4_rio_reg(CCN,CCN_TEA_addr,RIO_DATA,32); + sh4_rio_reg(CCN, CCN_TEA_addr, RIO_DATA); //CCN MMUCR 0xFF000010 0x1F000010 32 0x00000000 0x00000000 Held Held Iclk - sh4_rio_reg(CCN,CCN_MMUCR_addr,RIO_WF,32,0,&CCN_MMUCR_write); + sh4_rio_reg(CCN, CCN_MMUCR_addr, RIO_WF, nullptr, CCN_MMUCR_write); //CCN BASRA 0xFF000014 0x1F000014 8 Undefined Held Held Held Iclk - sh4_rio_reg(CCN,CCN_BASRA_addr,RIO_DATA,8); + sh4_rio_reg8(); //CCN BASRB 0xFF000018 0x1F000018 8 Undefined Held Held Held Iclk - sh4_rio_reg(CCN,CCN_BASRB_addr,RIO_DATA,8); + sh4_rio_reg8(); //CCN CCR 0xFF00001C 0x1F00001C 32 0x00000000 0x00000000 Held Held Iclk - sh4_rio_reg(CCN,CCN_CCR_addr,RIO_WF,32,0,&CCN_CCR_write); + sh4_rio_reg(CCN, CCN_CCR_addr, RIO_WF, nullptr, CCN_CCR_write); //CCN TRA 0xFF000020 0x1F000020 32 Undefined Undefined Held Held Iclk - sh4_rio_reg(CCN,CCN_TRA_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //CCN EXPEVT 0xFF000024 0x1F000024 32 0x00000000 0x00000020 Held Held Iclk - sh4_rio_reg(CCN,CCN_EXPEVT_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //CCN INTEVT 0xFF000028 0x1F000028 32 Undefined Undefined Held Held Iclk - sh4_rio_reg(CCN,CCN_INTEVT_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); // CPU VERSION 0xFF000030 0x1F000030 (undocumented) - sh4_rio_reg(CCN,CPU_VERSION_addr, RIO_RO_FUNC, 32, &CPU_VERSION_read, 0); + sh4_rio_reg(CCN, CPU_VERSION_addr, RIO_RO_FUNC, CPU_VERSION_read, nullptr); //CCN PTEA 0xFF000034 0x1F000034 32 Undefined Undefined Held Held Iclk - sh4_rio_reg(CCN,CCN_PTEA_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //CCN QACR0 0xFF000038 0x1F000038 32 Undefined Undefined Held Held Iclk - sh4_rio_reg(CCN,CCN_QACR0_addr,RIO_WF,32,0,&CCN_QACR_write<0>); + sh4_rio_reg(CCN, CCN_QACR0_addr, RIO_WF, nullptr, CCN_QACR_write<0>); //CCN QACR1 0xFF00003C 0x1F00003C 32 Undefined Undefined Held Held Iclk - sh4_rio_reg(CCN,CCN_QACR1_addr,RIO_WF,32,0,&CCN_QACR_write<1>); + sh4_rio_reg(CCN, CCN_QACR1_addr, RIO_WF, nullptr, CCN_QACR_write<1>); // CCN PRR 0xFF000044 0x1F000044 (undocumented) - sh4_rio_reg(CCN,CCN_PRR_addr, RIO_RO_FUNC, 32, &CCN_PRR_read, 0); + sh4_rio_reg(CCN,CCN_PRR_addr, RIO_RO_FUNC, &CCN_PRR_read, 0); } diff --git a/core/hw/sh4/modules/cpg.cpp b/core/hw/sh4/modules/cpg.cpp index fc016f967..94984ff5a 100644 --- a/core/hw/sh4/modules/cpg.cpp +++ b/core/hw/sh4/modules/cpg.cpp @@ -1,33 +1,27 @@ #include "types.h" #include "hw/sh4/sh4_mmr.h" - -/* -u16 CPG_FRQCR; -u8 CPG_STBCR; -u16 CPG_WTCNT; -u16 CPG_WTCSR; -u8 CPG_STBCR2; -*/ - //Init term res void cpg_init() { //CPG FRQCR H'FFC0 0000 H'1FC0 0000 16 *2 Held Held Held Pclk - sh4_rio_reg(CPG,CPG_FRQCR_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //CPG STBCR H'FFC0 0004 H'1FC0 0004 8 H'00 Held Held Held Pclk - sh4_rio_reg(CPG,CPG_STBCR_addr,RIO_DATA,8); + sh4_rio_reg(CPG, CPG_STBCR_addr, RIO_DATA); //CPG WTCNT H'FFC0 0008 H'1FC0 0008 8/16*3 H'00 Held Held Held Pclk - sh4_rio_reg(CPG,CPG_WTCNT_addr,RIO_DATA,16); + // Need special pattern 0x5A in upper 8 bits on write. Not currently checked + sh4_rio_reg8(); //CPG WTCSR H'FFC0 000C H'1FC0 000C 8/16*3 H'00 Held Held Held Pclk - sh4_rio_reg(CPG,CPG_WTCSR_addr,RIO_DATA,16); + // Need special pattern 0x5A in upper 8 bits on write. Not currently checked + sh4_rio_reg8(); //CPG STBCR2 H'FFC0 0010 H'1FC0 0010 8 H'00 Held Held Held Pclk - sh4_rio_reg(CPG,CPG_STBCR2_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); } + void cpg_reset() { /* @@ -46,4 +40,4 @@ void cpg_reset() void cpg_term() { -} \ No newline at end of file +} diff --git a/core/hw/sh4/modules/dmac.cpp b/core/hw/sh4/modules/dmac.cpp index 5dc958bb3..ffbdab428 100644 --- a/core/hw/sh4/modules/dmac.cpp +++ b/core/hw/sh4/modules/dmac.cpp @@ -101,7 +101,7 @@ void DMAC_Ch2St() static const InterruptID dmac_itr[] = { sh4_DMAC_DMTE0, sh4_DMAC_DMTE1, sh4_DMAC_DMTE2, sh4_DMAC_DMTE3 }; template -void WriteCHCR(u32 addr, u32 data) +static void WriteCHCR(u32 addr, u32 data) { if (ch == 0 || ch == 1) DMAC_CHCR(ch).full = data & 0xff0ffff7; @@ -140,65 +140,61 @@ void WriteCHCR(u32 addr, u32 data) } } -void WriteDMAOR(u32 addr, u32 data) -{ - DMAC_DMAOR.full = data; -} - //Init term res void dmac_init() { //DMAC SAR0 0xFFA00000 0x1FA00000 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_SAR0_addr,RIO_DATA,32); + sh4_rio_reg(DMAC, DMAC_SAR0_addr, RIO_DATA); //DMAC DAR0 0xFFA00004 0x1FA00004 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_DAR0_addr,RIO_DATA,32); + sh4_rio_reg(DMAC, DMAC_DAR0_addr, RIO_DATA); //DMAC DMATCR0 0xFFA00008 0x1FA00008 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_DMATCR0_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //DMAC CHCR0 0xFFA0000C 0x1FA0000C 32 0x00000000 0x00000000 Held Held Bclk - sh4_rio_reg(DMAC,DMAC_CHCR0_addr,RIO_WF,32,0,&WriteCHCR<0>); + sh4_rio_reg(DMAC, DMAC_CHCR0_addr, RIO_WF, nullptr, WriteCHCR<0>); //DMAC SAR1 0xFFA00010 0x1FA00010 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_SAR1_addr,RIO_DATA,32); + sh4_rio_reg(DMAC, DMAC_SAR1_addr, RIO_DATA); //DMAC DAR1 0xFFA00014 0x1FA00014 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_DAR1_addr,RIO_DATA,32); + sh4_rio_reg(DMAC, DMAC_DAR1_addr, RIO_DATA); //DMAC DMATCR1 0xFFA00018 0x1FA00018 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_DMATCR1_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //DMAC CHCR1 0xFFA0001C 0x1FA0001C 32 0x00000000 0x00000000 Held Held Bclk - sh4_rio_reg(DMAC,DMAC_CHCR1_addr,RIO_WF,32,0,&WriteCHCR<1>); + sh4_rio_reg(DMAC, DMAC_CHCR1_addr, RIO_WF, nullptr, WriteCHCR<1>); //DMAC SAR2 0xFFA00020 0x1FA00020 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_SAR2_addr,RIO_DATA,32); + sh4_rio_reg(DMAC, DMAC_SAR2_addr, RIO_DATA); //DMAC DAR2 0xFFA00024 0x1FA00024 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_DAR2_addr,RIO_DATA,32); + sh4_rio_reg(DMAC, DMAC_DAR2_addr, RIO_DATA); //DMAC DMATCR2 0xFFA00028 0x1FA00028 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_DMATCR2_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //DMAC CHCR2 0xFFA0002C 0x1FA0002C 32 0x00000000 0x00000000 Held Held Bclk - sh4_rio_reg(DMAC,DMAC_CHCR2_addr,RIO_WF,32,0,&WriteCHCR<2>); + sh4_rio_reg(DMAC, DMAC_CHCR2_addr, RIO_WF, nullptr, WriteCHCR<2>); //DMAC SAR3 0xFFA00030 0x1FA00030 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_SAR3_addr,RIO_DATA,32); + sh4_rio_reg(DMAC, DMAC_SAR3_addr, RIO_DATA); //DMAC DAR3 0xFFA00034 0x1FA00034 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_DAR3_addr,RIO_DATA,32); + sh4_rio_reg(DMAC, DMAC_DAR3_addr, RIO_DATA); //DMAC DMATCR3 0xFFA00038 0x1FA00038 32 Undefined Undefined Held Held Bclk - sh4_rio_reg(DMAC,DMAC_DMATCR3_addr,RIO_DATA,32); + sh4_rio_reg_wmask(); //DMAC CHCR3 0xFFA0003C 0x1FA0003C 32 0x00000000 0x00000000 Held Held Bclk - sh4_rio_reg(DMAC,DMAC_CHCR3_addr,RIO_WF,32,0,&WriteCHCR<3>); + sh4_rio_reg(DMAC, DMAC_CHCR3_addr, RIO_WF, nullptr, WriteCHCR<3>); //DMAC DMAOR 0xFFA00040 0x1FA00040 32 0x00000000 0x00000000 Held Held Bclk - sh4_rio_reg(DMAC,DMAC_DMAOR_addr,RIO_WF,32,0,&WriteDMAOR); + sh4_rio_reg_wmask(); } + void dmac_reset() { /* @@ -226,6 +222,7 @@ void dmac_reset() DMAC_CHCR(3).full = 0x0; DMAC_DMAOR.full = 0x0; } + void dmac_term() { } diff --git a/core/hw/sh4/modules/intc.cpp b/core/hw/sh4/modules/intc.cpp index 2b9d6da97..81a3e7e51 100644 --- a/core/hw/sh4/modules/intc.cpp +++ b/core/hw/sh4/modules/intc.cpp @@ -11,33 +11,35 @@ #include "../sh4_interrupts.h" #include "../sh4_mmr.h" - //Register writes need interrupt re-testing ! static void write_INTC_IPRA(u32 addr, u32 data) { - if (INTC_IPRA.reg_data!=(u16)data) + if (INTC_IPRA.reg_data != (u16)data) { - INTC_IPRA.reg_data=(u16)data; + INTC_IPRA.reg_data = (u16)data; SIIDRebuild(); //we need to rebuild the table } } + static void write_INTC_IPRB(u32 addr, u32 data) { - if (INTC_IPRB.reg_data!=(u16)data) + if (INTC_IPRB.reg_data != (u16)data) { - INTC_IPRB.reg_data=(u16)data; + INTC_IPRB.reg_data = (u16)data; SIIDRebuild(); //we need to rebuild the table } } + static void write_INTC_IPRC(u32 addr, u32 data) { - if (INTC_IPRC.reg_data!=(u16)data) + if (INTC_IPRC.reg_data != (u16)data) { - INTC_IPRC.reg_data=(u16)data; + INTC_IPRC.reg_data = (u16)data; SIIDRebuild(); //we need to rebuild the table } } + static u32 read_INTC_IPRD(u32 addr) { return 0; @@ -47,29 +49,29 @@ static u32 read_INTC_IPRD(u32 addr) void intc_init() { //INTC ICR 0xFFD00000 0x1FD00000 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(INTC,INTC_ICR_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //INTC IPRA 0xFFD00004 0x1FD00004 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(INTC,INTC_IPRA_addr,RIO_WF,16,0,&write_INTC_IPRA); + sh4_rio_reg(INTC, INTC_IPRA_addr, RIO_WF, nullptr, write_INTC_IPRA); //INTC IPRB 0xFFD00008 0x1FD00008 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(INTC,INTC_IPRB_addr,RIO_WF,16,0,&write_INTC_IPRB); + sh4_rio_reg(INTC, INTC_IPRB_addr, RIO_WF, nullptr, write_INTC_IPRB); //INTC IPRC 0xFFD0000C 0x1FD0000C 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(INTC,INTC_IPRC_addr,RIO_WF,16,0,&write_INTC_IPRC); + sh4_rio_reg(INTC, INTC_IPRC_addr, RIO_WF, nullptr, write_INTC_IPRC); //INTC IPRD 0xFFD00010 0x1FD00010 16 0xDA74 0xDA74 Held Held Pclk (SH7750S, SH7750R only) - sh4_rio_reg(INTC,INTC_IPRD_addr,RIO_RO_FUNC,16,&read_INTC_IPRD); + sh4_rio_reg(INTC, INTC_IPRD_addr, RIO_RO_FUNC, read_INTC_IPRD); interrupts_init(); } void intc_reset() { - INTC_ICR.reg_data = 0x0; - INTC_IPRA.reg_data = 0x0; - INTC_IPRB.reg_data = 0x0; - INTC_IPRC.reg_data = 0x0; + INTC_ICR.reg_data = 0; + INTC_IPRA.reg_data = 0; + INTC_IPRB.reg_data = 0; + INTC_IPRC.reg_data = 0; interrupts_reset(); } diff --git a/core/hw/sh4/modules/mmu.cpp b/core/hw/sh4/modules/mmu.cpp index b592dfcb1..36aee1ff5 100644 --- a/core/hw/sh4/modules/mmu.cpp +++ b/core/hw/sh4/modules/mmu.cpp @@ -529,9 +529,11 @@ void MMU_init() } } mmu_set_state(); +#ifdef FAST_MMU // pre-fill kernel memory for (u32 vpn = ARRAY_SIZE(mmuAddressLUT) / 2; vpn < ARRAY_SIZE(mmuAddressLUT); vpn++) mmuAddressLUT[vpn] = vpn << 12; +#endif } @@ -559,7 +561,6 @@ void mmu_flush_table() for (u32 i = 0; i < 64; i++) UTLB[i].Data.V = 0; - mmuAddressLUTFlush(true); } #endif diff --git a/core/hw/sh4/modules/mmu.h b/core/hw/sh4/modules/mmu.h index 8dad1bc6b..1f8242f66 100644 --- a/core/hw/sh4/modules/mmu.h +++ b/core/hw/sh4/modules/mmu.h @@ -152,8 +152,10 @@ static inline u32 mmuDynarecLookup(u32 vaddr, u32 write, u32 pc) // not reached return 0; } +#ifdef FAST_MMU if (vaddr >> 31 == 0) mmuAddressLUT[vaddr >> 12] = paddr & ~0xfff; +#endif return paddr; } diff --git a/core/hw/sh4/modules/rtc.cpp b/core/hw/sh4/modules/rtc.cpp index 8bd8c0e45..9cd9a350f 100644 --- a/core/hw/sh4/modules/rtc.cpp +++ b/core/hw/sh4/modules/rtc.cpp @@ -10,52 +10,52 @@ void rtc_init() // NAOMI reads from at least RTC_R64CNT //RTC R64CNT 0xFFC80000 0x1FC80000 8 Held Held Held Held Pclk - sh4_rio_reg(RTC,RTC_R64CNT_addr,RIO_DATA,8); + sh4_rio_reg(RTC, RTC_R64CNT_addr, RIO_RO); //RTC RSECCNT H'FFC8 0004 H'1FC8 0004 8 Held Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RSECCNT_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RMINCNT H'FFC8 0008 H'1FC8 0008 8 Held Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RMINCNT_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RHRCNT H'FFC8 000C H'1FC8 000C 8 Held Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RHRCNT_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RWKCNT H'FFC8 0010 H'1FC8 0010 8 Held Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RWKCNT_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RDAYCNT H'FFC8 0014 H'1FC8 0014 8 Held Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RDAYCNT_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RMONCNT H'FFC8 0018 H'1FC8 0018 8 Held Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RMONCNT_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RYRCNT H'FFC8 001C H'1FC8 001C 16 Held Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RYRCNT_addr,RIO_DATA,16); + sh4_rio_reg16(); //RTC RSECAR H'FFC8 0020 H'1FC8 0020 8 Held *2 Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RSECAR_addr,RIO_DATA,8); + sh4_rio_reg8(); //RTC RMINAR H'FFC8 0024 H'1FC8 0024 8 Held *2 Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RMINAR_addr,RIO_DATA,8); + sh4_rio_reg8(); //RTC RHRAR H'FFC8 0028 H'1FC8 0028 8 Held *2 Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RHRAR_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RWKAR H'FFC8 002C H'1FC8 002C 8 Held *2 Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RWKAR_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RDAYAR H'FFC8 0030 H'1FC8 0030 8 Held *2 Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RDAYAR_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RMONAR H'FFC8 0034 H'1FC8 0034 8 Held *2 Held Held Held Pclk - sh4_rio_reg(RTC,RTC_RMONAR_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RCR1 H'FFC8 0038 H'1FC8 0038 8 H'00*2 H'00*2 Held Held Pclk - sh4_rio_reg(RTC,RTC_RCR1_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //RTC RCR2 H'FFC8 003C H'1FC8 003C 8 H'09*2 H'00*2 Held Held Pclk - sh4_rio_reg(RTC,RTC_RCR2_addr,RIO_DATA,8); + sh4_rio_reg8(); } void rtc_reset() @@ -78,10 +78,10 @@ void rtc_reset() RTC RCR1 H'FFC8 0038 H'1FC8 0038 8 H'00*2 H'00*2 Held Held Pclk RTC RCR2 H'FFC8 003C H'1FC8 003C 8 H'09*2 H'00*2 Held Held Pclk */ - RTC_RCR1=0x00; - RTC_RCR2=0x09; + RTC_RCR1 = 0; + RTC_RCR2 = 9; } void rtc_term() { -} \ No newline at end of file +} diff --git a/core/hw/sh4/modules/serial.cpp b/core/hw/sh4/modules/serial.cpp index 1a7689e4c..4e9cdf949 100644 --- a/core/hw/sh4/modules/serial.cpp +++ b/core/hw/sh4/modules/serial.cpp @@ -98,7 +98,7 @@ static void WriteSerialStatus(u32 addr,u32 data) if (!SCIF_SCFSR2.BRK) data &= ~0x10; - SCIF_SCFSR2.full = data & ~3; + SCIF_SCFSR2.full = data & 0x00f0; SCIF_SCFSR2.TDFE = 1; SCIF_SCFSR2.TEND = 1; @@ -134,7 +134,7 @@ static u32 SCSCR2_read(u32 addr) static void SCSCR2_write(u32 addr, u32 data) { - SCIF_SCSCR2.full = data; + SCIF_SCSCR2.full = data & 0x00fa; Serial_UpdateInterrupts(); } @@ -204,46 +204,46 @@ void serial_init() // Serial Communication Interface with FIFO //SCIF SCSMR2 0xFFE80000 0x1FE80000 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(SCIF,SCIF_SCSMR2_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //SCIF SCBRR2 0xFFE80004 0x1FE80004 8 0xFF 0xFF Held Held Pclk - sh4_rio_reg(SCIF,SCIF_SCBRR2_addr,RIO_DATA,8); + sh4_rio_reg8(); //SCIF SCSCR2 0xFFE80008 0x1FE80008 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(SCIF, SCIF_SCSCR2_addr, RIO_FUNC, 16, &SCSCR2_read, &SCSCR2_write); + sh4_rio_reg(SCIF, SCIF_SCSCR2_addr, RIO_FUNC, SCSCR2_read, SCSCR2_write); //Write only //SCIF SCFTDR2 0xFFE8000C 0x1FE8000C 8 Undefined Undefined Held Held Pclk - sh4_rio_reg(SCIF,SCIF_SCFTDR2_addr,RIO_WF,8,0,&SerialWrite); + sh4_rio_reg(SCIF, SCIF_SCFTDR2_addr, RIO_WF, nullptr, SerialWrite); //SCIF SCFSR2 0xFFE80010 0x1FE80010 16 0x0060 0x0060 Held Held Pclk - sh4_rio_reg(SCIF,SCIF_SCFSR2_addr,RIO_FUNC,16,&ReadSerialStatus,&WriteSerialStatus); + sh4_rio_reg(SCIF, SCIF_SCFSR2_addr, RIO_FUNC, ReadSerialStatus, WriteSerialStatus); //READ only //SCIF SCFRDR2 0xFFE80014 0x1FE80014 8 Undefined Undefined Held Held Pclk - sh4_rio_reg(SCIF,SCIF_SCFRDR2_addr,RIO_RO_FUNC,8,&ReadSerialData); + sh4_rio_reg(SCIF, SCIF_SCFRDR2_addr, RIO_RO_FUNC, ReadSerialData); //SCIF SCFCR2 0xFFE80018 0x1FE80018 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(SCIF,SCIF_SCFCR2_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //Read only //SCIF SCFDR2 0xFFE8001C 0x1FE8001C 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(SCIF,SCIF_SCFDR2_addr,RIO_RO_FUNC,16,&Read_SCFDR2); + sh4_rio_reg(SCIF, SCIF_SCFDR2_addr, RIO_RO_FUNC, Read_SCFDR2); //SCIF SCSPTR2 0xFFE80020 0x1FE80020 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(SCIF,SCIF_SCSPTR2_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //SCIF SCLSR2 0xFFE80024 0x1FE80024 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(SCIF,SCIF_SCLSR2_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); // Serial Communication Interface - sh4_rio_reg(SCI, SCI_SCSMR1_addr, RIO_DATA, 8); - sh4_rio_reg(SCI, SCI_SCBRR1_addr, RIO_DATA, 8); - sh4_rio_reg(SCI, SCI_SCSCR1_addr, RIO_DATA, 8); - sh4_rio_reg(SCI, SCI_SCTDR1_addr, RIO_DATA, 8); - sh4_rio_reg(SCI, SCI_SCSSR1_addr, RIO_DATA, 8); - sh4_rio_reg(SCI, SCI_SCRDR1_addr, RIO_RO, 8); - sh4_rio_reg(SCI, SCI_SCSPTR1_addr, RIO_DATA, 8); + sh4_rio_reg8(); + sh4_rio_reg8(); + sh4_rio_reg8(); + sh4_rio_reg8(); + sh4_rio_reg_wmask(); + sh4_rio_reg(SCI, SCI_SCRDR1_addr, RIO_RO); + sh4_rio_reg_wmask(); } void serial_reset(bool hard) diff --git a/core/hw/sh4/modules/tmu.cpp b/core/hw/sh4/modules/tmu.cpp index d3d77406c..cc53bacba 100644 --- a/core/hw/sh4/modules/tmu.cpp +++ b/core/hw/sh4/modules/tmu.cpp @@ -15,9 +15,9 @@ u32 tmu_shift[3]; u32 tmu_mask[3]; u64 tmu_mask64[3]; -u32 old_mode[3] = {0xFFFF,0xFFFF,0xFFFF}; +u32 old_mode[3] = { 0xFFFF, 0xFFFF, 0xFFFF}; -static const InterruptID tmu_intID[3]={sh4_TMU0_TUNI0,sh4_TMU1_TUNI1,sh4_TMU2_TUNI2}; +static const InterruptID tmu_intID[3] = { sh4_TMU0_TUNI0, sh4_TMU1_TUNI1, sh4_TMU2_TUNI2 }; int tmu_sched[3]; #if 0 @@ -102,7 +102,7 @@ static void sched_chan_tick(int ch) cycles = SH4_MAIN_CLOCK; if (tmu_mask[ch]) - sh4_sched_request(tmu_sched[ch], cycles ); + sh4_sched_request(tmu_sched[ch], cycles); else sh4_sched_request(tmu_sched[ch], -1); } @@ -117,13 +117,13 @@ static void write_TMU_TCNTch(u32 ch, u32 data) } template -u32 read_TMU_TCNT(u32 addr) +static u32 read_TMU_TCNT(u32 addr) { return read_TMU_TCNTch(ch); } template -void write_TMU_TCNT(u32 addr, u32 data) +static void write_TMU_TCNT(u32 addr, u32 data) { write_TMU_TCNTch(ch,data); } @@ -188,9 +188,12 @@ static void UpdateTMUCounts(u32 reg) //Write to status registers template -void TMU_TCR_write(u32 addr, u32 data) +static void TMU_TCR_write(u32 addr, u32 data) { - TMU_TCR(ch)=(u16)data; + if (ch == 2) + TMU_TCR(ch) = data & 0x03ff; + else + TMU_TCR(ch) = data & 0x013f; UpdateTMUCounts(ch); } @@ -208,11 +211,10 @@ static void TMU_TCPR2_write(u32 addr, u32 data) static void write_TMU_TSTR(u32 addr, u32 data) { - TMU_TSTR=data; - //? + TMU_TSTR = data & 7; - for (int i=0;i<3;i++) - turn_on_off_ch(i,data&(1<(); //TMU TSTR 0xFFD80004 0x1FD80004 8 0x00 0x00 Held 0x00 Pclk - sh4_rio_reg(TMU,TMU_TSTR_addr,RIO_WF,8,0,&write_TMU_TSTR); + sh4_rio_reg(TMU, TMU_TSTR_addr, RIO_WF, nullptr, write_TMU_TSTR); //TMU TCOR0 0xFFD80008 0x1FD80008 32 0xFFFFFFFF 0xFFFFFFFF Held Held Pclk - sh4_rio_reg(TMU,TMU_TCOR0_addr,RIO_DATA,32); + sh4_rio_reg(TMU, TMU_TCOR0_addr, RIO_DATA); //TMU TCNT0 0xFFD8000C 0x1FD8000C 32 0xFFFFFFFF 0xFFFFFFFF Held Held Pclk - sh4_rio_reg(TMU,TMU_TCNT0_addr,RIO_FUNC,32,&read_TMU_TCNT<0>,&write_TMU_TCNT<0>); + sh4_rio_reg(TMU, TMU_TCNT0_addr, RIO_FUNC, read_TMU_TCNT<0>, write_TMU_TCNT<0>); //TMU TCR0 0xFFD80010 0x1FD80010 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(TMU,TMU_TCR0_addr,RIO_WF,16,0,&TMU_TCR_write<0>); + sh4_rio_reg(TMU, TMU_TCR0_addr, RIO_WF, nullptr, TMU_TCR_write<0>); //TMU TCOR1 0xFFD80014 0x1FD80014 32 0xFFFFFFFF 0xFFFFFFFF Held Held Pclk - sh4_rio_reg(TMU,TMU_TCOR1_addr,RIO_DATA,32); + sh4_rio_reg(TMU, TMU_TCOR1_addr, RIO_DATA); //TMU TCNT1 0xFFD80018 0x1FD80018 32 0xFFFFFFFF 0xFFFFFFFF Held Held Pclk - sh4_rio_reg(TMU,TMU_TCNT1_addr,RIO_FUNC,32,&read_TMU_TCNT<1>,&write_TMU_TCNT<1>); + sh4_rio_reg(TMU, TMU_TCNT1_addr, RIO_FUNC, read_TMU_TCNT<1>, write_TMU_TCNT<1>); //TMU TCR1 0xFFD8001C 0x1FD8001C 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(TMU,TMU_TCR1_addr,RIO_WF,16,0,&TMU_TCR_write<1>); + sh4_rio_reg(TMU, TMU_TCR1_addr, RIO_WF, nullptr, TMU_TCR_write<1>); //TMU TCOR2 0xFFD80020 0x1FD80020 32 0xFFFFFFFF 0xFFFFFFFF Held Held Pclk - sh4_rio_reg(TMU,TMU_TCOR2_addr,RIO_DATA,32); + sh4_rio_reg(TMU, TMU_TCOR2_addr, RIO_DATA); //TMU TCNT2 0xFFD80024 0x1FD80024 32 0xFFFFFFFF 0xFFFFFFFF Held Held Pclk - sh4_rio_reg(TMU,TMU_TCNT2_addr,RIO_FUNC,32,&read_TMU_TCNT<2>,&write_TMU_TCNT<2>); + sh4_rio_reg(TMU, TMU_TCNT2_addr, RIO_FUNC, read_TMU_TCNT<2>, write_TMU_TCNT<2>); //TMU TCR2 0xFFD80028 0x1FD80028 16 0x0000 0x0000 Held Held Pclk - sh4_rio_reg(TMU,TMU_TCR2_addr,RIO_WF,16,0,&TMU_TCR_write<2>); + sh4_rio_reg(TMU, TMU_TCR2_addr, RIO_WF, nullptr, TMU_TCR_write<2>); //TMU TCPR2 0xFFD8002C 0x1FD8002C 32 Held Held Held Held Pclk - sh4_rio_reg(TMU,TMU_TCPR2_addr,RIO_FUNC,32,&TMU_TCPR2_read,&TMU_TCPR2_write); + sh4_rio_reg(TMU, TMU_TCPR2_addr, RIO_FUNC, &TMU_TCPR2_read, &TMU_TCPR2_write); - for (int i = 0; i < 3; i++) { + for (int i = 0; i < 3; i++) tmu_sched[i] = sh4_sched_register(i, &sched_tmu_cb); - sh4_sched_request(tmu_sched[i], -1); - } } @@ -306,7 +306,7 @@ void tmu_reset(bool hard) memset(tmu_ch_base, 0, sizeof(tmu_ch_base)); memset(tmu_ch_base64, 0, sizeof(tmu_ch_base64)); } - TMU_TOCR=TMU_TSTR=0; + TMU_TOCR = TMU_TSTR = 0; TMU_TCOR(0) = TMU_TCOR(1) = TMU_TCOR(2) = 0xffffffff; TMU_TCR(0) = TMU_TCR(1) = TMU_TCR(2) = 0; @@ -314,10 +314,10 @@ void tmu_reset(bool hard) UpdateTMUCounts(1); UpdateTMUCounts(2); - write_TMU_TSTR(0,0); + write_TMU_TSTR(0, 0); - for (int i=0;i<3;i++) - write_TMU_TCNTch(i,0xffffffff); + for (int i = 0; i < 3; i++) + write_TMU_TCNTch(i, 0xffffffff); } void tmu_term() diff --git a/core/hw/sh4/modules/ubc.cpp b/core/hw/sh4/modules/ubc.cpp index 32b28a0dc..8ee1ebf7d 100644 --- a/core/hw/sh4/modules/ubc.cpp +++ b/core/hw/sh4/modules/ubc.cpp @@ -3,37 +3,37 @@ #include "types.h" #include "hw/sh4/sh4_mmr.h" - //Init term res void ubc_init() { //UBC BARA 0xFF200000 0x1F200000 32 Undefined Held Held Held Iclk - sh4_rio_reg(UBC,UBC_BARA_addr,RIO_DATA,32); + sh4_rio_reg(UBC, UBC_BARA_addr, RIO_DATA); //UBC BAMRA 0xFF200004 0x1F200004 8 Undefined Held Held Held Iclk - sh4_rio_reg(UBC,UBC_BAMRA_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //UBC BBRA 0xFF200008 0x1F200008 16 0x0000 Held Held Held Iclk - sh4_rio_reg(UBC,UBC_BBRA_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //UBC BARB 0xFF20000C 0x1F20000C 32 Undefined Held Held Held Iclk - sh4_rio_reg(UBC,UBC_BARB_addr,RIO_DATA,32); + sh4_rio_reg(UBC, UBC_BARB_addr, RIO_DATA); //UBC BAMRB 0xFF200010 0x1F200010 8 Undefined Held Held Held Iclk - sh4_rio_reg(UBC,UBC_BAMRB_addr,RIO_DATA,8); + sh4_rio_reg_wmask(); //UBC BBRB 0xFF200014 0x1F200014 16 0x0000 Held Held Held Iclk - sh4_rio_reg(UBC,UBC_BBRB_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); //UBC BDRB 0xFF200018 0x1F200018 32 Undefined Held Held Held Iclk - sh4_rio_reg(UBC,UBC_BDRB_addr,RIO_DATA,32); + sh4_rio_reg(UBC, UBC_BDRB_addr, RIO_DATA); //UBC BDMRB 0xFF20001C 0x1F20001C 32 Undefined Held Held Held Iclk - sh4_rio_reg(UBC,UBC_BDMRB_addr,RIO_DATA,32); + sh4_rio_reg(UBC, UBC_BDMRB_addr, RIO_DATA); //UBC BRCR 0xFF200020 0x1F200020 16 0x0000 Held Held Held Iclk - sh4_rio_reg(UBC,UBC_BRCR_addr,RIO_DATA,16); + sh4_rio_reg_wmask(); } + void ubc_reset() { /* diff --git a/core/hw/sh4/sh4_mmr.cpp b/core/hw/sh4/sh4_mmr.cpp index e4480b526..f4faa2552 100644 --- a/core/hw/sh4/sh4_mmr.cpp +++ b/core/hw/sh4/sh4_mmr.cpp @@ -17,149 +17,239 @@ std::array OnChipRAM; //All registers are 4 byte aligned -std::array CCN; -std::array UBC; -std::array BSC; -std::array DMAC; -std::array CPG; -std::array RTC; -std::array INTC; -std::array TMU; -std::array SCI; -std::array SCIF; +RegisterStruct CCN[18]; +RegisterStruct UBC[9]; +RegisterStruct BSC[19]; +RegisterStruct DMAC[17]; +RegisterStruct CPG[5]; +RegisterStruct RTC[16]; +RegisterStruct INTC[5]; +RegisterStruct TMU[12]; +RegisterStruct SCI[8]; +RegisterStruct SCIF[10]; static u32 sh4io_read_noacc(u32 addr) { - INFO_LOG(SH4, "sh4io: Invalid read access @@ %08X", addr); + INFO_LOG(SH4, "sh4io: Invalid read access @ %08X", addr); return 0; } + static void sh4io_write_noacc(u32 addr, u32 data) { - INFO_LOG(SH4, "sh4io: Invalid write access @@ %08X %08X", addr, data); + INFO_LOG(SH4, "sh4io: Invalid write access @ %08X %08X", addr, data); } + static void sh4io_write_const(u32 addr, u32 data) { - INFO_LOG(SH4, "sh4io: Const write ignored @@ %08X <- %08X", addr, data); + INFO_LOG(SH4, "sh4io: Const write ignored @ %08X <- %08X", addr, data); } -template -void sh4_rio_reg(T& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf) +void sh4_rio_reg(RegisterStruct *arr, u32 addr, RegIO flags, RegReadAddrFP* rf, RegWriteAddrFP* wf) { - u32 idx=(addr&255)/4; - - verify(idx < arr.size()); + u32 idx = (addr & 255) / 4; arr[idx].flags = flags; if (flags == RIO_NO_ACCESS) { - arr[idx].readFunctionAddr=&sh4io_read_noacc; - arr[idx].writeFunctionAddr=&sh4io_write_noacc; + arr[idx].readFunctionAddr = sh4io_read_noacc; + arr[idx].writeFunctionAddr = sh4io_write_noacc; } - else if (flags == RIO_CONST) + else if (flags == RIO_RO) { - arr[idx].writeFunctionAddr=&sh4io_write_const; + arr[idx].writeFunctionAddr = sh4io_write_const; + arr[idx].data32 = 0; } else { - arr[idx].data32=0; - if (flags & REG_RF) - arr[idx].readFunctionAddr=rf; + arr[idx].readFunctionAddr = rf; + else + arr[idx].data32 = 0; if (flags & REG_WF) - arr[idx].writeFunctionAddr=wf==0?&sh4io_write_noacc:wf; + arr[idx].writeFunctionAddr = wf == nullptr ? &sh4io_write_noacc : wf; } } -template void sh4_rio_reg(std::array& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf); -template void sh4_rio_reg(std::array& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf); -template void sh4_rio_reg(std::array& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf); -template void sh4_rio_reg(std::array& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf); -template void sh4_rio_reg(std::array& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf); -template void sh4_rio_reg(std::array& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf); -template void sh4_rio_reg(std::array& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf); -template void sh4_rio_reg(std::array& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf); -template void sh4_rio_reg(std::array& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf, RegWriteAddrFP* wf); -template -u32 sh4_rio_read(T& regs, u32 addr) +template +T sh4_rio_read(RegisterStruct *regs, u32 addr) { - u32 offset = addr&255; + u32 offset = addr & 255; #ifdef TRACE - if (offset & 3/*(size-1)*/) //4 is min align size + if (offset & 3) //4 is min align size { - INFO_LOG(SH4, "Unaligned System Bus register read"); + WARN_LOG(SH4, "Unaligned System Bus register read @ %x", addr); } #endif - offset>>=2; + offset >>= 2; -#ifdef TRACE - if (regs[offset].flags & sz) - { -#endif - if (!(regs[offset].flags & REG_RF) ) - { - if (sz==4) - return regs[offset].data32; - else if (sz==2) - return regs[offset].data16; - else - return regs[offset].data8; - } - else - { - return regs[offset].readFunctionAddr(addr); - } -#ifdef TRACE - } + if (!(regs[offset].flags & REG_RF)) + return (T)regs[offset].data32; else - { - INFO_LOG(SH4, "ERROR [wrong size read on register]"); - } -#endif - return 0; + return (T)regs[offset].readFunctionAddr(addr); } -template -void sh4_rio_write(T& regs, u32 addr, u32 data) +template +void sh4_rio_write(RegisterStruct *regs, u32 addr, T data) { - u32 offset = addr&255; + u32 offset = addr & 255; #ifdef TRACE - if (offset & 3/*(size-1)*/) //4 is min align size + if (offset & 3) //4 is min align size { - INFO_LOG(SH4, "Unaligned System bus register write"); + WARN_LOG(SH4, "Unaligned System bus register write @ %x", addr); } #endif -offset>>=2; -#ifdef TRACE - if (regs[offset].flags & sz) - { -#endif - if (!(regs[offset].flags & REG_WF) ) - { - if (sz==4) - regs[offset].data32=data; - else if (sz==2) - regs[offset].data16=(u16)data; - else - regs[offset].data8=(u8)data; - return; - } - else - { - //printf("RSW: %08X\n",addr); - regs[offset].writeFunctionAddr(addr,data); - return; - } -#ifdef TRACE + offset >>= 2; + + if (!(regs[offset].flags & REG_WF)) + regs[offset].data32 = data; + else + regs[offset].writeFunctionAddr(addr, data); +} + +#define SH4_REG_NAME(r) { r##_addr, #r }, +const std::map sh4_reg_names = { + SH4_REG_NAME(CCN_PTEH) + SH4_REG_NAME(CCN_PTEL) + SH4_REG_NAME(CCN_TTB) + SH4_REG_NAME(CCN_TEA) + SH4_REG_NAME(CCN_CCR) + SH4_REG_NAME(CCN_TRA) + SH4_REG_NAME(CCN_EXPEVT) + SH4_REG_NAME(CCN_INTEVT) + SH4_REG_NAME(CPU_VERSION) + SH4_REG_NAME(CCN_PTEA) + SH4_REG_NAME(CCN_QACR0) + SH4_REG_NAME(CCN_QACR1) + SH4_REG_NAME(CCN_PRR) + + SH4_REG_NAME(UBC_BARA) + SH4_REG_NAME(UBC_BAMRA) + SH4_REG_NAME(UBC_BBRA) + SH4_REG_NAME(UBC_BARB) + SH4_REG_NAME(UBC_BAMRB) + SH4_REG_NAME(UBC_BBRB) + SH4_REG_NAME(UBC_BDRB) + SH4_REG_NAME(UBC_BDMRB) + SH4_REG_NAME(UBC_BRCR) + + SH4_REG_NAME(BSC_BCR1) + SH4_REG_NAME(BSC_BCR2) + SH4_REG_NAME(BSC_WCR1) + SH4_REG_NAME(BSC_WCR2) + SH4_REG_NAME(BSC_WCR3) + SH4_REG_NAME(BSC_MCR) + SH4_REG_NAME(BSC_PCR) + SH4_REG_NAME(BSC_RTCSR) + SH4_REG_NAME(BSC_RTCNT) + SH4_REG_NAME(BSC_RTCOR) + SH4_REG_NAME(BSC_RFCR) + SH4_REG_NAME(BSC_PCTRA) + SH4_REG_NAME(BSC_PDTRA) + SH4_REG_NAME(BSC_PCTRB) + SH4_REG_NAME(BSC_PDTRB) + SH4_REG_NAME(BSC_GPIOIC) + SH4_REG_NAME(BSC_SDMR2) + SH4_REG_NAME(BSC_SDMR3) + + SH4_REG_NAME(DMAC_SAR0) + SH4_REG_NAME(DMAC_DAR0) + SH4_REG_NAME(DMAC_DMATCR0) + SH4_REG_NAME(DMAC_CHCR0) + SH4_REG_NAME(DMAC_SAR1) + SH4_REG_NAME(DMAC_DAR1) + SH4_REG_NAME(DMAC_DMATCR1) + SH4_REG_NAME(DMAC_CHCR1) + SH4_REG_NAME(DMAC_SAR2) + SH4_REG_NAME(DMAC_DAR2) + SH4_REG_NAME(DMAC_DMATCR2) + SH4_REG_NAME(DMAC_CHCR2) + SH4_REG_NAME(DMAC_SAR3) + SH4_REG_NAME(DMAC_DAR3) + SH4_REG_NAME(DMAC_DMATCR3) + SH4_REG_NAME(DMAC_CHCR3) + SH4_REG_NAME(DMAC_DMAOR) + + SH4_REG_NAME(CPG_FRQCR) + SH4_REG_NAME(CPG_STBCR) + SH4_REG_NAME(CPG_WTCNT) + SH4_REG_NAME(CPG_WTCSR) + SH4_REG_NAME(CPG_STBCR2) + + SH4_REG_NAME(RTC_R64CNT) + SH4_REG_NAME(RTC_RSECCNT) + SH4_REG_NAME(RTC_RMINCNT) + SH4_REG_NAME(RTC_RHRCNT) + SH4_REG_NAME(RTC_RWKCNT) + SH4_REG_NAME(RTC_RDAYCNT) + SH4_REG_NAME(RTC_RMONCNT) + SH4_REG_NAME(RTC_RYRCNT) + SH4_REG_NAME(RTC_RSECAR) + SH4_REG_NAME(RTC_RMINAR) + SH4_REG_NAME(RTC_RHRAR) + SH4_REG_NAME(RTC_RWKAR) + SH4_REG_NAME(RTC_RDAYAR) + SH4_REG_NAME(RTC_RMONAR) + SH4_REG_NAME(RTC_RCR1) + SH4_REG_NAME(RTC_RCR2) + + SH4_REG_NAME(INTC_ICR) + SH4_REG_NAME(INTC_IPRA) + SH4_REG_NAME(INTC_IPRB) + SH4_REG_NAME(INTC_IPRC) + SH4_REG_NAME(INTC_IPRD) + + SH4_REG_NAME(TMU_TOCR) + SH4_REG_NAME(TMU_TSTR) + SH4_REG_NAME(TMU_TCOR0) + SH4_REG_NAME(TMU_TCNT0) + SH4_REG_NAME(TMU_TCR0) + SH4_REG_NAME(TMU_TCOR1) + SH4_REG_NAME(TMU_TCNT1) + SH4_REG_NAME(TMU_TCR1) + SH4_REG_NAME(TMU_TCOR2) + SH4_REG_NAME(TMU_TCNT2) + SH4_REG_NAME(TMU_TCR2) + SH4_REG_NAME(TMU_TCPR2) + + SH4_REG_NAME(SCI_SCSMR1) + SH4_REG_NAME(SCI_SCBRR1) + SH4_REG_NAME(SCI_SCSCR1) + SH4_REG_NAME(SCI_SCTDR1) + SH4_REG_NAME(SCI_SCSSR1) + SH4_REG_NAME(SCI_SCRDR1) + SH4_REG_NAME(SCI_SCSCMR1) + SH4_REG_NAME(SCI_SCSPTR1) + + SH4_REG_NAME(SCIF_SCSMR2) + SH4_REG_NAME(SCIF_SCBRR2) + SH4_REG_NAME(SCIF_SCSCR2) + SH4_REG_NAME(SCIF_SCFTDR2) + SH4_REG_NAME(SCIF_SCFSR2) + SH4_REG_NAME(SCIF_SCFRDR2) + SH4_REG_NAME(SCIF_SCFCR2) + SH4_REG_NAME(SCIF_SCFDR2) + SH4_REG_NAME(SCIF_SCSPTR2) + SH4_REG_NAME(SCIF_SCLSR2) + + SH4_REG_NAME(UDI_SDIR) + SH4_REG_NAME(UDI_SDDR) +}; +#undef SH4_REG_NAME + +static const char *regName(u32 paddr) +{ + u32 addr = paddr & 0x1fffffff; + static char regName[32]; + auto it = sh4_reg_names.find(addr); + if (it == sh4_reg_names.end()) { + sprintf(regName, "?%08x", paddr); + return regName; } else - { - INFO_LOG(SH4, "ERROR: Wrong size write on register - offset=%x, data=%x, size=%d",offset,data,sz); - } -#endif - + return it->second; } //Region P4 @@ -168,7 +258,7 @@ template T DYNACALL ReadMem_P4(u32 addr) { constexpr size_t sz = sizeof(T); - switch((addr>>24)&0xFF) + switch ((addr >> 24) & 0xFF) { case 0xE0: @@ -194,13 +284,13 @@ T DYNACALL ReadMem_P4(u32 addr) case 0xF2: { - u32 entry=(addr>>8)&3; - return ITLB[entry].Address.reg_data | (ITLB[entry].Data.V<<8); + u32 entry = (addr >> 8) & 3; + return ITLB[entry].Address.reg_data | (ITLB[entry].Data.V << 8); } case 0xF3: { - u32 entry=(addr>>8)&3; + u32 entry = (addr >> 8) & 3; return ITLB[entry].Data.reg_data; } @@ -220,16 +310,16 @@ T DYNACALL ReadMem_P4(u32 addr) case 0xF6: { - u32 entry=(addr>>8)&63; - u32 rv=UTLB[entry].Address.reg_data; - rv|=UTLB[entry].Data.D<<9; - rv|=UTLB[entry].Data.V<<8; + u32 entry = (addr >> 8) & 63; + u32 rv = UTLB[entry].Address.reg_data; + rv |= UTLB[entry].Data.D << 9; + rv |= UTLB[entry].Data.V << 8; return rv; } case 0xF7: { - u32 entry=(addr>>8)&63; + u32 entry = (addr >> 8) & 63; return UTLB[entry].Data.reg_data; } @@ -251,9 +341,8 @@ template void DYNACALL WriteMem_P4(u32 addr,T data) { constexpr size_t sz = sizeof(T); - switch((addr>>24)&0xFF) + switch ((addr >> 24) & 0xFF) { - case 0xE0: case 0xE1: case 0xE2: @@ -275,28 +364,23 @@ void DYNACALL WriteMem_P4(u32 addr,T data) case 0xF2: { - u32 entry=(addr>>8)&3; - ITLB[entry].Address.reg_data=data & 0xFFFFFCFF; - ITLB[entry].Data.V=(data>>8) & 1; + u32 entry = (addr >> 8) & 3; + ITLB[entry].Address.reg_data = data & 0xFFFFFCFF; + ITLB[entry].Data.V = (data >> 8) & 1; ITLB_Sync(entry); - return; } + return; case 0xF3: { - u32 entry=(addr>>8)&3; - if (addr&0x800000) - { + u32 entry = (addr >> 8) & 3; + if (addr & 0x800000) ITLB[entry].Assistance.reg_data = data & 0xf; - } else - { ITLB[entry].Data.reg_data=data; - } ITLB_Sync(entry); - - return; } + return; case 0xF4: // DEBUG_LOG(SH4, "OC Address write %08x = %x", addr, data); @@ -311,61 +395,53 @@ void DYNACALL WriteMem_P4(u32 addr,T data) return; case 0xF6: + if (addr & 0x80) { - if (addr&0x80) + CCN_PTEH_type t; + t.reg_data = data; + + u32 va = t.VPN << 10; + + for (int i = 0; i < 64; i++) { - CCN_PTEH_type t; - t.reg_data=data; - - u32 va=t.VPN<<10; - - for (int i=0;i<64;i++) + if (mmu_match(va, UTLB[i].Address, UTLB[i].Data)) { - if (mmu_match(va,UTLB[i].Address,UTLB[i].Data)) - { - UTLB[i].Data.V=((u32)data>>8)&1; - UTLB[i].Data.D=((u32)data>>9)&1; - UTLB_Sync(i); - } - } - - for (int i=0;i<4;i++) - { - if (mmu_match(va,ITLB[i].Address,ITLB[i].Data)) - { - ITLB[i].Data.V=((u32)data>>8)&1; - ITLB[i].Data.D=((u32)data>>9)&1; - ITLB_Sync(i); - } + UTLB[i].Data.V = ((u32)data >> 8) & 1; + UTLB[i].Data.D = ((u32)data >> 9) & 1; + UTLB_Sync(i); } } - else + + for (int i = 0; i < 4; i++) { - u32 entry=(addr>>8)&63; - UTLB[entry].Address.reg_data=data & 0xFFFFFCFF; - UTLB[entry].Data.D=(data>>9)&1; - UTLB[entry].Data.V=(data>>8)&1; - UTLB_Sync(entry); + if (mmu_match(va, ITLB[i].Address, ITLB[i].Data)) + { + ITLB[i].Data.V = ((u32)data >> 8) & 1; + ITLB[i].Data.D = ((u32)data >> 9) & 1; + ITLB_Sync(i); + } } - return; } - break; + else + { + u32 entry = (addr >> 8) & 63; + UTLB[entry].Address.reg_data = data & 0xFFFFFCFF; + UTLB[entry].Data.D = (data >> 9) & 1; + UTLB[entry].Data.V = (data >> 8) & 1; + UTLB_Sync(entry); + } + return; case 0xF7: { - u32 entry=(addr>>8)&63; - if (addr&0x800000) - { + u32 entry = (addr >> 8) & 63; + if (addr & 0x800000) UTLB[entry].Assistance.reg_data = data & 0xf; - } else - { - UTLB[entry].Data.reg_data=data; - } + UTLB[entry].Data.reg_data = data; UTLB_Sync(entry); - - return; } + return; case 0xFF: INFO_LOG(SH4, "Unhandled p4 Write [area7] 0x%x = %x", addr, data); @@ -388,33 +464,27 @@ void DYNACALL WriteMem_P4(u32 addr,T data) template T DYNACALL ReadMem_p4mmr(u32 addr) { - constexpr size_t sz = sizeof(T); - /* - if (likely(addr==0xffd80024)) - { - return TMU_TCNT(2); - } - else if (likely(addr==0xFFD8000C)) - { - return TMU_TCNT(0); - } - else */if (likely(addr==0xFF000028)) - { - return CCN_INTEVT; - } - else if (likely(addr==0xFFA0002C)) - { - return DMAC_CHCR(2).full; - } + DEBUG_LOG(SH4, "read %s", regName(addr)); - addr&=0x1FFFFFFF; - u32 map_base=addr>>16; + /* + if (likely(addr == 0xffd80024)) + return TMU_TCNT(2); + if (likely(addr == 0xFFD8000C)) + return TMU_TCNT(0); + */ + if (likely(addr == 0xFF000028)) + return (T)CCN_INTEVT; + if (likely(addr == 0xFFA0002C)) + return (T)DMAC_CHCR(2).full; + + addr &= 0x1FFFFFFF; + u32 map_base = addr >> 16; switch (expected(map_base, A7_REG_HASH(TMU_BASE_addr))) { case A7_REG_HASH(CCN_BASE_addr): - if (addr<=0x1F000044) + if (addr <= 0x1F000044) { - return (T)sh4_rio_read(CCN, addr); + return sh4_rio_read(CCN, addr); } else { @@ -424,9 +494,9 @@ T DYNACALL ReadMem_p4mmr(u32 addr) break; case A7_REG_HASH(UBC_BASE_addr): - if (addr<=0x1F200020) + if (addr <= 0x1F200020) { - return (T)sh4_rio_read(UBC, addr); + return sh4_rio_read(UBC, addr); } else { @@ -436,9 +506,9 @@ T DYNACALL ReadMem_p4mmr(u32 addr) break; case A7_REG_HASH(BSC_BASE_addr): - if (addr<=0x1F800048) + if (addr <= 0x1F800048) { - return (T)sh4_rio_read(BSC, addr); + return sh4_rio_read(BSC, addr); } else { @@ -457,9 +527,9 @@ T DYNACALL ReadMem_p4mmr(u32 addr) return 0; case A7_REG_HASH(DMAC_BASE_addr): - if (addr<=0x1FA00040) + if (addr <= 0x1FA00040) { - return (T)sh4_rio_read(DMAC, addr); + return sh4_rio_read(DMAC, addr); } else { @@ -469,9 +539,9 @@ T DYNACALL ReadMem_p4mmr(u32 addr) break; case A7_REG_HASH(CPG_BASE_addr): - if (addr<=0x1FC00010) + if (addr <= 0x1FC00010) { - return (T)sh4_rio_read(CPG, addr); + return sh4_rio_read(CPG, addr); } else { @@ -481,9 +551,9 @@ T DYNACALL ReadMem_p4mmr(u32 addr) break; case A7_REG_HASH(RTC_BASE_addr): - if (addr<=0x1FC8003C) + if (addr <= 0x1FC8003C) { - return (T)sh4_rio_read(RTC, addr); + return sh4_rio_read(RTC, addr); } else { @@ -493,9 +563,9 @@ T DYNACALL ReadMem_p4mmr(u32 addr) break; case A7_REG_HASH(INTC_BASE_addr): - if (addr<=0x1FD00010) + if (addr <= 0x1FD00010) { - return (T)sh4_rio_read(INTC, addr); + return sh4_rio_read(INTC, addr); } else { @@ -505,9 +575,9 @@ T DYNACALL ReadMem_p4mmr(u32 addr) break; case A7_REG_HASH(TMU_BASE_addr): - if (addr<=0x1FD8002C) + if (addr <= 0x1FD8002C) { - return (T)sh4_rio_read(TMU, addr); + return sh4_rio_read(TMU, addr); } else { @@ -517,9 +587,9 @@ T DYNACALL ReadMem_p4mmr(u32 addr) break; case A7_REG_HASH(SCI_BASE_addr): - if (addr<=0x1FE0001C) + if (addr <= 0x1FE0001C) { - return (T)sh4_rio_read(SCI, addr); + return sh4_rio_read(SCI, addr); } else { @@ -529,9 +599,9 @@ T DYNACALL ReadMem_p4mmr(u32 addr) break; case A7_REG_HASH(SCIF_BASE_addr): - if (addr<=0x1FE80024) + if (addr <= 0x1FE80024) { - return (T)sh4_rio_read(SCIF, addr); + return sh4_rio_read(SCIF, addr); } else { @@ -562,56 +632,45 @@ T DYNACALL ReadMem_p4mmr(u32 addr) //Write P4 memory-mapped registers template -void DYNACALL WriteMem_p4mmr(u32 addr,T data) +void DYNACALL WriteMem_p4mmr(u32 addr, T data) { - constexpr size_t sz = sizeof(T); - if (likely(addr==0xFF000038)) + DEBUG_LOG(SH4, "write %s = %x", regName(addr), (int)data); + + if (likely(addr == 0xFF000038)) { - CCN_QACR_write<0>(addr,data); + CCN_QACR_write<0>(addr, data); return; } - else if (likely(addr==0xFF00003C)) + if (likely(addr == 0xFF00003C)) { - CCN_QACR_write<1>(addr,data); + CCN_QACR_write<1>(addr, data); return; } - addr&=0x1FFFFFFF; - u32 map_base=addr>>16; + addr &= 0x1FFFFFFF; + u32 map_base = addr >> 16; switch (map_base) { case A7_REG_HASH(CCN_BASE_addr): - if (addr<=0x1F00003C) - { - sh4_rio_write(CCN, addr, data); - } + if (addr <= 0x1F00003C) + sh4_rio_write(CCN, addr, data); else - { OUT_OF_RANGE("CCN"); - } return; case A7_REG_HASH(UBC_BASE_addr): - if (addr<=0x1F200020) - { - sh4_rio_write(UBC, addr, data); - } + if (addr <= 0x1F200020) + sh4_rio_write(UBC, addr, data); else - { OUT_OF_RANGE("UBC"); - } return; case A7_REG_HASH(BSC_BASE_addr): - if (addr<=0x1F800048) - { - sh4_rio_write(BSC, addr, data); - } + if (addr <= 0x1F800048) + sh4_rio_write(BSC, addr, data); else - { OUT_OF_RANGE("BSC"); - } return; case A7_REG_HASH(BSC_SDMR2_addr): //dram settings 2 / write only @@ -622,80 +681,52 @@ void DYNACALL WriteMem_p4mmr(u32 addr,T data) return; case A7_REG_HASH(DMAC_BASE_addr): - if (addr<=0x1FA00040) - { - sh4_rio_write(DMAC, addr, data); - } + if (addr <= 0x1FA00040) + sh4_rio_write(DMAC, addr, data); else - { OUT_OF_RANGE("DMAC"); - } return; case A7_REG_HASH(CPG_BASE_addr): - if (addr<=0x1FC00010) - { - sh4_rio_write(CPG, addr, data); - } + if (addr <= 0x1FC00010) + sh4_rio_write(CPG, addr, data); else - { OUT_OF_RANGE("CPG"); - } return; case A7_REG_HASH(RTC_BASE_addr): - if (addr<=0x1FC8003C) - { - sh4_rio_write(RTC, addr, data); - } + if (addr <= 0x1FC8003C) + sh4_rio_write(RTC, addr, data); else - { OUT_OF_RANGE("RTC"); - } return; case A7_REG_HASH(INTC_BASE_addr): - if (addr<=0x1FD0000C) - { - sh4_rio_write(INTC, addr, data); - } + if (addr <= 0x1FD00010) + sh4_rio_write(INTC, addr, data); else - { OUT_OF_RANGE("INTC"); - } return; case A7_REG_HASH(TMU_BASE_addr): - if (addr<=0x1FD8002C) - { - sh4_rio_write(TMU, addr, data); - } + if (addr <= 0x1FD8002C) + sh4_rio_write(TMU, addr, data); else - { OUT_OF_RANGE("TMU"); - } return; case A7_REG_HASH(SCI_BASE_addr): - if (addr<=0x1FE0001C) - { - sh4_rio_write(SCI, addr, data); - } + if (addr <= 0x1FE0001C) + sh4_rio_write(SCI, addr, data); else - { OUT_OF_RANGE("SCI"); - } return; case A7_REG_HASH(SCIF_BASE_addr): - if (addr<=0x1FE80024) - { - sh4_rio_write(SCIF, addr, data); - } + if (addr <= 0x1FE80024) + sh4_rio_write(SCIF, addr, data); else - { OUT_OF_RANGE("SCIF"); - } return; //who really cares about ht-udi ? it's not existent on dc iirc .. @@ -858,13 +889,13 @@ void map_p4() //register this before mmr and SQ so they overwrite it and handle em //default P4 handler //0xE0000000-0xFFFFFFFF - _vmem_map_handler(p4_handler,0xE0,0xFF); + _vmem_map_handler(p4_handler, 0xE0, 0xFF); //Store Queues -- Write only 32bit - _vmem_map_block(sq_both,0xE0,0xE0,63); - _vmem_map_block(sq_both,0xE1,0xE1,63); - _vmem_map_block(sq_both,0xE2,0xE2,63); - _vmem_map_block(sq_both,0xE3,0xE3,63); + _vmem_map_block(sq_both, 0xE0, 0xE0, 63); + _vmem_map_block(sq_both, 0xE1, 0xE1, 63); + _vmem_map_block(sq_both, 0xE2, 0xE2, 63); + _vmem_map_block(sq_both, 0xE3, 0xE3, 63); _vmem_map_handler(p4mmr_handler, 0xFF, 0xFF); } diff --git a/core/hw/sh4/sh4_mmr.h b/core/hw/sh4/sh4_mmr.h index fa19d1975..fac9cee81 100644 --- a/core/hw/sh4/sh4_mmr.h +++ b/core/hw/sh4/sh4_mmr.h @@ -1,5 +1,4 @@ #pragma once -#include #include "types.h" #include "sh4_if.h" #include "hw/hwreg.h" @@ -14,23 +13,22 @@ void map_p4(); #define sq_both (sh4rcb.sq_buffer) -extern std::array CCN; -extern std::array UBC; -extern std::array BSC; -extern std::array DMAC; -extern std::array CPG; -extern std::array RTC; -extern std::array INTC; -extern std::array TMU; -extern std::array SCI; -extern std::array SCIF; +extern RegisterStruct CCN[18]; +extern RegisterStruct UBC[9]; +extern RegisterStruct BSC[19]; +extern RegisterStruct DMAC[17]; +extern RegisterStruct CPG[5]; +extern RegisterStruct RTC[16]; +extern RegisterStruct INTC[5]; +extern RegisterStruct TMU[12]; +extern RegisterStruct SCI[8]; +extern RegisterStruct SCIF[10]; void sh4_mmr_init(); void sh4_mmr_reset(bool hard); void sh4_mmr_term(); -template -void sh4_rio_reg(T& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf=0, RegWriteAddrFP* wf=0); +void sh4_rio_reg(RegisterStruct *arr, u32 addr, RegIO flags, RegReadAddrFP *rf = nullptr, RegWriteAddrFP *wf = nullptr); #define SH4IO_REGN(mod, addr, size) ((mod)[((addr) & 255) / 4].data##size) #define SH4IO_REG(mod, name, size) SH4IO_REGN(mod, mod##_##name##_addr, size) @@ -39,6 +37,30 @@ void sh4_rio_reg(T& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rf=0, Reg #define SH4IO_REG_OFS(mod, name, o, s, size) SH4IO_REGN(mod, mod##_##name##0_addr + (o) * (s), size) #define SH4IO_REG_T_OFS(mod, name, o, s, size) ((mod##_##name##_type&)SH4IO_REG_OFS(mod, name, o, s, size)) +template +void sh4_write_reg(u32 addr, u32 data) +{ + SH4IO_REGN(Module, Addr, 32) = (data & Mask) | OrMask; +} + +template +void sh4_rio_reg_wmask() +{ + sh4_rio_reg(Module, Addr, RIO_WF, nullptr, sh4_write_reg); +}; + +template +void sh4_rio_reg16() +{ + sh4_rio_reg_wmask(); +}; + +template +void sh4_rio_reg8() +{ + sh4_rio_reg_wmask(); +}; + //CCN module registers base #define CCN_BASE_addr 0x1F000000 diff --git a/core/hw/sh4/sh4_sched.cpp b/core/hw/sh4/sh4_sched.cpp index 34ddfd14c..52c606c17 100755 --- a/core/hw/sh4/sh4_sched.cpp +++ b/core/hw/sh4/sh4_sched.cpp @@ -164,7 +164,6 @@ void sh4_sched_tick(int cycles) for (sched_list& sched : sch_list) { int remaining = sh4_sched_remaining(sched, fztime); - verify(remaining >= 0 || remaining == -1); if (remaining >= 0 && remaining <= (int)cycles) handle_cb(sched); } From a0529d1cfcbce398d84a9b0aefb8dccf2b4aeb9d Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 6 Dec 2022 18:09:43 +0100 Subject: [PATCH 19/34] drop support for legacy .reicast config dir (linux, macOS) --- core/linux-dist/main.cpp | 128 ++++-------------- .../emulator-osx/emulator-osx/osx-main.mm | 4 +- 2 files changed, 31 insertions(+), 101 deletions(-) diff --git a/core/linux-dist/main.cpp b/core/linux-dist/main.cpp index 2443e61fb..4753b5370 100644 --- a/core/linux-dist/main.cpp +++ b/core/linux-dist/main.cpp @@ -109,51 +109,31 @@ void os_CreateWindow() void common_linux_setup(); // Find the user config directory. -// The following folders are checked in this order: -// $HOME/.reicast -// $HOME/.config/flycast -// $HOME/.config/reicast -// If no folder exists, $HOME/.config/flycast is created and used. +// $HOME/.config/flycast on linux std::string find_user_config_dir() { #ifdef __SWITCH__ flycast::mkdir("/flycast", 0755); return "/flycast/"; #else - struct stat info; std::string xdg_home; - if (nowide::getenv("HOME") != NULL) - { - // Support for the legacy config dir at "$HOME/.reicast" - std::string legacy_home = (std::string)nowide::getenv("HOME") + "/.reicast/"; - if (flycast::stat(legacy_home.c_str(), &info) == 0 && (info.st_mode & S_IFDIR)) - // "$HOME/.reicast" already exists, let's use it! - return legacy_home; - + if (nowide::getenv("XDG_CONFIG_HOME") != nullptr) + // If XDG_CONFIG_HOME is set explicitly, we'll use that instead of $HOME/.config + xdg_home = (std::string)nowide::getenv("XDG_CONFIG_HOME"); + else if (nowide::getenv("HOME") != nullptr) /* If $XDG_CONFIG_HOME is not set, we're supposed to use "$HOME/.config" instead. * Consult the XDG Base Directory Specification for details: * http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html#variables */ xdg_home = (std::string)nowide::getenv("HOME") + "/.config"; - } - if (nowide::getenv("XDG_CONFIG_HOME") != NULL) - // If XDG_CONFIG_HOME is set explicitly, we'll use that instead of $HOME/.config - xdg_home = (std::string)nowide::getenv("XDG_CONFIG_HOME"); if (!xdg_home.empty()) { std::string fullpath = xdg_home + "/flycast/"; - if (flycast::stat(fullpath.c_str(), &info) == 0 && (info.st_mode & S_IFDIR)) - // Found .config/flycast - return fullpath; - fullpath = xdg_home + "/reicast/"; - if (flycast::stat(fullpath.c_str(), &info) == 0 && (info.st_mode & S_IFDIR)) - // Found .config/reicast - return fullpath; - - // Create .config/flycast - fullpath = xdg_home + "/flycast/"; - flycast::mkdir(fullpath.c_str(), 0755); + struct stat info; + if (flycast::stat(fullpath.c_str(), &info) != 0 || (info.st_mode & S_IFDIR) == 0) + // Create .config/flycast + flycast::mkdir(fullpath.c_str(), 0755); return fullpath; } @@ -163,51 +143,31 @@ std::string find_user_config_dir() } // Find the user data directory. -// The following folders are checked in this order: -// $HOME/.reicast/data -// $HOME/.local/share/flycast -// $HOME/.local/share/reicast -// If no folder exists, $HOME/.local/share/flycast is created and used. +// $HOME/.local/share/flycast on linux std::string find_user_data_dir() { #ifdef __SWITCH__ flycast::mkdir("/flycast/data", 0755); return "/flycast/data/"; #else - struct stat info; std::string xdg_home; - if (nowide::getenv("HOME") != NULL) - { - // Support for the legacy config dir at "$HOME/.reicast/data" - std::string legacy_data = (std::string)nowide::getenv("HOME") + "/.reicast/data/"; - if (flycast::stat(legacy_data.c_str(), &info) == 0 && (info.st_mode & S_IFDIR)) - // "$HOME/.reicast/data" already exists, let's use it! - return legacy_data; - + if (nowide::getenv("XDG_DATA_HOME") != nullptr) + // If XDG_DATA_HOME is set explicitly, we'll use that instead of $HOME/.local/share + xdg_home = (std::string)nowide::getenv("XDG_DATA_HOME"); + else if (nowide::getenv("HOME") != nullptr) /* If $XDG_DATA_HOME is not set, we're supposed to use "$HOME/.local/share" instead. * Consult the XDG Base Directory Specification for details: * http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html#variables */ xdg_home = (std::string)nowide::getenv("HOME") + "/.local/share"; - } - if (nowide::getenv("XDG_DATA_HOME") != NULL) - // If XDG_DATA_HOME is set explicitly, we'll use that instead of $HOME/.local/share - xdg_home = (std::string)nowide::getenv("XDG_DATA_HOME"); if (!xdg_home.empty()) { std::string fullpath = xdg_home + "/flycast/"; - if (flycast::stat(fullpath.c_str(), &info) == 0 && (info.st_mode & S_IFDIR)) - // Found .local/share/flycast - return fullpath; - fullpath = xdg_home + "/reicast/"; - if (flycast::stat(fullpath.c_str(), &info) == 0 && (info.st_mode & S_IFDIR)) - // Found .local/share/reicast - return fullpath; - - // Create .local/share/flycast - fullpath = xdg_home + "/flycast/"; - flycast::mkdir(fullpath.c_str(), 0755); + struct stat info; + if (flycast::stat(fullpath.c_str(), &info) != 0 || (info.st_mode & S_IFDIR) == 0) + // Create .local/share/flycast + flycast::mkdir(fullpath.c_str(), 0755); return fullpath; } @@ -234,12 +194,9 @@ static void addDirectoriesFromPath(std::vector& dirs, const std::st // Find a file in the user and system config directories. // The following folders are checked in this order: -// $HOME/.reicast // $HOME/.config/flycast -// $HOME/.config/reicast // if XDG_CONFIG_DIRS is defined: // <$XDG_CONFIG_DIRS>/flycast -// <$XDG_CONFIG_DIRS>/reicast // else // /etc/flycast/ // /etc/xdg/flycast/ @@ -252,32 +209,24 @@ std::vector find_system_config_dirs() dirs.push_back("/flycast/"); #else std::string xdg_home; - if (nowide::getenv("HOME") != NULL) - { - // Support for the legacy config dir at "$HOME/.reicast" - dirs.push_back((std::string)nowide::getenv("HOME") + "/.reicast/"); - xdg_home = (std::string)nowide::getenv("HOME") + "/.config"; - } - if (nowide::getenv("XDG_CONFIG_HOME") != NULL) + if (nowide::getenv("XDG_CONFIG_HOME") != nullptr) // If XDG_CONFIG_HOME is set explicitly, we'll use that instead of $HOME/.config xdg_home = (std::string)nowide::getenv("XDG_CONFIG_HOME"); + else if (nowide::getenv("HOME") != nullptr) + xdg_home = (std::string)nowide::getenv("HOME") + "/.config"; if (!xdg_home.empty()) - { // XDG config locations dirs.push_back(xdg_home + "/flycast/"); - dirs.push_back(xdg_home + "/reicast/"); - } - if (nowide::getenv("XDG_CONFIG_DIRS") != NULL) + if (nowide::getenv("XDG_CONFIG_DIRS") != nullptr) { std::string path = (std::string)nowide::getenv("XDG_CONFIG_DIRS"); addDirectoriesFromPath(dirs, path, "/flycast/"); - addDirectoriesFromPath(dirs, path, "/reicast/"); } else { #ifdef FLYCAST_SYSCONFDIR - const std::string config_dir (FLYCAST_SYSCONFDIR); + const std::string config_dir(FLYCAST_SYSCONFDIR); dirs.push_back(config_dir); #endif dirs.push_back("/etc/flycast/"); // This isn't part of the XDG spec, but much more common than /etc/xdg/ @@ -291,17 +240,12 @@ std::vector find_system_config_dirs() // Find a file in the user data directories. // The following folders are checked in this order: -// $HOME/.reicast/data // $HOME/.local/share/flycast -// $HOME/.local/share/reicast // if XDG_DATA_DIRS is defined: // <$XDG_DATA_DIRS>/flycast -// <$XDG_DATA_DIRS>/reicast // else // /usr/local/share/flycast // /usr/share/flycast -// /usr/local/share/reicast -// /usr/share/reicast // <$FLYCAST_BIOS_PATH> // ./ // ./data @@ -313,42 +257,30 @@ std::vector find_system_data_dirs() dirs.push_back("/flycast/data/"); #else std::string xdg_home; - if (nowide::getenv("HOME") != NULL) - { - // Support for the legacy data dir at "$HOME/.reicast/data" - dirs.push_back((std::string)nowide::getenv("HOME") + "/.reicast/data/"); - xdg_home = (std::string)nowide::getenv("HOME") + "/.local/share"; - } - if (nowide::getenv("XDG_DATA_HOME") != NULL) + if (nowide::getenv("XDG_DATA_HOME") != nullptr) // If XDG_DATA_HOME is set explicitly, we'll use that instead of $HOME/.local/share xdg_home = (std::string)nowide::getenv("XDG_DATA_HOME"); + else if (nowide::getenv("HOME") != nullptr) + xdg_home = (std::string)nowide::getenv("HOME") + "/.local/share"; if (!xdg_home.empty()) - { // XDG data locations dirs.push_back(xdg_home + "/flycast/"); - dirs.push_back(xdg_home + "/reicast/"); - dirs.push_back(xdg_home + "/reicast/data/"); - } - if (nowide::getenv("XDG_DATA_DIRS") != NULL) + if (nowide::getenv("XDG_DATA_DIRS") != nullptr) { std::string path = (std::string)nowide::getenv("XDG_DATA_DIRS"); - addDirectoriesFromPath(dirs, path, "/flycast/"); - addDirectoriesFromPath(dirs, path, "/reicast/"); } else { #ifdef FLYCAST_DATADIR - const std::string data_dir (FLYCAST_DATADIR); + const std::string data_dir(FLYCAST_DATADIR); dirs.push_back(data_dir); #endif dirs.push_back("/usr/local/share/flycast/"); dirs.push_back("/usr/share/flycast/"); - dirs.push_back("/usr/local/share/reicast/"); - dirs.push_back("/usr/share/reicast/"); } - if (nowide::getenv("FLYCAST_BIOS_PATH") != NULL) + if (nowide::getenv("FLYCAST_BIOS_PATH") != nullptr) { std::string path = (std::string)nowide::getenv("FLYCAST_BIOS_PATH"); addDirectoriesFromPath(dirs, path, "/"); @@ -377,7 +309,7 @@ int main(int argc, char* argv[]) #endif #if defined(USE_BREAKPAD) google_breakpad::MinidumpDescriptor descriptor("/tmp"); - google_breakpad::ExceptionHandler eh(descriptor, NULL, dumpCallback, NULL, true, -1); + google_breakpad::ExceptionHandler eh(descriptor, nullptr, dumpCallback, nullptr, true, -1); #endif LogManager::Init(); diff --git a/shell/apple/emulator-osx/emulator-osx/osx-main.mm b/shell/apple/emulator-osx/emulator-osx/osx-main.mm index c277c63ad..d9f5a8725 100644 --- a/shell/apple/emulator-osx/emulator-osx/osx-main.mm +++ b/shell/apple/emulator-osx/emulator-osx/osx-main.mm @@ -101,9 +101,7 @@ extern "C" int SDL_main(int argc, char *argv[]) char *home = getenv("HOME"); if (home != NULL) { - std::string config_dir = std::string(home) + "/.reicast/"; - if (!file_exists(config_dir)) - config_dir = std::string(home) + "/.flycast/"; + std::string config_dir = std::string(home) + "/.flycast/"; if (!file_exists(config_dir)) config_dir = std::string(home) + "/Library/Application Support/Flycast/"; From 976fb99aa3a03694281b0422e7ecff633059f999 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 6 Dec 2022 19:50:34 +0100 Subject: [PATCH 20/34] pvr: naomi mvsc2 needs more rendering cycles Regression introduced by 0b0e64304e5e011eb891286c606d6b9ac33c6429 --- core/hw/pvr/spg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/hw/pvr/spg.cpp b/core/hw/pvr/spg.cpp index 406818071..55d7c06c4 100755 --- a/core/hw/pvr/spg.cpp +++ b/core/hw/pvr/spg.cpp @@ -283,7 +283,7 @@ void scheduleRenderDone(TA_context *cntx) int size = 0; for (TA_context *c = cntx; c != nullptr; c = c->nextContext) size += c->tad.thd_data - c->tad.thd_root; - cycles = std::min(100000 + size * 2, 1500000); + cycles = std::min(200000 + size * 3, 1500000); } sh4_sched_request(render_end_schid, cycles); } From f5fa1f62106a00382aa4889ab8af79c2bf71e3ea Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 6 Dec 2022 20:42:00 +0100 Subject: [PATCH 21/34] holly, sh4: reg access handlers don't need size. SB_FFST constant 0 --- core/hw/aica/aica_if.cpp | 6 +- core/hw/gdrom/gdromv3.cpp | 26 +++--- core/hw/holly/sb.cpp | 81 ++++++----------- core/hw/holly/sb.h | 6 +- core/hw/holly/sb_mem.cpp | 4 +- core/hw/hwreg.h | 13 +-- core/hw/naomi/naomi.cpp | 21 ++--- core/hw/pvr/elan.cpp | 4 +- core/hw/sh4/sh4_mmr.h | 182 +++++++++++++++++++------------------- 9 files changed, 155 insertions(+), 188 deletions(-) diff --git a/core/hw/aica/aica_if.cpp b/core/hw/aica/aica_if.cpp index 4d7a35d16..397fe8764 100644 --- a/core/hw/aica/aica_if.cpp +++ b/core/hw/aica/aica_if.cpp @@ -393,7 +393,7 @@ static void Write_SB_ADST(u32 addr, u32 data) SB_ADSUSP &= ~0x10; // Schedule the end of DMA transfer interrupt - int cycles = len * (SH4_MAIN_CLOCK / 2 / 25000000); // 16 bits @ 25 MHz + int cycles = len * (SH4_MAIN_CLOCK / 2 / G2_BUS_CLOCK); // 16 bits @ 25 MHz if (cycles < 4096) dma_end_sched(0, 0, 0); else @@ -492,8 +492,10 @@ void aica_sb_Init() void aica_sb_Reset(bool hard) { - if (hard) + if (hard) { SB_ADST = 0; + SB_G2APRO = 0x7f00; + } } void aica_sb_Term() diff --git a/core/hw/gdrom/gdromv3.cpp b/core/hw/gdrom/gdromv3.cpp index 60934d572..82d43e081 100644 --- a/core/hw/gdrom/gdromv3.cpp +++ b/core/hw/gdrom/gdromv3.cpp @@ -1250,20 +1250,20 @@ static int GDRomschd(int i, int c, int j) } //DMA Start -void GDROM_DmaStart(u32 addr, u32 data) +static void GDROM_DmaStart(u32 addr, u32 data) { - if (SB_GDEN==0) - { - INFO_LOG(GDROM, "Invalid GD-DMA start, SB_GDEN=0. Ignoring it."); - return; - } - SB_GDST|=data&1; + SB_GDST |= data & 1; - if (SB_GDST==1) + if (SB_GDST == 1) { - SB_GDSTARD=SB_GDSTAR; - SB_GDLEND=0; - DEBUG_LOG(GDROM, "GDROM-DMA start addr %08X len %d", SB_GDSTAR, SB_GDLEN); + if (SB_GDEN == 0) + { + INFO_LOG(GDROM, "Invalid GD-DMA start, SB_GDEN=0. Ignoring it."); + return; + } + SB_GDSTARD = SB_GDSTAR; + SB_GDLEND = 0; + DEBUG_LOG(GDROM, "GDROM-DMA start addr %08X len %d fad %x", SB_GDSTAR, SB_GDLEN, read_params.start_sector); int ticks = getGDROMTicks(); if (ticks < SH4_TIMESLICE) @@ -1276,9 +1276,9 @@ void GDROM_DmaStart(u32 addr, u32 data) } } -void GDROM_DmaEnable(u32 addr, u32 data) +static void GDROM_DmaEnable(u32 addr, u32 data) { - SB_GDEN = (data & 1); + SB_GDEN = data & 1; if (SB_GDEN == 0 && SB_GDST == 1) { printf_spi("GD-DMA aborted"); diff --git a/core/hw/holly/sb.cpp b/core/hw/holly/sb.cpp index 300696162..683967d01 100644 --- a/core/hw/holly/sb.cpp +++ b/core/hw/holly/sb.cpp @@ -173,57 +173,34 @@ static const char *regName(u32 addr) return it->second; } -u32 sb_ReadMem(u32 addr,u32 sz) +u32 sb_ReadMem(u32 addr) { u32 offset = ((addr - SB_BASE) >> 2) & 0x1fff; u32 rv; - if (!(sb_regs[offset].flags & (REG_RF|REG_WO))) - { - if (sz==4) - rv = sb_regs[offset].data32; - else if (sz==2) - rv = sb_regs[offset].data16; - else - rv = sb_regs[offset].data8; - } + if (!(sb_regs[offset].flags & REG_RF)) + rv = sb_regs[offset].data32; else - { - if ((sb_regs[offset].flags & REG_WO) || sb_regs[offset].readFunctionAddr == NULL) - { - INFO_LOG(HOLLY, "sb_ReadMem write-only reg %08x %d", addr, sz); - rv = 0; - } - else - rv = sb_regs[offset].readFunctionAddr(addr); - } + rv = sb_regs[offset].readFunctionAddr(addr); + if ((addr & 0xffffff) != 0x5f6c18) // SB_MDST - DEBUG_LOG(HOLLY, "read(%d) %s.%c == %x", sz, regName(addr), + DEBUG_LOG(HOLLY, "read %s.%c == %x", regName(addr), ((addr >> 26) & 7) == 2 ? 'b' : (addr & 0x2000000) ? '1' : '0', rv); return rv; } -void sb_WriteMem(u32 addr,u32 data,u32 sz) +void sb_WriteMem(u32 addr, u32 data) { - DEBUG_LOG(HOLLY, "write(%d) %s.%c = %x", sz, regName(addr), + DEBUG_LOG(HOLLY, "write %s.%c = %x", regName(addr), ((addr >> 26) & 7) == 2 ? 'b' : (addr & 0x2000000) ? '1' : '0', data); u32 offset = ((addr - SB_BASE) >> 2) & 0x1fff; if (!(sb_regs[offset].flags & REG_WF)) - { - if (sz==4) - sb_regs[offset].data32=data; - else if (sz==2) - sb_regs[offset].data16=(u16)data; - else - sb_regs[offset].data8=(u8)data; - } + sb_regs[offset].data32 = data; else - { - sb_regs[offset].writeFunctionAddr(addr,data); - } + sb_regs[offset].writeFunctionAddr(addr, data); } static u32 sbio_read_noacc(u32 addr) @@ -253,7 +230,7 @@ static void sb_write_gdrom_unlock(u32 addr, u32 data) void sb_rio_register(u32 reg_addr, RegIO flags, RegReadAddrFP* rf, RegWriteAddrFP* wf) { - u32 idx=(reg_addr-SB_BASE)/4; + u32 idx = (reg_addr - SB_BASE) / 4; verify(idx < sb_regs.size()); @@ -261,22 +238,23 @@ void sb_rio_register(u32 reg_addr, RegIO flags, RegReadAddrFP* rf, RegWriteAddrF if (flags == RIO_NO_ACCESS) { - sb_regs[idx].readFunctionAddr=&sbio_read_noacc; - sb_regs[idx].writeFunctionAddr=&sbio_write_noacc; + sb_regs[idx].readFunctionAddr = sbio_read_noacc; + sb_regs[idx].writeFunctionAddr = sbio_write_noacc; } - else if (flags == RIO_CONST) + else if (flags == RIO_RO) { - sb_regs[idx].writeFunctionAddr=&sbio_write_const; + sb_regs[idx].writeFunctionAddr = sbio_write_const; + sb_regs[idx].data32 = 0; } else { - sb_regs[idx].data32=0; - if (flags & REG_RF) - sb_regs[idx].readFunctionAddr=rf; + sb_regs[idx].readFunctionAddr = rf; + else + sb_regs[idx].data32 = 0; if (flags & REG_WF) - sb_regs[idx].writeFunctionAddr=wf==0?&sbio_write_noacc:wf; + sb_regs[idx].writeFunctionAddr = wf == nullptr ? sbio_write_noacc : wf; } } @@ -288,14 +266,9 @@ void sb_write_reg(u32 addr, u32 data) u32 SB_FFST_rc; u32 SB_FFST; -static u32 sb_read_SB_FFST(u32 addr) +static u32 read_SB_FFST(u32 addr) { - SB_FFST_rc++; - if (SB_FFST_rc & 0x8) - { - SB_FFST^=31; - } - return 0; // does the fifo status has really to be faked ? + return 0; } static void sb_write_SB_SFRES(u32 addr, u32 data) @@ -367,13 +340,13 @@ void sb_Init() sb_rio_register(SB_LMMODE1_addr, RIO_WF, 0, sb_write_reg); //0x005F688C SB_FFST R FIFO status - sb_rio_register(SB_FFST_addr, RIO_RO_FUNC, sb_read_SB_FFST); + sb_rio_register(SB_FFST_addr, RIO_RO_FUNC, read_SB_FFST); //0x005F6890 SB_SFRES W System reset sb_rio_register(SB_SFRES_addr, RIO_WO_FUNC, 0, sb_write_SB_SFRES); //0x005F689C SB_SBREV R System bus revision number - sb_rio_register(SB_SBREV_addr,RIO_CONST); + sb_rio_register(SB_SBREV_addr, RIO_RO); //0x005F68A0 SB_RBSPLT RW SH4 Root Bus split enable sb_rio_register(SB_RBSPLT_addr, RIO_WF, 0, sb_write_reg); @@ -620,7 +593,7 @@ void sb_Init() sb_rio_register(SB_DDSUSP_addr, RIO_WF, 0, sb_write_SUSP); //0x005F7880 SB_G2ID R G2 bus version - sb_rio_register(SB_G2ID_addr,RIO_CONST); + sb_rio_register(SB_G2ID_addr, RIO_RO); //0x005F7890 SB_G2DSTO RW G2/DS timeout sb_rio_register(SB_G2DSTO_addr,RIO_DATA); @@ -747,11 +720,11 @@ void sb_Reset(bool hard) { for (auto& reg : sb_regs) reg.reset(); + SB_PDAPRO = 0x7f00; + SB_GDAPRO = 0x7f00; } SB_ISTNRM = 0; SB_ISTNRM1 = 0; - SB_FFST_rc = 0; - SB_FFST = 0; bba_Reset(hard); ModemReset(); diff --git a/core/hw/holly/sb.h b/core/hw/holly/sb.h index 5fe9f0dcd..137fd0a8e 100644 --- a/core/hw/holly/sb.h +++ b/core/hw/holly/sb.h @@ -7,8 +7,8 @@ #include "hw/hwreg.h" #include -u32 sb_ReadMem(u32 addr,u32 sz); -void sb_WriteMem(u32 addr,u32 data,u32 sz); +u32 sb_ReadMem(u32 addr); +void sb_WriteMem(u32 addr, u32 data); void sb_Init(); void sb_Reset(bool hard); void sb_Term(); @@ -355,7 +355,7 @@ extern std::array sb_regs; //0x005F6888 SB_LMMODE1 RW Via TA texture memory bus select 1 #define SB_LMMODE1 SB_REG_32(LMMODE1) //0x005F688C SB_FFST R FIFO status -extern u32 SB_FFST; +//#define SB_FFST SB_REG_32(FFST) //0x005F6890 SB_SFRES W System reset #define SB_SFRES SB_REG_32(SFRES) diff --git a/core/hw/holly/sb_mem.cpp b/core/hw/holly/sb_mem.cpp index a92be8bad..619095f4e 100644 --- a/core/hw/holly/sb_mem.cpp +++ b/core/hw/holly/sb_mem.cpp @@ -325,7 +325,7 @@ T DYNACALL ReadMem_area0(u32 paddr) } // All SB registers if (addr >= 0x005F6800 && addr <= 0x005F7CFF) - return (T)sb_ReadMem(paddr, sz); + return (T)sb_ReadMem(paddr); // TA / PVR core registers if (addr >= 0x005F8000 && addr <= 0x005F9FFF) { @@ -435,7 +435,7 @@ void DYNACALL WriteMem_area0(u32 paddr, T data) // All SB registers if (addr >= 0x005F6800 && addr <= 0x005F7CFF) { - sb_WriteMem(paddr, data, sz); + sb_WriteMem(paddr, data); return; } // TA / PVR core registers diff --git a/core/hw/hwreg.h b/core/hw/hwreg.h index e2f46fedc..58bbc8e01 100644 --- a/core/hw/hwreg.h +++ b/core/hw/hwreg.h @@ -11,7 +11,6 @@ typedef void RegWriteAddrFP(u32 addr, u32 data); F F N -> RF|WF -> RIO_FUNC D X N -> RO|WF -> RIO_RO F X N -> RF|WF|RO -> RIO_RO_FUNC - D X Y -> CONST|RO|WF-> RIO_CONST X F N -> RF|WF|WO -> RIO_WO_FUNC */ enum RegStructFlags @@ -30,7 +29,6 @@ enum RegIO RIO_FUNC = REG_WF | REG_RF, RIO_RO = REG_RO | REG_WF, RIO_RO_FUNC = REG_RO | REG_RF | REG_WF, - RIO_CONST = REG_RO | REG_WF, RIO_WO_FUNC = REG_WF | REG_RF | REG_WO, RIO_NO_ACCESS = REG_WF | REG_RF | REG_NO_ACCESS }; @@ -39,16 +37,13 @@ struct RegisterStruct { union { - u32 data32; //stores data of reg variable [if used] 32b - u16 data16; //stores data of reg variable [if used] 16b - u8 data8; //stores data of reg variable [if used] 8b - - RegReadAddrFP* readFunctionAddr; //stored pointer to reg read function + u32 data32; // Register value + RegReadAddrFP* readFunctionAddr; // Register read handler }; - RegWriteAddrFP* writeFunctionAddr; //stored pointer to reg write function + RegWriteAddrFP* writeFunctionAddr; // Register write handler - u32 flags; //Access flags ! + u32 flags; // Access flags void reset() { diff --git a/core/hw/naomi/naomi.cpp b/core/hw/naomi/naomi.cpp index 3183d069c..a203de6e7 100644 --- a/core/hw/naomi/naomi.cpp +++ b/core/hw/naomi/naomi.cpp @@ -422,19 +422,16 @@ void WriteMem_naomi(u32 address, u32 data, u32 size) } //Dma Start -void Naomi_DmaStart(u32 addr, u32 data) +static void Naomi_DmaStart(u32 addr, u32 data) { - if (SB_GDEN==0) + if ((data & 1) == 0) + return; + if (SB_GDEN == 0) { INFO_LOG(NAOMI, "Invalid (NAOMI)GD-DMA start, SB_GDEN=0. Ignoring it."); return; } - SB_GDST |= data & 1; - - if (SB_GDST == 0) - return; - if (!m3comm.DmaStart(addr, data) && CurrentCartridge != NULL) { DEBUG_LOG(NAOMI, "NAOMI-DMA start addr %08X len %d", SB_GDSTAR, SB_GDLEN); @@ -464,20 +461,20 @@ void Naomi_DmaStart(u32 addr, u32 data) SB_GDSTARD = SB_GDSTAR + SB_GDLEN; SB_GDLEND = SB_GDLEN; } - SB_GDST = 0; asic_RaiseInterrupt(holly_GDROM_DMA); } -void Naomi_DmaEnable(u32 addr, u32 data) +static void Naomi_DmaEnable(u32 addr, u32 data) { - SB_GDEN=data&1; - if (SB_GDEN==0 && SB_GDST==1) + SB_GDEN = data & 1; + if (SB_GDEN == 0 && SB_GDST == 1) { INFO_LOG(NAOMI, "(NAOMI)GD-DMA aborted"); - SB_GDST=0; + SB_GDST = 0; } } + void naomi_reg_Init() { #ifdef NAOMI_COMM diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp index 70c29e381..bdd40e08f 100644 --- a/core/hw/pvr/elan.cpp +++ b/core/hw/pvr/elan.cpp @@ -88,7 +88,7 @@ static u32 DYNACALL read_elanreg(u32 paddr) { case 0x5F: if (addr >= 0x005F6800 && addr <= 0x005F7CFF) - return sb_ReadMem(paddr, sizeof(u32)); + return sb_ReadMem(paddr); if (addr >= 0x005F8000 && addr <= 0x005F9FFF) return pvr_ReadReg(paddr); @@ -154,7 +154,7 @@ static void DYNACALL write_elanreg(u32 paddr, u32 data) { case 0x5F: if (addr>= 0x005F6800 && addr <= 0x005F7CFF) - sb_WriteMem(paddr, data, sizeof(u32)); + sb_WriteMem(paddr, data); else if (addr >= 0x005F8000 && addr <= 0x005F9FFF) pvr_WriteReg(paddr, data); else diff --git a/core/hw/sh4/sh4_mmr.h b/core/hw/sh4/sh4_mmr.h index fac9cee81..f7effda10 100644 --- a/core/hw/sh4/sh4_mmr.h +++ b/core/hw/sh4/sh4_mmr.h @@ -30,17 +30,17 @@ void sh4_mmr_term(); void sh4_rio_reg(RegisterStruct *arr, u32 addr, RegIO flags, RegReadAddrFP *rf = nullptr, RegWriteAddrFP *wf = nullptr); -#define SH4IO_REGN(mod, addr, size) ((mod)[((addr) & 255) / 4].data##size) -#define SH4IO_REG(mod, name, size) SH4IO_REGN(mod, mod##_##name##_addr, size) -#define SH4IO_REG_T(mod, name, size) ((mod##_##name##_type&)SH4IO_REG(mod, name, size)) +#define SH4IO_REGN(mod, addr) ((mod)[((addr) & 255) / 4].data32) +#define SH4IO_REG(mod, name) SH4IO_REGN(mod, mod##_##name##_addr) +#define SH4IO_REG_T(mod, name) ((mod##_##name##_type&)SH4IO_REG(mod, name)) -#define SH4IO_REG_OFS(mod, name, o, s, size) SH4IO_REGN(mod, mod##_##name##0_addr + (o) * (s), size) -#define SH4IO_REG_T_OFS(mod, name, o, s, size) ((mod##_##name##_type&)SH4IO_REG_OFS(mod, name, o, s, size)) +#define SH4IO_REG_OFS(mod, name, o, s) SH4IO_REGN(mod, mod##_##name##0_addr + (o) * (s)) +#define SH4IO_REG_T_OFS(mod, name, o, s) ((mod##_##name##_type&)SH4IO_REG_OFS(mod, name, o, s)) template void sh4_write_reg(u32 addr, u32 data) { - SH4IO_REGN(Module, Addr, 32) = (data & Mask) | OrMask; + SH4IO_REGN(Module, Addr) = (data & Mask) | OrMask; } template @@ -500,7 +500,7 @@ union BSC_BCR1_type }; -#define BSC_BCR1 SH4IO_REG_T(BSC,BCR1,32) +#define BSC_BCR1 SH4IO_REG_T(BSC, BCR1) //extern BCR1_type BSC_BCR1; //16 bit @@ -531,7 +531,7 @@ union BSC_BCR2_type u16 full; }; -#define BSC_BCR2 SH4IO_REG_T(BSC,BCR2,16) +#define BSC_BCR2 SH4IO_REG_T(BSC, BCR2) //32 bits union BSC_WCR1_type @@ -577,7 +577,7 @@ union BSC_WCR1_type u32 full; }; -#define BSC_WCR1 SH4IO_REG_T(BSC,WCR1,32) +#define BSC_WCR1 SH4IO_REG_T(BSC, WCR1) //32 bits union BSC_WCR2_type @@ -624,7 +624,7 @@ union BSC_WCR2_type u32 full; }; -#define BSC_WCR2 SH4IO_REG_T(BSC,WCR2,32) +#define BSC_WCR2 SH4IO_REG_T(BSC, WCR2) //32 bits union BSC_WCR3_type @@ -672,7 +672,7 @@ union BSC_WCR3_type }; -#define BSC_WCR3 SH4IO_REG_T(BSC,WCR3,32) +#define BSC_WCR3 SH4IO_REG_T(BSC, WCR3) //32 bits union BSC_MCR_type @@ -720,7 +720,7 @@ union BSC_MCR_type }; -#define BSC_MCR SH4IO_REG_T(BSC,MCR,32) +#define BSC_MCR SH4IO_REG_T(BSC, MCR) //16 bits union BSC_PCR_type @@ -749,7 +749,7 @@ union BSC_PCR_type u16 full; }; -#define BSC_PCR SH4IO_REG_T(BSC,PCR,16) +#define BSC_PCR SH4IO_REG_T(BSC, PCR) //16 bits -> misstype on manual ? RTSCR vs RTCSR... union BSC_RTCSR_type @@ -778,7 +778,7 @@ union BSC_RTCSR_type u16 full; }; -#define BSC_RTCSR SH4IO_REG_T(BSC,RTCSR,16) +#define BSC_RTCSR SH4IO_REG_T(BSC, RTCSR) //16 bits union BSC_RTCNT_type @@ -800,7 +800,7 @@ union BSC_RTCNT_type u16 full; }; -#define BSC_RTCNT SH4IO_REG_T(BSC,RTCNT,16) +#define BSC_RTCNT SH4IO_REG_T(BSC, RTCNT) //16 bits union BSC_RTCOR_type @@ -823,7 +823,7 @@ union BSC_RTCOR_type }; -#define BSC_RTCOR SH4IO_REG_T(BSC,RTCOR,16) +#define BSC_RTCOR SH4IO_REG_T(BSC, RTCOR) //16 bits union BSC_RFCR_type @@ -843,7 +843,7 @@ union BSC_RFCR_type u16 full; }; -#define BSC_RFCR SH4IO_REG_T(BSC,RFCR,16) +#define BSC_RFCR SH4IO_REG_T(BSC, RFCR) //32 bits union BSC_PCTRA_type @@ -890,7 +890,7 @@ union BSC_PCTRA_type u32 full; }; -#define BSC_PCTRA SH4IO_REG_T(BSC,PCTRA,32) +#define BSC_PCTRA SH4IO_REG_T(BSC, PCTRA) //16 bits union BSC_PDTRA_type @@ -966,7 +966,7 @@ union BSC_PCTRB_type u32 full; }; -#define BSC_PCTRB SH4IO_REG_T(BSC,PCTRB,32) +#define BSC_PCTRB SH4IO_REG_T(BSC, PCTRB) //16 bits union BSC_PDTRB_type @@ -995,7 +995,7 @@ union BSC_PDTRB_type u16 full; }; -#define BSC_PDTRB SH4IO_REG_T(BSC,PDTRB,16) +#define BSC_PDTRB SH4IO_REG_T(BSC, PDTRB) //16 bits union BSC_GPIOIC_type @@ -1024,7 +1024,7 @@ union BSC_GPIOIC_type u16 full; }; -#define BSC_GPIOIC SH4IO_REG_T(BSC,GPIOIC,16) +#define BSC_GPIOIC SH4IO_REG_T(BSC, GPIOIC) @@ -1123,27 +1123,27 @@ union CCN_QACR_type //Types -#define CCN_PTEH SH4IO_REG_T(CCN,PTEH,32) -#define CCN_PTEL SH4IO_REG_T(CCN,PTEL,32) -#define CCN_TTB SH4IO_REG(CCN,TTB,32) -#define CCN_TEA SH4IO_REG(CCN,TEA,32) -#define CCN_MMUCR SH4IO_REG_T(CCN,MMUCR,32) -#define CCN_BASRA SH4IO_REG(CCN,BASRA,8) -#define CCN_BASRB SH4IO_REG(CCN,BASRB,8) -#define CCN_CCR SH4IO_REG_T(CCN,CCR,32) -#define CCN_TRA SH4IO_REG(CCN,TRA,32) -#define CCN_EXPEVT SH4IO_REG(CCN,EXPEVT,32) -#define CCN_INTEVT SH4IO_REG(CCN,INTEVT,32) -#define CCN_PTEA SH4IO_REG_T(CCN,PTEA,32) +#define CCN_PTEH SH4IO_REG_T(CCN, PTEH) +#define CCN_PTEL SH4IO_REG_T(CCN, PTEL) +#define CCN_TTB SH4IO_REG(CCN, TTB) +#define CCN_TEA SH4IO_REG(CCN, TEA) +#define CCN_MMUCR SH4IO_REG_T(CCN, MMUCR) +#define CCN_BASRA SH4IO_REG(CCN, BASRA) +#define CCN_BASRB SH4IO_REG(CCN, BASRB) +#define CCN_CCR SH4IO_REG_T(CCN, CCR) +#define CCN_TRA SH4IO_REG(CCN, TRA) +#define CCN_EXPEVT SH4IO_REG(CCN, EXPEVT) +#define CCN_INTEVT SH4IO_REG(CCN, INTEVT) +#define CCN_PTEA SH4IO_REG_T(CCN, PTEA) -#define CCN_QACR0 ((CCN_QACR_type&)SH4IO_REG(CCN, QACR0, 32)) -#define CCN_QACR1 ((CCN_QACR_type&)SH4IO_REG(CCN, QACR1, 32)) +#define CCN_QACR0 ((CCN_QACR_type&)SH4IO_REG(CCN, QACR0)) +#define CCN_QACR1 ((CCN_QACR_type&)SH4IO_REG(CCN, QACR1)) -#define CPG_FRQCR SH4IO_REG(CPG,FRQCR,16) -#define CPG_STBCR SH4IO_REG(CPG,STBCR,8) -#define CPG_WTCNT SH4IO_REG(CPG,WTCNT,16) -#define CPG_WTCSR SH4IO_REG(CPG,WTCSR,16) -#define CPG_STBCR2 SH4IO_REG(CPG,STBCR2,8) +#define CPG_FRQCR SH4IO_REG(CPG, FRQCR) +#define CPG_STBCR SH4IO_REG(CPG, STBCR) +#define CPG_WTCNT SH4IO_REG(CPG, WTCNT) +#define CPG_WTCSR SH4IO_REG(CPG, WTCSR) +#define CPG_STBCR2 SH4IO_REG(CPG, STBCR2) @@ -1222,41 +1222,41 @@ extern u32 DMAC_DMATCR[4];//only 24 bits valid extern DMAC_CHCR_type DMAC_CHCR[4]; */ -#define DMAC_SAR(x) SH4IO_REG_OFS(DMAC,SAR,x,0x10,32) -#define DMAC_DAR(x) SH4IO_REG_OFS(DMAC,DAR,x,0x10,32) -#define DMAC_DMATCR(x) SH4IO_REG_OFS(DMAC,DMATCR,x,0x10,32) -#define DMAC_CHCR(x) SH4IO_REG_T_OFS(DMAC,CHCR,x,0x10,32) +#define DMAC_SAR(x) SH4IO_REG_OFS(DMAC, SAR, x, 0x10) +#define DMAC_DAR(x) SH4IO_REG_OFS(DMAC, DAR, x, 0x10) +#define DMAC_DMATCR(x) SH4IO_REG_OFS(DMAC, DMATCR, x, 0x10) +#define DMAC_CHCR(x) SH4IO_REG_T_OFS(DMAC, CHCR, x, 0x10) -#define DMAC_DMAOR SH4IO_REG_T(DMAC,DMAOR,32) +#define DMAC_DMAOR SH4IO_REG_T(DMAC, DMAOR) //UBC BARA 0xFF200000 0x1F200000 32 Undefined Held Held Held Iclk -#define UBC_BARA SH4IO_REG(UBC,BARA,32) +#define UBC_BARA SH4IO_REG(UBC, BARA) //UBC BAMRA 0xFF200004 0x1F200004 8 Undefined Held Held Held Iclk -#define UBC_BAMRA SH4IO_REG(UBC,BAMRA,8) +#define UBC_BAMRA SH4IO_REG(UBC, BAMRA) //UBC BBRA 0xFF200008 0x1F200008 16 0x0000 Held Held Held Iclk -#define UBC_BBRA SH4IO_REG(UBC,BBRA,16) +#define UBC_BBRA SH4IO_REG(UBC, BBRA) //UBC BARB 0xFF20000C 0x1F20000C 32 Undefined Held Held Held Iclk -#define UBC_BARB SH4IO_REG(UBC,BARB,32) +#define UBC_BARB SH4IO_REG(UBC, BARB) //UBC BAMRB 0xFF200010 0x1F200010 8 Undefined Held Held Held Iclk -#define UBC_BAMRB SH4IO_REG(UBC,BAMRB,8) +#define UBC_BAMRB SH4IO_REG(UBC, BAMRB) //UBC BBRB 0xFF200014 0x1F200014 16 0x0000 Held Held Held Iclk -#define UBC_BBRB SH4IO_REG(UBC,BBRB,16) +#define UBC_BBRB SH4IO_REG(UBC, BBRB) //UBC BDRB 0xFF200018 0x1F200018 32 Undefined Held Held Held Iclk -#define UBC_BDRB SH4IO_REG(UBC,BDRB,32) +#define UBC_BDRB SH4IO_REG(UBC, BDRB) //UBC BDMRB 0xFF20001C 0x1F20001C 32 Undefined Held Held Held Iclk -#define UBC_BDMRB SH4IO_REG(UBC,BDMRB,32) +#define UBC_BDMRB SH4IO_REG(UBC, BDMRB) //UBC BRCR 0xFF200020 0x1F200020 16 0x0000 Held Held Held Iclk -#define UBC_BRCR SH4IO_REG(UBC,BRCR,16) +#define UBC_BRCR SH4IO_REG(UBC, BRCR) //TCNT exists only as cached state //#define TMU_TCNT(x) SH4IO_REG_OFS(TMU,TCNT,x,12,32) -#define TMU_TCOR(x) SH4IO_REG_OFS(TMU,TCOR,x,12,32) -#define TMU_TCR(x) SH4IO_REG_OFS(TMU,TCR,x,12,16) +#define TMU_TCOR(x) SH4IO_REG_OFS(TMU, TCOR, x, 12) +#define TMU_TCR(x) SH4IO_REG_OFS(TMU, TCR, x, 12) -#define TMU_TOCR SH4IO_REG(TMU,TOCR,8) -#define TMU_TSTR SH4IO_REG(TMU,TSTR,8) +#define TMU_TOCR SH4IO_REG(TMU, TOCR) +#define TMU_TSTR SH4IO_REG(TMU, TSTR) @@ -1287,10 +1287,10 @@ union SCIF_SCSMR2_type u16 full; }; -#define SCIF_SCSMR2 SH4IO_REG_T(SCIF,SCSMR2,16) +#define SCIF_SCSMR2 SH4IO_REG_T(SCIF, SCSMR2) //SCIF SCBRR2 0xFFE80004 0x1FE80004 8 0xFF 0xFF Held Held Pclk -#define SCIF_SCBRR2 SH4IO_REG(SCIF,SCBRR2,8) +#define SCIF_SCBRR2 SH4IO_REG(SCIF, SCBRR2) //SCIF SCSCR2 0xFFE80008 0x1FE80008 16 0x0000 0x0000 Held Held Pclk union SCIF_SCSCR2_type @@ -1321,7 +1321,7 @@ union SCIF_SCSCR2_type extern SCIF_SCSCR2_type SCIF_SCSCR2; //SCIF SCFTDR2 0xFFE8000C 0x1FE8000C 8 Undefined Undefined Held Held Pclk -#define SCIF_SCFTDR2 SH4IO_REG(SCIF,SCFTDR2,8) +#define SCIF_SCFTDR2 SH4IO_REG(SCIF, SCFTDR2) //SCIF SCFSR2 0xFFE80010 0x1FE80010 16 0x0060 0x0060 Held Held Pclk union SCIF_SCFSR2_type @@ -1381,7 +1381,7 @@ union SCIF_SCFCR2_type }; u16 full; }; -#define SCIF_SCFCR2 SH4IO_REG_T(SCIF,SCFCR2,16) +#define SCIF_SCFCR2 SH4IO_REG_T(SCIF, SCFCR2) //Read OLNY //SCIF SCFDR2 0xFFE8001C 0x1FE8001C 16 0x0000 0x0000 Held Held Pclk @@ -1425,7 +1425,7 @@ union SCIF_SCSPTR2_type }; u16 full; }; -#define SCIF_SCSPTR2 SH4IO_REG_T(SCIF,SCSPTR2,16) +#define SCIF_SCSPTR2 SH4IO_REG_T(SCIF, SCSPTR2) //SCIF SCLSR2 0xFFE80024 0x1FE80024 16 0x0000 0x0000 Held Held Pclk union SCIF_SCLSR2_type @@ -1440,26 +1440,26 @@ union SCIF_SCLSR2_type }; u16 full; }; -#define SCIF_SCLSR2 SH4IO_REG_T(SCIF,SCLSR2,16) +#define SCIF_SCLSR2 SH4IO_REG_T(SCIF, SCLSR2) -#define RTC_R64CNT SH4IO_REG(RTC,R64CNT,8) -#define RTC_RSECCNT SH4IO_REG(RTC,RSECCNT,8) -#define RTC_RMINCNT SH4IO_REG(RTC,RMINCNT,8) -#define RTC_RHRCNT SH4IO_REG(RTC,RHRCNT,8) -#define RTC_RWKCNT SH4IO_REG(RTC,RWKCNT,8) -#define RTC_RDAYCNT SH4IO_REG(RTC,RDAYCNT,8) -#define RTC_RMONCNT SH4IO_REG(RTC,RMONCNT,8) -#define RTC_RYRCNT SH4IO_REG(RTC,RYRCNT,16) +#define RTC_R64CNT SH4IO_REG(RTC, R64CNT) +#define RTC_RSECCNT SH4IO_REG(RTC, RSECCNT) +#define RTC_RMINCNT SH4IO_REG(RTC, RMINCNT) +#define RTC_RHRCNT SH4IO_REG(RTC, RHRCNT) +#define RTC_RWKCNT SH4IO_REG(RTC, RWKCNT) +#define RTC_RDAYCNT SH4IO_REG(RTC, RDAYCNT) +#define RTC_RMONCNT SH4IO_REG(RTC, RMONCNT) +#define RTC_RYRCNT SH4IO_REG(RTC, RYRCNT) -#define RTC_RSECAR SH4IO_REG(RTC,RSECAR,8) -#define RTC_RMINAR SH4IO_REG(RTC,RMINAR,8) -#define RTC_RHRAR SH4IO_REG(RTC,RHRAR,8) -#define RTC_RWKAR SH4IO_REG(RTC,RWKAR,8) -#define RTC_RDAYAR SH4IO_REG(RTC,RDAYAR,8) -#define RTC_RMONAR SH4IO_REG(RTC,RMONAR,8) -#define RTC_RCR1 SH4IO_REG(RTC,RCR1,8) -#define RTC_RCR2 SH4IO_REG(RTC,RCR2,8) +#define RTC_RSECAR SH4IO_REG(RTC, RSECAR) +#define RTC_RMINAR SH4IO_REG(RTC, RMINAR) +#define RTC_RHRAR SH4IO_REG(RTC, RHRAR) +#define RTC_RWKAR SH4IO_REG(RTC, RWKAR) +#define RTC_RDAYAR SH4IO_REG(RTC, RDAYAR) +#define RTC_RMONAR SH4IO_REG(RTC, RMONAR) +#define RTC_RCR1 SH4IO_REG(RTC, RCR1) +#define RTC_RCR2 SH4IO_REG(RTC, RCR2) @@ -1514,16 +1514,16 @@ union INTC_IPRC_type }; }; -#define INTC_ICR SH4IO_REG_T(INTC,ICR,16) +#define INTC_ICR SH4IO_REG_T(INTC, ICR) -#define INTC_IPRA SH4IO_REG_T(INTC,IPRA,16) -#define INTC_IPRB SH4IO_REG_T(INTC,IPRB,16) -#define INTC_IPRC SH4IO_REG_T(INTC,IPRC,16) +#define INTC_IPRA SH4IO_REG_T(INTC, IPRA) +#define INTC_IPRB SH4IO_REG_T(INTC, IPRB) +#define INTC_IPRC SH4IO_REG_T(INTC, IPRC) -#define SCI_SCSMR1 SH4IO_REG(SCI, SCSMR1, 8) -#define SCI_SCBRR1 SH4IO_REG(SCI, SCBRR1, 8) -#define SCI_SCSCR1 SH4IO_REG(SCI, SCSCR1, 8) -#define SCI_SCTDR1 SH4IO_REG(SCI, SCTDR1, 8) -#define SCI_SCSSR1 SH4IO_REG(SCI, SCSSR1, 8) -#define SCI_SCRDR1 SH4IO_REG(SCI, SCRDR1, 8) -#define SCI_SCSPTR1 SH4IO_REG(SCI, SCSPTR1, 8) +#define SCI_SCSMR1 SH4IO_REG(SCI, SCSMR1) +#define SCI_SCBRR1 SH4IO_REG(SCI, SCBRR1) +#define SCI_SCSCR1 SH4IO_REG(SCI, SCSCR1) +#define SCI_SCTDR1 SH4IO_REG(SCI, SCTDR1) +#define SCI_SCSSR1 SH4IO_REG(SCI, SCSSR1) +#define SCI_SCRDR1 SH4IO_REG(SCI, SCRDR1) +#define SCI_SCSPTR1 SH4IO_REG(SCI, SCSPTR1) From 446619ce854d466524d615ec1168ebe7e8e3de17 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 6 Dec 2022 20:51:45 +0100 Subject: [PATCH 22/34] atomiswave only has 2 MB of aica ram new savestate version build.h clean up --- core/build.h | 29 ++++++++++------------------- core/emulator.cpp | 2 +- core/hw/arm7/arm7.h | 2 +- core/hw/holly/sb.cpp | 2 -- core/serialize.cpp | 23 +++++++++++++---------- core/serialize.h | 3 ++- 6 files changed, 27 insertions(+), 34 deletions(-) diff --git a/core/build.h b/core/build.h index 1214440fd..9e2bd19df 100755 --- a/core/build.h +++ b/core/build.h @@ -121,14 +121,11 @@ #define USE_WINCE_HACK #endif -#define DC_PLATFORM_MASK 7 -#define DC_PLATFORM_DREAMCAST 0 /* Works, for the most part */ -#define DC_PLATFORM_DEV_UNIT 1 /* This is missing hardware */ -#define DC_PLATFORM_NAOMI 2 /* Works, for the most part */ -#define DC_PLATFORM_NAOMI2 3 /* Needs to be done, 2xsh4 + 2xpvr + custom TNL */ -#define DC_PLATFORM_ATOMISWAVE 4 /* Works, for the most part */ -#define DC_PLATFORM_HIKARU 5 /* Needs to be done, 2xsh4, 2x aica , custom vpu */ -#define DC_PLATFORM_AURORA 6 /* Needs to be done, Uses newer 300 mhz sh4 + 150 mhz pvr mbx SoC */ +#define DC_PLATFORM_DREAMCAST 0 +#define DC_PLATFORM_DEV_UNIT 1 +#define DC_PLATFORM_NAOMI 2 +#define DC_PLATFORM_NAOMI2 3 +#define DC_PLATFORM_ATOMISWAVE 4 //HOST_CPU #define CPU_X86 0x20000001 @@ -139,14 +136,12 @@ #define CPU_PPC 0x20000006 #define CPU_PPC64 0x20000007 #define CPU_ARM64 0x20000008 -#define CPU_MIPS64 0x20000009 //FEAT_SHREC, FEAT_AREC, FEAT_DSPREC #define DYNAREC_NONE 0x40000001 #define DYNAREC_JIT 0x40000002 #define DYNAREC_CPP 0x40000003 - //automatic #if defined(__x86_64__) || defined(_M_X64) @@ -235,15 +230,11 @@ #define VRAM_SIZE_MAX (16*1024*1024) #define ARAM_SIZE_MAX (8*1024*1024) -#define GD_CLOCK 33868800 //GDROM XTAL -- 768fs - -#define AICA_CORE_CLOCK (GD_CLOCK*4/3) //[45158400] GD->PLL 3:4 -> AICA CORE -- 1024fs -#define ADAC_CLOCK (AICA_CORE_CLOCK/4) //[11289600] 44100*256, AICA CORE -> PLL 4:1 -> ADAC -- 256fs -#define AICA_ARM_CLOCK (AICA_CORE_CLOCK/2) //[22579200] AICA CORE -> PLL 2:1 -> ARM -#define AICA_SDRAM_CLOCK (GD_CLOCK*2) //[67737600] GD-> PLL 2 -> SDRAM -#define SH4_MAIN_CLOCK (200*1000*1000) //[200000000] XTal(13.5) -> PLL (33.3) -> PLL 1:6 (200) -#define SH4_RAM_CLOCK (100*1000*1000) //[100000000] XTal(13.5) -> PLL (33.3) -> PLL 1:3 (100) , also suplied to HOLLY chip -#define G2_BUS_CLOCK (25*1000*1000) //[25000000] from Holly, from SH4_RAM_CLOCK w/ 2 2:1 plls +#define GD_CLOCK 33868800 //GDROM XTAL -- 768fs +#define AICA_CORE_CLOCK (GD_CLOCK * 4 / 3) //[45158400] GD->PLL 3:4 -> AICA CORE -- 1024fs +#define AICA_ARM_CLOCK (AICA_CORE_CLOCK / 2) //[22579200] AICA CORE -> PLL 2:1 -> ARM +#define SH4_MAIN_CLOCK (200 * 1000 * 1000) //[200000000] XTal(13.5) -> PLL (33.3) -> PLL 1:6 (200) +#define G2_BUS_CLOCK (25 * 1000 * 1000) //[25000000] from Holly, from SH4_RAM_CLOCK w/ 2 2:1 plls #if defined(GLES) && !defined(GLES3) && !defined(GLES2) // Only use GL ES 2.0 API functions diff --git a/core/emulator.cpp b/core/emulator.cpp index 0bf549bf6..11e134e03 100644 --- a/core/emulator.cpp +++ b/core/emulator.cpp @@ -394,7 +394,7 @@ static void setPlatform(int platform) case DC_PLATFORM_ATOMISWAVE: settings.platform.ram_size = 16 * 1024 * 1024; settings.platform.vram_size = 8 * 1024 * 1024; - settings.platform.aram_size = 8 * 1024 * 1024; + settings.platform.aram_size = 2 * 1024 * 1024; settings.platform.bios_size = 128 * 1024; settings.platform.flash_size = 128 * 1024; // sram break; diff --git a/core/hw/arm7/arm7.h b/core/hw/arm7/arm7.h index cced9e27d..e98d84a72 100644 --- a/core/hw/arm7/arm7.h +++ b/core/hw/arm7/arm7.h @@ -99,7 +99,7 @@ typedef union alignas(8) extern reg_pair arm_Reg[RN_ARM_REG_COUNT]; // AICA ARM cpu clock: 22.5792 MHz -#define ARM_CYCLES_PER_SAMPLE 512 +#define ARM_CYCLES_PER_SAMPLE (AICA_ARM_CLOCK / 44100) extern int arm7ClockTicks; void CPUFiq(); diff --git a/core/hw/holly/sb.cpp b/core/hw/holly/sb.cpp index 683967d01..5fb8f6151 100644 --- a/core/hw/holly/sb.cpp +++ b/core/hw/holly/sb.cpp @@ -264,8 +264,6 @@ void sb_write_reg(u32 addr, u32 data) SB_REGN_32(reg_addr) = (data & mask) | or_mask; } -u32 SB_FFST_rc; -u32 SB_FFST; static u32 read_SB_FFST(u32 addr) { return 0; diff --git a/core/serialize.cpp b/core/serialize.cpp index d8329c5c4..650e551fc 100644 --- a/core/serialize.cpp +++ b/core/serialize.cpp @@ -52,10 +52,6 @@ extern u32 SB_ADST; //./core/hw/aica/aica_mem.o extern u8 aica_reg[0x8000]; -//./core/hw/holly/sb.o -extern u32 SB_FFST_rc; -extern u32 SB_FFST; - //./core/hw/holly/sb_mem.o extern MemChip *sys_rom; extern WritableChip *sys_nvmem; @@ -190,8 +186,6 @@ void dc_serialize(Serializer& ser) register_serialize(sb_regs, ser); ser << SB_ISTNRM; ser << SB_ISTNRM1; - ser << SB_FFST_rc; - ser << SB_FFST; ser << SB_ADST; sys_rom->Serialize(ser); @@ -339,6 +333,8 @@ static void dc_deserialize_libretro(Deserializer& deser) } deser.deserialize(aica_ram.data, aica_ram.size); + if (settings.platform.isAtomiswave()) + deser.skip(6 * 1024 * 1024); deser >> VREG; deser >> ARMRST; deser >> rtc_EN; @@ -349,8 +345,8 @@ static void dc_deserialize_libretro(Deserializer& deser) register_deserialize(sb_regs, deser); deser >> SB_ISTNRM; - deser >> SB_FFST_rc; - deser >> SB_FFST; + deser.skip(); // SB_FFST_rc; + deser.skip(); // SB_FFST; SB_ADST = 0; deser.skip(); // sys_nvmem->size @@ -624,7 +620,11 @@ void dc_deserialize(Deserializer& deser) } if (!deser.rollback()) + { deser.deserialize(aica_ram.data, aica_ram.size); + if (settings.platform.isAtomiswave()) + deser.skip(6 * 1024 * 1024, Deserializer::V30); + } deser >> VREG; deser >> ARMRST; deser >> rtc_EN; @@ -641,8 +641,11 @@ void dc_deserialize(Deserializer& deser) deser >> SB_ISTNRM1; else SB_ISTNRM1 = 0; - deser >> SB_FFST_rc; - deser >> SB_FFST; + if (deser.version() < Deserializer::V30) + { + deser.skip(); // SB_FFST_rc; + deser.skip(); // SB_FFST; + } if (deser.version() >= Deserializer::V15) deser >> SB_ADST; else diff --git a/core/serialize.h b/core/serialize.h index 70421e2ac..fbed29f6a 100644 --- a/core/serialize.h +++ b/core/serialize.h @@ -65,7 +65,8 @@ public: V27, V28, V29, - Current = V29, + V30, + Current = V30, Next = Current + 1, }; From 648f33473fd57bf5ef8e8ddb9ab07aeb5ae2e8ad Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 6 Dec 2022 21:12:41 +0100 Subject: [PATCH 23/34] test fix --- tests/src/serialize_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/serialize_test.cpp b/tests/src/serialize_test.cpp index 93314d135..833fe2690 100644 --- a/tests/src/serialize_test.cpp +++ b/tests/src/serialize_test.cpp @@ -31,7 +31,7 @@ TEST_F(SerializeTest, SizeTest) std::vector data(30000000); Serializer ser(data.data(), data.size()); dc_serialize(ser); - ASSERT_EQ(28191603u, ser.size()); + ASSERT_EQ(28191595u, ser.size()); } From 9069a49145ec829d43b0969e372110199eed8b4b Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 6 Dec 2022 22:04:37 +0100 Subject: [PATCH 24/34] pvr: take jitter into account in spg sheduler. fix scheduler edge case Take jitter into account when calculating current scanline Scheduler would miss the next int if it's on the next scanline Support for Hblank interrupt mode 2 (every scanline) --- core/hw/pvr/spg.cpp | 55 +++++++++++++++++------------------- tests/src/serialize_test.cpp | 2 +- 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/core/hw/pvr/spg.cpp b/core/hw/pvr/spg.cpp index e4bcea723..e477db2e1 100755 --- a/core/hw/pvr/spg.cpp +++ b/core/hw/pvr/spg.cpp @@ -11,7 +11,6 @@ //SPG emulation; Scanline/Raster beam registers & interrupts -static u32 in_vblank; static u32 clc_pvr_scanline; static u32 pvr_numscanlines = 512; static u32 prv_cur_scanline = -1; @@ -21,8 +20,8 @@ static u32 vblk_cnt; static float last_fps; #endif -//54 mhz pixel clock -#define PIXEL_CLOCK (54*1000*1000/2) +// 27 mhz pixel clock +constexpr int PIXEL_CLOCK = 27 * 1000 * 1000; static u32 Line_Cycles; static u32 Frame_Cycles; int render_end_schid; @@ -59,25 +58,28 @@ void CalculateSync() static int getNextSpgInterrupt() { + if (SPG_HBLANK_INT.hblank_int_mode == 2) + return Line_Cycles; + u32 min_scanline = prv_cur_scanline + 1; u32 min_active = pvr_numscanlines; - if (min_scanline < SPG_VBLANK_INT.vblank_in_interrupt_line_number) + if (min_scanline <= SPG_VBLANK_INT.vblank_in_interrupt_line_number) min_active = std::min(min_active, SPG_VBLANK_INT.vblank_in_interrupt_line_number); - if (min_scanline < SPG_VBLANK_INT.vblank_out_interrupt_line_number) + if (min_scanline <= SPG_VBLANK_INT.vblank_out_interrupt_line_number) min_active = std::min(min_active, SPG_VBLANK_INT.vblank_out_interrupt_line_number); - if (min_scanline < SPG_VBLANK.vstart) + if (min_scanline <= SPG_VBLANK.vstart) min_active = std::min(min_active, SPG_VBLANK.vstart); - if (min_scanline < SPG_VBLANK.vbend) + if (min_scanline <= SPG_VBLANK.vbend) min_active = std::min(min_active, SPG_VBLANK.vbend); - if (lightgun_line != 0xffff && min_scanline < lightgun_line) + if (lightgun_line != 0xffff && min_scanline <= lightgun_line) min_active = std::min(min_active, lightgun_line); - if (SPG_HBLANK_INT.hblank_int_mode == 0 && min_scanline < SPG_HBLANK_INT.line_comp_val) + if (SPG_HBLANK_INT.hblank_int_mode == 0 && min_scanline <= SPG_HBLANK_INT.line_comp_val) min_active = std::min(min_active, SPG_HBLANK_INT.line_comp_val); min_active = std::max(min_active, min_scanline); @@ -90,18 +92,14 @@ void rescheduleSPG() sh4_sched_request(vblank_schid, getNextSpgInterrupt()); } -//called from sh4 context , should update pvr/ta state and everything else -static int spg_line_sched(int tag, int cycl, int jit) +static int spg_line_sched(int tag, int cycles, int jitter) { - clc_pvr_scanline += cycl; + clc_pvr_scanline += cycles + jitter; - while (clc_pvr_scanline >= Line_Cycles)//60 ~hertz = 200 mhz / 60=3333333.333 cycles per screen refresh + while (clc_pvr_scanline >= Line_Cycles) { - //ok .. here , after much effort , we did one line - //now , we must check for raster beam interrupts and vblank - prv_cur_scanline=(prv_cur_scanline+1)%pvr_numscanlines; + prv_cur_scanline = (prv_cur_scanline + 1) % pvr_numscanlines; clc_pvr_scanline -= Line_Cycles; - //Check for scanline interrupts -- really need to test the scanline values if (SPG_VBLANK_INT.vblank_in_interrupt_line_number == prv_cur_scanline) { @@ -120,21 +118,20 @@ static int spg_line_sched(int tag, int cycl, int jit) } if (SPG_VBLANK.vstart == prv_cur_scanline) - in_vblank=1; + SPG_STATUS.vsync = 1; if (SPG_VBLANK.vbend == prv_cur_scanline) - in_vblank=0; + SPG_STATUS.vsync = 0; - SPG_STATUS.vsync=in_vblank; - SPG_STATUS.scanline=prv_cur_scanline; + SPG_STATUS.scanline = prv_cur_scanline; switch (SPG_HBLANK_INT.hblank_int_mode) { - case 0x0: + case 0: if (prv_cur_scanline == SPG_HBLANK_INT.line_comp_val) asic_RaiseInterrupt(holly_HBLank); break; - case 0x2: + case 2: asic_RaiseInterrupt(holly_HBLank); break; default: @@ -142,13 +139,13 @@ static int spg_line_sched(int tag, int cycl, int jit) break; } - //Vblank start - if (prv_cur_scanline==0) + // Vblank + if (prv_cur_scanline == 0) { if (SPG_CONTROL.interlace) - SPG_STATUS.fieldnum=~SPG_STATUS.fieldnum; + SPG_STATUS.fieldnum = ~SPG_STATUS.fieldnum; else - SPG_STATUS.fieldnum=0; + SPG_STATUS.fieldnum = 0; rend_vblank(); @@ -295,7 +292,6 @@ void scheduleRenderDone(TA_context *cntx) void spg_Serialize(Serializer& ser) { - ser << in_vblank; ser << clc_pvr_scanline; ser << maple_int_pending; ser << pvr_numscanlines; @@ -307,7 +303,8 @@ void spg_Serialize(Serializer& ser) } void spg_Deserialize(Deserializer& deser) { - deser >> in_vblank; + if (deser.version() < Deserializer::V30) + deser.skip(); // in_vblank deser >> clc_pvr_scanline; if (deser.version() < Deserializer::V9_LIBRETRO) { diff --git a/tests/src/serialize_test.cpp b/tests/src/serialize_test.cpp index 833fe2690..9d5e893d9 100644 --- a/tests/src/serialize_test.cpp +++ b/tests/src/serialize_test.cpp @@ -31,7 +31,7 @@ TEST_F(SerializeTest, SizeTest) std::vector data(30000000); Serializer ser(data.data(), data.size()); dc_serialize(ser); - ASSERT_EQ(28191595u, ser.size()); + ASSERT_EQ(28191591u, ser.size()); } From 32e3e7d9c5088a5b9da99d8db79f17b2ce8cc835 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Fri, 9 Dec 2022 17:37:49 +0100 Subject: [PATCH 25/34] vmem: no more 4GB vmem mode. only map elan RAM in naomi2 mode Do not attempt to reserve 4GM of virtual space on 64-bit hosts. Use 512MB everywhere. Don't map elan RAM if not needed and dont protect/unprotect it in memwatch. --- core/emulator.cpp | 2 + core/hw/mem/_vmem.cpp | 194 +++++------------------------- core/hw/mem/_vmem.h | 14 +-- core/hw/mem/mem_watch.cpp | 119 +++--------------- core/hw/pvr/elan.cpp | 13 +- core/hw/pvr/elan.h | 4 +- core/hw/sh4/dyna/blockmanager.cpp | 40 +----- core/hw/sh4/dyna/driver.cpp | 11 +- core/linux/libnx_vmem.cpp | 17 ++- core/linux/posix_vmem.cpp | 27 ++--- core/rec-ARM64/rec_arm64.cpp | 28 ++--- core/rec-x64/rec_x64.cpp | 27 ++--- core/windows/win_vmem.cpp | 7 +- 13 files changed, 99 insertions(+), 404 deletions(-) diff --git a/core/emulator.cpp b/core/emulator.cpp index e46ec6b1b..3f0aa823d 100644 --- a/core/emulator.cpp +++ b/core/emulator.cpp @@ -373,6 +373,7 @@ static void setPlatform(int platform) { if (VRAM_SIZE != 0) _vmem_unprotect_vram(0, VRAM_SIZE); + elan::ERAM_SIZE = 0; switch (platform) { case DC_PLATFORM_DREAMCAST: @@ -395,6 +396,7 @@ static void setPlatform(int platform) settings.platform.aram_size = 8 * 1024 * 1024; settings.platform.bios_size = 2 * 1024 * 1024; settings.platform.flash_size = 32 * 1024; // battery-backed ram + elan::ERAM_SIZE = 32 * 1024 * 1024; break; case DC_PLATFORM_ATOMISWAVE: settings.platform.ram_size = 16 * 1024 * 1024; diff --git a/core/hw/mem/_vmem.cpp b/core/hw/mem/_vmem.cpp index c5da88c0c..c8153f5ff 100644 --- a/core/hw/mem/_vmem.cpp +++ b/core/hw/mem/_vmem.cpp @@ -361,8 +361,7 @@ void _vmem_term() } u8* virt_ram_base; -bool vmem_4gb_space; -static VMemType vmemstatus = MemTypeError; +static bool vmemAvailable = false; static void *malloc_pages(size_t size) { @@ -410,45 +409,24 @@ bool BM_LockedWrite(u8* address) { } #endif -static void _vmem_set_p0_mappings() -{ - const vmem_mapping mem_mappings[] = { - // P0/U0 - {0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused - {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica - {0x01000000, 0x02800000, 0, 0, false}, // unused - {0x02800000, 0x03000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica mirror - {0x03000000, 0x04000000, 0, 0, false}, // unused - {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) - {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) - {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror - {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror - {0x08000000, 0x0A000000, 0, 0, false}, // Area 2 - {0x0A000000, 0x0C000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) - {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) - {0x10000000, 0x80000000, 0, 0, false}, // Area 4-7 (unused) - }; - vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings)); -} - bool _vmem_reserve() { static_assert((sizeof(Sh4RCB) % PAGE_SIZE) == 0, "sizeof(Sh4RCB) not multiple of PAGE_SIZE"); - if (vmemstatus != MemTypeError) + if (vmemAvailable) return true; // Use vmem only if settings mandate so, and if we have proper exception handlers. #if !defined(TARGET_NO_EXCEPTIONS) if (!settings.dynarec.disable_nvmem) - vmemstatus = vmem_platform_init((void**)&virt_ram_base, (void**)&p_sh4rcb, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX + elan::ELAN_RAM_SIZE); + vmemAvailable = vmem_platform_init((void**)&virt_ram_base, (void**)&p_sh4rcb, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX + elan::ERAM_SIZE_MAX); #endif return true; } static void _vmem_term_mappings() { - if (vmemstatus == MemTypeError) + if (!vmemAvailable) { free_pages(p_sh4rcb); p_sh4rcb = nullptr; @@ -467,7 +445,7 @@ void _vmem_init_mappings() { _vmem_term_mappings(); // Fallback to statically allocated buffers, this results in slow-ops being generated. - if (vmemstatus == MemTypeError) + if (!vmemAvailable) { WARN_LOG(VMEM, "Warning! nvmem is DISABLED (due to failure or not being built-in"); virt_ram_base = nullptr; @@ -488,94 +466,34 @@ void _vmem_init_mappings() aica_ram.size = ARAM_SIZE; aica_ram.data = (u8*)malloc_pages(ARAM_SIZE); - elan::RAM = (u8*)malloc_pages(elan::ELAN_RAM_SIZE); + elan::RAM = (u8*)malloc_pages(elan::ERAM_SIZE); } else { - NOTICE_LOG(VMEM, "Info: nvmem is enabled, with addr space of size %s", vmemstatus == MemType4GB ? "4GB" : "512MB"); + NOTICE_LOG(VMEM, "Info: nvmem is enabled"); INFO_LOG(VMEM, "Info: p_sh4rcb: %p virt_ram_base: %p", p_sh4rcb, virt_ram_base); // Map the different parts of the memory file into the new memory range we got. - if (vmemstatus == MemType512MB) - { - const vmem_mapping mem_mappings[] = { - {0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused - {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica - {0x01000000, 0x04000000, 0, 0, false}, // More unused - {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) - {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) - {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror - {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror - {0x08000000, 0x0A000000, 0, 0, false}, // Area 2 - {0x0A000000, 0x0C000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) - {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) - {0x10000000, 0x20000000, 0, 0, false}, // Area 4-7 (unused) - // This is outside of the 512MB addr space. We map 8MB in all cases to help some games read past the end of aica ram - {0x20000000, 0x20800000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // writable aica ram - }; - vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings)); + const vmem_mapping mem_mappings[] = { + {0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused + {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica + {0x01000000, 0x04000000, 0, 0, false}, // More unused + {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) + {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) + {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror + {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror + {0x08000000, 0x0A000000, 0, 0, false}, // Area 2 + {0x0A000000, 0x0C000000, MAP_ERAM_START_OFFSET, elan::ERAM_SIZE, true}, // Area 2 (Elan RAM) + {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) + {0x10000000, 0x20000000, 0, 0, false}, // Area 4-7 (unused) + // This is outside of the 512MB addr space. We map 8MB in all cases to help some games read past the end of aica ram + {0x20000000, 0x20800000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // writable aica ram + }; + vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings)); - // Point buffers to actual data pointers - aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writable AICA addrspace - vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writable and lockable) - mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror - elan::RAM = &virt_ram_base[0x0A000000]; - } - else - { - _vmem_set_p0_mappings(); - const vmem_mapping mem_mappings[] = { - // P1 - {0x80000000, 0x80800000, 0, 0, false}, // Area 0 -> unused - {0x80800000, 0x81000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica - {0x81000000, 0x82800000, 0, 0, false}, // unused - {0x82800000, 0x83000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica mirror - {0x83000000, 0x84000000, 0, 0, false}, // unused - {0x84000000, 0x85000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) - {0x85000000, 0x86000000, 0, 0, false}, // 32 bit path (unused) - {0x86000000, 0x87000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror - {0x87000000, 0x88000000, 0, 0, false}, // 32 bit path (unused) mirror - {0x88000000, 0x8A000000, 0, 0, false}, // Area 2 - {0x8A000000, 0x8C000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) - {0x8C000000, 0x90000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) - {0x90000000, 0xA0000000, 0, 0, false}, // Area 4-7 (unused) - // P2 - {0xA0000000, 0xA0800000, 0, 0, false}, // Area 0 -> unused - {0xA0800000, 0xA1000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica - {0xA1000000, 0xA2800000, 0, 0, false}, // unused - {0xA2800000, 0xA3000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica mirror - {0xA3000000, 0xA4000000, 0, 0, false}, // unused - {0xA4000000, 0xA5000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) - {0xA5000000, 0xA6000000, 0, 0, false}, // 32 bit path (unused) - {0xA6000000, 0xA7000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror - {0xA7000000, 0xA8000000, 0, 0, false}, // 32 bit path (unused) mirror - {0xA8000000, 0xAA000000, 0, 0, false}, // Area 2 - {0xAA000000, 0xAC000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) - {0xAC000000, 0xB0000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) - {0xB0000000, 0xC0000000, 0, 0, false}, // Area 4-7 (unused) - // P3 - {0xC0000000, 0xC0800000, 0, 0, false}, // Area 0 -> unused - {0xC0800000, 0xC1000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica - {0xC1000000, 0xC2800000, 0, 0, false}, // unused - {0xC2800000, 0xC3000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica mirror - {0xC3000000, 0xC4000000, 0, 0, false}, // unused - {0xC4000000, 0xC5000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) - {0xC5000000, 0xC6000000, 0, 0, false}, // 32 bit path (unused) - {0xC6000000, 0xC7000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror - {0xC7000000, 0xC8000000, 0, 0, false}, // 32 bit path (unused) mirror - {0xC8000000, 0xCA000000, 0, 0, false}, // Area 2 - {0xCA000000, 0xCC000000, MAP_ERAM_START_OFFSET, elan::ELAN_RAM_SIZE, true}, // Area 2 (Elan RAM) - {0xCC000000, 0xD0000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) - {0xD0000000, 0x100000000L, 0, 0, false}, // Area 4-7 (unused) - }; - vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings)); - - // Point buffers to actual data pointers - aica_ram.data = &virt_ram_base[0x80800000]; // Points to the first AICA addrspace in P1 - vram.data = &virt_ram_base[0x84000000]; // Points to first vram mirror (writable and lockable) in P1 - mem_b.data = &virt_ram_base[0x8C000000]; // Main memory, first mirror in P1 - elan::RAM = &virt_ram_base[0x8A000000]; - - vmem_4gb_space = true; - } + // Point buffers to actual data pointers + aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writable AICA addrspace + vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writable and lockable) + mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror + elan::RAM = &virt_ram_base[0x0A000000]; aica_ram.size = ARAM_SIZE; vram.size = VRAM_SIZE; @@ -605,7 +523,7 @@ void _vmem_release() _vmem_unprotect_vram(0, VRAM_SIZE); _vmem_term_mappings(); } - vmemstatus = MemTypeError; + vmemAvailable = false; } void _vmem_protect_vram(u32 addr, u32 size) @@ -621,25 +539,6 @@ void _vmem_protect_vram(u32 addr, u32 size) mem_region_lock(virt_ram_base + 0x04000000 + addr + VRAM_SIZE, size); // P0 wrap //mem_region_lock(virt_ram_base + 0x06000000 + addr + VRAM_SIZE, size); // P0 mirror wrap } - if (_nvmem_4gb_space()) - { - mem_region_lock(virt_ram_base + 0x84000000 + addr, size); // P1 - //mem_region_lock(virt_ram_base + 0x86000000 + addr, size); // P1 - mirror - mem_region_lock(virt_ram_base + 0xA4000000 + addr, size); // P2 - //mem_region_lock(virt_ram_base + 0xA6000000 + addr, size); // P2 - mirror - // We should also lock P3 and its mirrors, but it doesn't seem to be used... - //mem_region_lock(virt_ram_base + 0xC4000000 + addr, size); // P3 - //mem_region_lock(virt_ram_base + 0xC6000000 + addr, size); // P3 - mirror - if (VRAM_SIZE == 0x800000) - { - mem_region_lock(virt_ram_base + 0x84000000 + addr + VRAM_SIZE, size); // P1 wrap - //mem_region_lock(virt_ram_base + 0x86000000 + addr + VRAM_SIZE, size); // P1 - mirror wrap - mem_region_lock(virt_ram_base + 0xA4000000 + addr + VRAM_SIZE, size); // P2 wrap - //mem_region_lock(virt_ram_base + 0xA6000000 + addr + VRAM_SIZE, size); // P2 - mirror wrap - //mem_region_lock(virt_ram_base + 0xC4000000 + addr + VRAM_SIZE, size); // P3 wrap - //mem_region_lock(virt_ram_base + 0xC6000000 + addr + VRAM_SIZE, size); // P3 - mirror wrap - } - } } else { @@ -660,25 +559,6 @@ void _vmem_unprotect_vram(u32 addr, u32 size) mem_region_unlock(virt_ram_base + 0x04000000 + addr + VRAM_SIZE, size); // P0 wrap //mem_region_unlock(virt_ram_base + 0x06000000 + addr + VRAM_SIZE, size); // P0 mirror wrap } - if (_nvmem_4gb_space()) - { - mem_region_unlock(virt_ram_base + 0x84000000 + addr, size); // P1 - //mem_region_unlock(virt_ram_base + 0x86000000 + addr, size); // P1 - mirror - mem_region_unlock(virt_ram_base + 0xA4000000 + addr, size); // P2 - //mem_region_unlock(virt_ram_base + 0xA6000000 + addr, size); // P2 - mirror - // We should also lock P3 and its mirrors, but it doesn't seem to be used... - //mem_region_unlock(virt_ram_base + 0xC4000000 + addr, size); // P3 - //mem_region_unlock(virt_ram_base + 0xC6000000 + addr, size); // P3 - mirror - if (VRAM_SIZE == 0x800000) - { - mem_region_unlock(virt_ram_base + 0x84000000 + addr + VRAM_SIZE, size); // P1 wrap - //mem_region_unlock(virt_ram_base + 0x86000000 + addr + VRAM_SIZE, size); // P1 - mirror wrap - mem_region_unlock(virt_ram_base + 0xA4000000 + addr + VRAM_SIZE, size); // P2 wrap - //mem_region_unlock(virt_ram_base + 0xA6000000 + addr + VRAM_SIZE, size); // P2 - mirror wrap - //mem_region_unlock(virt_ram_base + 0xC4000000 + addr + VRAM_SIZE, size); // P3 wrap - //mem_region_unlock(virt_ram_base + 0xC6000000 + addr + VRAM_SIZE, size); // P3 - mirror wrap - } - } } else { @@ -691,21 +571,9 @@ u32 _vmem_get_vram_offset(void *addr) if (_nvmem_enabled()) { ptrdiff_t offset = (u8*)addr - virt_ram_base; - if (_nvmem_4gb_space()) - { - if (offset < 0 || offset >= 0xE0000000) - return -1; - offset &= 0x1FFFFFFF; - } - else - { - if (offset < 0 || offset >= 0x20000000) - return -1; - } - if ((offset >> 24) != 4) + if (offset < 0 || offset >= 0x20000000) return -1; - if ((((u8*)addr - virt_ram_base) >> 29) != 0 && (((u8*)addr - virt_ram_base) >> 29) != 4 && (((u8*)addr - virt_ram_base) >> 29) != 5) - // other areas aren't mapped atm + if ((offset >> 24) != 4) return -1; return offset & VRAM_MASK; diff --git a/core/hw/mem/_vmem.h b/core/hw/mem/_vmem.h index bccb03f9f..996710cdc 100644 --- a/core/hw/mem/_vmem.h +++ b/core/hw/mem/_vmem.h @@ -1,12 +1,6 @@ #pragma once #include "types.h" -enum VMemType { - MemType4GB, - MemType512MB, - MemTypeError -}; - struct vmem_mapping { u64 start_address, end_address; u64 memoffset, memsize; @@ -15,7 +9,7 @@ struct vmem_mapping { // Platform specific vmemory API // To initialize (maybe) the vmem subsystem -VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize); +bool vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize); // To reset the on-demand allocated pages. void vmem_platform_reset_mem(void *ptr, unsigned size_bytes); // To handle a fault&allocate an ondemand page. @@ -36,7 +30,7 @@ void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcach void vmem_platform_jit_set_exec(void* code, size_t size, bool enable); // Note: if you want to disable vmem magic in any given platform, implement the -// above functions as empty functions and make vmem_platform_init return MemTypeError. +// above functions as empty functions and make vmem_platform_init return false. //Typedef's //ReadMem @@ -99,14 +93,10 @@ void* _vmem_read_const(u32 addr,bool& ismem,u32 sz); void* _vmem_write_const(u32 addr,bool& ismem,u32 sz); extern u8* virt_ram_base; -extern bool vmem_4gb_space; static inline bool _nvmem_enabled() { return virt_ram_base != 0; } -static inline bool _nvmem_4gb_space() { - return vmem_4gb_space; -} void _vmem_bm_reset(); void _vmem_protect_vram(u32 addr, u32 size); diff --git a/core/hw/mem/mem_watch.cpp b/core/hw/mem/mem_watch.cpp index d76396a78..06c732740 100644 --- a/core/hw/mem/mem_watch.cpp +++ b/core/hw/mem/mem_watch.cpp @@ -29,98 +29,30 @@ ElanRamWatcher elanWatcher; void AicaRamWatcher::protectMem(u32 addr, u32 size) { size = std::min(ARAM_SIZE - addr, size) & ~PAGE_MASK; - if (_nvmem_enabled() && _nvmem_4gb_space()) { - mem_region_lock(virt_ram_base + 0x00800000 + addr, size); // P0 - mem_region_lock(virt_ram_base + 0x02800000 + addr, size);// P0 - mirror - mem_region_lock(virt_ram_base + 0x80800000 + addr, size); // P1 - //mem_region_lock(virt_ram_base + 0x82800000 + addr, size); // P1 - mirror - mem_region_lock(virt_ram_base + 0xA0800000 + addr, size); // P2 - //mem_region_lock(virt_ram_base + 0xA2800000 + addr, size); // P2 - mirror - if (ARAM_SIZE == 2 * 1024 * 1024) { - mem_region_lock(virt_ram_base + 0x00A00000 + addr, size); // P0 - mem_region_lock(virt_ram_base + 0x00C00000 + addr, size); // P0 - mem_region_lock(virt_ram_base + 0x00E00000 + addr, size); // P0 - mem_region_lock(virt_ram_base + 0x02A00000 + addr, size);// P0 - mirror - mem_region_lock(virt_ram_base + 0x02C00000 + addr, size);// P0 - mirror - mem_region_lock(virt_ram_base + 0x02E00000 + addr, size);// P0 - mirror - mem_region_lock(virt_ram_base + 0x80A00000 + addr, size); // P1 - mem_region_lock(virt_ram_base + 0x80C00000 + addr, size); // P1 - mem_region_lock(virt_ram_base + 0x80E00000 + addr, size); // P1 - mem_region_lock(virt_ram_base + 0xA0A00000 + addr, size); // P2 - mem_region_lock(virt_ram_base + 0xA0C00000 + addr, size); // P2 - mem_region_lock(virt_ram_base + 0xA0E00000 + addr, size); // P2 - } - } else { - mem_region_lock(aica_ram.data + addr, - std::min(aica_ram.size - addr, size)); - } + mem_region_lock(aica_ram.data + addr, + std::min(aica_ram.size - addr, size)); } void AicaRamWatcher::unprotectMem(u32 addr, u32 size) { size = std::min(ARAM_SIZE - addr, size) & ~PAGE_MASK; - if (_nvmem_enabled() && _nvmem_4gb_space()) { - mem_region_unlock(virt_ram_base + 0x00800000 + addr, size); // P0 - mem_region_unlock(virt_ram_base + 0x02800000 + addr, size); // P0 - mirror - mem_region_unlock(virt_ram_base + 0x80800000 + addr, size); // P1 - //mem_region_unlock(virt_ram_base + 0x82800000 + addr, size); // P1 - mirror - mem_region_unlock(virt_ram_base + 0xA0800000 + addr, size); // P2 - //mem_region_unlock(virt_ram_base + 0xA2800000 + addr, size); // P2 - mirror - if (ARAM_SIZE == 2 * 1024 * 1024) { - mem_region_unlock(virt_ram_base + 0x00A00000 + addr, size); // P0 - mem_region_unlock(virt_ram_base + 0x00C00000 + addr, size); // P0 - mem_region_unlock(virt_ram_base + 0x00E00000 + addr, size); // P0 - mem_region_unlock(virt_ram_base + 0x02A00000 + addr, size); // P0 - mirror - mem_region_unlock(virt_ram_base + 0x02C00000 + addr, size); // P0 - mirror - mem_region_unlock(virt_ram_base + 0x02E00000 + addr, size); // P0 - mirror - mem_region_unlock(virt_ram_base + 0x80A00000 + addr, size); // P1 - mem_region_unlock(virt_ram_base + 0x80C00000 + addr, size); // P1 - mem_region_unlock(virt_ram_base + 0x80E00000 + addr, size); // P1 - mem_region_unlock(virt_ram_base + 0xA0A00000 + addr, size); // P2 - mem_region_unlock(virt_ram_base + 0xA0C00000 + addr, size); // P2 - mem_region_unlock(virt_ram_base + 0xA0E00000 + addr, size); // P2 - } - } else { - mem_region_unlock(aica_ram.data + addr, - std::min(aica_ram.size - addr, size)); - } + mem_region_unlock(aica_ram.data + addr, + std::min(aica_ram.size - addr, size)); } u32 AicaRamWatcher::getMemOffset(void *p) { - u32 addr; - if (_nvmem_enabled() && _nvmem_4gb_space()) { - if ((u8*) p < virt_ram_base || (u8*) p >= virt_ram_base + 0x100000000L) - return -1; - addr = (u32) ((u8*) p - virt_ram_base); - u32 area = (addr >> 29) & 7; - if (area != 0 && area != 4 && area != 5) - return -1; - addr &= 0x1fffffff & ~0x02000000; - if (addr < 0x00800000 || addr >= 0x01000000) - return -1; - addr &= ARAM_MASK; - } else { - if ((u8*) p < &aica_ram[0] || (u8*) p >= &aica_ram[ARAM_SIZE]) - return -1; - addr = (u32) ((u8*) p - &aica_ram[0]); - } - return addr; + if ((u8 *)p < &aica_ram[0] || (u8 *)p >= &aica_ram[ARAM_SIZE]) + return -1; + return (u32)((u8 *)p - &aica_ram[0]); } void ElanRamWatcher::protectMem(u32 addr, u32 size) { using namespace elan; - size = std::min(ELAN_RAM_SIZE - addr, size) & ~PAGE_MASK; - if (_nvmem_enabled()) + if (ERAM_SIZE != 0) { - mem_region_lock(virt_ram_base + 0x0a000000 + addr, size); // P0 - if (_nvmem_4gb_space()) - { - mem_region_lock(virt_ram_base + 0x8a000000 + addr, size); // P1 - mem_region_lock(virt_ram_base + 0xaa000000 + addr, size); // P2 - } - } else { + size = std::min(ERAM_SIZE - addr, size) & ~PAGE_MASK; mem_region_lock(RAM + addr, size); } } @@ -128,16 +60,9 @@ void ElanRamWatcher::protectMem(u32 addr, u32 size) void ElanRamWatcher::unprotectMem(u32 addr, u32 size) { using namespace elan; - size = std::min(ELAN_RAM_SIZE - addr, size) & ~PAGE_MASK; - if (_nvmem_enabled()) + if (ERAM_SIZE != 0) { - mem_region_unlock(virt_ram_base + 0x0a000000 + addr, size); // P0 - if (_nvmem_4gb_space()) - { - mem_region_unlock(virt_ram_base + 0x8a000000 + addr, size); // P1 - mem_region_unlock(virt_ram_base + 0xaa000000 + addr, size); // P2 - } - } else { + size = std::min(ERAM_SIZE - addr, size) & ~PAGE_MASK; mem_region_unlock(RAM + addr, size); } } @@ -145,25 +70,9 @@ void ElanRamWatcher::unprotectMem(u32 addr, u32 size) u32 ElanRamWatcher::getMemOffset(void *p) { using namespace elan; - u32 addr; - if (_nvmem_enabled()) - { - if ((u8 *)p < virt_ram_base || (u8 *)p >= virt_ram_base + 0x100000000L) - return -1; - addr = (u32)((u8 *)p - virt_ram_base); - u32 area = (addr >> 29) & 7; - if (area != 0 && area != 4 && area != 5) // P0, P1 or P2 only - return -1; - addr &= 0x1fffffff; - if (addr < 0x0a000000 || addr >= 0x0a000000 + ELAN_RAM_SIZE) - return -1; - addr &= ELAN_RAM_MASK; - } else { - if ((u8 *)p < RAM || (u8 *)p >= &RAM[ELAN_RAM_SIZE]) - return -1; - addr = (u32)((u8 *)p - RAM); - } - return addr; + if ((u8 *)p < RAM || (u8 *)p >= &RAM[ERAM_SIZE]) + return -1; + return (u32)((u8 *)p - RAM); } } diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp index 83596c4f9..038896dc2 100644 --- a/core/hw/pvr/elan.cpp +++ b/core/hw/pvr/elan.cpp @@ -69,11 +69,14 @@ namespace elan { +constexpr u32 ELAN_RAM_MASK = ERAM_SIZE_MAX - 1; + static _vmem_handler elanRegHandler; static _vmem_handler elanCmdHandler; static _vmem_handler elanRamHandler; u8 *RAM; +u32 ERAM_SIZE; static u32 reg10; static u32 reg74; @@ -478,7 +481,7 @@ struct State static u32 elanRamAddress(void *p) { - if ((u8 *)p < RAM || (u8 *)p >= RAM + ELAN_RAM_SIZE) + if ((u8 *)p < RAM || (u8 *)p >= RAM + ERAM_SIZE) return Null; else return (u32)((u8 *)p - RAM); @@ -1441,7 +1444,7 @@ template static void executeCommand(u8 *data, int size) { // verify(size >= 0); -// verify(size < (int)ELAN_RAM_SIZE); +// verify(size < (int)ERAM_SIZE); // if (0x2b00 == (u32)(data - RAM)) // for (int i = 0; i < size; i += 4) // DEBUG_LOG(PVR, "Elan Parse %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); @@ -1748,7 +1751,7 @@ void reset(bool hard) { if (hard) { - memset(RAM, 0, ELAN_RAM_SIZE); + memset(RAM, 0, ERAM_SIZE); state.reset(); } } @@ -1780,7 +1783,7 @@ void serialize(Serializer& ser) ser << reg74; ser << elanCmd; if (!ser.rollback()) - ser.serialize(RAM, ELAN_RAM_SIZE); + ser.serialize(RAM, ERAM_SIZE); state.serialize(ser); } @@ -1792,7 +1795,7 @@ void deserialize(Deserializer& deser) deser >> reg74; deser >> elanCmd; if (!deser.rollback()) - deser.deserialize(RAM, ELAN_RAM_SIZE); + deser.deserialize(RAM, ERAM_SIZE); state.deserialize(deser); } diff --git a/core/hw/pvr/elan.h b/core/hw/pvr/elan.h index 3c28b3287..b1dea22f6 100644 --- a/core/hw/pvr/elan.h +++ b/core/hw/pvr/elan.h @@ -32,6 +32,6 @@ void serialize(Serializer& ser); void deserialize(Deserializer& deser); extern u8 *RAM; -constexpr u32 ELAN_RAM_SIZE = 32 * 1024 * 1024; -constexpr u32 ELAN_RAM_MASK = ELAN_RAM_SIZE - 1; +extern u32 ERAM_SIZE; +constexpr u32 ERAM_SIZE_MAX = 32 * 1024 * 1024; } diff --git a/core/hw/sh4/dyna/blockmanager.cpp b/core/hw/sh4/dyna/blockmanager.cpp index e88fbaa05..d9f9779c1 100644 --- a/core/hw/sh4/dyna/blockmanager.cpp +++ b/core/hw/sh4/dyna/blockmanager.cpp @@ -235,7 +235,6 @@ void bm_Reset() { // Windows cannot lock/unlock a region spanning more than one VirtualAlloc or MapViewOfFile // so we have to unlock each region individually - // No need for this mess in 4GB mode since windows doesn't use it if (settings.platform.ram_size == 16 * 1024 * 1024) { mem_region_unlock(virt_ram_base + 0x0C000000, RAM_SIZE); @@ -248,11 +247,6 @@ void bm_Reset() mem_region_unlock(virt_ram_base + 0x0C000000, RAM_SIZE); mem_region_unlock(virt_ram_base + 0x0E000000, RAM_SIZE); } - if (_nvmem_4gb_space()) - { - mem_region_unlock(virt_ram_base + 0x8C000000u, 0x90000000u - 0x8C000000u); - mem_region_unlock(virt_ram_base + 0xAC000000u, 0xB0000000u - 0xAC000000u); - } } else { @@ -264,38 +258,18 @@ void bm_LockPage(u32 addr, u32 size) { addr = addr & (RAM_MASK - PAGE_MASK); if (_nvmem_enabled()) - { mem_region_lock(virt_ram_base + 0x0C000000 + addr, size); - if (_nvmem_4gb_space()) - { - mem_region_lock(virt_ram_base + 0x8C000000 + addr, size); - mem_region_lock(virt_ram_base + 0xAC000000 + addr, size); - // TODO wraps - } - } else - { mem_region_lock(&mem_b[addr], size); - } } void bm_UnlockPage(u32 addr, u32 size) { addr = addr & (RAM_MASK - PAGE_MASK); if (_nvmem_enabled()) - { mem_region_unlock(virt_ram_base + 0x0C000000 + addr, size); - if (_nvmem_4gb_space()) - { - mem_region_unlock(virt_ram_base + 0x8C000000 + addr, size); - mem_region_unlock(virt_ram_base + 0xAC000000 + addr, size); - // TODO wraps - } - } else - { mem_region_unlock(&mem_b[addr], size); - } } void bm_ResetCache() @@ -618,18 +592,10 @@ u32 bm_getRamOffset(void *p) { if (_nvmem_enabled()) { - if (_nvmem_4gb_space()) - { - if ((u8 *)p < virt_ram_base || (u8 *)p >= virt_ram_base + 0x100000000L) - return -1; - } - else - { - if ((u8 *)p < virt_ram_base || (u8 *)p >= virt_ram_base + 0x20000000) - return -1; - } + if ((u8 *)p < virt_ram_base || (u8 *)p >= virt_ram_base + 0x20000000) + return -1; u32 addr = (u8*)p - virt_ram_base; - if (!IsOnRam(addr) || ((addr >> 29) > 0 && (addr >> 29) < 4)) // system RAM is not mapped to 20, 40 and 60 because of laziness + if (!IsOnRam(addr)) return -1; return addr & RAM_MASK; } diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index 0d67bf1e3..b78c9dda3 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -385,16 +385,7 @@ static void recSh4_Init() if (_nvmem_enabled()) - { - if (!_nvmem_4gb_space()) - { - verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000)); - } - else - { - verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x8C000000)); - } - } + verify(mem_b.data == ((u8*)p_sh4rcb->sq_buffer + 512 + 0x0C000000)); // Prepare some pointer to the pre-allocated code cache: void *candidate_ptr = (void*)(((unat)SH4_TCB + 4095) & ~4095); diff --git a/core/linux/libnx_vmem.cpp b/core/linux/libnx_vmem.cpp index 1545f2265..45af8458e 100644 --- a/core/linux/libnx_vmem.cpp +++ b/core/linux/libnx_vmem.cpp @@ -118,19 +118,19 @@ static mem_handle_t allocate_shared_filemem(unsigned size) */ // Implement vmem initialization for RAM, ARAM, VRAM and SH4 context, fpcb etc. -// The function supports allocating 512MB or 4GB addr spaces. -// vmem_base_addr points to an address space of 512MB (or 4GB) that can be used for fast memory ops. + +// vmem_base_addr points to an address space of 512MB that can be used for fast memory ops. // In negative offsets of the pointer (up to FPCB size, usually 65/129MB) the context and jump table // can be found. If the platform init returns error, the user is responsible for initializing the // memory using a fallback (that is, regular mallocs and falling back to slow memory JIT). -VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize) +bool vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize) { - return MemTypeError; + return false; #if 0 const unsigned size_aligned = ((RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX + PAGE_SIZE) & (~(PAGE_SIZE-1))); vmem_fd_page = allocate_shared_filemem(size_aligned); if (vmem_fd_page < 0) - return MemTypeError; + return false; vmem_fd_codememory = (uintptr_t)virtmemReserve(size_aligned); @@ -141,15 +141,12 @@ VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ra WARN_LOG(VMEM, "Failed to set perms (platform_int)..."); // Now try to allocate a contiguous piece of memory. - VMemType rv; if (reserved_base == NULL) { reserved_size = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000; reserved_base = mem_region_reserve(NULL, reserved_size); if (!reserved_base) - return MemTypeError; - - rv = MemType512MB; + return false; } *sh4rcb_addr = reserved_base; @@ -160,7 +157,7 @@ VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ra // Now map the memory for the SH4 context, do not include FPCB on purpose (paged on demand). mem_region_unlock(sh4rcb_base_ptr, sizeof(Sh4RCB) - fpcb_size); - return rv; + return true; #endif } diff --git a/core/linux/posix_vmem.cpp b/core/linux/posix_vmem.cpp index a2ab3c718..3f95379f7 100644 --- a/core/linux/posix_vmem.cpp +++ b/core/linux/posix_vmem.cpp @@ -148,39 +148,28 @@ static int allocate_shared_filemem(unsigned size) { } // Implement vmem initialization for RAM, ARAM, VRAM and SH4 context, fpcb etc. -// The function supports allocating 512MB or 4GB addr spaces. int vmem_fd = -1; static int shmem_fd2 = -1; static void *reserved_base; static size_t reserved_size; -// vmem_base_addr points to an address space of 512MB (or 4GB) that can be used for fast memory ops. +// vmem_base_addr points to an address space of 512MB that can be used for fast memory ops. // In negative offsets of the pointer (up to FPCB size, usually 65/129MB) the context and jump table // can be found. If the platform init returns error, the user is responsible for initializing the // memory using a fallback (that is, regular mallocs and falling back to slow memory JIT). -VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize) { +bool vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize) { // Firt let's try to allocate the shm-backed memory vmem_fd = allocate_shared_filemem(ramSize); if (vmem_fd < 0) - return MemTypeError; + return false; // Now try to allocate a contiguous piece of memory. - VMemType rv; -#if HOST_CPU == CPU_X64 || HOST_CPU == CPU_ARM64 - reserved_size = 0x100000000L + sizeof(Sh4RCB) + 0x10000; // 4GB + context size + 64K padding + reserved_size = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000; reserved_base = mem_region_reserve(NULL, reserved_size); - rv = MemType4GB; -#endif - if (reserved_base == NULL) - { - reserved_size = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000; - reserved_base = mem_region_reserve(NULL, reserved_size); - if (!reserved_base) { - close(vmem_fd); - return MemTypeError; - } - rv = MemType512MB; + if (!reserved_base) { + close(vmem_fd); + return false; } // Align pointer to 64KB too, some Linaro bug (no idea but let's just be safe I guess). @@ -194,7 +183,7 @@ VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ra // Now map the memory for the SH4 context, do not include FPCB on purpose (paged on demand). mem_region_unlock(sh4rcb_base_ptr, sizeof(Sh4RCB) - fpcb_size); - return rv; + return true; } // Just tries to wipe as much as possible in the relevant area. diff --git a/core/rec-ARM64/rec_arm64.cpp b/core/rec-ARM64/rec_arm64.cpp index f96a506ed..acd75f74e 100644 --- a/core/rec-ARM64/rec_arm64.cpp +++ b/core/rec-ARM64/rec_arm64.cpp @@ -1825,17 +1825,10 @@ private: Instruction *start_instruction = GetCursorAddress(); - // WARNING: the rewrite code relies on having 1 or 2 ops before the memory access + // WARNING: the rewrite code relies on having 2 ops before the memory access // Update ngen_Rewrite (and perhaps read_memory_rewrite_size) if adding or removing code - if (!_nvmem_4gb_space()) - { - Ubfx(x1, x0, 0, 29); - Add(x1, x1, sizeof(Sh4Context), LeaveFlags); - } - else - { - Add(x1, x0, sizeof(Sh4Context), LeaveFlags); - } + Ubfx(x1, x0, 0, 29); + Add(x1, x1, sizeof(Sh4Context), LeaveFlags); u32 size = op.flags & 0x7f; switch(size) @@ -1998,17 +1991,10 @@ private: Instruction *start_instruction = GetCursorAddress(); - // WARNING: the rewrite code relies on having 1 or 2 ops before the memory access + // WARNING: the rewrite code relies on having 2 ops before the memory access // Update ngen_Rewrite (and perhaps write_memory_rewrite_size) if adding or removing code - if (!_nvmem_4gb_space()) - { - Ubfx(x7, x0, 0, 29); - Add(x7, x7, sizeof(Sh4Context), LeaveFlags); - } - else - { - Add(x7, x0, sizeof(Sh4Context), LeaveFlags); - } + Ubfx(x7, x0, 0, 29); + Add(x7, x7, sizeof(Sh4Context), LeaveFlags); u32 size = op.flags & 0x7f; switch(size) @@ -2279,7 +2265,7 @@ bool ngen_Rewrite(host_context_t &context, void *faultAddress) verify(found); // Skip the preceding ops (add, ubfx) - u32 *code_rewrite = code_ptr - 1 - (!_nvmem_4gb_space() ? 1 : 0); + u32 *code_rewrite = code_ptr - 2; Arm64Assembler *assembler = new Arm64Assembler(code_rewrite); if (is_read) assembler->GenReadMemorySlow(size); diff --git a/core/rec-x64/rec_x64.cpp b/core/rec-x64/rec_x64.cpp index 00c3dc789..7ba4dfce8 100644 --- a/core/rec-x64/rec_x64.cpp +++ b/core/rec-x64/rec_x64.cpp @@ -741,13 +741,7 @@ public: //found ! const u8 *start = getCurr(); - u32 memAddress = _nvmem_4gb_space() ? -#ifdef _WIN32 - context.rcx -#else - context.rdi -#endif - : context.r9; + u32 memAddress = context.r9; if (op == MemOp::W && size >= MemSize::S32 && (memAddress >> 26) == 0x38) call(MemHandlers[MemType::StoreQueue][size][MemOp::W]); else @@ -759,12 +753,11 @@ public: context.pc = (uintptr_t)(retAddr - 5); // remove the call from the stack context.rsp += 8; - if (!_nvmem_4gb_space()) - //restore the addr from r9 to arg0 (rcx or rdi) so it's valid again + //restore the addr from r9 to arg0 (rcx or rdi) so it's valid again #ifdef _WIN32 - context.rcx = memAddress; + context.rcx = memAddress; #else - context.rdi = memAddress; + context.rdi = memAddress; #endif return true; @@ -781,6 +774,7 @@ private: { if (mmu_enabled()) { +#ifdef FAST_MMU Xbyak::Label inCache; Xbyak::Label done; @@ -797,15 +791,18 @@ private: } test(eax, eax); jne(inCache); +#endif mov(call_regs[1], write); mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc GenCall(mmuDynarecLookup); mov(call_regs[0], eax); +#ifdef FAST_MMU jmp(done); L(inCache); and_(call_regs[0], 0xFFF); or_(call_regs[0], eax); L(done); +#endif } } bool GenReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* block) @@ -1125,11 +1122,9 @@ private: if (type == MemType::Fast && _nvmem_enabled()) { mov(rax, (uintptr_t)virt_ram_base); - if (!_nvmem_4gb_space()) - { - mov(r9, call_regs64[0]); - and_(call_regs[0], 0x1FFFFFFF); - } + mov(r9, call_regs64[0]); + and_(call_regs[0], 0x1FFFFFFF); + switch (size) { case MemSize::S8: diff --git a/core/windows/win_vmem.cpp b/core/windows/win_vmem.cpp index 5fe995a25..dfc953842 100644 --- a/core/windows/win_vmem.cpp +++ b/core/windows/win_vmem.cpp @@ -45,14 +45,13 @@ static std::vector unmapped_regions; static std::vector mapped_regions; // Implement vmem initialization for RAM, ARAM, VRAM and SH4 context, fpcb etc. -// The function supports allocating 512MB or 4GB addr spaces. // Please read the POSIX implementation for more information. On Windows this is // rather straightforward. -VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize) +bool vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSize) { #ifdef TARGET_UWP - return MemTypeError; + return false; #endif unmapped_regions.reserve(32); mapped_regions.reserve(32); @@ -81,7 +80,7 @@ VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ra verify(ptr == *vmem_base_addr); unmapped_regions.push_back(ptr); - return MemType512MB; + return true; } // Just tries to wipe as much as possible in the relevant area. From 1ab4eb00c06545e0e466de7686fcae638b82888e Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Fri, 9 Dec 2022 17:49:32 +0100 Subject: [PATCH 26/34] tex cache: minor key mask fix. egl: no need for depth/stencil surface Part of PalSelect was used as cache key for palette textures PrintTexture fix egl,wgl,xgl: No need for depth/stencil surface gl: non-functional refactoring --- core/rend/TexCache.cpp | 10 +-- core/rend/TexCache.h | 21 +++-- core/rend/gl4/gles.cpp | 157 +++++++++++++++++++------------------- core/rend/gles/gldraw.cpp | 4 +- core/rend/gles/gles.cpp | 22 ++---- core/rend/gles/gles.h | 16 ++-- core/rend/gles/gltex.cpp | 2 +- core/wsi/egl.cpp | 2 - core/wsi/wgl.cpp | 4 +- core/wsi/xgl.cpp | 4 - 10 files changed, 115 insertions(+), 127 deletions(-) diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index 1eea3efc1..8ecc916ea 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -175,7 +175,7 @@ static std::vector VramLocks[VRAM_SIZE_MAX / PAGE_SIZE]; //List functions // -void vramlock_list_remove(vram_block* block) +static void vramlock_list_remove(vram_block* block) { u32 base = block->start / PAGE_SIZE; u32 end = block->end / PAGE_SIZE; @@ -191,7 +191,7 @@ void vramlock_list_remove(vram_block* block) } } -void vramlock_list_add(vram_block* block) +static void vramlock_list_add(vram_block* block) { u32 base = block->start / PAGE_SIZE; u32 end = block->end / PAGE_SIZE; @@ -210,7 +210,7 @@ void vramlock_list_add(vram_block* block) } } -std::mutex vramlist_lock; +static std::mutex vramlist_lock; bool VramLockedWriteOffset(size_t offset) { @@ -379,9 +379,9 @@ void BaseTextureCacheData::PrintTextureName() if (tcw.VQ_Comp) strcat(str, " VQ"); - else if (tcw.ScanOrder == 0) + else if (tcw.ScanOrder == 0 || IsPaletted()) strcat(str, " TW"); - else if (tcw.StrideSel) + else if (tcw.StrideSel == 1 && !IsPaletted()) strcat(str, " Stride"); if (tcw.ScanOrder == 0 && tcw.MipMapped) diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h index e142cdb20..5fe03f002 100644 --- a/core/rend/TexCache.h +++ b/core/rend/TexCache.h @@ -705,13 +705,18 @@ public: Texture *getTextureCacheData(TSP tsp, TCW tcw) { u64 key = tsp.full & TSPTextureCacheMask.full; - if ((tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8) - && !BaseTextureCacheData::IsGpuHandledPaletted(tsp, tcw)) - // Paletted textures have a palette selection that must be part of the key - // We also add the palette type to the key to avoid thrashing the cache - // when the palette type is changed. If the palette type is changed back in the future, - // this texture will stil be available. - key |= ((u64)tcw.full << 32) | ((PAL_RAM_CTRL & 3) << 6) | ((tsp.FilterMode != 0) << 8); + if (tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8) + { + if (BaseTextureCacheData::IsGpuHandledPaletted(tsp, tcw)) + // texaddr, pixelfmt, VQ, MipMap + key |= (u64)(tcw.full & TCWPalTextureCacheMask.full) << 32; + else + // Paletted textures have a palette selection that must be part of the key + // We also add the palette type to the key to avoid thrashing the cache + // when the palette type is changed. If the palette type is changed back in the future, + // this texture will stil be available. + key |= ((u64)tcw.full << 32) | ((PAL_RAM_CTRL & 3) << 6) | ((tsp.FilterMode != 0) << 8); + } else key |= (u64)(tcw.full & TCWTextureCacheMask.full) << 32; @@ -795,6 +800,8 @@ protected: const TSP TSPTextureCacheMask = { { 7, 7 } }; // TexAddr : 0x1FFFFF, Reserved : 0, StrideSel : 0, ScanOrder : 1, PixelFmt : 7, VQ_Comp : 1, MipMapped : 1 const TCW TCWTextureCacheMask = { { 0x1FFFFF, 0, 0, 1, 7, 1, 1 } }; + // TexAddr : 0x1FFFFF, PalSelect : 0, PixelFmt : 7, VQ_Comp : 1, MipMapped : 1 + const TCW TCWPalTextureCacheMask = { { 0x1FFFFF, 0, 0, 0, 7, 1, 1 } }; }; template diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index 27e10388c..724155913 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -663,11 +663,86 @@ static bool gl_create_resources() return true; } +struct OpenGL4Renderer : OpenGLRenderer +{ + bool Init() override; + + void Term() override + { + termABuffer(); + glcache.DeleteTextures(1, &stencilTexId); + stencilTexId = 0; + glcache.DeleteTextures(1, &depthTexId); + depthTexId = 0; + glcache.DeleteTextures(1, &opaqueTexId); + opaqueTexId = 0; + glcache.DeleteTextures(1, &depthSaveTexId); + depthSaveTexId = 0; + glDeleteFramebuffers(1, &geom_fbo); + geom_fbo = 0; + glDeleteSamplers(2, texSamplers); + texSamplers[0] = texSamplers[1] = 0; + glDeleteFramebuffers(1, &depth_fbo); + depth_fbo = 0; + + TexCache.Clear(); + termGLCommon(); + gl4_term(); + } + + bool Render() override + { + saveCurrentFramebuffer(); + renderFrame(pvrrc.framebufferWidth, pvrrc.framebufferHeight); + if (pvrrc.isRTT) { + restoreCurrentFramebuffer(); + return false; + } + + if (!config::EmulateFramebuffer) + { + DrawOSD(false); + gl.ofbo2.ready = false; + frameRendered = true; + } + restoreCurrentFramebuffer(); + + return true; + } + + GLenum getFogTextureSlot() const override { + return GL_TEXTURE5; + } + GLenum getPaletteTextureSlot() const override { + return GL_TEXTURE6; + } + + bool renderFrame(int width, int height); + +#ifdef LIBRETRO + void DrawOSD(bool clearScreen) override + { + void gl4DrawVmuTexture(u8 vmu_screen_number); + void gl4DrawGunCrosshair(u8 port); + + if (settings.platform.isConsole()) + { + for (int vmu_screen_number = 0 ; vmu_screen_number < 4 ; vmu_screen_number++) + if (vmu_lcd_status[vmu_screen_number * 2]) + gl4DrawVmuTexture(vmu_screen_number); + } + + for (int lightgun_port = 0 ; lightgun_port < 4 ; lightgun_port++) + gl4DrawGunCrosshair(lightgun_port); + } +#endif +}; + //setup void gl_DebugOutput(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, const void *userParam); -static bool gl4_init() +bool OpenGL4Renderer::Init() { findGLVersion(); if (gl.gl_major < 4 || (gl.gl_major == 4 && gl.gl_minor < 3)) @@ -737,7 +812,7 @@ static void resize(int w, int h) } } -static bool RenderFrame(int width, int height) +bool OpenGL4Renderer::renderFrame(int width, int height) { const bool is_rtt = pvrrc.isRTT; @@ -942,7 +1017,7 @@ static bool RenderFrame(int width, int height) #ifndef LIBRETRO else { gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(); - render_output_framebuffer(); + RenderLastFrame(); } #endif glBindVertexArray(0); @@ -950,82 +1025,6 @@ static bool RenderFrame(int width, int height) return !is_rtt; } -struct OpenGL4Renderer : OpenGLRenderer -{ - bool Init() override - { - return gl4_init(); - } - - void Term() override - { - termABuffer(); - glcache.DeleteTextures(1, &stencilTexId); - stencilTexId = 0; - glcache.DeleteTextures(1, &depthTexId); - depthTexId = 0; - glcache.DeleteTextures(1, &opaqueTexId); - opaqueTexId = 0; - glcache.DeleteTextures(1, &depthSaveTexId); - depthSaveTexId = 0; - glDeleteFramebuffers(1, &geom_fbo); - geom_fbo = 0; - glDeleteSamplers(2, texSamplers); - texSamplers[0] = texSamplers[1] = 0; - glDeleteFramebuffers(1, &depth_fbo); - depth_fbo = 0; - - TexCache.Clear(); - termGLCommon(); - gl4_term(); - } - - bool Render() override - { - saveCurrentFramebuffer(); - RenderFrame(pvrrc.framebufferWidth, pvrrc.framebufferHeight); - if (pvrrc.isRTT) { - restoreCurrentFramebuffer(); - return false; - } - - if (!config::EmulateFramebuffer) - { - DrawOSD(false); - gl.ofbo2.ready = false; - frameRendered = true; - } - restoreCurrentFramebuffer(); - - return true; - } - - GLenum getFogTextureSlot() const override { - return GL_TEXTURE5; - } - GLenum getPaletteTextureSlot() const override { - return GL_TEXTURE6; - } - -#ifdef LIBRETRO - void DrawOSD(bool clearScreen) override - { - void gl4DrawVmuTexture(u8 vmu_screen_number); - void gl4DrawGunCrosshair(u8 port); - - if (settings.platform.isConsole()) - { - for (int vmu_screen_number = 0 ; vmu_screen_number < 4 ; vmu_screen_number++) - if (vmu_lcd_status[vmu_screen_number * 2]) - gl4DrawVmuTexture(vmu_screen_number); - } - - for (int lightgun_port = 0 ; lightgun_port < 4 ; lightgun_port++) - gl4DrawGunCrosshair(lightgun_port); - } -#endif -}; - Renderer* rend_GL4() { return new OpenGL4Renderer(); diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 47084ae26..c269ee5a1 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -729,7 +729,7 @@ void OpenGLRenderer::RenderFramebuffer(const FramebufferInfo& info) drawQuad(gl.dcfb.tex, false, true); } #ifndef LIBRETRO - render_output_framebuffer(); + RenderLastFrame(); #endif DrawOSD(false); @@ -797,7 +797,7 @@ void writeFramebufferToVRAM() glCheck(); } -bool render_output_framebuffer() +bool OpenGLRenderer::RenderLastFrame() { GlFramebuffer *framebuffer = gl.ofbo2.ready ? gl.ofbo2.framebuffer.get() : gl.ofbo.framebuffer.get(); if (framebuffer == nullptr) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 0458e9357..7c321fec0 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -964,7 +964,7 @@ void gl_DebugOutput(GLenum source, } #endif -bool gles_init() +bool OpenGLRenderer::Init() { glcache.EnableCache(); @@ -1050,7 +1050,7 @@ static void updatePaletteTexture(GLenum texture_slot) glActiveTexture(GL_TEXTURE0); } -void OSD_DRAW(bool clear_screen) +void OpenGLRenderer::DrawOSD(bool clear_screen) { #ifdef LIBRETRO void DrawVmuTexture(u8 vmu_screen_number); @@ -1080,7 +1080,7 @@ void OSD_DRAW(bool clear_screen) { glcache.ClearColor(0.7f, 0.7f, 0.7f, 1.f); glClear(GL_COLOR_BUFFER_BIT); - render_output_framebuffer(); + RenderLastFrame(); glViewport(0, 0, settings.display.width, settings.display.height); } @@ -1172,7 +1172,7 @@ static void upload_vertex_indices() glCheck(); } -bool RenderFrame(int width, int height) +bool OpenGLRenderer::renderFrame(int width, int height) { bool is_rtt = pvrrc.isRTT; @@ -1383,7 +1383,7 @@ bool RenderFrame(int width, int height) #ifndef LIBRETRO else { gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(); - render_output_framebuffer(); + RenderLastFrame(); } #endif bindVertexArray(0); @@ -1391,11 +1391,6 @@ bool RenderFrame(int width, int height) return !is_rtt; } -bool OpenGLRenderer::Init() -{ - return gles_init(); -} - void OpenGLRenderer::Term() { TexCache.Clear(); @@ -1405,7 +1400,7 @@ void OpenGLRenderer::Term() bool OpenGLRenderer::Render() { saveCurrentFramebuffer(); - RenderFrame(pvrrc.framebufferWidth, pvrrc.framebufferHeight); + renderFrame(pvrrc.framebufferWidth, pvrrc.framebufferHeight); if (pvrrc.isRTT) { restoreCurrentFramebuffer(); return false; @@ -1422,11 +1417,6 @@ bool OpenGLRenderer::Render() return true; } -bool OpenGLRenderer::RenderLastFrame() -{ - return render_output_framebuffer(); -} - Renderer* rend_GLES2() { return new OpenGLRenderer(); diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 23c60d952..fb636a18d 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -287,8 +287,6 @@ struct gl_ctx extern gl_ctx gl; -BaseTextureCacheData *gl_GetTexture(TSP tsp, TCW tcw); - enum ModifierVolumeMode { Xor, Or, Inclusion, Exclusion, ModeCount }; void termGLCommon(); @@ -301,10 +299,8 @@ GLuint BindRTT(bool withDepthBuffer = true); void ReadRTTBuffer(); void glReadFramebuffer(const FramebufferInfo& info); GLuint init_output_framebuffer(int width, int height); -bool render_output_framebuffer(); void writeFramebufferToVRAM(); -void OSD_DRAW(bool clear_screen); PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear, @@ -416,12 +412,9 @@ struct OpenGLRenderer : Renderer bool RenderLastFrame() override; - void DrawOSD(bool clear_screen) override { OSD_DRAW(clear_screen); } + void DrawOSD(bool clear_screen) override; - BaseTextureCacheData *GetTexture(TSP tsp, TCW tcw) override - { - return gl_GetTexture(tsp, tcw); - } + BaseTextureCacheData *GetTexture(TSP tsp, TCW tcw) override; bool Present() override { @@ -434,6 +427,7 @@ struct OpenGLRenderer : Renderer return true; } +protected: virtual GLenum getFogTextureSlot() const { return GL_TEXTURE1; } @@ -453,6 +447,10 @@ struct OpenGLRenderer : Renderer glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.origFbo); } +private: + bool renderFrame(int width, int height); + +protected: bool frameRendered = false; }; diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index a516271cf..e48aa89ac 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -327,7 +327,7 @@ static int TexCacheLookups; static int TexCacheHits; //static float LastTexCacheStats; -BaseTextureCacheData *gl_GetTexture(TSP tsp, TCW tcw) +BaseTextureCacheData *OpenGLRenderer::GetTexture(TSP tsp, TCW tcw) { TexCacheLookups++; diff --git a/core/wsi/egl.cpp b/core/wsi/egl.cpp index 36ede455b..6a6bd7909 100644 --- a/core/wsi/egl.cpp +++ b/core/wsi/egl.cpp @@ -69,8 +69,6 @@ bool EGLGraphicsContext::init() EGLint pi32ConfigAttribs[] = { EGL_SURFACE_TYPE, EGL_WINDOW_BIT, EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, - EGL_DEPTH_SIZE, 24, - EGL_STENCIL_SIZE, 8, EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, EGL_BLUE_SIZE, 8, diff --git a/core/wsi/wgl.cpp b/core/wsi/wgl.cpp index 2ef204128..97bcd5b2b 100644 --- a/core/wsi/wgl.cpp +++ b/core/wsi/wgl.cpp @@ -50,8 +50,8 @@ bool WGLGraphicsContext::init() 0, 0, 0, 0, 0, 0, - 24, //Number of bits for the depthbuffer - 8, //Number of bits for the stencilbuffer + 0, //Number of bits for the depthbuffer + 0, //Number of bits for the stencilbuffer 0, //Number of Aux buffers in the framebuffer. PFD_MAIN_PLANE, 0, diff --git a/core/wsi/xgl.cpp b/core/wsi/xgl.cpp index a87514b97..186d7a49f 100644 --- a/core/wsi/xgl.cpp +++ b/core/wsi/xgl.cpp @@ -113,11 +113,7 @@ bool XGLGraphicsContext::ChooseVisual(Display* x11Display, XVisualInfo** visual, GLX_GREEN_SIZE , 8, GLX_BLUE_SIZE , 8, GLX_ALPHA_SIZE , 8, - GLX_DEPTH_SIZE , 24, - GLX_STENCIL_SIZE , 8, GLX_DOUBLEBUFFER , True, - //GLX_SAMPLE_BUFFERS , 1, - //GLX_SAMPLES , 4, None }; From d76dff8594415aaaa1b8bb617e9dd29453f9920f Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 10 Dec 2022 13:06:31 +0100 Subject: [PATCH 27/34] ggpo: don't call exit on assert. clean up logging Don't call exit when an assertion fails, throw an exception instead. Log ggpo API errors. Deleted most ggpo logging methods and use flycast logging. --- CMakeLists.txt | 2 - core/deps/ggpo/include/ggponet.h | 20 ----- core/deps/ggpo/lib/ggpo/backends/backend.h | 1 - core/deps/ggpo/lib/ggpo/backends/p2p.cpp | 38 ++++---- .../deps/ggpo/lib/ggpo/backends/spectator.cpp | 2 +- core/deps/ggpo/lib/ggpo/backends/synctest.cpp | 42 +-------- core/deps/ggpo/lib/ggpo/backends/synctest.h | 3 - core/deps/ggpo/lib/ggpo/game_input.cpp | 17 ++-- core/deps/ggpo/lib/ggpo/ggpo_types.h | 15 ++-- core/deps/ggpo/lib/ggpo/input_queue.cpp | 52 ++++------- core/deps/ggpo/lib/ggpo/input_queue.h | 1 - core/deps/ggpo/lib/ggpo/log.cpp | 31 ------- core/deps/ggpo/lib/ggpo/log.h | 14 --- core/deps/ggpo/lib/ggpo/main.cpp | 42 +++------ core/deps/ggpo/lib/ggpo/network/udp.cpp | 29 ++---- core/deps/ggpo/lib/ggpo/network/udp.h | 5 -- core/deps/ggpo/lib/ggpo/network/udp_proto.cpp | 89 ++++++++----------- core/deps/ggpo/lib/ggpo/network/udp_proto.h | 1 - core/deps/ggpo/lib/ggpo/sync.cpp | 18 ++-- core/deps/ggpo/lib/ggpo/timesync.cpp | 4 +- 20 files changed, 119 insertions(+), 307 deletions(-) delete mode 100644 core/deps/ggpo/lib/ggpo/log.cpp delete mode 100644 core/deps/ggpo/lib/ggpo/log.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d9a0065f0..417e8ff6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1358,8 +1358,6 @@ if(NOT LIBRETRO) core/deps/ggpo/lib/ggpo/game_input.h core/deps/ggpo/lib/ggpo/input_queue.cpp core/deps/ggpo/lib/ggpo/input_queue.h - core/deps/ggpo/lib/ggpo/log.cpp - core/deps/ggpo/lib/ggpo/log.h core/deps/ggpo/lib/ggpo/main.cpp core/deps/ggpo/lib/ggpo/platform_linux.cpp core/deps/ggpo/lib/ggpo/platform_linux.h diff --git a/core/deps/ggpo/include/ggponet.h b/core/deps/ggpo/include/ggponet.h index 29eba49c1..9bf8552ec 100644 --- a/core/deps/ggpo/include/ggponet.h +++ b/core/deps/ggpo/include/ggponet.h @@ -579,26 +579,6 @@ GGPO_API GGPOErrorCode __cdecl ggpo_set_disconnect_timeout(GGPOSession *, GGPO_API GGPOErrorCode __cdecl ggpo_set_disconnect_notify_start(GGPOSession *, int timeout); -/* - * ggpo_log -- - * - * Used to write to the ggpo.net log. In the current versions of the - * SDK, a log file is only generated if the "quark.log" environment - * variable is set to 1. This will change in future versions of the - * SDK. - */ -GGPO_API void __cdecl ggpo_log(GGPOSession *, - const char *fmt, ...); -/* - * ggpo_logv -- - * - * A varargs compatible version of ggpo_log. See ggpo_log for - * more details. - */ -GGPO_API void __cdecl ggpo_logv(GGPOSession *, - const char *fmt, - va_list args); - /* * ggpo_send_message -- * diff --git a/core/deps/ggpo/lib/ggpo/backends/backend.h b/core/deps/ggpo/lib/ggpo/backends/backend.h index 39a176fbd..b8b0a3402 100644 --- a/core/deps/ggpo/lib/ggpo/backends/backend.h +++ b/core/deps/ggpo/lib/ggpo/backends/backend.h @@ -20,7 +20,6 @@ struct GGPOSession { virtual GGPOErrorCode IncrementFrame(void) { return GGPO_OK; } virtual GGPOErrorCode DisconnectPlayer(GGPOPlayerHandle handle) { return GGPO_OK; } virtual GGPOErrorCode GetNetworkStats(GGPONetworkStats *stats, GGPOPlayerHandle handle) { return GGPO_OK; } - virtual GGPOErrorCode Logv(const char *fmt, va_list list) { ::Logv(fmt, list); return GGPO_OK; } virtual GGPOErrorCode SetFrameDelay(GGPOPlayerHandle player, int delay) { return GGPO_ERRORCODE_UNSUPPORTED; } virtual GGPOErrorCode SetDisconnectTimeout(int timeout) { return GGPO_ERRORCODE_UNSUPPORTED; } diff --git a/core/deps/ggpo/lib/ggpo/backends/p2p.cpp b/core/deps/ggpo/lib/ggpo/backends/p2p.cpp index 57628da26..b47b131a5 100644 --- a/core/deps/ggpo/lib/ggpo/backends/p2p.cpp +++ b/core/deps/ggpo/lib/ggpo/backends/p2p.cpp @@ -130,12 +130,12 @@ Peer2PeerBackend::DoPoll(int timeout) total_min_confirmed = PollNPlayers(current_frame); } - Log("last confirmed frame in p2p backend is %d.\n", total_min_confirmed); + Log("last confirmed frame in p2p backend is %d.", total_min_confirmed); if (total_min_confirmed >= 0) { ASSERT(total_min_confirmed != INT_MAX); if (_num_spectators > 0) { while (_next_spectator_frame <= total_min_confirmed) { - Log("pushing frame %d to spectators.\n", _next_spectator_frame); + Log("pushing frame %d to spectators.", _next_spectator_frame); GameInput input; input.frame = _next_spectator_frame; @@ -147,7 +147,7 @@ Peer2PeerBackend::DoPoll(int timeout) _next_spectator_frame++; } } - Log("setting confirmed frame in sync to %d.\n", total_min_confirmed); + Log("setting confirmed frame in sync to %d.", total_min_confirmed); _sync.SetLastConfirmedFrame(total_min_confirmed); } @@ -189,12 +189,12 @@ int Peer2PeerBackend::Poll2Players(int current_frame) if (!_local_connect_status[i].disconnected) { total_min_confirmed = MIN(_local_connect_status[i].last_frame, total_min_confirmed); } - Log(" local endp: connected = %d, last_received = %d, total_min_confirmed = %d.\n", !_local_connect_status[i].disconnected, _local_connect_status[i].last_frame, total_min_confirmed); + Log(" local endp: connected = %d, last_received = %d, total_min_confirmed = %d.", !_local_connect_status[i].disconnected, _local_connect_status[i].last_frame, total_min_confirmed); if (!queue_connected && !_local_connect_status[i].disconnected) { - Log("disconnecting i %d by remote request.\n", i); + Log("disconnecting i %d by remote request.", i); DisconnectPlayerQueue(i, total_min_confirmed); } - Log(" total_min_confirmed = %d.\n", total_min_confirmed); + Log(" total_min_confirmed = %d.", total_min_confirmed); } return total_min_confirmed; } @@ -208,7 +208,7 @@ int Peer2PeerBackend::PollNPlayers(int current_frame) for (queue = 0; queue < _num_players; queue++) { bool queue_connected = true; int queue_min_confirmed = MAX_INT; - Log("considering queue %d.\n", queue); + Log("considering queue %d.", queue); for (i = 0; i < _num_players; i++) { // we're going to do a lot of logic here in consideration of endpoint i. // keep accumulating the minimum confirmed point for all n*n packets and @@ -218,16 +218,16 @@ int Peer2PeerBackend::PollNPlayers(int current_frame) queue_connected = queue_connected && connected; queue_min_confirmed = MIN(last_received, queue_min_confirmed); - Log(" endpoint %d: connected = %d, last_received = %d, queue_min_confirmed = %d.\n", i, connected, last_received, queue_min_confirmed); + Log(" endpoint %d: connected = %d, last_received = %d, queue_min_confirmed = %d.", i, connected, last_received, queue_min_confirmed); } else { - Log(" endpoint %d: ignoring... not running.\n", i); + Log(" endpoint %d: ignoring... not running.", i); } } // merge in our local status only if we're still connected! if (!_local_connect_status[queue].disconnected) { queue_min_confirmed = MIN(_local_connect_status[queue].last_frame, queue_min_confirmed); } - Log(" local endp: connected = %d, last_received = %d, queue_min_confirmed = %d.\n", !_local_connect_status[queue].disconnected, _local_connect_status[queue].last_frame, queue_min_confirmed); + Log(" local endp: connected = %d, last_received = %d, queue_min_confirmed = %d.", !_local_connect_status[queue].disconnected, _local_connect_status[queue].last_frame, queue_min_confirmed); if (queue_connected) { total_min_confirmed = MIN(queue_min_confirmed, total_min_confirmed); @@ -236,11 +236,11 @@ int Peer2PeerBackend::PollNPlayers(int current_frame) // so, we need to re-adjust. This can happen when we detect our own disconnect at frame n // and later receive a disconnect notification for frame n-1. if (!_local_connect_status[queue].disconnected || _local_connect_status[queue].last_frame > queue_min_confirmed) { - Log("disconnecting queue %d by remote request.\n", queue); + Log("disconnecting queue %d by remote request.", queue); DisconnectPlayerQueue(queue, queue_min_confirmed); } } - Log(" total_min_confirmed = %d.\n", total_min_confirmed); + Log(" total_min_confirmed = %d.", total_min_confirmed); } return total_min_confirmed; } @@ -335,7 +335,7 @@ Peer2PeerBackend::SyncInput(void *values, GGPOErrorCode Peer2PeerBackend::IncrementFrame(void) { - Log("End of frame (%d)...\n", _sync.GetFrameCount()); + Log("End of frame (%d)...", _sync.GetFrameCount()); _sync.IncrementFrame(); DoPoll(0); PollSyncEvents(); @@ -383,7 +383,7 @@ Peer2PeerBackend::OnUdpProtocolPeerEvent(UdpProtocol::Event &evt, int queue) _sync.AddRemoteInput(queue, evt.u.input.input); // Notify the other endpoints which frame we received from a peer - Log("setting remote connect status for queue %d to %d\n", queue, evt.u.input.input.frame); + Log("setting remote connect status for queue %d to %d", queue, evt.u.input.input.frame); _local_connect_status[queue].last_frame = evt.u.input.input.frame; } break; @@ -493,14 +493,14 @@ Peer2PeerBackend::DisconnectPlayer(GGPOPlayerHandle player) int current_frame = _sync.GetFrameCount(); // xxx: we should be tracking who the local player is, but for now assume // that if the endpoint is not initalized, this must be the local player. - Log("Disconnecting local player %d at frame %d by user request.\n", queue, _local_connect_status[queue].last_frame); + Log("Disconnecting local player %d at frame %d by user request.", queue, _local_connect_status[queue].last_frame); for (int i = 0; i < _num_players; i++) { if (_endpoints[i].IsInitialized()) { DisconnectPlayerQueue(i, current_frame); } } } else { - Log("Disconnecting queue %d at frame %d by user request.\n", queue, _local_connect_status[queue].last_frame); + Log("Disconnecting queue %d at frame %d by user request.", queue, _local_connect_status[queue].last_frame); DisconnectPlayerQueue(queue, _local_connect_status[queue].last_frame); } return GGPO_OK; @@ -514,16 +514,16 @@ Peer2PeerBackend::DisconnectPlayerQueue(int queue, int syncto) _endpoints[queue].Disconnect(); - Log("Changing queue %d local connect status for last frame from %d to %d on disconnect request (current: %d).\n", + Log("Changing queue %d local connect status for last frame from %d to %d on disconnect request (current: %d).", queue, _local_connect_status[queue].last_frame, syncto, framecount); _local_connect_status[queue].disconnected = 1; _local_connect_status[queue].last_frame = syncto; if (syncto != GameInput::NullFrame && syncto < framecount) { - Log("adjusting simulation to account for the fact that %d disconnected @ %d.\n", queue, syncto); + Log("adjusting simulation to account for the fact that %d disconnected @ %d.", queue, syncto); _sync.AdjustSimulation(syncto); - Log("finished adjusting simulation.\n"); + Log("finished adjusting simulation."); } info.code = GGPO_EVENTCODE_DISCONNECTED_FROM_PEER; diff --git a/core/deps/ggpo/lib/ggpo/backends/spectator.cpp b/core/deps/ggpo/lib/ggpo/backends/spectator.cpp index df65dd410..1e820de57 100644 --- a/core/deps/ggpo/lib/ggpo/backends/spectator.cpp +++ b/core/deps/ggpo/lib/ggpo/backends/spectator.cpp @@ -92,7 +92,7 @@ SpectatorBackend::SyncInput(void *values, GGPOErrorCode SpectatorBackend::IncrementFrame(void) { - Log("End of frame (%d)...\n", _next_input_to_send - 1); + Log("End of frame (%d)...", _next_input_to_send - 1); DoPoll(0); PollUdpProtocolEvents(); diff --git a/core/deps/ggpo/lib/ggpo/backends/synctest.cpp b/core/deps/ggpo/lib/ggpo/backends/synctest.cpp index 10e5b3147..370266525 100644 --- a/core/deps/ggpo/lib/ggpo/backends/synctest.cpp +++ b/core/deps/ggpo/lib/ggpo/backends/synctest.cpp @@ -83,7 +83,6 @@ SyncTestBackend::SyncInput(void *values, int size, int *disconnect_flags) { - BeginLog(false); if (_rollingback) { _last_input = _saved_frames.front().input; } else { @@ -105,8 +104,7 @@ SyncTestBackend::IncrementFrame(void) _sync.IncrementFrame(); _current_input.erase(); - Log("End of frame(%d)...\n", _sync.GetFrameCount()); - EndLog(); + DEBUG_LOG(NETWORK, "End of frame(%d)...", _sync.GetFrameCount()); if (_rollingback) { return GGPO_OK; @@ -171,47 +169,9 @@ SyncTestBackend::RaiseSyncError(const char *fmt, ...) #ifdef _WIN32 OutputDebugStringA(buf); #endif - EndLog(); // DebugBreak(); } -GGPOErrorCode -SyncTestBackend::Logv(char *fmt, va_list list) -{ - if (_logfp) { - vfprintf(_logfp, fmt, list); - } - return GGPO_OK; -} - -void -SyncTestBackend::BeginLog(int saving) -{ - EndLog(); - - char filename[MAX_PATH]; -#ifdef _WIN32 - CreateDirectoryA("synclogs", NULL); -#else - mkdir("synclogs", 0755); -#endif - snprintf(filename, ARRAY_SIZE(filename), "synclogs/%s-%04d-%s.log", - saving ? "state" : "log", - _sync.GetFrameCount(), - _rollingback ? "replay" : "original"); - - _logfp = fopen(filename, "w"); -} - -void -SyncTestBackend::EndLog() -{ - if (_logfp) { - fprintf(_logfp, "Closing log file.\n"); - fclose(_logfp); - _logfp = NULL; - } -} void SyncTestBackend::LogSaveStates(SavedInfo &info) { diff --git a/core/deps/ggpo/lib/ggpo/backends/synctest.h b/core/deps/ggpo/lib/ggpo/backends/synctest.h index bd947517e..209a8d4ef 100644 --- a/core/deps/ggpo/lib/ggpo/backends/synctest.h +++ b/core/deps/ggpo/lib/ggpo/backends/synctest.h @@ -24,7 +24,6 @@ public: virtual GGPOErrorCode AddLocalInput(GGPOPlayerHandle player, void *values, int size); virtual GGPOErrorCode SyncInput(void *values, int size, int *disconnect_flags); virtual GGPOErrorCode IncrementFrame(void); - virtual GGPOErrorCode Logv(char *fmt, va_list list); protected: struct SavedInfo { @@ -36,8 +35,6 @@ protected: }; void RaiseSyncError(const char *fmt, ...); - void BeginLog(int saving); - void EndLog(); void LogSaveStates(SavedInfo &info); protected: diff --git a/core/deps/ggpo/lib/ggpo/game_input.cpp b/core/deps/ggpo/lib/ggpo/game_input.cpp index a1d62dc37..7a380a6cb 100644 --- a/core/deps/ggpo/lib/ggpo/game_input.cpp +++ b/core/deps/ggpo/lib/ggpo/game_input.cpp @@ -8,7 +8,6 @@ #include "game_input.h" #include "ggpo_types.h" -#include "log.h" void GameInput::init(int iframe, char *ibits, int isize, int offset) @@ -65,25 +64,23 @@ GameInput::log(char *prefix, bool show_frame) const size_t c = strlen(prefix); strcpy(buf, prefix); desc(buf + c, ARRAY_SIZE(buf) - c, show_frame); - strcat(buf, "\n"); - Log(buf); + Log("%s", buf); } bool GameInput::equal(GameInput &other, bool bitsonly) { if (!bitsonly && frame != other.frame) { - Log("frames don't match: %d, %d\n", frame, other.frame); + Log("frames don't match: %d, %d", frame, other.frame); } if (size != other.size) { - Log("sizes don't match: %d, %d\n", size, other.size); - } - if (memcmp(bits, other.bits, size)) { - Log("bits don't match\n"); + Log("sizes don't match: %d, %d", size, other.size); } + bool match = memcmp(bits, other.bits, size) == 0; + if (!match) + Log("bits don't match"); ASSERT(size && other.size); return (bitsonly || frame == other.frame) && - size == other.size && - memcmp(bits, other.bits, size) == 0; + size == other.size && match; } diff --git a/core/deps/ggpo/lib/ggpo/ggpo_types.h b/core/deps/ggpo/lib/ggpo/ggpo_types.h index 54abecdf5..44106ab13 100644 --- a/core/deps/ggpo/lib/ggpo/ggpo_types.h +++ b/core/deps/ggpo/lib/ggpo/ggpo_types.h @@ -40,6 +40,7 @@ typedef short int16; typedef int int32; #include "ggponet.h" +#include "log/Log.h" #include class GGPOException : public std::runtime_error { @@ -61,8 +62,6 @@ public: # error Unsupported platform #endif -#include "log.h" - /* * Macros */ @@ -70,16 +69,14 @@ public: do { \ if (!(x)) { \ char assert_buf[1024]; \ - snprintf(assert_buf, sizeof(assert_buf) - 1, "Assertion: %s @ %s:%d (pid:%ld)", #x, __FILE__, __LINE__, (long)GGPOPlatform::GetProcessID()); \ - Log("%s\n", assert_buf); \ - Log("\n"); \ - Log("\n"); \ - Log("\n"); \ - GGPOPlatform::AssertFailed(assert_buf); \ - exit(0); \ + snprintf(assert_buf, sizeof(assert_buf) - 1, "Assertion: %s @ %s:%d", #x, __FILE__, __LINE__); \ + GGPOPlatform::AssertFailed(assert_buf); \ + throw GGPOException(assert_buf, GGPO_ERRORCODE_GENERAL_FAILURE); \ } \ } while (false) +#define Log(...) DEBUG_LOG(NETWORK, __VA_ARGS__) + #ifndef ARRAY_SIZE # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) #endif diff --git a/core/deps/ggpo/lib/ggpo/input_queue.cpp b/core/deps/ggpo/lib/ggpo/input_queue.cpp index 3441ef70c..051c47e7d 100644 --- a/core/deps/ggpo/lib/ggpo/input_queue.cpp +++ b/core/deps/ggpo/lib/ggpo/input_queue.cpp @@ -48,7 +48,7 @@ InputQueue::Init(int id, int input_size) int InputQueue::GetLastConfirmedFrame() { - Log("returning last confirmed frame %d.\n", _last_added_frame); + Log("input q%d | returning last confirmed frame %d.", _id, _last_added_frame); return _last_added_frame; } @@ -67,21 +67,21 @@ InputQueue::DiscardConfirmedFrames(int frame) frame = MIN(frame, _last_frame_requested); } - Log("discarding confirmed frames up to %d (last_added:%d length:%d [head:%d tail:%d]).\n", + Log("input q%d | discarding confirmed frames up to %d (last_added:%d length:%d [head:%d tail:%d]).", _id, frame, _last_added_frame, _length, _head, _tail); if (frame >= _last_added_frame) { _tail = _head; } else { int offset = frame - _inputs[_tail].frame + 1; - Log("difference of %d frames.\n", offset); + Log("input q%d | difference of %d frames.", _id, offset); ASSERT(offset >= 0); _tail = (_tail + offset) % INPUT_QUEUE_LENGTH; _length -= offset; } - Log("after discarding, new tail is %d (frame:%d).\n", _tail, _inputs[_tail].frame); + Log("input q%d | after discarding, new tail is %d (frame:%d).", _id, _tail, _inputs[_tail].frame); ASSERT(_length >= 0); } @@ -90,7 +90,7 @@ InputQueue::ResetPrediction(int frame) { ASSERT(_first_incorrect_frame == GameInput::NullFrame || frame <= _first_incorrect_frame); - Log("resetting all prediction errors back to frame %d.\n", frame); + Log("input q%d | resetting all prediction errors back to frame %d.", _id, frame); /* * There's nothing really to do other than reset our prediction @@ -116,7 +116,7 @@ InputQueue::GetConfirmedInput(int requested_frame, GameInput *input) bool InputQueue::GetInput(int requested_frame, GameInput *input) { - Log("requesting input frame %d.\n", requested_frame); + Log("input q%d | requesting input frame %d.", _id, requested_frame); /* * No one should ever try to grab any input when we have a prediction @@ -144,7 +144,7 @@ InputQueue::GetInput(int requested_frame, GameInput *input) offset = (offset + _tail) % INPUT_QUEUE_LENGTH; ASSERT(_inputs[offset].frame == requested_frame); *input = _inputs[offset]; - Log("returning confirmed frame number %d.\n", input->frame); + Log("input q%d | returning confirmed frame number %d.", _id, input->frame); return true; } @@ -154,13 +154,13 @@ InputQueue::GetInput(int requested_frame, GameInput *input) * same thing they did last time. */ if (requested_frame == 0) { - Log("basing new prediction frame from nothing, you're client wants frame 0.\n"); + Log("input q%d | basing new prediction frame from nothing, you're client wants frame 0.", _id); _prediction.erase(); } else if (_last_added_frame == GameInput::NullFrame) { - Log("basing new prediction frame from nothing, since we have no frames yet.\n"); + Log("input q%d | basing new prediction frame from nothing, since we have no frames yet.", _id); _prediction.erase(); } else { - Log("basing new prediction frame from previously added frame (queue entry:%d, frame:%d).\n", + Log("input q%d | basing new prediction frame from previously added frame (queue entry:%d, frame:%d).", _id, PREVIOUS_FRAME(_head), _inputs[PREVIOUS_FRAME(_head)].frame); _prediction = _inputs[PREVIOUS_FRAME(_head)]; } @@ -176,7 +176,7 @@ InputQueue::GetInput(int requested_frame, GameInput *input) */ *input = _prediction; input->frame = requested_frame; - Log("returning prediction frame number %d (%d).\n", input->frame, _prediction.frame); + Log("input q%d | returning prediction frame number %d (%d).", _id, input->frame, _prediction.frame); return false; } @@ -186,7 +186,7 @@ InputQueue::AddInput(GameInput &input) { int new_frame; - Log("adding input frame number %d to queue.\n", input.frame); + Log("input q%d | adding input frame number %d to queue.", _id, input.frame); /* * These next two lines simply verify that inputs are passed in @@ -216,7 +216,7 @@ InputQueue::AddInput(GameInput &input) void InputQueue::AddDelayedInputToQueue(GameInput &input, int frame_number) { - Log("adding delayed input frame number %d to queue.\n", frame_number); + Log("input q%d | adding delayed input frame number %d to queue.", _id, frame_number); if (input.size != _prediction.size) throw GGPOException("Input size differs from peer", GGPO_ERRORCODE_INPUT_SIZE_DIFF); @@ -246,7 +246,7 @@ InputQueue::AddDelayedInputToQueue(GameInput &input, int frame_number) * in GetFirstIncorrectFrame() */ if (_first_incorrect_frame == GameInput::NullFrame && !_prediction.equal(input, true)) { - Log("frame %d does not match prediction. marking error.\n", frame_number); + Log("input q%d | frame %d does not match prediction. marking error.", _id, frame_number); _first_incorrect_frame = frame_number; } @@ -257,7 +257,7 @@ InputQueue::AddDelayedInputToQueue(GameInput &input, int frame_number) * count up. */ if (_prediction.frame == _last_frame_requested && _first_incorrect_frame == GameInput::NullFrame) { - Log("prediction is correct! dumping out of prediction mode.\n"); + Log("input q%d | prediction is correct! dumping out of prediction mode.", _id); _prediction.frame = GameInput::NullFrame; } else { _prediction.frame++; @@ -269,7 +269,7 @@ InputQueue::AddDelayedInputToQueue(GameInput &input, int frame_number) int InputQueue::AdvanceQueueHead(int frame) { - Log("advancing queue head to frame %d.\n", frame); + Log("input q%d | advancing queue head to frame %d.", _id, frame); int expected_frame = _first_frame ? 0 : _inputs[PREVIOUS_FRAME(_head)].frame + 1; @@ -281,7 +281,7 @@ InputQueue::AdvanceQueueHead(int frame) * time we shoved a frame into the system. In this case, there's * no room on the queue. Toss it. */ - Log("Dropping input frame %d (expected next frame to be %d).\n", + Log("input q%d | Dropping input frame %d (expected next frame to be %d).", _id, frame, expected_frame); return GameInput::NullFrame; } @@ -293,7 +293,7 @@ InputQueue::AdvanceQueueHead(int frame) * last frame in the queue several times in order to fill the space * left. */ - Log("Adding padding frame %d to account for change in frame delay.\n", + Log("input q%d | Adding padding frame %d to account for change in frame delay.", _id, expected_frame); GameInput &last_frame = _inputs[PREVIOUS_FRAME(_head)]; AddDelayedInputToQueue(last_frame, expected_frame); @@ -303,19 +303,3 @@ InputQueue::AdvanceQueueHead(int frame) ASSERT(frame == 0 || frame == _inputs[PREVIOUS_FRAME(_head)].frame + 1); return frame; } - - -void -InputQueue::Log(const char *fmt, ...) -{ - char buf[1024]; - size_t offset; - va_list args; - - offset = snprintf(buf, ARRAY_SIZE(buf), "input q%d | ", _id); - va_start(args, fmt); - vsnprintf(buf + offset, ARRAY_SIZE(buf) - offset - 1, fmt, args); - buf[ARRAY_SIZE(buf)-1] = '\0'; - ::Log(buf); - va_end(args); -} diff --git a/core/deps/ggpo/lib/ggpo/input_queue.h b/core/deps/ggpo/lib/ggpo/input_queue.h index 460a50168..62f86daf2 100644 --- a/core/deps/ggpo/lib/ggpo/input_queue.h +++ b/core/deps/ggpo/lib/ggpo/input_queue.h @@ -34,7 +34,6 @@ public: protected: int AdvanceQueueHead(int frame); void AddDelayedInputToQueue(GameInput &input, int i); - void Log(const char *fmt, ...); protected: int _id; diff --git a/core/deps/ggpo/lib/ggpo/log.cpp b/core/deps/ggpo/lib/ggpo/log.cpp deleted file mode 100644 index b816a05f2..000000000 --- a/core/deps/ggpo/lib/ggpo/log.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* ----------------------------------------------------------------------- - * GGPO.net (http://ggpo.net) - Copyright 2009 GroundStorm Studios, LLC. - * - * Use of this software is governed by the MIT license that can be found - * in the LICENSE file. - */ - -#include "ggpo_types.h" -#include "log/Log.h" -#include "log/LogManager.h" -#include - -void Log(const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - Logv(fmt, args); - va_end(args); -} - -void Logv(const char* fmt, va_list args) -{ - std::string copy; - if (fmt[strlen(fmt) - 1] == '\n') { - copy = fmt; - copy.pop_back(); - fmt = copy.c_str(); - } - if (LogManager::GetInstance()) - LogManager::GetInstance()->Log(LogTypes::LDEBUG, LogTypes::NETWORK, __FILE__, __LINE__, fmt, args); -} diff --git a/core/deps/ggpo/lib/ggpo/log.h b/core/deps/ggpo/lib/ggpo/log.h deleted file mode 100644 index 0096b2416..000000000 --- a/core/deps/ggpo/lib/ggpo/log.h +++ /dev/null @@ -1,14 +0,0 @@ -/* ----------------------------------------------------------------------- - * GGPO.net (http://ggpo.net) - Copyright 2009 GroundStorm Studios, LLC. - * - * Use of this software is governed by the MIT license that can be found - * in the LICENSE file. - */ - -#ifndef _LOG_H -#define _LOG_H - -extern void Log(const char *fmt, ...); -extern void Logv(const char *fmt, va_list list); - -#endif diff --git a/core/deps/ggpo/lib/ggpo/main.cpp b/core/deps/ggpo/lib/ggpo/main.cpp index 694555c44..258f5f5dd 100644 --- a/core/deps/ggpo/lib/ggpo/main.cpp +++ b/core/deps/ggpo/lib/ggpo/main.cpp @@ -23,22 +23,6 @@ struct Init }; static Init init; -void -ggpo_log(GGPOSession *ggpo, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - ggpo_logv(ggpo, fmt, args); - va_end(args); -} - -void -ggpo_logv(GGPOSession *ggpo, const char *fmt, va_list args) -{ - if (ggpo) - ggpo->Logv(fmt, args); -} - GGPOErrorCode ggpo_start_session(GGPOSession **session, GGPOSessionCallbacks *cb, @@ -59,7 +43,7 @@ ggpo_start_session(GGPOSession **session, verification_size); return GGPO_OK; } catch (const GGPOException& e) { - Log("GGPOException in ggpo_start_session: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_start_session: %s", e.what()); return e.ggpoError; } } @@ -74,7 +58,7 @@ ggpo_add_player(GGPOSession *ggpo, try { return ggpo->AddPlayer(player, handle); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_add_player: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_add_player: %s", e.what()); return e.ggpoError; } } @@ -93,7 +77,7 @@ ggpo_start_synctest(GGPOSession **ggpo, *ggpo = (GGPOSession *)new SyncTestBackend(cb, game, frames, num_players); return GGPO_OK; } catch (const GGPOException& e) { - Log("GGPOException in ggpo_start_synctest: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_start_synctest: %s", e.what()); return e.ggpoError; } } @@ -108,7 +92,7 @@ ggpo_set_frame_delay(GGPOSession *ggpo, try { return ggpo->SetFrameDelay(player, frame_delay); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_set_frame_delay: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_set_frame_delay: %s", e.what()); return e.ggpoError; } } @@ -121,7 +105,7 @@ ggpo_idle(GGPOSession *ggpo, int timeout) try { return ggpo->DoPoll(timeout); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_idle: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_idle: %s", e.what()); return e.ggpoError; } } @@ -137,7 +121,7 @@ ggpo_add_local_input(GGPOSession *ggpo, try { return ggpo->AddLocalInput(player, values, size); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_add_local_input: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_add_local_input: %s", e.what()); return e.ggpoError; } } @@ -153,7 +137,7 @@ ggpo_synchronize_input(GGPOSession *ggpo, try { return ggpo->SyncInput(values, size, disconnect_flags); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_synchronize_input: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_synchronize_input: %s", e.what()); return e.ggpoError; } } @@ -166,7 +150,7 @@ GGPOErrorCode ggpo_disconnect_player(GGPOSession *ggpo, try { return ggpo->DisconnectPlayer(player); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_disconnect_player: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_disconnect_player: %s", e.what()); return e.ggpoError; } } @@ -179,7 +163,7 @@ ggpo_advance_frame(GGPOSession *ggpo) try { return ggpo->IncrementFrame(); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_advance_frame: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_advance_frame: %s", e.what()); return e.ggpoError; } } @@ -194,7 +178,7 @@ ggpo_get_network_stats(GGPOSession *ggpo, try { return ggpo->GetNetworkStats(stats, player); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_get_network_stats: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_get_network_stats: %s", e.what()); return e.ggpoError; } } @@ -218,7 +202,7 @@ ggpo_set_disconnect_timeout(GGPOSession *ggpo, int timeout) try { return ggpo->SetDisconnectTimeout(timeout); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_set_disconnect_timeout: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_set_disconnect_timeout: %s", e.what()); return e.ggpoError; } } @@ -231,7 +215,7 @@ ggpo_set_disconnect_notify_start(GGPOSession *ggpo, int timeout) try { return ggpo->SetDisconnectNotifyStart(timeout); } catch (const GGPOException& e) { - Log("GGPOException in ggpo_set_disconnect_notify_start: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_set_disconnect_notify_start: %s", e.what()); return e.ggpoError; } } @@ -259,7 +243,7 @@ GGPOErrorCode ggpo_start_spectating(GGPOSession **session, verification_size); return GGPO_OK; } catch (const GGPOException& e) { - Log("GGPOException in ggpo_start_spectating: %s", e.what()); + ERROR_LOG(NETWORK, "GGPOException in ggpo_start_spectating: %s", e.what()); return e.ggpoError; } } diff --git a/core/deps/ggpo/lib/ggpo/network/udp.cpp b/core/deps/ggpo/lib/ggpo/network/udp.cpp index 5b3686937..4b0ea8080 100644 --- a/core/deps/ggpo/lib/ggpo/network/udp.cpp +++ b/core/deps/ggpo/lib/ggpo/network/udp.cpp @@ -34,7 +34,7 @@ CreateSocket(uint16 bind_port, int retries) for (port = bind_port; port <= bind_port + retries; port++) { sin.sin_port = htons(port); if (bind(s, (sockaddr *)&sin, sizeof sin) == 0) { - Log("Udp bound to port: %d.\n", port); + Log("Udp bound to port: %d.", port); return s; } } @@ -62,7 +62,7 @@ Udp::Init(uint16 port, Poll *poll, Callbacks *callbacks) _callbacks = callbacks; poll->RegisterLoop(this); - Log("binding udp socket to port %d.\n", port); + Log("binding udp socket to port %d.", port); _socket = CreateSocket(port, 0); if (_socket == INVALID_SOCKET) throw GGPOException("Socket creation or bind failed", GGPO_ERRORCODE_NETWORK_ERROR); @@ -76,11 +76,11 @@ Udp::SendTo(char *buffer, int len, int flags, struct sockaddr *dst, int destlen) int res = sendto(_socket, buffer, len, flags, dst, destlen); if (res == SOCKET_ERROR) { int err = WSAGetLastError(); - Log("unknown error in sendto (erro: %d wsaerr: %d).\n", res, err); + Log("unknown error in sendto (erro: %d wsaerr: %d).", res, err); ASSERT(false && "Unknown error in sendto"); } char dst_ip[1024]; - Log("sent packet length %d to %s:%d (ret:%d).\n", len, inet_ntop(AF_INET, (void *)&to->sin_addr, dst_ip, ARRAY_SIZE(dst_ip)), ntohs(to->sin_port), res); + Log("sent packet length %d to %s:%d (ret:%d).", len, inet_ntop(AF_INET, (void *)&to->sin_addr, dst_ip, ARRAY_SIZE(dst_ip)), ntohs(to->sin_port), res); } bool @@ -99,32 +99,15 @@ Udp::OnLoopPoll(void *cookie) if (len == -1) { int error = WSAGetLastError(); if (error != WSAEWOULDBLOCK) { - Log("recvfrom WSAGetLastError returned %d (%x).\n", error, error); + Log("recvfrom WSAGetLastError returned %d (%x).", error, error); } break; } else if (len > 0) { char src_ip[1024]; - Log("recvfrom returned (len:%d from:%s:%d).\n", len, inet_ntop(AF_INET, (void*)&recv_addr.sin_addr, src_ip, ARRAY_SIZE(src_ip)), ntohs(recv_addr.sin_port) ); + Log("recvfrom returned (len:%d from:%s:%d).", len, inet_ntop(AF_INET, (void*)&recv_addr.sin_addr, src_ip, ARRAY_SIZE(src_ip)), ntohs(recv_addr.sin_port) ); UdpMsg *msg = (UdpMsg *)recv_buf; _callbacks->OnMsg(recv_addr, msg, len); } } return true; } - - -void -Udp::Log(const char *fmt, ...) -{ - char buf[1024]; - size_t offset; - va_list args; - - strcpy(buf, "udp | "); - offset = strlen(buf); - va_start(args, fmt); - vsnprintf(buf + offset, ARRAY_SIZE(buf) - offset - 1, fmt, args); - buf[ARRAY_SIZE(buf)-1] = '\0'; - ::Log(buf); - va_end(args); -} diff --git a/core/deps/ggpo/lib/ggpo/network/udp.h b/core/deps/ggpo/lib/ggpo/network/udp.h index 7af07d8c0..ade94c586 100644 --- a/core/deps/ggpo/lib/ggpo/network/udp.h +++ b/core/deps/ggpo/lib/ggpo/network/udp.h @@ -31,11 +31,6 @@ public: virtual void OnMsg(sockaddr_in &from, UdpMsg *msg, int len) = 0; }; - -protected: - void Log(const char *fmt, ...); - -public: Udp(); void Init(uint16 port, Poll *p, Callbacks *callbacks); diff --git a/core/deps/ggpo/lib/ggpo/network/udp_proto.cpp b/core/deps/ggpo/lib/ggpo/network/udp_proto.cpp index 6f29dbb7e..2a9a5acec 100644 --- a/core/deps/ggpo/lib/ggpo/network/udp_proto.cpp +++ b/core/deps/ggpo/lib/ggpo/network/udp_proto.cpp @@ -197,7 +197,7 @@ UdpProtocol::OnLoopPoll(void *cookie) case Syncing: next_interval = (_state.sync.roundtrips_remaining == NUM_SYNC_PACKETS) ? SYNC_FIRST_RETRY_INTERVAL : SYNC_RETRY_INTERVAL; if (_last_send_time && _last_send_time + next_interval < now && _peer_addr.sin_addr.s_addr != 0) { - Log("No luck syncing after %d ms... Re-queueing sync packet.\n", next_interval); + Log("udpproto%d | No luck syncing after %d ms... Re-queueing sync packet.", _queue, next_interval); SendSyncRequest(); } break; @@ -205,7 +205,7 @@ UdpProtocol::OnLoopPoll(void *cookie) case Running: // xxx: rig all this up with a timer wrapper if (!_state.running.last_input_packet_recv_time || _state.running.last_input_packet_recv_time + RUNNING_RETRY_INTERVAL < now) { - Log("Haven't exchanged packets in a while (last received:%d last sent:%d). Resending.\n", _last_received_input.frame, _last_sent_input.frame); + Log("udpproto%d | Haven't exchanged packets in a while (last received:%d last sent:%d). Resending.", _queue, _last_received_input.frame, _last_sent_input.frame); SendPendingOutput(); _state.running.last_input_packet_recv_time = now; } @@ -224,13 +224,13 @@ UdpProtocol::OnLoopPoll(void *cookie) } if (_last_send_time && _last_send_time + KEEP_ALIVE_INTERVAL < now) { - Log("Sending keep alive packet\n"); + Log("udpproto%d | Sending keep alive packet", _queue); SendMsg(new UdpMsg(UdpMsg::KeepAlive)); } if (_disconnect_timeout && _disconnect_notify_start && !_disconnect_notify_sent && (_last_recv_time + _disconnect_notify_start < now)) { - Log("Endpoint has stopped receiving packets for %d ms. Sending notification.\n", _disconnect_notify_start); + Log("udpproto%d | Endpoint has stopped receiving packets for %d ms. Sending notification.", _queue, _disconnect_notify_start); Event e(Event::NetworkInterrupted); e.u.network_interrupted.disconnect_timeout = _disconnect_timeout - _disconnect_notify_start; QueueEvent(e); @@ -239,7 +239,7 @@ UdpProtocol::OnLoopPoll(void *cookie) if (_disconnect_timeout && (_last_recv_time + _disconnect_timeout < now)) { if (!_disconnect_event_sent) { - Log("Endpoint has stopped receiving packets for %d ms. Disconnecting.\n", _disconnect_timeout); + Log("udpproto%d | Endpoint has stopped receiving packets for %d ms. Disconnecting.", _queue, _disconnect_timeout); QueueEvent(Event(Event::Disconnected)); _disconnect_event_sent = true; } @@ -248,7 +248,7 @@ UdpProtocol::OnLoopPoll(void *cookie) case Disconnected: if (_shutdown_timeout < now) { - Log("Shutting down udp connection.\n"); + Log("udpproto%d | Shutting down udp connection.", _queue); _udp = NULL; _shutdown_timeout = 0; } @@ -345,9 +345,9 @@ UdpProtocol::OnMsg(UdpMsg *msg, int len) // filter out out-of-order packets uint16 skipped = (uint16)((int)seq - (int)_next_recv_seq); - // Log("checking sequence number -> next - seq : %d - %d = %d\n", seq, _next_recv_seq, skipped); + // Log("udpproto%d | checking sequence number -> next - seq : %d - %d = %d", _queue, seq, _next_recv_seq, skipped); if (skipped > MAX_SEQ_DISTANCE) { - Log("dropping out of order packet (seq: %d, last seq:%d)\n", seq, _next_recv_seq); + Log("udpproto%d | dropping out of order packet (seq: %d, last seq:%d)", _queue, seq, _next_recv_seq); return; } } @@ -384,9 +384,9 @@ UdpProtocol::UpdateNetworkStats(void) _kbps_sent = int(Bps / 1024); - Log("Network Stats -- Bandwidth: %.2f KBps Packets Sent: %5d (%.2f pps) " - "KB Sent: %.2f UDP Overhead: %.2f %%.\n", - _kbps_sent, + Log("udpproto%d | Network Stats -- Bandwidth: %.2f KBps Packets Sent: %5d (%.2f pps) " + "KB Sent: %.2f UDP Overhead: %.2f %%.", _queue, + (float)_kbps_sent, _packets_sent, (float)_packets_sent * 1000 / (now - _stats_start_time), total_bytes_sent / 1024.0, @@ -419,51 +419,35 @@ UdpProtocol::GetPeerConnectStatus(int id, int *frame) return !_peer_connect_status[id].disconnected; } -void -UdpProtocol::Log(const char *fmt, ...) -{ - char buf[1024]; - size_t offset; - va_list args; - - snprintf(buf, ARRAY_SIZE(buf), "udpproto%d | ", _queue); - offset = strlen(buf); - va_start(args, fmt); - vsnprintf(buf + offset, ARRAY_SIZE(buf) - offset - 1, fmt, args); - buf[ARRAY_SIZE(buf)-1] = '\0'; - ::Log(buf); - va_end(args); -} - void UdpProtocol::LogMsg(const char *prefix, UdpMsg *msg) { switch (msg->hdr.type) { case UdpMsg::SyncRequest: - Log("%s sync-request (%d).\n", prefix, + Log("udpproto%d | %s sync-request (%d).", _queue, prefix, msg->u.sync_request.random_request); break; case UdpMsg::SyncReply: - Log("%s sync-reply (%d).\n", prefix, + Log("udpproto%d | %s sync-reply (%d).", _queue, prefix, msg->u.sync_reply.random_reply); break; case UdpMsg::QualityReport: - Log("%s quality report.\n", prefix); + Log("udpproto%d | %s quality report.", _queue, prefix); break; case UdpMsg::QualityReply: - Log("%s quality reply.\n", prefix); + Log("udpproto%d | %s quality reply.", _queue, prefix); break; case UdpMsg::KeepAlive: - Log("%s keep alive.\n", prefix); + Log("udpproto%d | %s keep alive.", _queue, prefix); break; case UdpMsg::Input: - Log("%s game-compressed-input %d (+ %d bits).\n", prefix, msg->u.input.start_frame, msg->u.input.num_bits); + Log("udpproto%d | %s game-compressed-input %d (+ %d bits).", _queue, prefix, msg->u.input.start_frame, msg->u.input.num_bits); break; case UdpMsg::InputAck: - Log("%s input ack.\n", prefix); + Log("udpproto%d | %s input ack.", _queue, prefix); break; case UdpMsg::AppData: - Log("%s app data (%d bytes).\n", prefix, msg->u.app_data.size); + Log("udpproto%d | %s app data (%d bytes).", _queue, prefix, msg->u.app_data.size); break; default: ASSERT(false && "Unknown UdpMsg type."); @@ -474,7 +458,7 @@ void UdpProtocol::LogEvent(const char *prefix, const UdpProtocol::Event &evt) { if (evt.type == UdpProtocol::Event::Synchronzied) - Log("%s (event: Synchronzied).\n", prefix); + Log("udpproto%d | %s (event: Synchronized).", _queue, prefix); } bool @@ -488,7 +472,7 @@ bool UdpProtocol::OnSyncRequest(UdpMsg *msg, int len) { if (_remote_magic_number != 0 && msg->hdr.magic != _remote_magic_number) { - Log("Ignoring sync request from unknown endpoint (%d != %d).\n", + Log("udpproto%d | Ignoring sync request from unknown endpoint (%d != %d).", _queue, msg->hdr.magic, _remote_magic_number); return false; } @@ -500,14 +484,14 @@ UdpProtocol::OnSyncRequest(UdpMsg *msg, int len) if (msgVerifSize != (int)verification.size() || (msgVerifSize != 0 && memcmp(&msg->u.sync_request.verification[0], &verification[0], msgVerifSize))) { - Log("Verification mismatch: size received %d expected %d", msgVerifSize, (int)verification.size()); + Log("udpproto%d | Verification mismatch: size received %d expected %d", _queue, msgVerifSize, (int)verification.size()); reply->u.sync_reply.verification_failure = 1; SendMsg(reply); throw GGPOException("Verification mismatch", GGPO_ERRORCODE_VERIFICATION_ERROR); } // FIXME if (_state.sync.roundtrips_remaining == NUM_SYNC_PACKETS && msg->hdr.sequence_number == 0) { - Log("Sync request 0 received... Re-queueing sync packet.\n"); + Log("udpproto%d | Sync request 0 received... Re-queueing sync packet.", _queue); SendSyncRequest(); } @@ -521,12 +505,12 @@ bool UdpProtocol::OnSyncReply(UdpMsg *msg, int len) { if (_current_state != Syncing) { - Log("Ignoring SyncReply while not synching.\n"); + Log("udpproto%d | Ignoring SyncReply while not synching.", _queue); return msg->hdr.magic == _remote_magic_number; } if (msg->u.sync_reply.random_reply != _state.sync.random) { - Log("sync reply %d != %d. Keep looking...\n", + Log("udpproto%d | sync reply %d != %d. Keep looking...", _queue, msg->u.sync_reply.random_reply, _state.sync.random); return false; } @@ -538,9 +522,9 @@ UdpProtocol::OnSyncReply(UdpMsg *msg, int len) _connected = true; } - Log("Checking sync state (%d round trips remaining).\n", _state.sync.roundtrips_remaining); + Log("udpproto%d | Checking sync state (%d round trips remaining).", _queue, _state.sync.roundtrips_remaining); if (--_state.sync.roundtrips_remaining == 0) { - Log("Synchronized!\n"); + Log("udpproto%d | Synchronized!", _queue); QueueEvent(UdpProtocol::Event(UdpProtocol::Event::Synchronzied)); _current_state = Running; _last_received_input.frame = -1; @@ -564,7 +548,7 @@ UdpProtocol::OnInput(UdpMsg *msg, int len) bool disconnect_requested = msg->u.input.disconnect_requested; if (disconnect_requested) { if (_current_state != Disconnected && !_disconnect_event_sent) { - Log("Disconnecting endpoint on remote request.\n"); + Log("udpproto%d | Disconnecting endpoint on remote request.", _queue); QueueEvent(Event(Event::Disconnected)); _disconnect_event_sent = true; } @@ -629,20 +613,21 @@ UdpProtocol::OnInput(UdpMsg *msg, int len) _last_received_input.frame = currentFrame; /* - * Send the event to the emualtor + * Send the event to the emulator */ UdpProtocol::Event evt(UdpProtocol::Event::Input); evt.u.input.input = _last_received_input; - _last_received_input.desc(desc, ARRAY_SIZE(desc)); + if (LogTypes::LDEBUG <= MAX_LOGLEVEL) + _last_received_input.desc(desc, ARRAY_SIZE(desc)); _state.running.last_input_packet_recv_time = GGPOPlatform::GetCurrentTimeMS(); - Log("Sending frame %d to emu queue %d (%s).\n", _last_received_input.frame, _queue, desc); + Log("udpproto%d | Sending frame %d to emu queue %d (%s).", _queue, _last_received_input.frame, _queue, desc); QueueEvent(evt); } else { - Log("Skipping past frame:(%d) current is %d.\n", currentFrame, _last_received_input.frame); + Log("udpproto%d | Skipping past frame:(%d) current is %d.", _queue, currentFrame, _last_received_input.frame); } /* @@ -657,7 +642,7 @@ UdpProtocol::OnInput(UdpMsg *msg, int len) * Get rid of our buffered input */ while (_pending_output.size() && _pending_output.front().frame < msg->u.input.ack_frame) { - Log("Throwing away pending output frame %d\n", _pending_output.front().frame); + Log("udpproto%d | Throwing away pending output frame %d", _queue, _pending_output.front().frame); _last_acked_input = _pending_output.front(); _pending_output.pop(); } @@ -672,7 +657,7 @@ UdpProtocol::OnInputAck(UdpMsg *msg, int len) * Get rid of our buffered input */ while (_pending_output.size() && _pending_output.front().frame < msg->u.input_ack.ack_frame) { - Log("Throwing away pending output frame %d\n", _pending_output.front().frame); + Log("udpproto%d | Throwing away pending output frame %d", _queue, _pending_output.front().frame); _last_acked_input = _pending_output.front(); _pending_output.pop(); } @@ -770,7 +755,7 @@ UdpProtocol::PumpSendQueue() } if (_oop_percent && !_oo_packet.msg && ((rand() % 100) < _oop_percent)) { int delay = rand() % (_send_latency * 10 + 1000); - Log("creating rogue oop (seq: %d delay: %d)\n", entry.msg->hdr.sequence_number, delay); + Log("udpproto%d | creating rogue oop (seq: %d delay: %d)", _queue, entry.msg->hdr.sequence_number, delay); _oo_packet.send_time = GGPOPlatform::GetCurrentTimeMS() + delay; _oo_packet.msg = entry.msg; _oo_packet.dest_addr = entry.dest_addr; @@ -785,7 +770,7 @@ UdpProtocol::PumpSendQueue() _send_queue.pop(); } if (_oo_packet.msg && _oo_packet.send_time < (int)GGPOPlatform::GetCurrentTimeMS()) { - Log("sending rogue oop!"); + Log("udpproto%d | sending rogue oop!", _queue); _udp->SendTo((char *)_oo_packet.msg, _oo_packet.msg->PacketSize(), 0, (struct sockaddr *)&_oo_packet.dest_addr, sizeof _oo_packet.dest_addr); diff --git a/core/deps/ggpo/lib/ggpo/network/udp_proto.h b/core/deps/ggpo/lib/ggpo/network/udp_proto.h index 00af4f8b2..a19e9f701 100644 --- a/core/deps/ggpo/lib/ggpo/network/udp_proto.h +++ b/core/deps/ggpo/lib/ggpo/network/udp_proto.h @@ -119,7 +119,6 @@ protected: void UpdateNetworkStats(void); void QueueEvent(const UdpProtocol::Event &evt); void ClearSendQueue(void); - void Log(const char *fmt, ...); void LogMsg(const char *prefix, UdpMsg *msg); void LogEvent(const char *prefix, const UdpProtocol::Event &evt); void SendSyncRequest(); diff --git a/core/deps/ggpo/lib/ggpo/sync.cpp b/core/deps/ggpo/lib/ggpo/sync.cpp index c133735c3..8670a14d2 100644 --- a/core/deps/ggpo/lib/ggpo/sync.cpp +++ b/core/deps/ggpo/lib/ggpo/sync.cpp @@ -59,7 +59,7 @@ Sync::AddLocalInput(int queue, GameInput &input) { int frames_behind = _framecount - _last_confirmed_frame; if (_framecount >= _max_prediction_frames && frames_behind >= _max_prediction_frames) { - Log("Rejecting input from emulator: reached prediction barrier.\n"); + Log("Rejecting input from emulator: reached prediction barrier."); return false; } @@ -67,7 +67,7 @@ Sync::AddLocalInput(int queue, GameInput &input) SaveCurrentFrame(); } - Log("Sending undelayed local frame %d to queue %d.\n", _framecount, queue); + Log("Sending undelayed local frame %d to queue %d.", _framecount, queue); input.frame = _framecount; _input_queues[queue].AddInput(input); @@ -146,7 +146,7 @@ Sync::AdjustSimulation(int seek_to) int framecount = _framecount; int count = _framecount - seek_to; - Log("Catching up\n"); + Log("Catching up"); _rollingback = true; /* @@ -167,7 +167,7 @@ Sync::AdjustSimulation(int seek_to) _rollingback = false; - Log("---\n"); + Log("---"); } void @@ -175,7 +175,7 @@ Sync::LoadFrame(int frame) { // find the frame in question if (frame == _framecount) { - Log("Skipping NOP.\n"); + Log("Skipping NOP."); return; } @@ -183,7 +183,7 @@ Sync::LoadFrame(int frame) _savedstate.head = FindSavedFrameIndex(frame); SavedFrame *state = _savedstate.frames + _savedstate.head; - Log("=== Loading frame info %d (size: %d checksum: %08x).\n", + Log("=== Loading frame info %d (size: %d checksum: %08x).", state->frame, state->cbuf, state->checksum); ASSERT(state->buf && state->cbuf); @@ -210,7 +210,7 @@ Sync::SaveCurrentFrame() state->frame = _framecount; _callbacks.save_game_state(&state->buf, &state->cbuf, &state->checksum, state->frame); - Log("=== Saved frame info %d (size: %d checksum: %08x).\n", state->frame, state->cbuf, state->checksum); + Log("=== Saved frame info %d (size: %d checksum: %08x).", state->frame, state->cbuf, state->checksum); _savedstate.head = (_savedstate.head + 1) % ARRAY_SIZE(_savedstate.frames); } @@ -259,7 +259,7 @@ Sync::CheckSimulationConsistency(int *seekTo) int first_incorrect = GameInput::NullFrame; for (int i = 0; i < _config.num_players; i++) { int incorrect = _input_queues[i].GetFirstIncorrectFrame(); - Log("considering incorrect frame %d reported by queue %d.\n", incorrect, i); + Log("considering incorrect frame %d reported by queue %d.", incorrect, i); if (incorrect != GameInput::NullFrame && (first_incorrect == GameInput::NullFrame || incorrect < first_incorrect)) { first_incorrect = incorrect; @@ -267,7 +267,7 @@ Sync::CheckSimulationConsistency(int *seekTo) } if (first_incorrect == GameInput::NullFrame) { - Log("prediction ok. proceeding.\n"); + Log("prediction ok. proceeding."); return true; } *seekTo = first_incorrect; diff --git a/core/deps/ggpo/lib/ggpo/timesync.cpp b/core/deps/ggpo/lib/ggpo/timesync.cpp index 5fd7722f0..df9e2cfe7 100644 --- a/core/deps/ggpo/lib/ggpo/timesync.cpp +++ b/core/deps/ggpo/lib/ggpo/timesync.cpp @@ -59,7 +59,7 @@ TimeSync::recommend_frame_wait_duration(bool require_idle_input) // sleep for. int sleep_frames = (int)(((radvantage - advantage) / 2) + 0.5); - Log("iteration %d: sleep frames is %d\n", count, sleep_frames); + Log("iteration %d: sleep frames is %d", count, sleep_frames); // Some things just aren't worth correcting for. Make sure // the difference is relevant before proceeding. @@ -74,7 +74,7 @@ TimeSync::recommend_frame_wait_duration(bool require_idle_input) if (require_idle_input) { for (size_t i = 1; i < ARRAY_SIZE(_last_inputs); i++) { if (!_last_inputs[i].equal(_last_inputs[0], true)) { - Log("iteration %d: rejecting due to input stuff at position %d...!!!\n", count, i); + Log("iteration %d: rejecting due to input stuff at position %d...!!!", count, (int)i); return 0; } } From cdca559d99b45ae97ce9641da06054d51b7a97a0 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 10 Dec 2022 17:19:34 +0100 Subject: [PATCH 28/34] ggpo: optimize memwatch, save/load state. endOfFrame on vblank in ggpo: Optimize memwatch and load/save state. Unprotect memory before restoring a state. Smaller timesync wait. Missing error handling. pvr: call ggpo::endOfFrame() on vblank in only ssa: fix warning --- core/emulator.cpp | 4 +--- core/hw/mem/mem_watch.cpp | 6 ++---- core/hw/mem/mem_watch.h | 34 +++++++++++++++++++++++++++----- core/hw/pvr/Renderer_if.cpp | 11 +---------- core/hw/pvr/spg.cpp | 2 ++ core/hw/sh4/dyna/ssa.cpp | 2 +- core/network/ggpo.cpp | 39 ++++++++++++++++++++++--------------- 7 files changed, 59 insertions(+), 39 deletions(-) diff --git a/core/emulator.cpp b/core/emulator.cpp index 3f0aa823d..f72befdb5 100644 --- a/core/emulator.cpp +++ b/core/emulator.cpp @@ -870,9 +870,7 @@ void Emulator::vblank() if (sh4_sched_now64() - startTime <= 10000000) return; renderTimeout = true; - if (ggpo::active()) - ggpo::endOfFrame(); - else if (!config::ThreadedRendering) + if (!ggpo::active() && !config::ThreadedRendering) sh4_cpu.Stop(); } diff --git a/core/hw/mem/mem_watch.cpp b/core/hw/mem/mem_watch.cpp index 06c732740..c6870b633 100644 --- a/core/hw/mem/mem_watch.cpp +++ b/core/hw/mem/mem_watch.cpp @@ -29,15 +29,13 @@ ElanRamWatcher elanWatcher; void AicaRamWatcher::protectMem(u32 addr, u32 size) { size = std::min(ARAM_SIZE - addr, size) & ~PAGE_MASK; - mem_region_lock(aica_ram.data + addr, - std::min(aica_ram.size - addr, size)); + mem_region_lock(aica_ram.data + addr, size); } void AicaRamWatcher::unprotectMem(u32 addr, u32 size) { size = std::min(ARAM_SIZE - addr, size) & ~PAGE_MASK; - mem_region_unlock(aica_ram.data + addr, - std::min(aica_ram.size - addr, size)); + mem_region_unlock(aica_ram.data + addr, size); } u32 AicaRamWatcher::getMemOffset(void *p) diff --git a/core/hw/mem/mem_watch.h b/core/hw/mem/mem_watch.h index 1986d1939..67ce4acf8 100644 --- a/core/hw/mem/mem_watch.h +++ b/core/hw/mem/mem_watch.h @@ -30,7 +30,14 @@ namespace memwatch { -using PageMap = std::unordered_map>; +struct Page +{ + Page() { + // don't initialize data + } + u8 data[PAGE_SIZE]; +}; +using PageMap = std::unordered_map; template class Watcher @@ -51,7 +58,11 @@ public: for (const auto& pair : pages) static_cast(*this).protectMem(pair.first, PAGE_SIZE); } - pages.clear(); + } + + void unprotect() + { + static_cast(*this).unprotectMem(0, 0xffffffff); } void reset() @@ -69,13 +80,16 @@ public: if (pages.count(offset) > 0) // already saved return true; - memcpy(&pages[offset][0], static_cast(*this).getMemPage(offset), PAGE_SIZE); + Page& page = pages.emplace(offset, Page()).first->second; + memcpy(&page.data[0], static_cast(*this).getMemPage(offset), PAGE_SIZE); static_cast(*this).unprotectMem(offset, PAGE_SIZE); return true; } - const PageMap& getPages() { - return pages; + void getPages(PageMap& other) + { + std::swap(pages, other); + pages = PageMap(); } }; @@ -199,6 +213,16 @@ inline static void protect() elanWatcher.protect(); } +inline static void unprotect() +{ + if (!config::GGPOEnable) + return; + vramWatcher.unprotect(); + ramWatcher.unprotect(); + aramWatcher.unprotect(); + elanWatcher.unprotect(); +} + inline static void reset() { vramWatcher.reset(); diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index c85efd9d8..d01ac97a3 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -4,7 +4,6 @@ #include "rend/TexCache.h" #include "rend/transform_matrix.h" #include "cfg/option.h" -#include "network/ggpo.h" #include "emulator.h" #include "serialize.h" #include "hw/holly/holly_intc.h" @@ -368,15 +367,12 @@ void rend_start_render() ctx->rend.framebufferHeight = height; } - bool present = !config::DelayFrameSwapping && !ctx->rend.isRTT && !config::EmulateFramebuffer; - if (present) - ggpo::endOfFrame(); if (QueueRender(ctx)) { palette_update(); pend_rend = true; pvrQueue.enqueue(PvrMessageQueue::Render); - if (present) + if (!config::DelayFrameSwapping && !ctx->rend.isRTT && !config::EmulateFramebuffer) pvrQueue.enqueue(PvrMessageQueue::Present); } } @@ -410,7 +406,6 @@ void rend_vblank() fbInfo.update(); pvrQueue.enqueue(PvrMessageQueue::RenderFramebuffer, fbInfo); pvrQueue.enqueue(PvrMessageQueue::Present); - ggpo::endOfFrame(); if (!config::EmulateFramebuffer) DEBUG_LOG(PVR, "Direct framebuffer write detected"); fb_dirty = false; @@ -452,11 +447,7 @@ void rend_set_fb_write_addr(u32 fb_w_sof1) void rend_swap_frame(u32 fb_r_sof) { if (!config::EmulateFramebuffer && fb_r_sof == fb_w_cur) - { pvrQueue.enqueue(PvrMessageQueue::Present); - if (config::DelayFrameSwapping) - ggpo::endOfFrame(); - } } void rend_disable_rollback() diff --git a/core/hw/pvr/spg.cpp b/core/hw/pvr/spg.cpp index e477db2e1..47d565741 100755 --- a/core/hw/pvr/spg.cpp +++ b/core/hw/pvr/spg.cpp @@ -8,6 +8,7 @@ #include "rend/TexCache.h" #include "hw/maple/maple_if.h" #include "serialize.h" +#include "network/ggpo.h" //SPG emulation; Scanline/Raster beam registers & interrupts @@ -109,6 +110,7 @@ static int spg_line_sched(int tag, int cycles, int jitter) SB_MDST = 0; } asic_RaiseInterrupt(holly_SCANINT1); + ggpo::endOfFrame(); } if (SPG_VBLANK_INT.vblank_out_interrupt_line_number == prv_cur_scanline) diff --git a/core/hw/sh4/dyna/ssa.cpp b/core/hw/sh4/dyna/ssa.cpp index 9288e62ff..04624c980 100644 --- a/core/hw/sh4/dyna/ssa.cpp +++ b/core/hw/sh4/dyna/ssa.cpp @@ -51,7 +51,7 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op) u32 rs2 = op->rs2.is_imm() ? op->rs2.imm_value() : 0; u32 rs3 = op->rs3.is_imm() ? op->rs3.imm_value() : 0; u32 rd; - u32 rd2; + u32 rd2 = 0; switch (op->op) { diff --git a/core/network/ggpo.cpp b/core/network/ggpo.cpp index c1d2363b8..ae2b90a73 100644 --- a/core/network/ggpo.cpp +++ b/core/network/ggpo.cpp @@ -127,10 +127,10 @@ struct MemPages { void load() { - ram = memwatch::ramWatcher.getPages(); - vram = memwatch::vramWatcher.getPages(); - aram = memwatch::aramWatcher.getPages(); - elanram = memwatch::elanWatcher.getPages(); + memwatch::ramWatcher.getPages(ram); + memwatch::vramWatcher.getPages(vram); + memwatch::aramWatcher.getPages(aram); + memwatch::elanWatcher.getPages(elanram); } memwatch::PageMap ram; memwatch::PageMap vram; @@ -226,7 +226,7 @@ static bool on_event(GGPOEvent *info) case GGPO_EVENTCODE_TIMESYNC: INFO_LOG(NETWORK, "Timesync: %d frames ahead", info->u.timesync.frames_ahead); timesyncOccurred += 5; - std::this_thread::sleep_for(std::chrono::milliseconds(1000 * info->u.timesync.frames_ahead / (msPerFrameAvg >= 25 ? 30 : 60))); + std::this_thread::sleep_for(std::chrono::milliseconds(1000 / (msPerFrameAvg >= 25 ? 30 : 60))); break; case GGPO_EVENTCODE_CONNECTION_INTERRUPTED: INFO_LOG(NETWORK, "Connection interrupted with player %d", info->u.connection_interrupted.player); @@ -283,17 +283,18 @@ static bool load_game_state(unsigned char *buffer, int len) Deserializer deser(buffer, len, true); int frame; deser >> frame; + memwatch::unprotect(); for (int f = lastSavedFrame - 1; f >= frame; f--) { const MemPages& pages = deltaStates[f]; for (const auto& pair : pages.ram) - memcpy(memwatch::ramWatcher.getMemPage(pair.first), &pair.second[0], PAGE_SIZE); + memcpy(memwatch::ramWatcher.getMemPage(pair.first), &pair.second.data[0], PAGE_SIZE); for (const auto& pair : pages.vram) - memcpy(memwatch::vramWatcher.getMemPage(pair.first), &pair.second[0], PAGE_SIZE); + memcpy(memwatch::vramWatcher.getMemPage(pair.first), &pair.second.data[0], PAGE_SIZE); for (const auto& pair : pages.aram) - memcpy(memwatch::aramWatcher.getMemPage(pair.first), &pair.second[0], PAGE_SIZE); + memcpy(memwatch::aramWatcher.getMemPage(pair.first), &pair.second.data[0], PAGE_SIZE); for (const auto& pair : pages.elanram) - memcpy(memwatch::elanWatcher.getMemPage(pair.first), &pair.second[0], PAGE_SIZE); + memcpy(memwatch::elanWatcher.getMemPage(pair.first), &pair.second.data[0], PAGE_SIZE); DEBUG_LOG(NETWORK, "Restored frame %d pages: %d ram, %d vram, %d eram, %d aica ram", f, (u32)pages.ram.size(), (u32)pages.vram.size(), (u32)pages.elanram.size(), (u32)pages.aram.size()); } @@ -319,6 +320,7 @@ static bool save_game_state(unsigned char **buffer, int *len, int *checksum, int { verify(!sh4_cpu.IsCpuRunning()); lastSavedFrame = frame; + // TODO this is way too much memory size_t allocSize = (settings.platform.isNaomi() ? 20 : 10) * 1024 * 1024; *buffer = (unsigned char *)malloc(allocSize); if (*buffer == nullptr) @@ -335,6 +337,7 @@ static bool save_game_state(unsigned char **buffer, int *len, int *checksum, int #ifdef SYNC_TEST *checksum = XXH32(*buffer, usedSize, 7); #endif + memwatch::protect(); if (frame > 0) { #ifdef SYNC_TEST @@ -386,7 +389,6 @@ static bool save_game_state(unsigned char **buffer, int *len, int *checksum, int DEBUG_LOG(NETWORK, "Saved frame %d pages: %d ram, %d vram, %d eram, %d aica ram", frame - 1, (u32)deltaStates[frame - 1].ram.size(), (u32)deltaStates[frame - 1].vram.size(), (u32)deltaStates[frame - 1].elanram.size(), (u32)deltaStates[frame - 1].aram.size()); } - memwatch::protect(); return true; } @@ -679,7 +681,7 @@ bool nextFrame() stopSession(); if (error == GGPO_ERRORCODE_INPUT_SIZE_DIFF) throw FlycastException("GGPO analog settings are different from peer"); - else if (error != GGPO_OK) + else throw FlycastException("GGPO error"); } @@ -726,18 +728,23 @@ bool nextFrame() mo_y_delta[0] -= inputs.u.relPos.y; mo_wheel_delta[0] -= inputs.u.relPos.wheel; } - GGPOErrorCode result = ggpo_add_local_input(ggpoSession, localPlayer, &inputs, inputSize); - if (result == GGPO_OK) + error = ggpo_add_local_input(ggpoSession, localPlayer, &inputs, inputSize); + if (error == GGPO_OK) break; - if (result != GGPO_ERRORCODE_PREDICTION_THRESHOLD) + if (error != GGPO_ERRORCODE_PREDICTION_THRESHOLD) { - WARN_LOG(NETWORK, "ggpo_add_local_input failed %d", result); + WARN_LOG(NETWORK, "ggpo_add_local_input failed %d", error); stopSession(); throw FlycastException("GGPO error"); } DEBUG_LOG(NETWORK, "ggpo_add_local_input prediction barrier reached"); std::this_thread::sleep_for(std::chrono::milliseconds(5)); - ggpo_idle(ggpoSession, 0); + error = ggpo_idle(ggpoSession, 0); + if (error != GGPO_OK) + { + stopSession(); + throw FlycastException("GGPO error"); + } } while (active()); #ifdef SYNC_TEST u32 input = ~kcode[1 - localPlayerNum]; From a0a9b5bbc5eed06f6705fa4c58dcaa946a231fd3 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 12 Dec 2022 22:15:12 +0100 Subject: [PATCH 29/34] holly: crash when reading a write-only register --- core/hw/holly/sb.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/hw/holly/sb.cpp b/core/hw/holly/sb.cpp index 5fb8f6151..61f552354 100644 --- a/core/hw/holly/sb.cpp +++ b/core/hw/holly/sb.cpp @@ -180,6 +180,10 @@ u32 sb_ReadMem(u32 addr) if (!(sb_regs[offset].flags & REG_RF)) rv = sb_regs[offset].data32; + else if (sb_regs[offset].flags & REG_WO) { + INFO_LOG(HOLLY, "sb_ReadMem write-only reg %s", regName(addr)); + rv = 0; + } else rv = sb_regs[offset].readFunctionAddr(addr); @@ -248,7 +252,7 @@ void sb_rio_register(u32 reg_addr, RegIO flags, RegReadAddrFP* rf, RegWriteAddrF } else { - if (flags & REG_RF) + if ((flags & REG_RF) && !(flags & REG_WO)) sb_regs[idx].readFunctionAddr = rf; else sb_regs[idx].data32 = 0; From 9781d8971a4c9f13533ba197d032367d14a2beae Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 13 Dec 2022 20:56:19 +0100 Subject: [PATCH 30/34] gl: use static buffers for quad vertices --- core/rend/gles/quad.cpp | 67 +++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 13 deletions(-) diff --git a/core/rend/gles/quad.cpp b/core/rend/gles/quad.cpp index bf84035fc..002df970a 100644 --- a/core/rend/gles/quad.cpp +++ b/core/rend/gles/quad.cpp @@ -48,7 +48,9 @@ void main() static GLuint shader; static GLuint rot90shader; static GLuint quadVertexArray; +static GLuint quadVertexArraySwapY; static GLuint quadBuffer; +static GLuint quadBufferSwapY; static GLuint quadIndexBuffer; static void setupVertexAttribs() @@ -87,8 +89,13 @@ void initQuad() glUniform1i(tex, 0); // texture 0 } #ifndef GLES2 - if (quadVertexArray == 0 && gl.gl_major >= 3) - glGenVertexArrays(1, &quadVertexArray); + if (gl.gl_major >= 3) + { + if (quadVertexArray == 0) + glGenVertexArrays(1, &quadVertexArray); + if (quadVertexArraySwapY == 0) + glGenVertexArrays(1, &quadVertexArraySwapY); + } #endif if (quadIndexBuffer == 0) { @@ -101,6 +108,14 @@ void initQuad() if (quadBuffer == 0) { glGenBuffers(1, &quadBuffer); + float vertices[4][5] = { + { -1.f, 1.f, 1.f, 0.f, 1.f }, + { -1.f, -1.f, 1.f, 0.f, 0.f }, + { 1.f, 1.f, 1.f, 1.f, 1.f }, + { 1.f, -1.f, 1.f, 1.f, 0.f }, + }; + glBindBuffer(GL_ARRAY_BUFFER, quadBuffer); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); #ifndef GLES2 if (gl.gl_major >= 3) { @@ -109,10 +124,34 @@ void initQuad() glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, quadIndexBuffer); setupVertexAttribs(); bindVertexArray(0); - glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } #endif + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + if (quadBufferSwapY == 0) + { + glGenBuffers(1, &quadBufferSwapY); + float vertices[4][5] = { + { -1.f, 1.f, 1.f, 0.f, 0.f }, + { -1.f, -1.f, 1.f, 0.f, 1.f }, + { 1.f, 1.f, 1.f, 1.f, 0.f }, + { 1.f, -1.f, 1.f, 1.f, 1.f }, + }; + glBindBuffer(GL_ARRAY_BUFFER, quadBufferSwapY); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); +#ifndef GLES2 + if (gl.gl_major >= 3) + { + bindVertexArray(quadVertexArraySwapY); + glBindBuffer(GL_ARRAY_BUFFER, quadBufferSwapY); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, quadIndexBuffer); + setupVertexAttribs(); + bindVertexArray(0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } +#endif + glBindBuffer(GL_ARRAY_BUFFER, 0); } glCheck(); } @@ -124,6 +163,11 @@ void termQuad() glDeleteBuffers(1, &quadBuffer); quadBuffer = 0; } + if (quadBufferSwapY != 0) + { + glDeleteBuffers(1, &quadBufferSwapY); + quadBufferSwapY = 0; + } if (quadIndexBuffer != 0) { glDeleteBuffers(1, &quadIndexBuffer); @@ -134,6 +178,11 @@ void termQuad() deleteVertexArray(quadVertexArray); quadVertexArray = 0; } + if (quadVertexArraySwapY != 0) + { + deleteVertexArray(quadVertexArraySwapY); + quadVertexArraySwapY = 0; + } if (shader != 0) { glcache.DeleteProgram(shader); @@ -145,13 +194,6 @@ void termQuad() void drawQuad(GLuint texId, bool rotate, bool swapY) { - float vertices[4][5] = { - { -1.f, 1.f, 1.f, 0.f, (float)!swapY }, - { -1.f, -1.f, 1.f, 0.f, (float)swapY }, - { 1.f, 1.f, 1.f, 1.f, (float)!swapY }, - { 1.f, -1.f, 1.f, 1.f, (float)swapY }, - }; - glcache.Disable(GL_SCISSOR_TEST); glcache.Disable(GL_DEPTH_TEST); glcache.Disable(GL_STENCIL_TEST); @@ -163,14 +205,13 @@ void drawQuad(GLuint texId, bool rotate, bool swapY) glActiveTexture(GL_TEXTURE0); glcache.BindTexture(GL_TEXTURE_2D, texId); - glBindBuffer(GL_ARRAY_BUFFER, quadBuffer); + glBindBuffer(GL_ARRAY_BUFFER, swapY ? quadBufferSwapY : quadBuffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, quadIndexBuffer); if (gl.gl_major < 3) setupVertexAttribs(); else - bindVertexArray(quadVertexArray); + bindVertexArray(swapY ? quadVertexArraySwapY : quadVertexArray); - glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW); glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, (GLvoid *)0); bindVertexArray(0); glCheck(); From f9feaa313b37b4033651b6be434dd41432fd7806 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 13 Dec 2022 22:57:57 +0100 Subject: [PATCH 31/34] pvr: sort triangles during parse. Use primitive restart Sort triangles during ta_parse. Use the same index as other polys. Store results in rend_context. Use primitive restart if available to avoid too many degenerate triangles. Update renderers. --- CMakeLists.txt | 1 + core/hw/pvr/ta.h | 2 +- core/hw/pvr/ta_ctx.h | 16 + core/hw/pvr/ta_util.cpp | 541 ++++++++++++++++++++++++++ core/hw/pvr/ta_vtx.cpp | 385 +++--------------- core/rend/dx11/dx11_renderer.cpp | 56 +-- core/rend/dx11/dx11_renderer.h | 6 +- core/rend/dx9/d3d_renderer.cpp | 61 +-- core/rend/dx9/d3d_renderer.h | 7 +- core/rend/gl4/gles.cpp | 4 +- core/rend/gles/gldraw.cpp | 130 ++----- core/rend/gles/gles.cpp | 22 +- core/rend/gles/gles.h | 3 +- core/rend/sorter.cpp | 207 +--------- core/rend/sorter.h | 11 - core/rend/vulkan/drawer.cpp | 39 +- core/rend/vulkan/drawer.h | 7 +- core/rend/vulkan/oit/oit_pipeline.cpp | 2 +- core/rend/vulkan/pipeline.cpp | 11 +- core/rend/vulkan/vulkan_renderer.h | 2 +- 20 files changed, 738 insertions(+), 775 deletions(-) create mode 100644 core/hw/pvr/ta_util.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 417e8ff6c..a8ad2a3d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -813,6 +813,7 @@ target_sources(${PROJECT_NAME} PRIVATE core/hw/pvr/ta_ctx.h core/hw/pvr/ta.h core/hw/pvr/ta_structs.h + core/hw/pvr/ta_util.cpp core/hw/pvr/ta_vtx.cpp core/hw/sh4/dyna/blockmanager.cpp core/hw/sh4/dyna/blockmanager.h diff --git a/core/hw/pvr/ta.h b/core/hw/pvr/ta.h index 6aba412c9..a8b939cad 100644 --- a/core/hw/pvr/ta.h +++ b/core/hw/pvr/ta.h @@ -14,7 +14,7 @@ void ta_vtx_SoftReset(); void DYNACALL ta_vtx_data32(const SQBuffer *data); void ta_vtx_data(const SQBuffer *data, u32 size); -bool ta_parse(TA_context *ctx); +bool ta_parse(TA_context *ctx, bool primRestart); class TaTypeLut { diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index 36da3e60b..97304f591 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -172,6 +172,7 @@ struct RenderPass { u32 pt_count; u32 tr_count; u32 mvo_tr_count; + u32 sorted_tr_count; }; struct N2Matrix @@ -216,6 +217,13 @@ struct N2LightModel int bumpId2; // Light index for vol1 bump mapping }; +struct SortedTriangle +{ + const PolyParam* ppid; + u32 first; + u32 count; +}; + struct rend_context { u8* proc_start; @@ -250,6 +258,7 @@ struct rend_context List global_param_pt; List global_param_tr; List render_passes; + std::vector sortedTriangles; List matrices; List lightModels; @@ -265,6 +274,7 @@ struct rend_context global_param_mvo.Clear(); global_param_mvo_tr.Clear(); render_passes.Clear(); + sortedTriangles.clear(); // Reserve space for background poly global_param_op.Append()->init(); @@ -412,6 +422,12 @@ u32 ta_get_list_type(); void ta_set_list_type(u32 listType); void ta_parse_reset(); void getRegionTileAddrAndSize(u32& address, u32& size); +//void sortTriangles(rend_context& ctx, int pass); +void sortTriangles(rend_context& ctx, RenderPass& pass, const RenderPass& previousPass); +void sortPolyParams(List *polys, int first, int end, rend_context* ctx); +void fix_texture_bleeding(const List *list, rend_context& ctx); +void makeIndex(const List *polys, int first, int end, bool merge, rend_context* ctx); +void makePrimRestartIndex(const List *polys, int first, int end, bool merge, rend_context* ctx); class TAParserException : public FlycastException { diff --git a/core/hw/pvr/ta_util.cpp b/core/hw/pvr/ta_util.cpp new file mode 100644 index 000000000..68e1ae4ca --- /dev/null +++ b/core/hw/pvr/ta_util.cpp @@ -0,0 +1,541 @@ +/* + Copyright 2022 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . + */ +#include "ta_ctx.h" +#include "pvr_mem.h" +#include +#include +#include + +struct IndexTrig +{ + u32 id[3]; + u16 pid; + f32 z; +}; + +static float minZ(const Vertex *v, const u32 *mod) +{ + return std::min(std::min(v[mod[0]].z, v[mod[1]].z), v[mod[2]].z); +} + +static bool operator<(const IndexTrig& left, const IndexTrig& right) +{ + return left.z < right.z; +} + +static float getProjectedZ(const Vertex *v, const float *mat) +{ + // -1 / z + return -1 / (mat[2] * v->x + mat[1 * 4 + 2] * v->y + mat[2 * 4 + 2] * v->z + mat[3 * 4 + 2]); +} + +static void fill_id(u32 *d, const Vertex *v0, const Vertex *v1, const Vertex *v2, const Vertex *vb) +{ + d[0] = (u32)(v0 - vb); + d[1] = (u32)(v1 - vb); + d[2] = (u32)(v2 - vb); +} + +void sortTriangles(rend_context& ctx, RenderPass& pass, const RenderPass& previousPass) +{ + int first = previousPass.tr_count; + int count = pass.tr_count - first; + if (count == 0) + return; + + const Vertex * const vtx_base = ctx.verts.head(); + const PolyParam * const pp_base = &ctx.global_param_tr.head()[first]; + const PolyParam * const pp_end = pp_base + count; + + int vtx_count = ctx.verts.used() - pp_base->first; + if (vtx_count <= 0) + return; + + //make lists of all triangles, with their pid and vid + static std::vector lst; + + lst.resize(vtx_count * 4); + + int aused = 0; + + for (const PolyParam *pp = pp_base; pp != pp_end; pp++) + { + u32 ppid = (u32)(pp - pp_base); + + if (pp->count > 2) + { + u32 idx = pp->first; + u32 flip = 0; + float z0 = 0, z1 = 0; + + if (pp->isNaomi2()) + { + z0 = getProjectedZ(&vtx_base[idx], pp->mvMatrix); + z1 = getProjectedZ(&vtx_base[idx + 1], pp->mvMatrix); + } + for (u32 i = 0; i < pp->count - 2; i++, idx++) + { + const Vertex *v0, *v1; + if (flip) + { + v0 = &vtx_base[idx + 1]; + v1 = &vtx_base[idx]; + } + else + { + v0 = &vtx_base[idx]; + v1 = &vtx_base[idx + 1]; + } + const Vertex *v2 = &vtx_base[idx + 2]; + fill_id(lst[aused].id, v0, v1, v2, vtx_base); + lst[aused].pid = ppid; + if (pp->isNaomi2()) + { + float z2 = getProjectedZ(v2, pp->mvMatrix); + lst[aused].z = std::min(z0, std::min(z1, z2)); + z0 = z1; + z1 = z2; + } + else + { + lst[aused].z = minZ(vtx_base, lst[aused].id); + } + aused++; + + flip ^= 1; + } + } + } + + lst.resize(aused); + + //sort them + std::stable_sort(lst.begin(), lst.end()); + + //Merge pids/draw cmds if two different pids are actually equal + for (int k = 1; k < aused; k++) + if (lst[k].pid != lst[k - 1].pid) + { + const PolyParam& curPoly = pp_base[lst[k].pid]; + const PolyParam& prevPoly = pp_base[lst[k - 1].pid]; + if (curPoly.equivalentIgnoreCullingDirection(prevPoly) + && (curPoly.isp.CullMode < 2 || curPoly.isp.CullMode == prevPoly.isp.CullMode)) + lst[k].pid = lst[k - 1].pid; + } + + //re-assemble them into drawing commands + + int idx = -1; + int idxSize = ctx.idx.used(); + + for (int i = 0; i < aused; i++) + { + int pid = lst[i].pid; + u32* midx = lst[i].id; + + *ctx.idx.Append() = midx[0]; + *ctx.idx.Append() = midx[1]; + *ctx.idx.Append() = midx[2]; + + if (idx != pid) + { + SortedTriangle cur = { pp_base + pid, (u32)(idxSize + i * 3), 0 }; + + if (idx != -1) + { + SortedTriangle& last = ctx.sortedTriangles.back(); + last.count = cur.first - last.first; + } + + ctx.sortedTriangles.push_back(cur); + idx = pid; + } + } + + if (aused > 0) + { + SortedTriangle& last = ctx.sortedTriangles.back(); + last.count = idxSize + aused * 3 - last.first; + } + pass.sorted_tr_count = ctx.sortedTriangles.size(); + +#if PRINT_SORT_STATS + printf("Reassembled into %d from %d\n", (int)ctx.sortedTriangles.size(), pp_end - pp_base); +#endif +} + +static bool operator<(const PolyParam& left, const PolyParam& right) +{ + return left.zvZ < right.zvZ; +} + +void sortPolyParams(List *polys, int first, int end, rend_context* ctx) +{ + if (end - first <= 1) + return; + + Vertex *vtx_base = ctx->verts.head(); + + PolyParam * const pp_end = &polys->head()[end]; + + for (PolyParam *pp = &polys->head()[first]; pp != pp_end; pp++) + { + if (pp->count < 3) + { + pp->zvZ = 0; + } + else + { + Vertex *vtx = &vtx_base[pp->first]; + Vertex *vtx_end = &vtx_base[pp->first + pp->count]; + + if (pp->isNaomi2()) + { + glm::mat4 mvMat = pp->mvMatrix != nullptr ? glm::make_mat4(pp->mvMatrix) : glm::mat4(1); + glm::vec3 min{ 1e38f, 1e38f, 1e38f }; + glm::vec3 max{ -1e38f, -1e38f, -1e38f }; + while (vtx != vtx_end) + { + glm::vec3 pos{ vtx->x, vtx->y, vtx->z }; + min = glm::min(min, pos); + max = glm::max(max, pos); + vtx++; + } + glm::vec4 center((min + max) / 2.f, 1); + glm::vec4 extents(max - glm::vec3(center), 0); + // transform + center = mvMat * center; + glm::vec3 extentX = mvMat * glm::vec4(extents.x, 0, 0, 0); + glm::vec3 extentY = mvMat * glm::vec4(0, extents.y, 0, 0); + glm::vec3 extentZ = mvMat * glm::vec4(0, 0, extents.z, 0); + // new AA extents + glm::vec3 newExtent = glm::abs(extentX) + glm::abs(extentY) + glm::abs(extentZ); + + min = glm::vec3(center) - newExtent; + max = glm::vec3(center) + newExtent; + + // project + pp->zvZ = -1 / std::min(min.z, max.z); + } + else + { + u32 zv = 0xFFFFFFFF; + while (vtx != vtx_end) + { + zv = std::min(zv, (u32&)vtx->z); + vtx++; + } + + pp->zvZ = (f32&)zv; + } + } + } + + std::stable_sort(&polys->head()[first], pp_end); +} + +void getRegionTileAddrAndSize(u32& address, u32& size) +{ + address = REGION_BASE; + const bool type1_tile = ((FPU_PARAM_CFG >> 21) & 1) == 0; + size = (type1_tile ? 5 : 6) * 4; + bool empty_first_region = true; + for (int i = type1_tile ? 4 : 5; i > 0; i--) + if ((pvr_read32p(address + i * 4) & 0x80000000) == 0) + { + empty_first_region = false; + break; + } + if (empty_first_region) + address += size; + RegionArrayTile tile; + tile.full = pvr_read32p(address); + if (tile.PreSort) + // Windows CE weirdness + size = 6 * 4; +} + +int getTAContextAddresses(u32 *addresses) +{ + u32 addr; + u32 tile_size; + getRegionTileAddrAndSize(addr, tile_size); + + RegionArrayTile tile; + tile.full = pvr_read32p(addr); + u32 x = tile.X; + u32 y = tile.Y; + u32 count = 0; + do { + tile.full = pvr_read32p(addr); + if (tile.X != x || tile.Y != y) + break; + // Try the opaque pointer + u32 opbAddr = pvr_read32p(addr + 4); + if (opbAddr & 0x80000000) + { + // Try the translucent pointer + opbAddr = pvr_read32p(addr + 12); + if (opbAddr & 0x80000000) + { + // Try the punch-through pointer + if (tile_size >= 24) + opbAddr = pvr_read32p(addr + 20); + if (opbAddr & 0x80000000) + { + INFO_LOG(PVR, "Can't find any non-null OPB for pass %d", count); + break; + } + } + } + addresses[count++] = pvr_read32p(opbAddr); + addr += tile_size; + } while (!tile.LastRegion && count < MAX_PASSES); + + return count; +} + +void fix_texture_bleeding(const List *list, rend_context& ctx) +{ + const PolyParam *pp_end = list->LastPtr(0); + const u32 *idx_base = ctx.idx.head(); + Vertex *vtx_base = ctx.verts.head(); + for (const PolyParam *pp = list->head(); pp != pp_end; pp++) + { + if (!pp->pcw.Texture || pp->count < 3) + continue; + // Find polygons that are facing the camera (constant z) + // and only use 0 and 1 for U and V (some tolerance around 1 for SA2) + // then apply a half-pixel correction on U and V. + const u32 first = idx_base[pp->first]; + const u32 last = idx_base[pp->first + pp->count - 1]; + bool need_fixing = true; + float z = 0.f; + for (u32 idx = first; idx <= last && need_fixing; idx++) + { + Vertex& vtx = vtx_base[idx]; + + if (vtx.u != 0.f && (vtx.u <= 0.995f || vtx.u > 1.f)) + need_fixing = false; + else if (vtx.v != 0.f && (vtx.v <= 0.995f || vtx.v > 1.f)) + need_fixing = false; + else if (idx == first) + z = vtx.z; + else if (z != vtx.z) + need_fixing = false; + } + if (!need_fixing) + continue; + u32 tex_width = 8 << pp->tsp.TexU; + u32 tex_height = 8 << pp->tsp.TexV; + for (u32 idx = first; idx <= last; idx++) + { + Vertex& vtx = vtx_base[idx]; + if (vtx.u > 0.995f) + vtx.u = 1.f; + vtx.u = (0.5f + vtx.u * (tex_width - 1)) / tex_width; + if (vtx.v > 0.995f) + vtx.v = 1.f; + vtx.v = (0.5f + vtx.v * (tex_height - 1)) / tex_height; + } + } +} + +// +// Check if a vertex has huge x,y,z values or negative z +// +static bool is_vertex_inf(const Vertex& vtx) +{ + return std::isnan(vtx.x) || fabsf(vtx.x) > 3.4e37f + || std::isnan(vtx.y) || fabsf(vtx.y) > 3.4e37f + || std::isnan(vtx.z) || vtx.z < 0.f || vtx.z > 3.4e37f; +} + +// +// Create the vertex index, eliminating invalid vertices and merging strips when possible. +// Use primitive restart when merging strips. +// +void makePrimRestartIndex(const List *polys, int first, int end, bool merge, rend_context* ctx) +{ + const Vertex *vertices = ctx->verts.head(); + + PolyParam *last_poly = nullptr; + const PolyParam *end_poly = &polys->head()[end]; + for (PolyParam *poly = &polys->head()[first]; poly != end_poly; poly++) + { + int first_index; + bool dupe_next_vtx = false; + if (merge + && last_poly != nullptr + && last_poly->count != 0 + && poly->equivalentIgnoreCullingDirection(*last_poly)) + { + *ctx->idx.Append() = ~0; + dupe_next_vtx = poly->isp.CullMode >= 2 && poly->isp.CullMode != last_poly->isp.CullMode; + first_index = last_poly->first; + } + else + { + last_poly = poly; + first_index = ctx->idx.used(); + } + int last_good_vtx = -1; + for (u32 i = 0; i < poly->count; i++) + { + const Vertex& vtx = vertices[poly->first + i]; + if (!poly->isNaomi2() && is_vertex_inf(vtx)) + { + bool odd = false; + while (i < poly->count - 1) + { + odd = !odd; + const Vertex& next_vtx = vertices[poly->first + i + 1]; + if (!is_vertex_inf(next_vtx)) + { + if (poly->count - i - 1 >= 3) + { + if (last_good_vtx >= 0) + // reset the strip + *ctx->idx.Append() = ~0; + if (odd && poly->isp.CullMode >= 2) + // repeat next vertex to get culling right + dupe_next_vtx = true; + } + break; + } + i++; + } + } + else + { + last_good_vtx = poly->first + i; + if (dupe_next_vtx) + { + *ctx->idx.Append() = last_good_vtx; + dupe_next_vtx = false; + } + *ctx->idx.Append() = last_good_vtx; + } + } + if (last_poly == poly) + { + poly->first = first_index; + poly->count = ctx->idx.used() - first_index; + } + else + { + last_poly->count = ctx->idx.used() - last_poly->first; + poly->count = 0; + } + } +} + +// +// Create the vertex index, eliminating invalid vertices and merging strips when possible. +// Use degenerate triangles to link strips. +// +void makeIndex(const List *polys, int first, int end, bool merge, rend_context* ctx) +{ + const u32 *indices = ctx->idx.head(); + const Vertex *vertices = ctx->verts.head(); + + PolyParam *last_poly = nullptr; + const PolyParam *end_poly = &polys->head()[end]; + bool cullingReversed = false; + for (PolyParam *poly = &polys->head()[first]; poly != end_poly; poly++) + { + int first_index; + bool dupe_next_vtx = false; + if (merge + && last_poly != nullptr + && last_poly->count != 0 + && poly->equivalentIgnoreCullingDirection(*last_poly)) + { + const u32 last_vtx = indices[last_poly->first + last_poly->count - 1]; + *ctx->idx.Append() = last_vtx; + if (poly->isp.CullMode < 2 || poly->isp.CullMode == last_poly->isp.CullMode) + { + if (cullingReversed) + *ctx->idx.Append() = last_vtx; + cullingReversed = false; + } + else + { + if (!cullingReversed) + *ctx->idx.Append() = last_vtx; + cullingReversed = true; + } + dupe_next_vtx = true; + first_index = last_poly->first; + } + else + { + last_poly = poly; + first_index = ctx->idx.used(); + cullingReversed = false; + } + int last_good_vtx = -1; + for (u32 i = 0; i < poly->count; i++) + { + const Vertex& vtx = vertices[poly->first + i]; + if (!poly->isNaomi2() && is_vertex_inf(vtx)) + { + while (i < poly->count - 1) + { + const Vertex& next_vtx = vertices[poly->first + i + 1]; + if (!is_vertex_inf(next_vtx)) + { + // repeat last and next vertices to link strips + if (last_good_vtx >= 0) + { + verify(!dupe_next_vtx); + *ctx->idx.Append() = last_good_vtx; + dupe_next_vtx = true; + } + break; + } + i++; + } + } + else + { + last_good_vtx = poly->first + i; + if (dupe_next_vtx) + { + *ctx->idx.Append() = last_good_vtx; + dupe_next_vtx = false; + } + const u32 count = ctx->idx.used() - first_index; + if (((i ^ count) & 1) ^ cullingReversed) + *ctx->idx.Append() = last_good_vtx; + *ctx->idx.Append() = last_good_vtx; + } + } + if (last_poly == poly) + { + poly->first = first_index; + poly->count = ctx->idx.used() - first_index; + } + else + { + last_poly->count = ctx->idx.used() - last_poly->first; + poly->count = 0; + } + } +} diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index b38162ada..a8735b530 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -12,8 +12,6 @@ #include #include -#include -#include #define TACALL DYNACALL #ifdef NDEBUG @@ -1127,228 +1125,39 @@ private: static void getRegionTileClipping(u32& xmin, u32& xmax, u32& ymin, u32& ymax); static void getRegionSettings(int passNumber, RenderPass& pass); -// -// Check if a vertex has huge x,y,z values or negative z -// -static bool is_vertex_inf(const Vertex& vtx) +static void parseRenderPass(RenderPass& pass, const RenderPass& previousPass, rend_context& ctx, bool primRestart) { - return std::isnan(vtx.x) || fabsf(vtx.x) > 3.4e37f - || std::isnan(vtx.y) || fabsf(vtx.y) > 3.4e37f - || std::isnan(vtx.z) || vtx.z < 0.f || vtx.z > 3.4e37f; -} + const bool perPixel = config::RendererType == RenderType::OpenGL_OIT + || config::RendererType == RenderType::DirectX11_OIT + || config::RendererType == RenderType::Vulkan_OIT; + const bool mergeTranslucent = config::PerStripSorting || perPixel; -// -// Create the vertex index, eliminating invalid vertices and merging strips when possible. -// -static void make_index(const List *polys, int first, int end, bool merge, rend_context* ctx) -{ - const u32 *indices = ctx->idx.head(); - const Vertex *vertices = ctx->verts.head(); - - PolyParam *last_poly = nullptr; - const PolyParam *end_poly = &polys->head()[end]; - bool cullingReversed = false; - for (PolyParam *poly = &polys->head()[first]; poly != end_poly; poly++) + if (primRestart) + makePrimRestartIndex(&ctx.global_param_op, previousPass.op_count, pass.op_count, true, &ctx); + else + makeIndex(&ctx.global_param_op, previousPass.op_count, pass.op_count, true, &ctx); + if (primRestart) + makePrimRestartIndex(&ctx.global_param_pt, previousPass.pt_count, pass.pt_count, true, &ctx); + else + makeIndex(&ctx.global_param_pt, previousPass.pt_count, pass.pt_count, true, &ctx); + if (pass.autosort && !perPixel) { - int first_index; - bool dupe_next_vtx = false; - if (merge - && last_poly != nullptr - && last_poly->count != 0 - && poly->equivalentIgnoreCullingDirection(*last_poly)) - { - const u32 last_vtx = indices[last_poly->first + last_poly->count - 1]; - *ctx->idx.Append() = last_vtx; - if (poly->isp.CullMode < 2 || poly->isp.CullMode == last_poly->isp.CullMode) - { - if (cullingReversed) - *ctx->idx.Append() = last_vtx; - cullingReversed = false; - } - else - { - if (!cullingReversed) - *ctx->idx.Append() = last_vtx; - cullingReversed = true; - } - dupe_next_vtx = true; - first_index = last_poly->first; - } + if (config::PerStripSorting) + sortPolyParams(&ctx.global_param_tr, previousPass.tr_count, pass.tr_count, &ctx); else - { - last_poly = poly; - first_index = ctx->idx.used(); - cullingReversed = false; - } - int last_good_vtx = -1; - for (u32 i = 0; i < poly->count; i++) - { - const Vertex& vtx = vertices[poly->first + i]; - if (!poly->isNaomi2() && is_vertex_inf(vtx)) - { - while (i < poly->count - 1) - { - const Vertex& next_vtx = vertices[poly->first + i + 1]; - if (!is_vertex_inf(next_vtx)) - { - // repeat last and next vertices to link strips - if (last_good_vtx >= 0) - { - verify(!dupe_next_vtx); - *ctx->idx.Append() = last_good_vtx; - dupe_next_vtx = true; - } - break; - } - i++; - } - } - else - { - last_good_vtx = poly->first + i; - if (dupe_next_vtx) - { - *ctx->idx.Append() = last_good_vtx; - dupe_next_vtx = false; - } - const u32 count = ctx->idx.used() - first_index; - if (((i ^ count) & 1) ^ cullingReversed) - *ctx->idx.Append() = last_good_vtx; - *ctx->idx.Append() = last_good_vtx; - } - } - if (last_poly == poly) - { - poly->first = first_index; - poly->count = ctx->idx.used() - first_index; - } + sortTriangles(ctx, pass, previousPass); + } + // sortTriangles already created the index + if (!pass.autosort || perPixel || config::PerStripSorting) + { + if (primRestart) + makePrimRestartIndex(&ctx.global_param_tr, previousPass.tr_count, pass.tr_count, mergeTranslucent, &ctx); else - { - last_poly->count = ctx->idx.used() - last_poly->first; - poly->count = 0; - } + makeIndex(&ctx.global_param_tr, previousPass.tr_count, pass.tr_count, mergeTranslucent, &ctx); } } -static void fix_texture_bleeding(const List *list) -{ - const PolyParam *pp_end = list->LastPtr(0); - const u32 *idx_base = vd_rc.idx.head(); - Vertex *vtx_base = vd_rc.verts.head(); - for (const PolyParam *pp = list->head(); pp != pp_end; pp++) - { - if (!pp->pcw.Texture || pp->count < 3) - continue; - // Find polygons that are facing the camera (constant z) - // and only use 0 and 1 for U and V (some tolerance around 1 for SA2) - // then apply a half-pixel correction on U and V. - const u32 first = idx_base[pp->first]; - const u32 last = idx_base[pp->first + pp->count - 1]; - bool need_fixing = true; - float z = 0.f; - for (u32 idx = first; idx <= last && need_fixing; idx++) - { - Vertex& vtx = vtx_base[idx]; - - if (vtx.u != 0.f && (vtx.u <= 0.995f || vtx.u > 1.f)) - need_fixing = false; - else if (vtx.v != 0.f && (vtx.v <= 0.995f || vtx.v > 1.f)) - need_fixing = false; - else if (idx == first) - z = vtx.z; - else if (z != vtx.z) - need_fixing = false; - } - if (!need_fixing) - continue; - u32 tex_width = 8 << pp->tsp.TexU; - u32 tex_height = 8 << pp->tsp.TexV; - for (u32 idx = first; idx <= last; idx++) - { - Vertex& vtx = vtx_base[idx]; - if (vtx.u > 0.995f) - vtx.u = 1.f; - vtx.u = (0.5f + vtx.u * (tex_width - 1)) / tex_width; - if (vtx.v > 0.995f) - vtx.v = 1.f; - vtx.v = (0.5f + vtx.v * (tex_height - 1)) / tex_height; - } - } -} - -static bool operator<(const PolyParam& left, const PolyParam& right) -{ - return left.zvZ < right.zvZ; -} - -static void sortPolyParams(List *polys, int first, int end, rend_context* ctx) -{ - if (end - first <= 1) - return; - - Vertex *vtx_base = ctx->verts.head(); - - PolyParam *pp = &polys->head()[first]; - PolyParam *pp_end = &polys->head()[end]; - - while (pp != pp_end) - { - if (pp->count < 3) - { - pp->zvZ = 0; - } - else - { - Vertex *vtx = &vtx_base[pp->first]; - Vertex *vtx_end = &vtx_base[pp->first + pp->count]; - - if (pp->isNaomi2()) - { - glm::mat4 mvMat = pp->mvMatrix != nullptr ? glm::make_mat4(pp->mvMatrix) : glm::mat4(1); - glm::vec3 min{ 1e38f, 1e38f, 1e38f }; - glm::vec3 max{ -1e38f, -1e38f, -1e38f }; - while (vtx != vtx_end) - { - glm::vec3 pos{ vtx->x, vtx->y, vtx->z }; - min = glm::min(min, pos); - max = glm::max(max, pos); - vtx++; - } - glm::vec4 center((min + max) / 2.f, 1); - glm::vec4 extents(max - glm::vec3(center), 0); - // transform - center = mvMat * center; - glm::vec3 extentX = mvMat * glm::vec4(extents.x, 0, 0, 0); - glm::vec3 extentY = mvMat * glm::vec4(0, extents.y, 0, 0); - glm::vec3 extentZ = mvMat * glm::vec4(0, 0, extents.z, 0); - // new AA extents - glm::vec3 newExtent = glm::abs(extentX) + glm::abs(extentY) + glm::abs(extentZ); - - min = glm::vec3(center) - newExtent; - max = glm::vec3(center) + newExtent; - - // project - pp->zvZ = -1 / std::min(min.z, max.z); - } - else - { - u32 zv = 0xFFFFFFFF; - while (vtx != vtx_end) - { - zv = std::min(zv, (u32&)vtx->z); - vtx++; - } - - pp->zvZ = (f32&)zv; - } - } - pp++; - } - - std::stable_sort(&polys->head()[first], pp_end); -} - -static bool ta_parse_vdrc(TA_context* ctx) +static bool ta_parse_vdrc(TA_context* ctx, bool primRestart) { bool rv=false; verify(vd_ctx == nullptr); @@ -1357,9 +1166,6 @@ static bool ta_parse_vdrc(TA_context* ctx) ta_parse_reset(); bool empty_context = true; - int op_poly_count = 0; - int pt_poly_count = 0; - int tr_poly_count = 0; PolyParam *bgpp = vd_rc.global_param_op.head(); if (bgpp->pcw.Texture) @@ -1368,13 +1174,10 @@ static bool ta_parse_vdrc(TA_context* ctx) empty_context = false; } - const bool perPixel = config::RendererType == RenderType::OpenGL_OIT - || config::RendererType == RenderType::DirectX11_OIT - || config::RendererType == RenderType::Vulkan_OIT; - const bool mergeTranslucent = config::PerStripSorting || perPixel; - TA_context *childCtx = ctx; int pass = 0; + RenderPass previousPass{}; + while (childCtx != nullptr) { childCtx->MarkRend(); @@ -1401,25 +1204,17 @@ static bool ta_parse_vdrc(TA_context* ctx) if (pass == 0 || !empty_pass) { - RenderPass *render_pass = vd_rc.render_passes.Append(); - getRegionSettings(pass, *render_pass); - render_pass->op_count = vd_rc.global_param_op.used(); - make_index(&vd_rc.global_param_op, op_poly_count, - render_pass->op_count, true, &vd_rc); - op_poly_count = render_pass->op_count; - render_pass->mvo_count = vd_rc.global_param_mvo.used(); - render_pass->pt_count = vd_rc.global_param_pt.used(); - make_index(&vd_rc.global_param_pt, pt_poly_count, - render_pass->pt_count, true, &vd_rc); - pt_poly_count = render_pass->pt_count; - render_pass->tr_count = vd_rc.global_param_tr.used(); - if (render_pass->autosort && config::PerStripSorting && !perPixel) - sortPolyParams(&vd_rc.global_param_tr, tr_poly_count, - render_pass->tr_count, &vd_rc); - make_index(&vd_rc.global_param_tr, tr_poly_count, - render_pass->tr_count, mergeTranslucent, &vd_rc); - tr_poly_count = render_pass->tr_count; - render_pass->mvo_tr_count = vd_rc.global_param_mvo_tr.used(); + RenderPass& render_pass = *vd_rc.render_passes.Append(); + getRegionSettings(pass, render_pass); + render_pass.op_count = vd_rc.global_param_op.used(); + render_pass.pt_count = vd_rc.global_param_pt.used(); + render_pass.tr_count = vd_rc.global_param_tr.used(); + render_pass.sorted_tr_count = 0; + render_pass.mvo_count = vd_rc.global_param_mvo.used(); + render_pass.mvo_tr_count = vd_rc.global_param_mvo_tr.used(); + + parseRenderPass(render_pass, previousPass, vd_rc, primRestart); + previousPass = render_pass; } childCtx = childCtx->nextContext; pass++; @@ -1431,9 +1226,9 @@ static bool ta_parse_vdrc(TA_context* ctx) WARN_LOG(PVR, "ERROR: TA context overrun"); else if (config::RenderResolution > 480 && !config::EmulateFramebuffer) { - fix_texture_bleeding(&vd_rc.global_param_op); - fix_texture_bleeding(&vd_rc.global_param_pt); - fix_texture_bleeding(&vd_rc.global_param_tr); + fix_texture_bleeding(&vd_rc.global_param_op, vd_rc); + fix_texture_bleeding(&vd_rc.global_param_pt, vd_rc); + fix_texture_bleeding(&vd_rc.global_param_tr, vd_rc); } if (rv && !overrun) { @@ -1452,7 +1247,7 @@ static bool ta_parse_vdrc(TA_context* ctx) return rv && !overrun; } -static bool ta_parse_naomi2(TA_context* ctx) +static bool ta_parse_naomi2(TA_context* ctx, bool primRestart) { for (PolyParam& pp : ctx->rend.global_param_op) { @@ -1484,23 +1279,12 @@ static bool ta_parse_naomi2(TA_context* ctx) else { ctx->rend.newRenderPass(); - int op_count = 0; - int pt_count = 0; - int tr_count = 0; - const bool perPixel = config::RendererType == RenderType::OpenGL_OIT - || config::RendererType == RenderType::DirectX11_OIT - || config::RendererType == RenderType::Vulkan_OIT; - const bool mergeTranslucent = config::PerStripSorting || perPixel; - for (const RenderPass& pass : ctx->rend.render_passes) + RenderPass previousPass{}; + + for (RenderPass& pass : ctx->rend.render_passes) { - make_index(&ctx->rend.global_param_op, op_count, pass.op_count, true, &ctx->rend); - make_index(&ctx->rend.global_param_pt, pt_count, pass.pt_count, true, &ctx->rend); - if (pass.autosort && config::PerStripSorting && !perPixel) - sortPolyParams(&ctx->rend.global_param_tr, tr_count, pass.tr_count, &ctx->rend); - make_index(&ctx->rend.global_param_tr, tr_count, pass.tr_count, mergeTranslucent, &ctx->rend); - op_count = pass.op_count; - pt_count = pass.pt_count; - tr_count = pass.tr_count; + parseRenderPass(pass, previousPass, ctx->rend, primRestart); + previousPass = pass; } u32 xmin, xmax, ymin, ymax; @@ -1514,14 +1298,17 @@ static bool ta_parse_naomi2(TA_context* ctx) return !overrun; } -bool ta_parse(TA_context *ctx) +bool ta_parse(TA_context *ctx, bool primRestart) { if (settings.platform.isNaomi2()) - return ta_parse_naomi2(ctx); + return ta_parse_naomi2(ctx, primRestart); else - return ta_parse_vdrc(ctx); + return ta_parse_vdrc(ctx, primRestart); } +// +// Naomi 2 stuff +// static PolyParam *n2CurrentPP; static ModifierVolumeParam *n2CurrentMVP; @@ -1659,6 +1446,10 @@ void ta_set_list_type(u32 listType) vd_ctx = nullptr; } +// +// end Naomi 2 +// + void ta_parse_reset() { using TAParser = TAParserTempl<>; @@ -1842,27 +1633,6 @@ void FillBGP(TA_context* ctx) cv[3].v = max_v; } -void getRegionTileAddrAndSize(u32& address, u32& size) -{ - address = REGION_BASE; - const bool type1_tile = ((FPU_PARAM_CFG >> 21) & 1) == 0; - size = (type1_tile ? 5 : 6) * 4; - bool empty_first_region = true; - for (int i = type1_tile ? 4 : 5; i > 0; i--) - if ((pvr_read32p(address + i * 4) & 0x80000000) == 0) - { - empty_first_region = false; - break; - } - if (empty_first_region) - address += size; - RegionArrayTile tile; - tile.full = pvr_read32p(address); - if (tile.PreSort) - // Windows CE weirdness - size = 6 * 4; -} - static void getRegionTileClipping(u32& xmin, u32& xmax, u32& ymin, u32& ymax) { xmin = 20; @@ -1890,46 +1660,6 @@ static void getRegionTileClipping(u32& xmin, u32& xmax, u32& ymin, u32& ymax) ymax *= 32; } -int getTAContextAddresses(u32 *addresses) -{ - u32 addr; - u32 tile_size; - getRegionTileAddrAndSize(addr, tile_size); - - RegionArrayTile tile; - tile.full = pvr_read32p(addr); - u32 x = tile.X; - u32 y = tile.Y; - u32 count = 0; - do { - tile.full = pvr_read32p(addr); - if (tile.X != x || tile.Y != y) - break; - // Try the opaque pointer - u32 opbAddr = pvr_read32p(addr + 4); - if (opbAddr & 0x80000000) - { - // Try the translucent pointer - opbAddr = pvr_read32p(addr + 12); - if (opbAddr & 0x80000000) - { - // Try the punch-through pointer - if (tile_size >= 24) - opbAddr = pvr_read32p(addr + 20); - if (opbAddr & 0x80000000) - { - INFO_LOG(PVR, "Can't find any non-null OPB for pass %d", count); - break; - } - } - } - addresses[count++] = pvr_read32p(opbAddr); - addr += tile_size; - } while (!tile.LastRegion && count < MAX_PASSES); - - return count; -} - static void getRegionSettings(int passNumber, RenderPass& pass) { u32 addr; @@ -1957,6 +1687,7 @@ void rend_context::newRenderPass() pass.pt_count = global_param_pt.used(); pass.mvo_count = global_param_mvo.used(); pass.mvo_tr_count = global_param_mvo_tr.used(); + pass.sorted_tr_count = 0; getRegionSettings(render_passes.used(), pass); *render_passes.Append() = pass; } diff --git a/core/rend/dx11/dx11_renderer.cpp b/core/rend/dx11/dx11_renderer.cpp index 4c33c1f07..613795476 100644 --- a/core/rend/dx11/dx11_renderer.cpp +++ b/core/rend/dx11/dx11_renderer.cpp @@ -307,7 +307,7 @@ bool DX11Renderer::Process(TA_context* ctx) texCache.Clear(); texCache.Cleanup(); - return ta_parse(ctx); + return ta_parse(ctx, true); } void DX11Renderer::configVertexShader() @@ -674,41 +674,15 @@ void DX11Renderer::drawList(const List& gply, int first, int count) } } -void DX11Renderer::sortTriangles(int first, int count) +void DX11Renderer::drawSorted(int first, int count, bool multipass) { - std::vector vidx_sort; - GenSorted(first, count, pidx_sort, vidx_sort); - - //Upload to GPU if needed - if (pidx_sort.empty()) - return; - - const size_t bufSize = vidx_sort.size() * sizeof(u32); - // Upload sorted index buffer - ensureBufferSize(sortedTriIndexBuffer, D3D11_BIND_INDEX_BUFFER, sortedTriIndexBufferSize, (u32)bufSize); - D3D11_MAPPED_SUBRESOURCE mappedSubres; - deviceContext->Map(sortedTriIndexBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); - memcpy(mappedSubres.pData, &vidx_sort[0], bufSize); - deviceContext->Unmap(sortedTriIndexBuffer, 0); - deviceContext->IASetIndexBuffer(sortedTriIndexBuffer, DXGI_FORMAT_R32_UINT, 0); -} - -void DX11Renderer::drawSorted(bool multipass) -{ - if (pidx_sort.empty()) - return; - deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - size_t count = pidx_sort.size(); - - for (u32 p = 0; p < count; p++) + int end = first + count; + for (int p = first; p < end; p++) { - const PolyParam* params = pidx_sort[p].ppid; - if (pidx_sort[p].count > 2) - { - setRenderState(params); - deviceContext->DrawIndexed(pidx_sort[p].count, pidx_sort[p].first, 0); - } + const PolyParam* params = pvrrc.sortedTriangles[p].ppid; + setRenderState(params); + deviceContext->DrawIndexed(pvrrc.sortedTriangles[p].count, pvrrc.sortedTriangles[p].first, 0); } if (multipass && config::TranslucentPolygonDepthMask) { @@ -738,17 +712,16 @@ void DX11Renderer::drawSorted(bool multipass) deviceContext->OMSetDepthStencilState(depthStencilStates.getState(true, true, 6, false), 0); deviceContext->RSSetScissorRects(1, &scissorRect); - for (u32 p = 0; p < count; p++) + for (int p = first; p < end; p++) { - const PolyParam* params = pidx_sort[p].ppid; - if (pidx_sort[p].count > 2 && !params->isp.ZWriteDis) + const PolyParam* params = pvrrc.sortedTriangles[p].ppid; + if (!params->isp.ZWriteDis) { setCullMode(params->isp.CullMode); - deviceContext->DrawIndexed(pidx_sort[p].count, pidx_sort[p].first, 0); + deviceContext->DrawIndexed(pvrrc.sortedTriangles[p].count, pvrrc.sortedTriangles[p].first, 0); } } } - deviceContext->IASetIndexBuffer(indexBuffer, DXGI_FORMAT_R32_UINT, 0); } void DX11Renderer::drawModVols(int first, int count) @@ -855,14 +828,9 @@ void DX11Renderer::drawStrips() if (current_pass.autosort) { if (!config::PerStripSorting) - { - sortTriangles(previous_pass.tr_count, tr_count); - drawSorted(render_pass < pvrrc.render_passes.used() - 1); - } + drawSorted(previous_pass.sorted_tr_count, current_pass.sorted_tr_count - previous_pass.sorted_tr_count, render_pass < pvrrc.render_passes.used() - 1); else - { drawList(pvrrc.global_param_tr, previous_pass.tr_count, tr_count); - } } else { diff --git a/core/rend/dx11/dx11_renderer.h b/core/rend/dx11/dx11_renderer.h index 559c86173..372471ccf 100644 --- a/core/rend/dx11/dx11_renderer.h +++ b/core/rend/dx11/dx11_renderer.h @@ -133,15 +133,12 @@ private: void drawList(const List& gply, int first, int count); template void setRenderState(const PolyParam *gp); - void sortTriangles(int first, int count); - void drawSorted(bool multipass); + void drawSorted(int first, int count, bool multipass); void drawModVols(int first, int count); u32 vertexBufferSize = 0; u32 modvolBufferSize = 0; u32 indexBufferSize = 0; - ComPtr sortedTriIndexBuffer; - u32 sortedTriIndexBufferSize = 0; ComPtr fbTex; ComPtr dcfbTexture; @@ -163,7 +160,6 @@ private: DX11TextureCache texCache; DX11Shaders *shaders; - std::vector pidx_sort; std::unique_ptr quad; ComPtr vtxConstants; ComPtr pxlConstants; diff --git a/core/rend/dx9/d3d_renderer.cpp b/core/rend/dx9/d3d_renderer.cpp index 0f8fd9dfa..b604f808a 100644 --- a/core/rend/dx9/d3d_renderer.cpp +++ b/core/rend/dx9/d3d_renderer.cpp @@ -21,6 +21,7 @@ #include "hw/pvr/pvr_mem.h" #include "rend/tileclip.h" #include "rend/gui.h" +#include "rend/sorter.h" #define verifyWin(x) verify(SUCCEEDED(x)) @@ -172,8 +173,6 @@ void D3DRenderer::preReset() mainVtxDecl.reset(); modvolBuffer.reset(); modvolBufferSize = 0; - sortedTriIndexBuffer.reset(); - sortedTriIndexBufferSize = 0; indexBuffer.reset(); indexBufferSize = 0; vertexBuffer.reset(); @@ -304,7 +303,7 @@ bool D3DRenderer::Process(TA_context* ctx) texCache.Clear(); texCache.Cleanup(); - return ta_parse(ctx); + return ta_parse(ctx, false); } inline void D3DRenderer::setTexMode(D3DSAMPLERSTATETYPE state, u32 clamp, u32 mirror) @@ -483,40 +482,14 @@ void D3DRenderer::drawList(const List& gply, int first, int count) } } -void D3DRenderer::sortTriangles(int first, int count) +void D3DRenderer::drawSorted(int first, int count, bool multipass) { - std::vector vidx_sort; - GenSorted(first, count, pidx_sort, vidx_sort); - - //Upload to GPU if needed - if (pidx_sort.empty()) - return; - - const size_t bufSize = vidx_sort.size() * sizeof(u32); - // Upload sorted index buffer - ensureIndexBufferSize(sortedTriIndexBuffer, sortedTriIndexBufferSize, (u32)bufSize); - void *ptr; - sortedTriIndexBuffer->Lock(0, (UINT)bufSize, &ptr, D3DLOCK_DISCARD); - memcpy(ptr, &vidx_sort[0], bufSize); - sortedTriIndexBuffer->Unlock(); - device->SetIndices(sortedTriIndexBuffer); -} - -void D3DRenderer::drawSorted(bool multipass) -{ - if (pidx_sort.empty()) - return; - - u32 count = (u32)pidx_sort.size(); - - for (u32 p = 0; p < count; p++) + int end = first + count; + for (int p = first; p < end; p++) { - const PolyParam* params = pidx_sort[p].ppid; - if (pidx_sort[p].count > 2) - { - setGPState(params); - device->DrawIndexedPrimitive(D3DPT_TRIANGLELIST, 0, 0, pidx_sort[p].count, pidx_sort[p].first, pidx_sort[p].count / 3); - } + const PolyParam* params = pvrrc.sortedTriangles[p].ppid; + setGPState(params); + device->DrawIndexedPrimitive(D3DPT_TRIANGLELIST, 0, 0, pvrrc.sortedTriangles[p].count, pvrrc.sortedTriangles[p].first, pvrrc.sortedTriangles[p].count / 3); } if (multipass && config::TranslucentPolygonDepthMask) { @@ -533,20 +506,20 @@ void D3DRenderer::drawSorted(bool multipass) if (scissorEnable) device->SetScissorRect(&scissorRect); - for (u32 p = 0; p < count; p++) + for (int p = first; p < end; p++) { - const PolyParam* params = pidx_sort[p].ppid; - if (pidx_sort[p].count > 2 && !params->isp.ZWriteDis) { + const PolyParam* params = pvrrc.sortedTriangles[p].ppid; + if (!params->isp.ZWriteDis) + { // FIXME no clipping in modvol shader //SetTileClip(gp->tileclip,true); devCache.SetRenderState(D3DRS_CULLMODE, CullMode[params->isp.CullMode]); - device->DrawIndexedPrimitive(D3DPT_TRIANGLELIST, 0, 0, pidx_sort[p].count, pidx_sort[p].first, pidx_sort[p].count / 3); + device->DrawIndexedPrimitive(D3DPT_TRIANGLELIST, 0, 0, pvrrc.sortedTriangles[p].count, pvrrc.sortedTriangles[p].first, pvrrc.sortedTriangles[p].count / 3); } } devCache.SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA | D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE); } - device->SetIndices(indexBuffer); } //All pixels are in area 0 by default. @@ -785,14 +758,10 @@ void D3DRenderer::drawStrips() if (current_pass.autosort) { if (!config::PerStripSorting) - { - sortTriangles(previous_pass.tr_count, tr_count); - drawSorted(render_pass < pvrrc.render_passes.used() - 1); - } + drawSorted(previous_pass.sorted_tr_count, current_pass.sorted_tr_count - previous_pass.sorted_tr_count, + render_pass < pvrrc.render_passes.used() - 1); else - { drawList(pvrrc.global_param_tr, previous_pass.tr_count, tr_count); - } } else { diff --git a/core/rend/dx9/d3d_renderer.h b/core/rend/dx9/d3d_renderer.h index e36f21881..77f22cd7e 100644 --- a/core/rend/dx9/d3d_renderer.h +++ b/core/rend/dx9/d3d_renderer.h @@ -25,7 +25,6 @@ #include "rend/transform_matrix.h" #include "d3d_texture.h" #include "d3d_shaders.h" -#include "rend/sorter.h" #include "rend/imgui_driver.h" class RenderStateCache @@ -132,8 +131,7 @@ private: void updatePaletteTexture(); void updateFogTexture(); void displayFramebuffer(); - void sortTriangles(int first, int count); - void drawSorted(bool multipass); + void drawSorted(int first, int count, bool multipass); void setMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc); void drawModVols(int first, int count); void setTexMode(D3DSAMPLERSTATETYPE state, u32 clamp, u32 mirror); @@ -150,8 +148,6 @@ private: u32 modvolBufferSize = 0; ComPtr indexBuffer; u32 indexBufferSize = 0; - ComPtr sortedTriIndexBuffer; - u32 sortedTriIndexBufferSize = 0; ComPtr mainVtxDecl; ComPtr modVolVtxDecl; @@ -172,7 +168,6 @@ private: u32 height = 0; TransformMatrix matrices; D3DTextureCache texCache; - std::vector pidx_sort; D3DShaders shaders; RECT scissorRect{}; bool scissorEnable = false; diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index 724155913..da2330ecc 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -865,11 +865,11 @@ bool OpenGL4Renderer::renderFrame(int width, int height) } for (auto& it : gl4.shaders) resetN2UniformCache(&it.second); - gl4ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; - GLuint output_fbo; + glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX); + GLuint output_fbo; //setup render target first if (is_rtt) output_fbo = BindRTT(false); diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index c269ee5a1..c677a520c 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -283,109 +283,52 @@ void DrawList(const List& gply, int first, int count) } } -static std::vector pidx_sort; - -static void SortTriangles(int first, int count) +static void drawSorted(int first, int count, bool multipass) { - std::vector vidx_sort; - GenSorted(first, count, pidx_sort, vidx_sort); + glcache.Enable(GL_STENCIL_TEST); + glcache.StencilFunc(GL_ALWAYS,0,0); + glcache.StencilOp(GL_KEEP,GL_KEEP,GL_REPLACE); - //Upload to GPU if needed - if (!pidx_sort.empty()) + int end = first + count; + for (int p = first; p < end; p++) { - //Bind and upload sorted index buffer - if (gl.index_type == GL_UNSIGNED_SHORT) - { - static bool overrun; - static List short_vidx; - if (short_vidx.daty != NULL) - short_vidx.Free(); - short_vidx.Init(vidx_sort.size(), &overrun, NULL); - for (size_t i = 0; i < vidx_sort.size(); i++) - *(short_vidx.Append()) = vidx_sort[i]; - gl.vbo.idxs2->update(short_vidx.head(), short_vidx.bytes()); - } - else - gl.vbo.idxs2->update(&vidx_sort[0], vidx_sort.size() * sizeof(u32)); - glCheck(); + const PolyParam* params = pvrrc.sortedTriangles[p].ppid; + SetGPState(params); + glDrawElements(GL_TRIANGLES, pvrrc.sortedTriangles[p].count, gl.index_type, + (GLvoid*)(gl.get_index_size() * pvrrc.sortedTriangles[p].first)); } -} -void DrawSorted(bool multipass) -{ - //if any drawing commands, draw them - if (!pidx_sort.empty()) + if (multipass && config::TranslucentPolygonDepthMask) { - std::size_t count = pidx_sort.size(); + // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glcache.Disable(GL_BLEND); + glcache.StencilMask(0); + + // We use the modifier volumes shader because it's fast. We don't need textures, etc. + glcache.UseProgram(gl.modvol_shader.program); + glUniform1f(gl.modvol_shader.sp_ShaderColor, 1.f); + + glcache.DepthFunc(GL_GEQUAL); + glcache.DepthMask(GL_TRUE); + + for (int p = first; p < end; p++) { - //set some 'global' modes for all primitives - - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilFunc(GL_ALWAYS,0,0); - glcache.StencilOp(GL_KEEP,GL_KEEP,GL_REPLACE); - - for (std::size_t p = 0; p < count; p++) + const PolyParam* params = pvrrc.sortedTriangles[p].ppid; + if (!params->isp.ZWriteDis) { - const PolyParam* params = pidx_sort[p].ppid; - if (pidx_sort[p].count>2) //this actually happens for some games. No idea why .. - { - SetGPState(params); - glDrawElements(GL_TRIANGLES, pidx_sort[p].count, gl.index_type, - (GLvoid*)(gl.get_index_size() * pidx_sort[p].first)); glCheck(); + // FIXME no clipping in modvol shader + //SetTileClip(gp->tileclip,true); -#if 0 - //Verify restriping -- only valid if no sort - int fs=pidx_sort[p].first; + SetCull(params->isp.CullMode ^ gcflip); - for (u32 j=0; j<(params->count-2); j++) - { - for (u32 k=0; k<3; k++) - { - verify(idx_base[params->first+j+k]==vidx_sort[fs++]); - } - } - - verify(fs==(pidx_sort[p].first+pidx_sort[p].count)); -#endif - } - params++; - } - - if (multipass && config::TranslucentPolygonDepthMask) - { - // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - glcache.Disable(GL_BLEND); - - glcache.StencilMask(0); - - // We use the modifier volumes shader because it's fast. We don't need textures, etc. - glcache.UseProgram(gl.modvol_shader.program); - glUniform1f(gl.modvol_shader.sp_ShaderColor, 1.f); - - glcache.DepthFunc(GL_GEQUAL); - glcache.DepthMask(GL_TRUE); - - for (std::size_t p = 0; p < count; p++) - { - const PolyParam* params = pidx_sort[p].ppid; - if (pidx_sort[p].count > 2 && !params->isp.ZWriteDis) { - // FIXME no clipping in modvol shader - //SetTileClip(gp->tileclip,true); - - SetCull(params->isp.CullMode ^ gcflip); - - glDrawElements(GL_TRIANGLES, pidx_sort[p].count, gl.index_type, - (GLvoid*)(gl.get_index_size() * pidx_sort[p].first)); - } - } - glcache.StencilMask(0xFF); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDrawElements(GL_TRIANGLES, pvrrc.sortedTriangles[p].count, gl.index_type, + (GLvoid*)(gl.get_index_size() * pvrrc.sortedTriangles[p].first)); } } - // Re-bind the previous index buffer for subsequent render passes - gl.vbo.idxs->bind(); + glcache.StencilMask(0xFF); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } } @@ -681,14 +624,9 @@ void DrawStrips() if (current_pass.autosort) { if (!config::PerStripSorting) - { - SortTriangles(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); - DrawSorted(render_pass < pvrrc.render_passes.used() - 1); - } + drawSorted(previous_pass.sorted_tr_count, current_pass.sorted_tr_count - previous_pass.sorted_tr_count, render_pass < pvrrc.render_passes.used() - 1); else - { DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); - } } else DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 7c321fec0..199991fd8 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -456,7 +456,6 @@ static void gles_term() gl.vbo.geometry.reset(); gl.vbo.modvols.reset(); gl.vbo.idxs.reset(); - gl.vbo.idxs2.reset(); termGLCommon(); gl_delete_shaders(); @@ -477,12 +476,16 @@ void findGLVersion() gl.glsl_version_header = "#version 300 es"; if (gl.gl_major > 3 || gl.gl_minor >= 2) gl.border_clamp_supported = true; + gl.prim_restart_supported = false; + gl.prim_restart_fixed_supported = true; } else { gl.gl_version = "GLES2"; gl.glsl_version_header = ""; gl.index_type = GL_UNSIGNED_SHORT; + gl.prim_restart_supported = false; + gl.prim_restart_fixed_supported = false; } gl.single_channel_format = GL_ALPHA; const char *extensions = (const char *)glGetString(GL_EXTENSIONS); @@ -510,12 +513,17 @@ void findGLVersion() gl.glsl_version_header = "#version 130"; #endif gl.single_channel_format = GL_RED; + gl.prim_restart_supported = gl.gl_major > 3 || gl.gl_minor >= 1; // 3.1 min + gl.prim_restart_fixed_supported = gl.gl_major > 4 + || (gl.gl_major == 4 && gl.gl_minor >= 3); // 4.3 min } else { gl.gl_version = "GL2"; gl.glsl_version_header = "#version 120"; gl.single_channel_format = GL_ALPHA; + gl.prim_restart_supported = false; + gl.prim_restart_fixed_supported = false; } gl.highp_float_supported = true; gl.border_clamp_supported = true; @@ -925,7 +933,6 @@ static bool gl_create_resources() gl.vbo.geometry = std::unique_ptr(new GlBuffer(GL_ARRAY_BUFFER)); gl.vbo.modvols = std::unique_ptr(new GlBuffer(GL_ARRAY_BUFFER)); gl.vbo.idxs = std::unique_ptr(new GlBuffer(GL_ELEMENT_ARRAY_BUFFER)); - gl.vbo.idxs2 = std::unique_ptr(new GlBuffer(GL_ELEMENT_ARRAY_BUFFER)); initQuad(); @@ -1151,7 +1158,7 @@ bool OpenGLRenderer::Process(TA_context* ctx) updatePaletteTexture(getPaletteTextureSlot()); palette_updated = false; } - return ta_parse(ctx); + return ta_parse(ctx, gl.prim_restart_fixed_supported || gl.prim_restart_supported); } static void upload_vertex_indices() @@ -1237,7 +1244,14 @@ bool OpenGLRenderer::renderFrame(int width, int height) ShaderUniforms.Set(&it.second); resetN2UniformCache(&it.second); } - +#ifndef GLES2 + if (gl.prim_restart_fixed_supported) + glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX); + else if (gl.prim_restart_supported) { + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(-1); + } +#endif //setup render target first if (is_rtt) { diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index fb636a18d..26c566133 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -227,7 +227,6 @@ struct gl_ctx std::unique_ptr geometry; std::unique_ptr modvols; std::unique_ptr idxs; - std::unique_ptr idxs2; } vbo; struct @@ -281,6 +280,8 @@ struct gl_ctx float max_anisotropy; bool mesa_nouveau; bool border_clamp_supported; + bool prim_restart_supported; + bool prim_restart_fixed_supported; size_t get_index_size() { return index_type == GL_UNSIGNED_INT ? sizeof(u32) : sizeof(u16); } }; diff --git a/core/rend/sorter.cpp b/core/rend/sorter.cpp index d5b3e2675..0ebc60cb5 100644 --- a/core/rend/sorter.cpp +++ b/core/rend/sorter.cpp @@ -15,193 +15,6 @@ along with reicast. If not, see . */ #include "sorter.h" -#include "hw/pvr/Renderer_if.h" -#include - -struct IndexTrig -{ - u32 id[3]; - u16 pid; - f32 z; -}; - -static float minZ(const Vertex *v, const u32 *mod) -{ - return std::min(std::min(v[mod[0]].z, v[mod[1]].z), v[mod[2]].z); -} - -static bool operator<(const IndexTrig& left, const IndexTrig& right) -{ - return left.z < right.z; -} - -static float getProjectedZ(const Vertex *v, const float *mat) -{ - // -1 / z - return -1 / (mat[2] * v->x + mat[1 * 4 + 2] * v->y + mat[2 * 4 + 2] * v->z + mat[3 * 4 + 2]); -} - -const static Vertex *vtx_sort_base; - -static void fill_id(u32 *d, const Vertex *v0, const Vertex *v1, const Vertex *v2, const Vertex *vb) -{ - d[0] = (u32)(v0 - vb); - d[1] = (u32)(v1 - vb); - d[2] = (u32)(v2 - vb); -} - -void GenSorted(int first, int count, std::vector& pidx_sort, std::vector& vidx_sort) -{ - u32 tess_gen=0; - - pidx_sort.clear(); - - if (pvrrc.verts.used() == 0 || count == 0) - return; - - const Vertex * const vtx_base = pvrrc.verts.head(); - const u32 * const idx_base = pvrrc.idx.head(); - - const PolyParam * const pp_base = &pvrrc.global_param_tr.head()[first]; - const PolyParam *pp = pp_base; - const PolyParam * const pp_end = pp + count; - while (pp->count == 0 && pp < pp_end) - pp++; - if (pp == pp_end) - return; - - vtx_sort_base=vtx_base; - - static u32 vtx_cnt; - - int vtx_count = pvrrc.verts.used() - idx_base[pp->first]; - if ((u32)vtx_count > vtx_cnt) - vtx_cnt = vtx_count; - -#if PRINT_SORT_STATS - printf("TVTX: %d || %d\n",vtx_cnt,vtx_count); -#endif - - if (vtx_count<=0) - return; - - //make lists of all triangles, with their pid and vid - static std::vector lst; - - lst.resize(vtx_count*4); - - - int pfsti=0; - - while (pp != pp_end) - { - u32 ppid = (u32)(pp - pp_base); - - if (pp->count > 2) - { - const u32 *idx = idx_base + pp->first; - u32 flip = 0; - float z0 = 0, z1 = 0; - - if (pp->isNaomi2()) - { - z0 = getProjectedZ(vtx_base + idx[0], pp->mvMatrix); - z1 = getProjectedZ(vtx_base + idx[1], pp->mvMatrix); - } - for (u32 i = 0; i < pp->count - 2; i++) - { - const Vertex *v0, *v1; - if (flip) - { - v0 = vtx_base + idx[i + 1]; - v1 = vtx_base + idx[i]; - } - else - { - v0 = vtx_base + idx[i]; - v1 = vtx_base + idx[i + 1]; - } - const Vertex *v2 = vtx_base + idx[i + 2]; - fill_id(lst[pfsti].id, v0, v1, v2, vtx_base); - lst[pfsti].pid = ppid; - if (pp->isNaomi2()) - { - float z2 = getProjectedZ(v2, pp->mvMatrix); - lst[pfsti].z = std::min(z0, std::min(z1, z2)); - z0 = z1; - z1 = z2; - } - else - { - lst[pfsti].z = minZ(vtx_base, lst[pfsti].id); - } - pfsti++; - - flip ^= 1; - } - } - pp++; - } - - u32 aused=pfsti; - - lst.resize(aused); - - //sort them - std::stable_sort(lst.begin(),lst.end()); - - //Merge pids/draw cmds if two different pids are actually equal - for (u32 k = 1; k < aused; k++) - if (lst[k].pid != lst[k - 1].pid) - { - const PolyParam& curPoly = pp_base[lst[k].pid]; - const PolyParam& prevPoly = pp_base[lst[k - 1].pid]; - if (curPoly.equivalentIgnoreCullingDirection(prevPoly) - && (curPoly.isp.CullMode < 2 || curPoly.isp.CullMode == prevPoly.isp.CullMode)) - lst[k].pid = lst[k - 1].pid; - } - - //re-assemble them into drawing commands - vidx_sort.resize(aused*3); - - int idx=-1; - - for (u32 i=0; i - -struct SortTrigDrawParam -{ - const PolyParam* ppid; - u32 first; - u32 count; -}; - -// Sort based on min-z of each triangle -void GenSorted(int first, int count, std::vector& pidx_sort, std::vector& vidx_sort); // Use the first vertex as provoking vertex for flat-shaded triangles void setFirstProvokingVertex(rend_context& rendContext); diff --git a/core/rend/vulkan/drawer.cpp b/core/rend/vulkan/drawer.cpp index b92d805e7..97592fc6e 100644 --- a/core/rend/vulkan/drawer.cpp +++ b/core/rend/vulkan/drawer.cpp @@ -21,30 +21,6 @@ #include "drawer.h" #include "hw/pvr/pvr_mem.h" -void Drawer::SortTriangles() -{ - sortedPolys.resize(pvrrc.render_passes.used()); - sortedIndexes.resize(pvrrc.render_passes.used()); - sortedIndexCount = 0; - RenderPass previousPass = {}; - - for (int render_pass = 0; render_pass < pvrrc.render_passes.used(); render_pass++) - { - const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; - sortedIndexes[render_pass].clear(); - if (current_pass.autosort) - { - GenSorted(previousPass.tr_count, current_pass.tr_count - previousPass.tr_count, sortedPolys[render_pass], sortedIndexes[render_pass]); - for (auto& poly : sortedPolys[render_pass]) - poly.first += sortedIndexCount; - sortedIndexCount += sortedIndexes[render_pass].size(); - } - else - sortedPolys[render_pass].clear(); - previousPass = current_pass; - } -} - TileClipping BaseDrawer::SetTileClip(u32 val, vk::Rect2D& clipRect) { int rect[4] = {}; @@ -253,15 +229,16 @@ void Drawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sor cmdBuffer.drawIndexed(count, 1, first, 0, 0); } -void Drawer::DrawSorted(const vk::CommandBuffer& cmdBuffer, const std::vector& polys, bool multipass) +void Drawer::DrawSorted(const vk::CommandBuffer& cmdBuffer, const std::vector& polys, u32 first, u32 last, bool multipass) { - for (const SortTrigDrawParam& param : polys) - DrawPoly(cmdBuffer, ListType_Translucent, true, *param.ppid, pvrrc.idx.used() + param.first, param.count); + for (u32 idx = first; idx < last; idx++) + DrawPoly(cmdBuffer, ListType_Translucent, true, *polys[idx].ppid, polys[idx].first, polys[idx].count); if (multipass && config::TranslucentPolygonDepthMask) { // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) - for (const SortTrigDrawParam& param : polys) + for (u32 idx = first; idx < last; idx++) { + const SortedTriangle& param = polys[idx]; if (param.ppid->isp.ZWriteDis) continue; vk::Pipeline pipeline = pipelineManager->GetDepthPassPipeline(param.ppid->isp.CullMode, param.ppid->isNaomi2()); @@ -350,9 +327,6 @@ void Drawer::UploadMainBuffer(const VertexShaderUniforms& vertexUniforms, const offsets.modVolOffset = packer.add(pvrrc.modtrig.head(), pvrrc.modtrig.bytes()); // Index offsets.indexOffset = packer.add(pvrrc.idx.head(), pvrrc.idx.bytes()); - for (const std::vector& idx : sortedIndexes) - if (!idx.empty()) - packer.add(&idx[0], idx.size() * sizeof(u32)); // Uniform buffers offsets.vertexUniformOffset = packer.addUniform(&vertexUniforms, sizeof(vertexUniforms)); offsets.fragmentUniformOffset = packer.addUniform(&fragmentUniforms, sizeof(fragmentUniforms)); @@ -372,7 +346,6 @@ bool Drawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) { FragmentShaderUniforms fragUniforms = MakeFragmentUniforms(); - SortTriangles(); currentScissor = vk::Rect2D(); vk::CommandBuffer cmdBuffer = BeginRenderPass(); @@ -420,7 +393,7 @@ bool Drawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) if (current_pass.autosort) { if (!config::PerStripSorting) - DrawSorted(cmdBuffer, sortedPolys[render_pass], render_pass + 1 < pvrrc.render_passes.used()); + DrawSorted(cmdBuffer, pvrrc.sortedTriangles, previous_pass.sorted_tr_count, current_pass.sorted_tr_count, render_pass + 1 < pvrrc.render_passes.used()); else DrawList(cmdBuffer, ListType_Translucent, true, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); } diff --git a/core/rend/vulkan/drawer.h b/core/rend/vulkan/drawer.h index cba0f6ad1..67c41c621 100644 --- a/core/rend/vulkan/drawer.h +++ b/core/rend/vulkan/drawer.h @@ -233,7 +233,7 @@ protected: private: void SortTriangles(); void DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, const PolyParam& poly, u32 first, u32 count); - void DrawSorted(const vk::CommandBuffer& cmdBuffer, const std::vector& polys, bool multipass); + void DrawSorted(const vk::CommandBuffer& cmdBuffer, const std::vector& polys, u32 first, u32 last, bool multipass); void DrawList(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, const List& polys, u32 first, u32 last); void DrawModVols(const vk::CommandBuffer& cmdBuffer, int first, int count); void UploadMainBuffer(const VertexShaderUniforms& vertexUniforms, const FragmentShaderUniforms& fragmentUniforms); @@ -255,11 +255,6 @@ private: DescriptorSets descriptorSets; std::vector> mainBuffers; PipelineManager *pipelineManager = nullptr; - - // Per-triangle sort results - std::vector> sortedPolys; - std::vector> sortedIndexes; - u32 sortedIndexCount = 0; bool perStripSorting = false; }; diff --git a/core/rend/vulkan/oit/oit_pipeline.cpp b/core/rend/vulkan/oit/oit_pipeline.cpp index e61a8d63a..18a5db05d 100644 --- a/core/rend/vulkan/oit/oit_pipeline.cpp +++ b/core/rend/vulkan/oit/oit_pipeline.cpp @@ -27,7 +27,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP // Input assembly state vk::PipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateCreateInfo(vk::PipelineInputAssemblyStateCreateFlags(), - vk::PrimitiveTopology::eTriangleStrip); + vk::PrimitiveTopology::eTriangleStrip, true); // Viewport and scissor states vk::PipelineViewportStateCreateInfo pipelineViewportStateCreateInfo(vk::PipelineViewportStateCreateFlags(), 1, nullptr, 1, nullptr); diff --git a/core/rend/vulkan/pipeline.cpp b/core/rend/vulkan/pipeline.cpp index 4f51feab2..b0c4ffca3 100644 --- a/core/rend/vulkan/pipeline.cpp +++ b/core/rend/vulkan/pipeline.cpp @@ -266,8 +266,15 @@ void PipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const Pol vk::PipelineVertexInputStateCreateInfo pipelineVertexInputStateCreateInfo = GetMainVertexInputStateCreateInfo(); // Input assembly state - vk::PipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateCreateInfo(vk::PipelineInputAssemblyStateCreateFlags(), - sortTriangles && !config::PerStripSorting ? vk::PrimitiveTopology::eTriangleList : vk::PrimitiveTopology::eTriangleStrip); + vk::PipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateCreateInfo; + if (sortTriangles && !config::PerStripSorting) { + pipelineInputAssemblyStateCreateInfo.topology = vk::PrimitiveTopology::eTriangleList; + } + else + { + pipelineInputAssemblyStateCreateInfo.topology = vk::PrimitiveTopology::eTriangleStrip; + pipelineInputAssemblyStateCreateInfo.primitiveRestartEnable = true; + } // Viewport and scissor states vk::PipelineViewportStateCreateInfo pipelineViewportStateCreateInfo(vk::PipelineViewportStateCreateFlags(), 1, nullptr, 1, nullptr); diff --git a/core/rend/vulkan/vulkan_renderer.h b/core/rend/vulkan/vulkan_renderer.h index 0dd97316d..a8b90f21e 100644 --- a/core/rend/vulkan/vulkan_renderer.h +++ b/core/rend/vulkan/vulkan_renderer.h @@ -134,7 +134,7 @@ public: texCommandBuffer = texCommandPool.Allocate(); texCommandBuffer.begin(vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit)); - bool result = ta_parse(ctx); + bool result = ta_parse(ctx, true); if (result) { From 1765e507030044754f1ba10881fddc154c1c1b7f Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 13 Dec 2022 23:33:24 +0100 Subject: [PATCH 32/34] lr: missing symbols and functions --- core/deps/libretro-common/glsm/glsm.c | 12 ++++++++++++ core/deps/libretro-common/include/glsm/glsm.h | 2 ++ core/deps/libretro-common/include/glsm/glsmsym.h | 2 ++ 3 files changed, 16 insertions(+) diff --git a/core/deps/libretro-common/glsm/glsm.c b/core/deps/libretro-common/glsm/glsm.c index 413d49973..ab1d9ac6d 100644 --- a/core/deps/libretro-common/glsm/glsm.c +++ b/core/deps/libretro-common/glsm/glsm.c @@ -2725,6 +2725,16 @@ void rglGetShaderPrecisionFormat(GLenum shaderType, GLenum precisionType, GLint #endif } +void rglPrimitiveRestartIndex(GLuint index) +{ +#ifdef GLSM_DEBUG + log_cb(RETRO_LOG_INFO, "glPrimitiveRestartIndex.\n"); +#endif +#if defined(HAVE_OPENGL) + glPrimitiveRestartIndex(index); +#endif +} + /* GLSM-side */ static void glsm_state_setup(void) @@ -2744,6 +2754,8 @@ static void glsm_state_setup(void) gl_state.cap_translate[SGL_COLOR_LOGIC_OP] = GL_COLOR_LOGIC_OP; gl_state.cap_translate[SGL_CLIP_DISTANCE0] = GL_CLIP_DISTANCE0; gl_state.cap_translate[SGL_DEPTH_CLAMP] = GL_DEPTH_CLAMP; + gl_state.cap_translate[SGL_PRIMITIVE_RESTART] = GL_PRIMITIVE_RESTART; + gl_state.cap_translate[SGL_PRIMITIVE_RESTART_FIXED_INDEX] = GL_PRIMITIVE_RESTART_FIXED_INDEX; #endif for (i = 0; i < MAX_ATTRIB; i++) diff --git a/core/deps/libretro-common/include/glsm/glsm.h b/core/deps/libretro-common/include/glsm/glsm.h index 27dc7cbeb..01c76e75e 100644 --- a/core/deps/libretro-common/include/glsm/glsm.h +++ b/core/deps/libretro-common/include/glsm/glsm.h @@ -105,6 +105,8 @@ enum SGL_SAMPLE_COVERAGE, #ifndef HAVE_OPENGLES SGL_COLOR_LOGIC_OP, + SGL_PRIMITIVE_RESTART, + SGL_PRIMITIVE_RESTART_FIXED_INDEX, #endif SGL_CAP_MAX }; diff --git a/core/deps/libretro-common/include/glsm/glsmsym.h b/core/deps/libretro-common/include/glsm/glsmsym.h index 31c437d78..2609056c9 100644 --- a/core/deps/libretro-common/include/glsm/glsmsym.h +++ b/core/deps/libretro-common/include/glsm/glsmsym.h @@ -199,6 +199,7 @@ RETRO_BEGIN_DECLS #define glGetAttachedShaders rglGetAttachedShaders #define glGetShaderPrecisionFormat rglGetShaderPrecisionFormat #define glClearDepthf rglClearDepthf +#define glPrimitiveRestartIndex rglPrimitiveRestartIndex const GLubyte* rglGetStringi(GLenum name, GLuint index); void rglTexBuffer(GLenum target, GLenum internalFormat, GLuint buffer); @@ -506,6 +507,7 @@ const GLubyte* rglGetString(GLenum name); void rglGetAttachedShaders(GLuint program, GLsizei maxCount, GLsizei *count, GLuint *shaders); void rglGetShaderPrecisionFormat(GLenum shaderType, GLenum precisionType, GLint *range, GLint *precision); void rglClearDepthf(GLfloat depth); +void rglPrimitiveRestartIndex(GLuint index); RETRO_END_DECLS From 4d3de39c64ecfedcf8a1f87a0e668ba7c94807e5 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 13 Dec 2022 23:42:11 +0100 Subject: [PATCH 33/34] gl: no GL_PRIMITIVE_RESTART and glPrimitiveRestartIndex in GLES --- core/rend/gles/gles.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 199991fd8..91f160253 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -1247,10 +1247,12 @@ bool OpenGLRenderer::renderFrame(int width, int height) #ifndef GLES2 if (gl.prim_restart_fixed_supported) glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX); +#ifndef GLES else if (gl.prim_restart_supported) { glEnable(GL_PRIMITIVE_RESTART); glPrimitiveRestartIndex(-1); } +#endif #endif //setup render target first if (is_rtt) From e3809ba058dd7d5fb6febec6b6a0d1bf2a99cbb5 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 14 Dec 2022 23:43:36 +0100 Subject: [PATCH 34/34] gl: macOS build fix --- core/rend/gles/gles.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 26c566133..c0e6f77a1 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -19,6 +19,9 @@ #ifndef GL_MAX_TEXTURE_MAX_ANISOTROPY #define GL_MAX_TEXTURE_MAX_ANISOTROPY 0x84FF #endif +#ifndef GL_PRIMITIVE_RESTART_FIXED_INDEX +#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69 +#endif #define glCheck() do { if (unlikely(config::OpenGlChecks)) { verify(glGetError()==GL_NO_ERROR); } } while(0)