From 51aa5c5760bfd5f7a5479450b8a4420607b2cc15 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 6 Aug 2023 01:49:35 +1000 Subject: [PATCH] Misc Mac fixes --- src/common/align.h | 35 ++++++++++++ src/core/CMakeLists.txt | 76 +++++++++++++-------------- src/core/gpu.h | 3 +- src/core/gpu/gl/context_agl.h | 2 +- src/core/gpu/gl/context_agl.mm | 5 +- src/core/gpu/gpu_device.cpp | 53 ++++++++++++++++++- src/core/gpu/gpu_device.h | 9 ++-- src/core/gpu/gpu_shader_cache.h | 1 + src/core/gpu/gpu_texture.cpp | 6 ++- src/core/gpu/gpu_texture.h | 2 +- src/core/gpu/opengl_device.h | 2 +- src/core/gpu/opengl_stream_buffer.cpp | 8 +-- src/core/gpu/postprocessing_shader.h | 2 +- src/core/gpu_hw.cpp | 10 +++- src/core/gpu_hw.h | 4 +- src/core/shader_cache_version.h | 2 +- src/core/shadergen.h | 3 +- src/duckstation-qt/CMakeLists.txt | 2 +- src/duckstation-qt/qthost.cpp | 2 +- src/frontend-common/common_host.cpp | 48 ----------------- src/scmversion/gen_scmversion.sh | 2 +- 21 files changed, 162 insertions(+), 115 deletions(-) diff --git a/src/common/align.h b/src/common/align.h index 0aa5faf4b..831008061 100644 --- a/src/common/align.h +++ b/src/common/align.h @@ -3,6 +3,14 @@ #pragma once +#include "types.h" + +#include + +#ifdef _MSC_VER +#include +#endif + namespace Common { template constexpr bool IsAligned(T value, unsigned int alignment) @@ -52,4 +60,31 @@ constexpr T PreviousPow2(T value) value |= (value >> 16); return value - (value >> 1); } + + +ALWAYS_INLINE static void* AlignedMalloc(size_t size, size_t alignment) +{ +#ifdef _MSC_VER + return _aligned_malloc(size, alignment); +#else + // Unaligned sizes are slow on macOS. + #ifdef __APPLE__ + if (IsPow2(alignment)) + size = (size + alignment - 1) & ~(alignment - 1); + #endif + void* ret = nullptr; + posix_memalign(&ret, alignment, size); + return ret; +} + +ALWAYS_INLINE static void AlignedFree(void* ptr) +{ +#ifdef _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif +} + +#endif } // namespace Common diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e6c7e8cbe..9a88bfd4d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -123,8 +123,8 @@ target_link_libraries(core PRIVATE stb xxhash imgui rapidjson tinyxml2) target_sources(core PRIVATE gpu/gpu_device.cpp gpu/gpu_device.h - gpu/gpu_pipeline.h - gpu/gpu_shader.h + gpu/gpu_shader_cache.cpp + gpu/gpu_shader_cache.h gpu/gpu_texture.cpp gpu/gpu_texture.h gpu/postprocessing_chain.cpp @@ -196,37 +196,38 @@ if(USE_DRMKMS) endif() if(ENABLE_OPENGL) - target_sources(common PRIVATE + target_sources(core PRIVATE gpu/gl/context.cpp gpu/gl/context.h - gpu/gl/program.cpp - gpu/gl/program.h - gpu/gl/shader_cache.cpp - gpu/gl/shader_cache.h - gpu/gl/stream_buffer.cpp - gpu/gl/stream_buffer.h - gpu/gl/texture.cpp - gpu/gl/texture.h + gpu/opengl_device.cpp + gpu/opengl_device.h + gpu/opengl_loader.h + gpu/opengl_pipeline.cpp + gpu/opengl_pipeline.h + gpu/opengl_stream_buffer.cpp + gpu/opengl_stream_buffer.h + gpu/opengl_texture.cpp + gpu/opengl_texture.h ) - target_compile_definitions(common PUBLIC "WITH_OPENGL=1") - target_link_libraries(common PRIVATE glad) + target_compile_definitions(core PUBLIC "WITH_OPENGL=1") + target_link_libraries(core PRIVATE glad) if(WIN32) - target_sources(common PRIVATE + target_sources(core PRIVATE gl/context_wgl.cpp gl/context_wgl.h ) endif() if(USE_EGL) - target_sources(common PRIVATE + target_sources(core PRIVATE gl/context_egl.cpp gl/context_egl.h ) - target_compile_definitions(common PRIVATE "-DUSE_EGL=1") + target_compile_definitions(core PRIVATE "-DUSE_EGL=1") if(USE_X11) - target_sources(common PRIVATE + target_sources(core PRIVATE gl/context_egl_x11.cpp gl/context_egl_x11.h ) @@ -234,25 +235,25 @@ if(ENABLE_OPENGL) # We set EGL_NO_X11 because otherwise X comes in with its macros and breaks # a bunch of files from compiling, if we include the EGL headers. This just # makes the data types opaque, we can still use it with X11 if needed. - target_compile_definitions(common PRIVATE "-DEGL_NO_X11=1") + target_compile_definitions(core PRIVATE "-DEGL_NO_X11=1") endif() if(ANDROID AND USE_EGL) - target_sources(common PRIVATE + target_sources(core PRIVATE gl/context_egl_android.cpp gl/context_egl_android.h ) endif() if(USE_DRMKMS) - target_compile_definitions(common PRIVATE "-DUSE_GBM=1") - target_sources(common PRIVATE + target_compile_definitions(core PRIVATE "-DUSE_GBM=1") + target_sources(core PRIVATE gl/context_egl_gbm.cpp gl/context_egl_gbm.h ) - target_link_libraries(common PUBLIC GBM::GBM) + target_link_libraries(core PUBLIC GBM::GBM) endif() if(USE_FBDEV) - target_compile_definitions(common PRIVATE "-DUSE_FBDEV=1") - target_sources(common PRIVATE + target_compile_definitions(core PRIVATE "-DUSE_FBDEV=1") + target_sources(core PRIVATE gl/context_egl_fbdev.cpp gl/context_egl_fbdev.h ) @@ -260,37 +261,30 @@ if(ENABLE_OPENGL) endif() if(USE_X11) - target_sources(common PRIVATE + target_sources(core PRIVATE gl/context_glx.cpp gl/context_glx.h ) - target_compile_definitions(common PRIVATE "-DUSE_GLX=1") + target_compile_definitions(core PRIVATE "-DUSE_GLX=1") endif() if(USE_WAYLAND) - target_sources(common PRIVATE + target_sources(core PRIVATE gl/context_egl_wayland.cpp gl/context_egl_wayland.h ) endif() if(APPLE) - target_sources(common PRIVATE + target_sources(core PRIVATE gpu/gl/context_agl.mm gpu/gl/context_agl.h ) endif() endif() -if(ENABLE_OPENGL) - target_sources(core PRIVATE - gpu_hw_opengl.cpp - gpu_hw_opengl.h - ) - target_link_libraries(core PRIVATE glad) -endif() if(ENABLE_VULKAN) - target_sources(common PRIVATE + target_sources(core PRIVATE gpu/vulkan/builders.cpp gpu/vulkan/builders.h gpu/vulkan/context.cpp @@ -309,13 +303,17 @@ if(ENABLE_VULKAN) gpu/vulkan/texture.h gpu/vulkan/util.cpp gpu/vulkan/util.h + gpu/vulkan_gpu_device.cpp + gpu/vulkan_gpu_device.h + gpu/imgui_impl_vulkan.cpp + gpu/imgui_impl_vulkan.h ) - target_compile_definitions(common PUBLIC "WITH_VULKAN=1") - target_link_libraries(common PRIVATE glslang) + target_compile_definitions(core PUBLIC "WITH_VULKAN=1") + target_link_libraries(core PRIVATE glslang) if(APPLE) # Needed for Vulkan Swap Chain. - target_link_libraries(common PRIVATE "objc") + target_link_libraries(core PRIVATE "objc") endif() endif() if(ENABLE_VULKAN) diff --git a/src/core/gpu.h b/src/core/gpu.h index 0b5fe10f9..ccc251b72 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -22,8 +22,7 @@ class GPUTexture; class TimingEvent; -namespace Threading -{ +namespace Threading { class Thread; } diff --git a/src/core/gpu/gl/context_agl.h b/src/core/gpu/gl/context_agl.h index f26547e33..526ec878e 100644 --- a/src/core/gpu/gl/context_agl.h +++ b/src/core/gpu/gl/context_agl.h @@ -3,7 +3,7 @@ #pragma once #include "context.h" -#include "loader.h" +#include "../opengl_loader.h" #if defined(__APPLE__) && defined(__OBJC__) #import diff --git a/src/core/gpu/gl/context_agl.mm b/src/core/gpu/gl/context_agl.mm index 96ab9b135..4cd76a711 100644 --- a/src/core/gpu/gl/context_agl.mm +++ b/src/core/gpu/gl/context_agl.mm @@ -2,9 +2,8 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "context_agl.h" -#include "../assert.h" -#include "../log.h" -#include "loader.h" +#include "common/assert.h" +#include "common/log.h" #include Log_SetChannel(GL::ContextAGL); diff --git a/src/core/gpu/gpu_device.cpp b/src/core/gpu/gpu_device.cpp index 96ec55886..0308562c4 100644 --- a/src/core/gpu/gpu_device.cpp +++ b/src/core/gpu/gpu_device.cpp @@ -28,8 +28,19 @@ Log_SetChannel(GPUDevice); -// FIXME +#ifdef _WIN32 #include "common/windows_headers.h" +#include "d3d11_device.h" +#include "d3d12_gpu_device.h" +#endif + +#ifdef WITH_OPENGL +#include "opengl_device.h" +#endif + +#ifdef WITH_VULKAN +#include "vulkan_gpu_device.h" +#endif // TODO: default sampler mode, create a persistent descriptor set in Vulkan for textures // TODO: input layout => VAO in GL, buffer might change @@ -1513,3 +1524,43 @@ bool GPUDevice::WriteScreenshotToFile(std::string filename, bool internal_resolu compress_thread.detach(); return true; } + +std::unique_ptr Host::CreateDisplayForAPI(RenderAPI api) +{ + switch (api) + { +#ifdef WITH_VULKAN + case RenderAPI::Vulkan: + return std::make_unique(); +#endif + +#ifdef WITH_OPENGL + case RenderAPI::OpenGL: + case RenderAPI::OpenGLES: + return std::make_unique(); +#endif + +#ifdef _WIN32 + case RenderAPI::D3D12: + return std::make_unique(); + + case RenderAPI::D3D11: + return std::make_unique(); +#endif + + default: +#if defined(_WIN32) && defined(_M_ARM64) + return std::make_unique(); +#elif defined(_WIN32) + return std::make_unique(); +#elif defined(__APPLE__) + return WrapNewMetalDevice(); +#elif defined(WITH_OPENGL) + return std::make_unique(); +#elif defined(WITH_VULKAN) + return std::make_unique(); +#else + return {}; +#endif + } +} diff --git a/src/core/gpu/gpu_device.h b/src/core/gpu/gpu_device.h index b7c1d2eab..56057cb62 100644 --- a/src/core/gpu/gpu_device.h +++ b/src/core/gpu/gpu_device.h @@ -188,18 +188,19 @@ public: static constexpr VertexAttribute Make(u8 index, Type type, u8 components, u8 offset) { - VertexAttribute ret = {}; #if 0 + VertexAttribute ret = {}; ret.index = index; ret.type = type; ret.components = components; ret.offset = offset; + return ret; #else // Nasty :/ can't access an inactive element of a union here.. - ret.key = (static_cast(index) & 0xf) | ((static_cast(type) & 0xf) << 4) | - ((static_cast(components) & 0x7) << 8) | ((static_cast(offset) & 0xffff) << 16); + return VertexAttribute{{(static_cast(index) & 0xf) | ((static_cast(type) & 0xf) << 4) | + ((static_cast(components) & 0x7) << 8) | + ((static_cast(offset) & 0xffff) << 16)}}; #endif - return ret; } }; diff --git a/src/core/gpu/gpu_shader_cache.h b/src/core/gpu/gpu_shader_cache.h index 6e1df57df..5bcc6a9af 100644 --- a/src/core/gpu/gpu_shader_cache.h +++ b/src/core/gpu/gpu_shader_cache.h @@ -6,6 +6,7 @@ #include "common/hash_combine.h" #include "common/types.h" +#include #include #include #include diff --git a/src/core/gpu/gpu_texture.cpp b/src/core/gpu/gpu_texture.cpp index 180747797..12686c1bb 100644 --- a/src/core/gpu/gpu_texture.cpp +++ b/src/core/gpu/gpu_texture.cpp @@ -8,8 +8,9 @@ Log_SetChannel(GPUTexture); GPUTexture::GPUTexture() = default; -GPUTexture::GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, GPUTexture::Format format) - : m_width(width), m_height(height), m_layers(layers), m_levels(levels), m_samples(samples), m_format(format) +GPUTexture::GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format) + : m_width(width), m_height(height), m_layers(layers), m_levels(levels), m_samples(samples), m_type(type), + m_format(format) { } @@ -22,6 +23,7 @@ void GPUTexture::ClearBaseProperties() m_layers = 0; m_levels = 0; m_samples = 0; + m_type = GPUTexture::Type::Unknown; m_format = GPUTexture::Format::Unknown; m_state = State::Dirty; } diff --git a/src/core/gpu/gpu_texture.h b/src/core/gpu/gpu_texture.h index 8ac673237..9ef003bf8 100644 --- a/src/core/gpu/gpu_texture.h +++ b/src/core/gpu/gpu_texture.h @@ -128,7 +128,7 @@ public: protected: GPUTexture(); - GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Format format); + GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format); void ClearBaseProperties(); diff --git a/src/core/gpu/opengl_device.h b/src/core/gpu/opengl_device.h index edfdd7a9a..2658b358a 100644 --- a/src/core/gpu/opengl_device.h +++ b/src/core/gpu/opengl_device.h @@ -93,7 +93,7 @@ public: void UnmapIndexBuffer(u32 used_index_count) override; void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; - void UnmapUniformBuffer(u32 size); + void UnmapUniformBuffer(u32 size) override; void SetFramebuffer(GPUFramebuffer* fb) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; diff --git a/src/core/gpu/opengl_stream_buffer.cpp b/src/core/gpu/opengl_stream_buffer.cpp index 8175a7e81..27f2b9051 100644 --- a/src/core/gpu/opengl_stream_buffer.cpp +++ b/src/core/gpu/opengl_stream_buffer.cpp @@ -46,7 +46,7 @@ namespace { class BufferSubDataStreamBuffer final : public OpenGLStreamBuffer { public: - ~BufferSubDataStreamBuffer() override { _aligned_free(m_cpu_buffer); } + ~BufferSubDataStreamBuffer() override { Common::AlignedFree(m_cpu_buffer); } MappingResult Map(u32 alignment, u32 min_size) override { @@ -88,7 +88,7 @@ public: private: BufferSubDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) { - m_cpu_buffer = static_cast(_aligned_malloc(size, 32)); + m_cpu_buffer = static_cast(Common::AlignedMalloc(size, 32)); if (!m_cpu_buffer) Panic("Failed to allocate CPU storage for GL buffer"); } @@ -100,7 +100,7 @@ private: class BufferDataStreamBuffer final : public OpenGLStreamBuffer { public: - ~BufferDataStreamBuffer() override { _aligned_free(m_cpu_buffer); } + ~BufferDataStreamBuffer() override { Common::AlignedFree(m_cpu_buffer); } MappingResult Map(u32 alignment, u32 min_size) override { @@ -142,7 +142,7 @@ public: private: BufferDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) { - m_cpu_buffer = static_cast(_aligned_malloc(size, 32)); + m_cpu_buffer = static_cast(Common::AlignedMalloc(size, 32)); if (!m_cpu_buffer) Panic("Failed to allocate CPU storage for GL buffer"); } diff --git a/src/core/gpu/postprocessing_shader.h b/src/core/gpu/postprocessing_shader.h index dc2a70bb1..a64253e6f 100644 --- a/src/core/gpu/postprocessing_shader.h +++ b/src/core/gpu/postprocessing_shader.h @@ -4,7 +4,7 @@ #pragma once #include "common/rectangle.h" -#include "core/types.h" +#include "common/types.h" #include "gpu_device.h" #include diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index cd45424c4..fea6bdf80 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -545,6 +545,7 @@ void GPU_HW::DestroyBuffers() bool GPU_HW::CompilePipelines() { + const GPUDevice::Features features = g_host_display->GetFeatures(); GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); @@ -778,8 +779,9 @@ bool GPU_HW::CompilePipelines() // VRAM write // TODO: SSBO path here... { + const bool use_ssbo = features.texture_buffers_emulated_with_ssbo; std::unique_ptr fs = g_host_display->CreateShader( - GPUShaderStage::Fragment, shadergen.GenerateVRAMWriteFragmentShader(false /*m_use_ssbos_for_vram_writes*/)); + GPUShaderStage::Fragment, shadergen.GenerateVRAMWriteFragmentShader(use_ssbo)); if (!fs) return false; @@ -2631,6 +2633,10 @@ void GPU_HW::ShaderCompileProgressTracker::Increment() } } +// TODO: Combine all these.. + +#ifdef _WIN32 + std::unique_ptr GPU::CreateHardwareD3D11Renderer() { if (!Host::AcquireHostDisplay(RenderAPI::D3D11)) @@ -2646,6 +2652,8 @@ std::unique_ptr GPU::CreateHardwareD3D11Renderer() return gpu; } +#endif + std::unique_ptr GPU::CreateHardwareOpenGLRenderer() { diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index b825c9423..a8f9d1489 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -225,8 +225,8 @@ protected: void MapBatchVertexPointer(u32 required_vertices); void UnmapBatchVertexPointer(u32 used_vertices); void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices); - void ClearDisplay(); - void UpdateDisplay(); + void ClearDisplay() override; + void UpdateDisplay() override; u32 CalculateResolutionScale() const; GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index 5b7dbb113..07b317b2f 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -2,6 +2,6 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "types.h" +#include "common/types.h" static constexpr u32 SHADER_CACHE_VERSION = 8; \ No newline at end of file diff --git a/src/core/shadergen.h b/src/core/shadergen.h index a23bd869e..11c015b8b 100644 --- a/src/core/shadergen.h +++ b/src/core/shadergen.h @@ -2,8 +2,9 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + #include "gpu/gpu_device.h" -#include "gpu_hw.h" + #include #include diff --git a/src/duckstation-qt/CMakeLists.txt b/src/duckstation-qt/CMakeLists.txt index dd568a82a..4b236189c 100644 --- a/src/duckstation-qt/CMakeLists.txt +++ b/src/duckstation-qt/CMakeLists.txt @@ -197,7 +197,7 @@ if(WIN32) ) endif() -if(APPLE) +if(APPLE AND NOT CMAKE_GENERATOR MATCHES "Xcode") set(BUNDLE_PATH ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DuckStation.app) # Ask for an application bundle. diff --git a/src/duckstation-qt/qthost.cpp b/src/duckstation-qt/qthost.cpp index 213aed329..80475a480 100644 --- a/src/duckstation-qt/qthost.cpp +++ b/src/duckstation-qt/qthost.cpp @@ -1635,7 +1635,7 @@ void EmuThread::updatePerformanceCounters() if (g_gpu) { // TODO: Fix renderer type - renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::HardwareD3D11 : GPURenderer::Software; + renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::HardwareOpenGL : GPURenderer::Software; std::tie(render_width, render_height) = g_gpu->GetEffectiveDisplayResolution(); } diff --git a/src/frontend-common/common_host.cpp b/src/frontend-common/common_host.cpp index 48aea3444..a6ded29ea 100644 --- a/src/frontend-common/common_host.cpp +++ b/src/frontend-common/common_host.cpp @@ -55,21 +55,11 @@ #ifdef _WIN32 #include "common/windows_headers.h" -#include "core/gpu/d3d11_device.h" -#include "core/gpu/d3d12_gpu_device.h" #include #include #include #endif -#ifdef WITH_OPENGL -#include "core/gpu/opengl_device.h" -#endif - -#ifdef WITH_VULKAN -#include "core/gpu/vulkan_gpu_device.h" -#endif - Log_SetChannel(CommonHostInterface); namespace CommonHost { @@ -139,44 +129,6 @@ void CommonHost::PumpMessagesOnCPUThread() #endif } -std::unique_ptr Host::CreateDisplayForAPI(RenderAPI api) -{ - switch (api) - { -#ifdef WITH_VULKAN - case RenderAPI::Vulkan: - return std::make_unique(); -#endif - -#ifdef WITH_OPENGL - case RenderAPI::OpenGL: - case RenderAPI::OpenGLES: - return std::make_unique(); -#endif - -#ifdef _WIN32 - case RenderAPI::D3D12: - return std::make_unique(); - - case RenderAPI::D3D11: - return std::make_unique(); -#endif - - default: -#if defined(_WIN32) && defined(_M_ARM64) - return std::make_unique(); -#elif defined(_WIN32) - return std::make_unique(); -#elif defined(WITH_OPENGL) - return std::make_unique(); -#elif defined(WITH_VULKAN) - return std::make_unique(); -#else - return {}; -#endif - } -} - bool CommonHost::CreateHostDisplayResources() { return true; diff --git a/src/scmversion/gen_scmversion.sh b/src/scmversion/gen_scmversion.sh index 3d25e5ede..9c1dacaba 100755 --- a/src/scmversion/gen_scmversion.sh +++ b/src/scmversion/gen_scmversion.sh @@ -4,7 +4,7 @@ VERSION_FILE="scmversion.cpp" CURDIR=$(pwd) if [ "$(uname -s)" = "Darwin" ]; then - cd "$(dirname $(python -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$0"))" + cd "$(dirname $(python3 -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$0"))" else cd $(dirname $(readlink -f $0)) fi