From acbb1cb8856e7f2ddde8a3df54f4d8f878b6974e Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Wed, 17 Feb 2021 00:14:20 +1000 Subject: [PATCH] WIP --- src/common/vulkan/context.cpp | 12 ++ src/common/vulkan/context.h | 1 + src/core/gpu_hw.cpp | 19 ++- src/core/gpu_hw.h | 8 +- src/core/gpu_hw_d3d11.cpp | 122 +++++++++++---- src/core/gpu_hw_opengl.cpp | 281 ++++++++++++++++++++++++---------- src/core/gpu_hw_shadergen.cpp | 12 +- src/core/gpu_hw_shadergen.h | 5 +- src/core/gpu_hw_vulkan.cpp | 161 ++++++++++++++----- src/core/settings.h | 1 + 10 files changed, 447 insertions(+), 175 deletions(-) diff --git a/src/common/vulkan/context.cpp b/src/common/vulkan/context.cpp index 884c4bbfd..2ad534b1e 100644 --- a/src/common/vulkan/context.cpp +++ b/src/common/vulkan/context.cpp @@ -1160,6 +1160,18 @@ void Context::DisableDebugReports() } } +bool Context::SupportsTextureFormat(VkFormat format, bool as_render_target /*= false*/) const +{ + VkImageFormatProperties props; + VkResult res = vkGetPhysicalDeviceImageFormatProperties( + m_physical_device, format, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_SAMPLED_BIT | (as_render_target ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT : 0), 0, &props); + if (res != VK_SUCCESS) + return false; + + return true; +} + bool Context::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index) { for (u32 i = 0; i < VK_MAX_MEMORY_TYPES; i++) diff --git a/src/common/vulkan/context.h b/src/common/vulkan/context.h index e08363676..91825e3b9 100644 --- a/src/common/vulkan/context.h +++ b/src/common/vulkan/context.h @@ -89,6 +89,7 @@ public: // Support bits ALWAYS_INLINE bool SupportsGeometryShaders() const { return m_device_features.geometryShader == VK_TRUE; } ALWAYS_INLINE bool SupportsDualSourceBlend() const { return m_device_features.dualSrcBlend == VK_TRUE; } + bool SupportsTextureFormat(VkFormat format, bool as_render_target = false) const; // Helpers for getting constants ALWAYS_INLINE VkDeviceSize GetUniformBufferAlignment() const diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 2f6a46f4f..36febce9e 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -48,6 +48,8 @@ bool GPU_HW::Initialize(HostDisplay* host_display) m_resolution_scale = CalculateResolutionScale(); m_multisamples = std::min(g_settings.gpu_multisamples, m_max_multisamples); + m_use_rgb5a1_framebuffer = + g_settings.gpu_use_rgb5a1_framebuffer && !g_settings.gpu_true_color && m_supports_rgb5a1_framebuffer; m_render_api = host_display->GetRenderAPI(); m_per_sample_shading = g_settings.gpu_per_sample_shading && m_supports_per_sample_shading; m_true_color = g_settings.gpu_true_color; @@ -128,17 +130,20 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) const u32 resolution_scale = CalculateResolutionScale(); const u32 multisamples = std::min(m_max_multisamples, g_settings.gpu_multisamples); const bool per_sample_shading = g_settings.gpu_per_sample_shading && m_supports_per_sample_shading; + const bool rgb5a1_framebuffer = + g_settings.gpu_use_rgb5a1_framebuffer && !g_settings.gpu_true_color && m_supports_rgb5a1_framebuffer; const GPUDownsampleMode downsample_mode = GetDownsampleMode(resolution_scale); const bool use_uv_limits = ShouldUseUVLimits(); - *framebuffer_changed = - (m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_downsample_mode != downsample_mode); + *framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || + m_downsample_mode != downsample_mode || rgb5a1_framebuffer != m_use_rgb5a1_framebuffer); *shaders_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || - m_true_color != g_settings.gpu_true_color || m_per_sample_shading != per_sample_shading || - m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter || - m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing || - m_downsample_mode != downsample_mode || m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); + rgb5a1_framebuffer != m_use_rgb5a1_framebuffer || g_settings.gpu_true_color != g_settings.gpu_true_color || + m_per_sample_shading != per_sample_shading || m_scaled_dithering != g_settings.gpu_scaled_dithering || + m_texture_filtering != g_settings.gpu_texture_filter || m_using_uv_limits != use_uv_limits || + m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing || m_downsample_mode != downsample_mode || + m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); if (m_resolution_scale != resolution_scale) { @@ -167,6 +172,7 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) m_resolution_scale = resolution_scale; m_multisamples = multisamples; m_per_sample_shading = per_sample_shading; + m_use_rgb5a1_framebuffer = rgb5a1_framebuffer; m_true_color = g_settings.gpu_true_color; m_scaled_dithering = g_settings.gpu_scaled_dithering; m_texture_filtering = g_settings.gpu_texture_filter; @@ -246,6 +252,7 @@ void GPU_HW::PrintSettingsToLog() Log_InfoPrintf("Resolution Scale: %u (%ux%u), maximum %u", m_resolution_scale, VRAM_WIDTH * m_resolution_scale, VRAM_HEIGHT * m_resolution_scale, m_max_resolution_scale); Log_InfoPrintf("Multisampling: %ux%s", m_multisamples, m_per_sample_shading ? " (per sample shading)" : ""); + Log_InfoPrintf("Framebuffer format: %s", m_use_rgb5a1_framebuffer ? "RGB5A1" : "RGBA8"); Log_InfoPrintf("Dithering: %s%s", m_true_color ? "Disabled" : "Enabled", (!m_true_color && m_scaled_dithering) ? " (Scaled)" : ""); Log_InfoPrintf("Texture Filtering: %s", Settings::GetTextureFilterDisplayName(m_texture_filtering)); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index f28e2df47..7b970ad17 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -329,9 +329,11 @@ protected: BitField m_supports_per_sample_shading; BitField m_supports_dual_source_blend; BitField m_supports_adaptive_downsampling; - BitField m_per_sample_shading; - BitField m_scaled_dithering; - BitField m_chroma_smoothing; + BitField m_supports_rgb5a1_framebuffer; + BitField m_per_sample_shading; + BitField m_scaled_dithering; + BitField m_chroma_smoothing; + BitField m_use_rgb5a1_framebuffer; u8 bits = 0; }; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index da0e8b2fe..128897598 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -237,6 +237,12 @@ void GPU_HW_D3D11::SetCapabilities() m_max_multisamples = multisamples; } } + + const UINT required_support = D3D11_FORMAT_SUPPORT_RENDER_TARGET | D3D11_FORMAT_SUPPORT_TEXTURE2D; + UINT bgr5a1_support = 0; + m_supports_rgb5a1_framebuffer = + (SUCCEEDED(m_device->CheckFormatSupport(DXGI_FORMAT_B5G5R5A1_UNORM, &bgr5a1_support)) && + ((bgr5a1_support & required_support) == required_support)); } bool GPU_HW_D3D11::CreateFramebuffer() @@ -247,21 +253,25 @@ bool GPU_HW_D3D11::CreateFramebuffer() const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; const u16 samples = static_cast(m_multisamples); - const DXGI_FORMAT texture_format = DXGI_FORMAT_R8G8B8A8_UNORM; + const DXGI_FORMAT render_texture_format = + m_use_rgb5a1_framebuffer ? DXGI_FORMAT_B5G5R5A1_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM; + const DXGI_FORMAT display_texture_format = + m_use_rgb5a1_framebuffer ? DXGI_FORMAT_B5G5R5A1_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM; const DXGI_FORMAT depth_format = DXGI_FORMAT_D16_UNORM; - if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, 1, samples, texture_format, + if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, 1, samples, render_texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || !m_vram_depth_texture.Create(m_device.Get(), texture_width, texture_height, 1, samples, depth_format, D3D11_BIND_DEPTH_STENCIL) || - !m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, 1, 1, texture_format, + !m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, 1, 1, render_texture_format, D3D11_BIND_SHADER_RESOURCE) || !m_display_texture.Create(m_device.Get(), GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, - GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, texture_format, + GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, display_texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || - !m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, - D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || - !m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, false)) + ((!m_use_rgb5a1_framebuffer || m_resolution_scale > 1) && + !m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, render_texture_format, + D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET)) || + !m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, render_texture_format, false)) { return false; } @@ -278,7 +288,7 @@ bool GPU_HW_D3D11::CreateFramebuffer() const u32 levels = GetAdaptiveDownsamplingMipLevels(); if (!m_downsample_texture.Create(m_device.Get(), texture_width, texture_height, static_cast(levels), 1, - texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || + render_texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || !m_downsample_weight_texture.Create(m_device.Get(), texture_width >> (levels - 1), texture_height >> (levels - 1), 1, 1, DXGI_FORMAT_R8_UNORM, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET)) @@ -290,9 +300,9 @@ bool GPU_HW_D3D11::CreateFramebuffer() for (u32 i = 0; i < levels; i++) { const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_downsample_texture, D3D11_SRV_DIMENSION_TEXTURE2D, - texture_format, i, 1); - const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D, texture_format, - i, 1); + render_texture_format, i, 1); + const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D, + render_texture_format, i, 1); hr = m_device->CreateShaderResourceView(m_downsample_texture, &srv_desc, m_downsample_mip_views[i].first.GetAddressOf()); @@ -307,7 +317,7 @@ bool GPU_HW_D3D11::CreateFramebuffer() } else if (m_downsample_mode == GPUDownsampleMode::Box) { - if (!m_downsample_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, + if (!m_downsample_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, render_texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET)) { return false; @@ -497,8 +507,8 @@ bool GPU_HW_D3D11::CompileShaders() g_settings.gpu_use_debug_device); GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_supports_dual_source_blend); + m_use_rgb5a1_framebuffer, m_true_color, m_scaled_dithering, m_texture_filtering, + m_using_uv_limits, m_pgxp_depth_buffer, m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3) + 1; @@ -937,31 +947,77 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { // Get bounds with wrap-around handled. const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); - const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; - const u32 encoded_height = copy_rect.GetHeight(); // Encode the 24-bit texture as 16-bit. - const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get()); - m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); - SetViewportAndScissor(0, 0, encoded_width, encoded_height); - DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms)); - - // Stage the readback. - m_vram_readback_texture.CopyFromTexture(m_context.Get(), m_vram_encoding_texture.GetD3DTexture(), 0, 0, 0, 0, 0, - encoded_width, encoded_height); - // And copy it into our shadow buffer. - if (m_vram_readback_texture.Map(m_context.Get(), false)) + if (!m_use_rgb5a1_framebuffer || m_resolution_scale > 1) { - m_vram_readback_texture.ReadPixels(0, 0, encoded_width * 2, encoded_height, VRAM_WIDTH, - &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); - m_vram_readback_texture.Unmap(m_context.Get()); + const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + const u32 encoded_height = copy_rect.GetHeight(); + const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; + m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get()); + m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr); + m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); + m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); + SetViewportAndScissor(0, 0, encoded_width, encoded_height); + DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms)); + + // Stage the readback. + m_vram_readback_texture.CopyFromTexture(m_context.Get(), m_vram_encoding_texture.GetD3DTexture(), 0, 0, 0, 0, 0, + encoded_width, encoded_height); + + // And copy it into our shadow buffer. + if (m_vram_readback_texture.Map(m_context.Get(), false)) + { + m_vram_readback_texture.ReadPixels(0, 0, encoded_width * 2, encoded_height, VRAM_WIDTH, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); + m_vram_readback_texture.Unmap(m_context.Get()); + } + else + { + Log_ErrorPrintf("Failed to map VRAM readback texture"); + } } else { - Log_ErrorPrintf("Failed to map VRAM readback texture"); + const u32 copy_width = copy_rect.GetWidth(); + const u32 copy_height = copy_rect.GetHeight(); + + m_vram_readback_texture.CopyFromTexture(m_context.Get(), m_vram_texture.GetD3DTexture(), 0, copy_rect.left, + copy_rect.top, copy_rect.left, copy_rect.top, copy_width, copy_height); + + // And copy it into our shadow buffer. + if (m_vram_readback_texture.Map(m_context.Get(), false)) + { + // Needs to be swapped to PSX order. + // TODO: vector-ify. + const u32 src_pitch = m_vram_readback_texture.GetMappedSubresource().RowPitch; + const u32 dst_pitch = sizeof(u16) * VRAM_WIDTH; + const u32 copy_size = sizeof(u16) * copy_width; + const u8* src_ptr = reinterpret_cast(m_vram_readback_texture.GetMappedSubresource().pData) + + copy_rect.top * src_pitch + copy_rect.left * sizeof(u16); + u8* dst_ptr = reinterpret_cast(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); + for (u32 row = 0; row < copy_height; row++) + { + const u8* src_row_ptr = src_ptr; + u8* dst_row_ptr = dst_ptr; + for (u32 col = 0; col < copy_width; col++) + { + u16 pix; + std::memcpy(&pix, src_row_ptr, sizeof(pix)); + src_row_ptr += sizeof(u16); + pix = (pix & 0x83E0) | ((pix >> 10) & 0x1F) | ((pix & 0x1F) << 10); + std::memcpy(dst_row_ptr, &pix, sizeof(pix)); + dst_row_ptr += sizeof(u16); + } + src_ptr += src_pitch; + dst_ptr += dst_pitch; + } + m_vram_readback_texture.Unmap(m_context.Get()); + } + else + { + Log_ErrorPrintf("Failed to map VRAM readback texture"); + } } RestoreGraphicsAPIState(); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 7502f2cfe..e168c4d3f 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -378,6 +378,9 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) // adaptive smoothing would require texture views, which aren't in GLES. m_supports_adaptive_downsampling = false; + + // GLES mandates support for RGB5A1, desktop ??? + m_supports_rgb5a1_framebuffer = true; } bool GPU_HW_OpenGL::CreateFramebuffer() @@ -386,19 +389,23 @@ bool GPU_HW_OpenGL::CreateFramebuffer() const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; const u32 multisamples = m_multisamples; + const GLenum render_texture_format = m_use_rgb5a1_framebuffer ? GL_RGB5_A1 : GL_RGBA8; + const GLenum render_texture_type = m_use_rgb5a1_framebuffer ? GL_UNSIGNED_SHORT_5_5_5_1 : GL_RGBA8; + const GLenum display_texture_format = m_use_rgb5a1_framebuffer ? GL_RGB5_A1 : GL_RGBA8; + const GLenum display_texture_type = m_use_rgb5a1_framebuffer ? GL_UNSIGNED_SHORT_5_5_5_1 : GL_RGBA8; - if (!m_vram_texture.Create(texture_width, texture_height, multisamples, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, - false, true) || + if (!m_vram_texture.Create(texture_width, texture_height, multisamples, render_texture_format, GL_RGBA, + render_texture_type, nullptr, false, true) || !m_vram_depth_texture.Create(texture_width, texture_height, multisamples, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, nullptr, false) || - !m_vram_read_texture.Create(texture_width, texture_height, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, - true) || + !m_vram_read_texture.Create(texture_width, texture_height, 1, render_texture_format, GL_RGBA, render_texture_type, + nullptr, false, true) || !m_vram_read_texture.CreateFramebuffer() || - !m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, - false) || + (!m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, render_texture_format, GL_RGBA, render_texture_type, + nullptr, false)) || !m_vram_encoding_texture.CreateFramebuffer() || !m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, - 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) || + 1, display_texture_format, GL_RGBA, display_texture_type, nullptr, false) || !m_display_texture.CreateFramebuffer()) { return false; @@ -416,7 +423,8 @@ bool GPU_HW_OpenGL::CreateFramebuffer() if (m_downsample_mode == GPUDownsampleMode::Box) { - if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE) || + if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, display_texture_format, GL_RGBA, + display_texture_type) || !m_downsample_texture.CreateFramebuffer()) { return false; @@ -509,8 +517,8 @@ bool GPU_HW_OpenGL::CompilePrograms() const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout(); GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_supports_dual_source_blend); + m_use_rgb5a1_framebuffer, m_true_color, m_scaled_dithering, m_texture_filtering, + m_using_uv_limits, m_pgxp_depth_buffer, m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = (4 * 9 * 2 * 2) + (2 * 3) + 6; @@ -862,18 +870,20 @@ void GPU_HW_OpenGL::UpdateDisplay() { UpdateVRAMReadTexture(); - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_read_texture.GetGLId())), - HostDisplayPixelFormat::RGBA8, m_vram_read_texture.GetWidth(), - static_cast(m_vram_read_texture.GetHeight()), 0, - m_vram_read_texture.GetHeight(), m_vram_read_texture.GetWidth(), - -static_cast(m_vram_read_texture.GetHeight())); + m_host_display->SetDisplayTexture( + reinterpret_cast(static_cast(m_vram_read_texture.GetGLId())), + m_use_rgb5a1_framebuffer ? HostDisplayPixelFormat::RGBA5551 : HostDisplayPixelFormat::RGBA8, + m_vram_read_texture.GetWidth(), static_cast(m_vram_read_texture.GetHeight()), 0, + m_vram_read_texture.GetHeight(), m_vram_read_texture.GetWidth(), + -static_cast(m_vram_read_texture.GetHeight())); } else { - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture.GetGLId())), - HostDisplayPixelFormat::RGBA8, m_vram_texture.GetWidth(), - static_cast(m_vram_texture.GetHeight()), 0, m_vram_texture.GetHeight(), - m_vram_texture.GetWidth(), -static_cast(m_vram_texture.GetHeight())); + m_host_display->SetDisplayTexture( + reinterpret_cast(static_cast(m_vram_texture.GetGLId())), + m_use_rgb5a1_framebuffer ? HostDisplayPixelFormat::RGBA5551 : HostDisplayPixelFormat::RGBA8, + m_vram_texture.GetWidth(), static_cast(m_vram_texture.GetHeight()), 0, m_vram_texture.GetHeight(), + m_vram_texture.GetWidth(), -static_cast(m_vram_texture.GetHeight())); } m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); @@ -907,8 +917,9 @@ void GPU_HW_OpenGL::UpdateDisplay() else { m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture.GetGLId())), - HostDisplayPixelFormat::RGBA8, m_vram_texture.GetWidth(), - m_vram_texture.GetHeight(), scaled_vram_offset_x, + m_use_rgb5a1_framebuffer ? HostDisplayPixelFormat::RGBA5551 : + HostDisplayPixelFormat::RGBA8, + m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x, m_vram_texture.GetHeight() - scaled_vram_offset_y, scaled_display_width, -static_cast(scaled_display_height)); } @@ -947,10 +958,11 @@ void GPU_HW_OpenGL::UpdateDisplay() } else { - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_display_texture.GetGLId())), - HostDisplayPixelFormat::RGBA8, m_display_texture.GetWidth(), - m_display_texture.GetHeight(), 0, scaled_display_height, scaled_display_width, - -static_cast(scaled_display_height)); + m_host_display->SetDisplayTexture( + reinterpret_cast(static_cast(m_display_texture.GetGLId())), + m_use_rgb5a1_framebuffer ? HostDisplayPixelFormat::RGBA5551 : HostDisplayPixelFormat::RGBA8, + m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, scaled_display_height, scaled_display_width, + -static_cast(scaled_display_height)); } // restore state @@ -971,35 +983,95 @@ void GPU_HW_OpenGL::UpdateDisplay() } } +static u16 SwapGL16ToPSX16(u16 val) +{ + // RRRRRGGGGGBBBBBA -> ABBBBBGGGGGRRRRR + return ((val << 15) | // A + (val >> 11) | // R + ((val >> 1) & 0x3E0) | // G + ((val << 9) & 0x7C00)); // B +} + +static u16 SwapPSX16ToGL16(u16 val) +{ + // ABBBBBGGGGGRRRRR -> // RRRRRGGGGGBBBBBA + return ((val >> 15) | // A + ((val << 11) & 0xF800) | // R + ((val << 1) & 0x7C0) | // G + ((val >> 9) & 0x3E)); // B +} + void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { // Get bounds with wrap-around handled. const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); - const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; - const u32 encoded_height = copy_rect.GetHeight(); + if (!m_use_rgb5a1_framebuffer) + { + const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + const u32 encoded_height = copy_rect.GetHeight(); - // Encode the 24-bit texture as 16-bit. - const u32 uniforms[4] = {copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_rect.GetHeight(), copy_rect.GetWidth(), - copy_rect.GetHeight()}; - m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); - m_vram_texture.Bind(); - m_vram_read_program.Bind(); - UploadUniformBuffer(uniforms, sizeof(uniforms)); - glDisable(GL_BLEND); - glDisable(GL_SCISSOR_TEST); - glViewport(0, 0, encoded_width, encoded_height); - glBindVertexArray(m_attributeless_vao_id); - glDrawArrays(GL_TRIANGLES, 0, 3); + // Encode the 24-bit texture as 16-bit. + const u32 uniforms[4] = {copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_rect.GetHeight(), copy_rect.GetWidth(), + copy_rect.GetHeight()}; + m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); + m_vram_texture.Bind(); + m_vram_read_program.Bind(); + UploadUniformBuffer(uniforms, sizeof(uniforms)); + glDisable(GL_BLEND); + glDisable(GL_SCISSOR_TEST); + glViewport(0, 0, encoded_width, encoded_height); + glBindVertexArray(m_attributeless_vao_id); + glDrawArrays(GL_TRIANGLES, 0, 3); - // Readback encoded texture. - m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); - glPixelStorei(GL_PACK_ALIGNMENT, 2); - glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2); - glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE, - &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); - glPixelStorei(GL_PACK_ALIGNMENT, 4); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); - RestoreGraphicsAPIState(); + // Readback encoded texture. + m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); + glPixelStorei(GL_PACK_ALIGNMENT, 2); + glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2); + glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + RestoreGraphicsAPIState(); + } + else + { + const u32 copy_width = copy_rect.GetWidth(); + const u32 copy_height = copy_rect.GetHeight(); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_fbo_id); + glPixelStorei(GL_PACK_ALIGNMENT, 2); + glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH); + glReadPixels(copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_height, copy_width, copy_height, GL_RGBA, + GL_UNSIGNED_SHORT_5_5_5_1, &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + + // flip and convert colours + const u32 rows_to_flip = copy_height / 2; + u16* base_ptr = &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]; + for (u32 row = 0; row < rows_to_flip; row++) + { + const u32 row1 = row; + const u32 row2 = copy_height - row - 1; + + u16* row1_ptr = base_ptr + row1 * VRAM_WIDTH; + u16* row2_ptr = base_ptr + row2 * VRAM_WIDTH; + + for (u32 col = 0; col < copy_width; col++) + { + const u16 temp = SwapGL16ToPSX16(row1_ptr[col]); + row1_ptr[col] = SwapGL16ToPSX16(row2_ptr[col]); + row2_ptr[col] = temp; + } + } + if (copy_height & 1u) + { + const u32 row = rows_to_flip; + u16* row_ptr = base_ptr + row * VRAM_WIDTH; + for (u32 col = 0; col < copy_width; col++) + row_ptr[col] = SwapGL16ToPSX16(row_ptr[col]); + } + } } void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) @@ -1108,44 +1180,87 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* GPU_HW::UpdateVRAM(x, y, width, height, data, set_mask, check_mask); - const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); - - // reverse copy the rows so it matches opengl's lower-left origin - const u32 source_stride = width * sizeof(u16); - const u8* source_ptr = static_cast(data) + (source_stride * (height - 1)); - u32* dest_ptr = static_cast(map_result.pointer); - for (u32 row = 0; row < height; row++) - { - const u8* source_row_ptr = source_ptr; - - for (u32 col = 0; col < width; col++) - { - u16 src_col; - std::memcpy(&src_col, source_row_ptr, sizeof(src_col)); - source_row_ptr += sizeof(src_col); - - *(dest_ptr++) = RGBA5551ToRGBA8888(src_col); - } - - source_ptr -= source_stride; - } - - m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32)); - m_texture_stream_buffer->Bind(); - - // have to write to the 1x texture first - if (m_resolution_scale > 1) - m_vram_encoding_texture.Bind(); - else - m_vram_texture.Bind(); - // lower-left origin flip happens here const u32 flipped_y = VRAM_HEIGHT - y - height; - // update texture data - glTexSubImage2D(m_vram_texture.GetGLTarget(), 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, - reinterpret_cast(static_cast(map_result.buffer_offset))); - m_texture_stream_buffer->Unbind(); + if (!m_use_rgb5a1_framebuffer) + { + const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); + + // reverse copy the rows so it matches opengl's lower-left origin + const u32 source_stride = width * sizeof(u16); + const u8* source_ptr = static_cast(data) + (source_stride * (height - 1)); + u32* dest_ptr = static_cast(map_result.pointer); + for (u32 row = 0; row < height; row++) + { + const u8* source_row_ptr = source_ptr; + + for (u32 col = 0; col < width; col++) + { + u16 src_col; + std::memcpy(&src_col, source_row_ptr, sizeof(src_col)); + source_row_ptr += sizeof(src_col); + + *(dest_ptr++) = RGBA5551ToRGBA8888(src_col); + } + + source_ptr -= source_stride; + } + + m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32)); + m_texture_stream_buffer->Bind(); + + // have to write to the 1x texture first + if (m_resolution_scale > 1) + m_vram_encoding_texture.Bind(); + else + m_vram_texture.Bind(); + + // update texture data + glTexSubImage2D(m_vram_texture.GetGLTarget(), 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, + reinterpret_cast(static_cast(map_result.buffer_offset))); + m_texture_stream_buffer->Unbind(); + } + else + { + const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); + + // reverse copy the rows so it matches opengl's lower-left origin + const u32 source_stride = width * sizeof(u16); + const u8* source_ptr = static_cast(data) + (source_stride * (height - 1)); + u16* dest_ptr = static_cast(map_result.pointer); + for (u32 row = 0; row < height; row++) + { + const u8* source_row_ptr = source_ptr; + + for (u32 col = 0; col < width; col++) + { + // TODO: Byte flip? + u16 src_col; + std::memcpy(&src_col, source_row_ptr, sizeof(src_col)); + source_row_ptr += sizeof(src_col); + *(dest_ptr++) = SwapPSX16ToGL16(src_col); + } + + source_ptr -= source_stride; + } + + m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32)); + m_texture_stream_buffer->Bind(); + + // have to write to the 1x texture first + if (m_resolution_scale > 1) + m_vram_encoding_texture.Bind(); + else + m_vram_texture.Bind(); + + // update texture data + glPixelStorei(GL_UNPACK_ALIGNMENT, 2); + glTexSubImage2D(m_vram_texture.GetGLTarget(), 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1, + reinterpret_cast(static_cast(map_result.buffer_offset))); + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + m_texture_stream_buffer->Unbind(); + } if (m_resolution_scale > 1) { diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 59907a778..fa68ca033 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -4,13 +4,13 @@ #include GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples, - bool per_sample_shading, bool true_color, bool scaled_dithering, - GPUTextureFilter texture_filtering, bool uv_limits, bool pgxp_depth, - bool supports_dual_source_blend) + bool per_sample_shading, bool rgb5a1_framebuffer, bool true_color, + bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, + bool pgxp_depth, bool supports_dual_source_blend) : ShaderGen(render_api, supports_dual_source_blend), m_resolution_scale(resolution_scale), - m_multisamples(multisamples), m_per_sample_shading(per_sample_shading), m_true_color(true_color), - m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits), - m_pgxp_depth(pgxp_depth) + m_multisamples(multisamples), m_per_sample_shading(per_sample_shading), m_rgb5a1_framebuffer(rgb5a1_framebuffer), + m_true_color(true_color), m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), + m_uv_limits(uv_limits), m_pgxp_depth(pgxp_depth) { } diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index dff617e97..6c770d21e 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -6,8 +6,8 @@ class GPU_HW_ShaderGen : public ShaderGen { public: GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, - bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, - bool pgxp_depth, bool supports_dual_source_blend); + bool rgb5a1_framebuffer, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, + bool uv_limits, bool pgxp_depth, bool supports_dual_source_blend); ~GPU_HW_ShaderGen(); std::string GenerateBatchVertexShader(bool textured); @@ -37,6 +37,7 @@ private: u32 m_resolution_scale; u32 m_multisamples; bool m_per_sample_shading; + bool m_rgb5a1_framebuffer; bool m_true_color; bool m_scaled_dithering; GPUTextureFilter m_texture_filter; diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index d58b5d845..1d4497bc5 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -320,9 +320,12 @@ void GPU_HW_Vulkan::SetCapabilities() m_supports_dual_source_blend = g_vulkan_context->GetDeviceFeatures().dualSrcBlend; m_supports_per_sample_shading = g_vulkan_context->GetDeviceFeatures().sampleRateShading; m_supports_adaptive_downsampling = true; + m_supports_rgb5a1_framebuffer = g_vulkan_context->SupportsTextureFormat(VK_FORMAT_R5G5B5A1_UNORM_PACK16) || + g_vulkan_context->SupportsTextureFormat(VK_FORMAT_B5G5R5A1_UNORM_PACK16); Log_InfoPrintf("Dual-source blend: %s", m_supports_dual_source_blend ? "supported" : "not supported"); Log_InfoPrintf("Per-sample shading: %s", m_supports_per_sample_shading ? "supported" : "not supported"); + Log_InfoPrintf("RGB5A1 framebuffer: %s", m_supports_rgb5a1_framebuffer ? "supported" : "not supported"); Log_InfoPrintf("Max multisamples: %u", m_max_multisamples); #ifdef __APPLE__ @@ -509,36 +512,59 @@ bool GPU_HW_Vulkan::CreateFramebuffer() // scale vram size to internal resolution const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; - const VkFormat texture_format = VK_FORMAT_R8G8B8A8_UNORM; + u32 readback_texture_width = VRAM_WIDTH / 2; + u32 readback_texture_height = VRAM_HEIGHT; + + // figure out which 16-bit format to use + VkFormat render_format = VK_FORMAT_R8G8B8A8_UNORM; + VkFormat display_format = VK_FORMAT_R8G8B8A8_UNORM; + if (m_use_rgb5a1_framebuffer) + { + if (g_vulkan_context->SupportsTextureFormat(VK_FORMAT_R5G5B5A1_UNORM_PACK16)) + { + render_format = VK_FORMAT_R5G5B5A1_UNORM_PACK16; + display_format = VK_FORMAT_R5G5B5A1_UNORM_PACK16; + readback_texture_width = VRAM_WIDTH; + readback_texture_height = VRAM_HEIGHT; + } + else if (g_vulkan_context->SupportsTextureFormat(VK_FORMAT_B5G5R5A1_UNORM_PACK16)) + { + render_format = VK_FORMAT_B5G5R5A1_UNORM_PACK16; + display_format = VK_FORMAT_B5G5R5A1_UNORM_PACK16; + readback_texture_width = VRAM_WIDTH; + readback_texture_height = VRAM_HEIGHT; + } + } + const VkFormat depth_format = VK_FORMAT_D16_UNORM; const VkSampleCountFlagBits samples = static_cast(m_multisamples); - if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, + if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, render_format, samples, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || !m_vram_depth_texture.Create(texture_width, texture_height, 1, 1, depth_format, samples, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || - !m_vram_read_texture.Create(texture_width, texture_height, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, + !m_vram_read_texture.Create(texture_width, texture_height, 1, 1, render_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || !m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, - 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, + 1, 1, display_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || - !m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + !m_vram_readback_texture.Create(readback_texture_width, readback_texture_height, 1, 1, render_format, + VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) || - !m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2, - VRAM_HEIGHT)) + !m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, render_format, + readback_texture_width, readback_texture_height)) { return false; } m_vram_render_pass = - g_vulkan_context->GetRenderPass(texture_format, depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD); + g_vulkan_context->GetRenderPass(render_format, depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD); m_vram_update_depth_render_pass = g_vulkan_context->GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, samples, VK_ATTACHMENT_LOAD_OP_DONT_CARE); m_display_render_pass = g_vulkan_context->GetRenderPass(m_display_texture.GetFormat(), VK_FORMAT_UNDEFINED, @@ -605,7 +631,7 @@ bool GPU_HW_Vulkan::CreateFramebuffer() { const u32 levels = GetAdaptiveDownsamplingMipLevels(); - if (!m_downsample_texture.Create(texture_width, texture_height, levels, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, + if (!m_downsample_texture.Create(texture_width, texture_height, levels, 1, render_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || @@ -683,7 +709,7 @@ bool GPU_HW_Vulkan::CreateFramebuffer() } else if (m_downsample_mode == GPUDownsampleMode::Box) { - if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, + if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, render_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) @@ -818,8 +844,8 @@ bool GPU_HW_Vulkan::CompilePipelines() VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache(); GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_supports_dual_source_blend); + m_use_rgb5a1_framebuffer, m_true_color, m_scaled_dithering, m_texture_filtering, + m_using_uv_limits, m_pgxp_depth_buffer, m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 2 + (2 * 3) + 1; @@ -1421,43 +1447,94 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { // Get bounds with wrap-around handled. const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); - const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; - const u32 encoded_height = copy_rect.GetHeight(); + if (!m_use_rgb5a1_framebuffer || m_resolution_scale > 1) + { + const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + const u32 encoded_height = copy_rect.GetHeight(); - EndRenderPass(); + EndRenderPass(); - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - // Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we use - // the actual size we're rendering to... - BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, m_vram_readback_texture.GetWidth(), - m_vram_readback_texture.GetHeight()); + // Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we + // use the actual size we're rendering to... + BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, m_vram_readback_texture.GetWidth(), + m_vram_readback_texture.GetHeight()); - // Encode the 24-bit texture as 16-bit. - const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_readback_pipeline); - vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), - uniforms); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, - &m_vram_read_descriptor_set, 0, nullptr); - Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height); - vkCmdDraw(cmdbuf, 3, 1, 0, 0); + // Encode the 24-bit texture as 16-bit. + const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_readback_pipeline); + vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), + uniforms); + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, + &m_vram_read_descriptor_set, 0, nullptr); + Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height); + vkCmdDraw(cmdbuf, 3, 1, 0, 0); - EndRenderPass(); + EndRenderPass(); - m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - // Stage the readback. - m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width, - encoded_height); + // Stage the readback. + m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width, + encoded_height); - // And copy it into our shadow buffer (will execute command buffer and stall). - m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height, - &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], - VRAM_WIDTH * sizeof(u16)); + // And copy it into our shadow buffer (will execute command buffer and stall). + m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], + VRAM_WIDTH * sizeof(u16)); + } + else + { + const u32 copy_width = copy_rect.GetWidth(); + const u32 copy_height = copy_rect.GetHeight(); + + EndRenderPass(); + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, copy_rect.left, copy_rect.top, 0, 0, 0, 0, + copy_width, copy_height); + m_vram_readback_staging_texture.Flush(); + + const u32 src_stride = m_vram_readback_staging_texture.GetMappedStride(); + const u32 dst_stride = VRAM_WIDTH * sizeof(u16); + const u32 copy_size = copy_width * sizeof(u16); + + const u8* src_ptr = reinterpret_cast(m_vram_readback_staging_texture.GetMappedPointer()); + u8* dst_ptr = reinterpret_cast(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); + + if (m_vram_texture.GetFormat() == VK_FORMAT_R5G5B5A1_UNORM_PACK16) + { + for (u32 row = 0; row < copy_height; row++) + { + std::memcpy(dst_ptr, src_ptr, copy_size); + src_ptr += src_stride; + dst_ptr += dst_stride; + } + } + else + { + for (u32 row = 0; row < copy_height; row++) + { + const u8* src_row_ptr = src_ptr; + u8* dst_row_ptr = dst_ptr; + for (u32 col = 0; col < copy_width; col++) + { + u16 pix; + std::memcpy(&pix, src_row_ptr, sizeof(pix)); + src_row_ptr += sizeof(u16); + pix = (pix & 0x83E0) | ((pix >> 10) & 0x1F) | ((pix & 0x1F) << 10); + std::memcpy(dst_row_ptr, &pix, sizeof(pix)); + dst_row_ptr += sizeof(u16); + } + src_ptr += src_stride; + dst_ptr += dst_stride; + } + } + } RestoreGraphicsAPIState(); } diff --git a/src/core/settings.h b/src/core/settings.h index 5b4b2b42a..bdf40eb1f 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -116,6 +116,7 @@ struct Settings bool gpu_disable_interlacing = false; bool gpu_force_ntsc_timings = false; bool gpu_widescreen_hack = false; + bool gpu_use_rgb5a1_framebuffer = true; bool gpu_pgxp_enable = false; bool gpu_pgxp_culling = true; bool gpu_pgxp_texture_correction = true;