This commit is contained in:
Connor McLaughlin 2021-02-17 00:14:20 +10:00
parent 65daf1d6a0
commit acbb1cb885
10 changed files with 447 additions and 175 deletions

View File

@ -1160,6 +1160,18 @@ void Context::DisableDebugReports()
}
}
bool Context::SupportsTextureFormat(VkFormat format, bool as_render_target /*= false*/) const
{
VkImageFormatProperties props;
VkResult res = vkGetPhysicalDeviceImageFormatProperties(
m_physical_device, format, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_SAMPLED_BIT | (as_render_target ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT : 0), 0, &props);
if (res != VK_SUCCESS)
return false;
return true;
}
bool Context::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index)
{
for (u32 i = 0; i < VK_MAX_MEMORY_TYPES; i++)

View File

@ -89,6 +89,7 @@ public:
// Support bits
ALWAYS_INLINE bool SupportsGeometryShaders() const { return m_device_features.geometryShader == VK_TRUE; }
ALWAYS_INLINE bool SupportsDualSourceBlend() const { return m_device_features.dualSrcBlend == VK_TRUE; }
bool SupportsTextureFormat(VkFormat format, bool as_render_target = false) const;
// Helpers for getting constants
ALWAYS_INLINE VkDeviceSize GetUniformBufferAlignment() const

View File

@ -48,6 +48,8 @@ bool GPU_HW::Initialize(HostDisplay* host_display)
m_resolution_scale = CalculateResolutionScale();
m_multisamples = std::min(g_settings.gpu_multisamples, m_max_multisamples);
m_use_rgb5a1_framebuffer =
g_settings.gpu_use_rgb5a1_framebuffer && !g_settings.gpu_true_color && m_supports_rgb5a1_framebuffer;
m_render_api = host_display->GetRenderAPI();
m_per_sample_shading = g_settings.gpu_per_sample_shading && m_supports_per_sample_shading;
m_true_color = g_settings.gpu_true_color;
@ -128,17 +130,20 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
const u32 resolution_scale = CalculateResolutionScale();
const u32 multisamples = std::min(m_max_multisamples, g_settings.gpu_multisamples);
const bool per_sample_shading = g_settings.gpu_per_sample_shading && m_supports_per_sample_shading;
const bool rgb5a1_framebuffer =
g_settings.gpu_use_rgb5a1_framebuffer && !g_settings.gpu_true_color && m_supports_rgb5a1_framebuffer;
const GPUDownsampleMode downsample_mode = GetDownsampleMode(resolution_scale);
const bool use_uv_limits = ShouldUseUVLimits();
*framebuffer_changed =
(m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_downsample_mode != downsample_mode);
*framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
m_downsample_mode != downsample_mode || rgb5a1_framebuffer != m_use_rgb5a1_framebuffer);
*shaders_changed =
(m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
m_true_color != g_settings.gpu_true_color || m_per_sample_shading != per_sample_shading ||
m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter ||
m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing ||
m_downsample_mode != downsample_mode || m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer());
rgb5a1_framebuffer != m_use_rgb5a1_framebuffer || g_settings.gpu_true_color != g_settings.gpu_true_color ||
m_per_sample_shading != per_sample_shading || m_scaled_dithering != g_settings.gpu_scaled_dithering ||
m_texture_filtering != g_settings.gpu_texture_filter || m_using_uv_limits != use_uv_limits ||
m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing || m_downsample_mode != downsample_mode ||
m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer());
if (m_resolution_scale != resolution_scale)
{
@ -167,6 +172,7 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
m_resolution_scale = resolution_scale;
m_multisamples = multisamples;
m_per_sample_shading = per_sample_shading;
m_use_rgb5a1_framebuffer = rgb5a1_framebuffer;
m_true_color = g_settings.gpu_true_color;
m_scaled_dithering = g_settings.gpu_scaled_dithering;
m_texture_filtering = g_settings.gpu_texture_filter;
@ -246,6 +252,7 @@ void GPU_HW::PrintSettingsToLog()
Log_InfoPrintf("Resolution Scale: %u (%ux%u), maximum %u", m_resolution_scale, VRAM_WIDTH * m_resolution_scale,
VRAM_HEIGHT * m_resolution_scale, m_max_resolution_scale);
Log_InfoPrintf("Multisampling: %ux%s", m_multisamples, m_per_sample_shading ? " (per sample shading)" : "");
Log_InfoPrintf("Framebuffer format: %s", m_use_rgb5a1_framebuffer ? "RGB5A1" : "RGBA8");
Log_InfoPrintf("Dithering: %s%s", m_true_color ? "Disabled" : "Enabled",
(!m_true_color && m_scaled_dithering) ? " (Scaled)" : "");
Log_InfoPrintf("Texture Filtering: %s", Settings::GetTextureFilterDisplayName(m_texture_filtering));

View File

@ -329,9 +329,11 @@ protected:
BitField<u8, bool, 0, 1> m_supports_per_sample_shading;
BitField<u8, bool, 1, 1> m_supports_dual_source_blend;
BitField<u8, bool, 2, 1> m_supports_adaptive_downsampling;
BitField<u8, bool, 3, 1> m_per_sample_shading;
BitField<u8, bool, 4, 1> m_scaled_dithering;
BitField<u8, bool, 5, 1> m_chroma_smoothing;
BitField<u8, bool, 3, 1> m_supports_rgb5a1_framebuffer;
BitField<u8, bool, 4, 1> m_per_sample_shading;
BitField<u8, bool, 5, 1> m_scaled_dithering;
BitField<u8, bool, 6, 1> m_chroma_smoothing;
BitField<u8, bool, 7, 1> m_use_rgb5a1_framebuffer;
u8 bits = 0;
};

View File

@ -237,6 +237,12 @@ void GPU_HW_D3D11::SetCapabilities()
m_max_multisamples = multisamples;
}
}
const UINT required_support = D3D11_FORMAT_SUPPORT_RENDER_TARGET | D3D11_FORMAT_SUPPORT_TEXTURE2D;
UINT bgr5a1_support = 0;
m_supports_rgb5a1_framebuffer =
(SUCCEEDED(m_device->CheckFormatSupport(DXGI_FORMAT_B5G5R5A1_UNORM, &bgr5a1_support)) &&
((bgr5a1_support & required_support) == required_support));
}
bool GPU_HW_D3D11::CreateFramebuffer()
@ -247,21 +253,25 @@ bool GPU_HW_D3D11::CreateFramebuffer()
const u32 texture_width = VRAM_WIDTH * m_resolution_scale;
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
const u16 samples = static_cast<u16>(m_multisamples);
const DXGI_FORMAT texture_format = DXGI_FORMAT_R8G8B8A8_UNORM;
const DXGI_FORMAT render_texture_format =
m_use_rgb5a1_framebuffer ? DXGI_FORMAT_B5G5R5A1_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM;
const DXGI_FORMAT display_texture_format =
m_use_rgb5a1_framebuffer ? DXGI_FORMAT_B5G5R5A1_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM;
const DXGI_FORMAT depth_format = DXGI_FORMAT_D16_UNORM;
if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, 1, samples, texture_format,
if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, 1, samples, render_texture_format,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
!m_vram_depth_texture.Create(m_device.Get(), texture_width, texture_height, 1, samples, depth_format,
D3D11_BIND_DEPTH_STENCIL) ||
!m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, 1, 1, texture_format,
!m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, 1, 1, render_texture_format,
D3D11_BIND_SHADER_RESOURCE) ||
!m_display_texture.Create(m_device.Get(), GPU_MAX_DISPLAY_WIDTH * m_resolution_scale,
GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, texture_format,
GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, display_texture_format,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
!m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
!m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, false))
((!m_use_rgb5a1_framebuffer || m_resolution_scale > 1) &&
!m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, render_texture_format,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET)) ||
!m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, render_texture_format, false))
{
return false;
}
@ -278,7 +288,7 @@ bool GPU_HW_D3D11::CreateFramebuffer()
const u32 levels = GetAdaptiveDownsamplingMipLevels();
if (!m_downsample_texture.Create(m_device.Get(), texture_width, texture_height, static_cast<u16>(levels), 1,
texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
render_texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
!m_downsample_weight_texture.Create(m_device.Get(), texture_width >> (levels - 1),
texture_height >> (levels - 1), 1, 1, DXGI_FORMAT_R8_UNORM,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET))
@ -290,9 +300,9 @@ bool GPU_HW_D3D11::CreateFramebuffer()
for (u32 i = 0; i < levels; i++)
{
const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_downsample_texture, D3D11_SRV_DIMENSION_TEXTURE2D,
texture_format, i, 1);
const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D, texture_format,
i, 1);
render_texture_format, i, 1);
const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D,
render_texture_format, i, 1);
hr = m_device->CreateShaderResourceView(m_downsample_texture, &srv_desc,
m_downsample_mip_views[i].first.GetAddressOf());
@ -307,7 +317,7 @@ bool GPU_HW_D3D11::CreateFramebuffer()
}
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
if (!m_downsample_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format,
if (!m_downsample_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, render_texture_format,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET))
{
return false;
@ -497,8 +507,8 @@ bool GPU_HW_D3D11::CompileShaders()
g_settings.gpu_use_debug_device);
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_pgxp_depth_buffer, m_supports_dual_source_blend);
m_use_rgb5a1_framebuffer, m_true_color, m_scaled_dithering, m_texture_filtering,
m_using_uv_limits, m_pgxp_depth_buffer, m_supports_dual_source_blend);
Common::Timer compile_time;
const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3) + 1;
@ -937,31 +947,77 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
// Get bounds with wrap-around handled.
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight();
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get());
m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr);
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
SetViewportAndScissor(0, 0, encoded_width, encoded_height);
DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms));
// Stage the readback.
m_vram_readback_texture.CopyFromTexture(m_context.Get(), m_vram_encoding_texture.GetD3DTexture(), 0, 0, 0, 0, 0,
encoded_width, encoded_height);
// And copy it into our shadow buffer.
if (m_vram_readback_texture.Map(m_context.Get(), false))
if (!m_use_rgb5a1_framebuffer || m_resolution_scale > 1)
{
m_vram_readback_texture.ReadPixels(0, 0, encoded_width * 2, encoded_height, VRAM_WIDTH,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
m_vram_readback_texture.Unmap(m_context.Get());
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight();
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get());
m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr);
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
SetViewportAndScissor(0, 0, encoded_width, encoded_height);
DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms));
// Stage the readback.
m_vram_readback_texture.CopyFromTexture(m_context.Get(), m_vram_encoding_texture.GetD3DTexture(), 0, 0, 0, 0, 0,
encoded_width, encoded_height);
// And copy it into our shadow buffer.
if (m_vram_readback_texture.Map(m_context.Get(), false))
{
m_vram_readback_texture.ReadPixels(0, 0, encoded_width * 2, encoded_height, VRAM_WIDTH,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
m_vram_readback_texture.Unmap(m_context.Get());
}
else
{
Log_ErrorPrintf("Failed to map VRAM readback texture");
}
}
else
{
Log_ErrorPrintf("Failed to map VRAM readback texture");
const u32 copy_width = copy_rect.GetWidth();
const u32 copy_height = copy_rect.GetHeight();
m_vram_readback_texture.CopyFromTexture(m_context.Get(), m_vram_texture.GetD3DTexture(), 0, copy_rect.left,
copy_rect.top, copy_rect.left, copy_rect.top, copy_width, copy_height);
// And copy it into our shadow buffer.
if (m_vram_readback_texture.Map(m_context.Get(), false))
{
// Needs to be swapped to PSX order.
// TODO: vector-ify.
const u32 src_pitch = m_vram_readback_texture.GetMappedSubresource().RowPitch;
const u32 dst_pitch = sizeof(u16) * VRAM_WIDTH;
const u32 copy_size = sizeof(u16) * copy_width;
const u8* src_ptr = reinterpret_cast<const u8*>(m_vram_readback_texture.GetMappedSubresource().pData) +
copy_rect.top * src_pitch + copy_rect.left * sizeof(u16);
u8* dst_ptr = reinterpret_cast<u8*>(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
for (u32 row = 0; row < copy_height; row++)
{
const u8* src_row_ptr = src_ptr;
u8* dst_row_ptr = dst_ptr;
for (u32 col = 0; col < copy_width; col++)
{
u16 pix;
std::memcpy(&pix, src_row_ptr, sizeof(pix));
src_row_ptr += sizeof(u16);
pix = (pix & 0x83E0) | ((pix >> 10) & 0x1F) | ((pix & 0x1F) << 10);
std::memcpy(dst_row_ptr, &pix, sizeof(pix));
dst_row_ptr += sizeof(u16);
}
src_ptr += src_pitch;
dst_ptr += dst_pitch;
}
m_vram_readback_texture.Unmap(m_context.Get());
}
else
{
Log_ErrorPrintf("Failed to map VRAM readback texture");
}
}
RestoreGraphicsAPIState();

View File

@ -378,6 +378,9 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display)
// adaptive smoothing would require texture views, which aren't in GLES.
m_supports_adaptive_downsampling = false;
// GLES mandates support for RGB5A1, desktop ???
m_supports_rgb5a1_framebuffer = true;
}
bool GPU_HW_OpenGL::CreateFramebuffer()
@ -386,19 +389,23 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
const u32 texture_width = VRAM_WIDTH * m_resolution_scale;
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
const u32 multisamples = m_multisamples;
const GLenum render_texture_format = m_use_rgb5a1_framebuffer ? GL_RGB5_A1 : GL_RGBA8;
const GLenum render_texture_type = m_use_rgb5a1_framebuffer ? GL_UNSIGNED_SHORT_5_5_5_1 : GL_RGBA8;
const GLenum display_texture_format = m_use_rgb5a1_framebuffer ? GL_RGB5_A1 : GL_RGBA8;
const GLenum display_texture_type = m_use_rgb5a1_framebuffer ? GL_UNSIGNED_SHORT_5_5_5_1 : GL_RGBA8;
if (!m_vram_texture.Create(texture_width, texture_height, multisamples, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr,
false, true) ||
if (!m_vram_texture.Create(texture_width, texture_height, multisamples, render_texture_format, GL_RGBA,
render_texture_type, nullptr, false, true) ||
!m_vram_depth_texture.Create(texture_width, texture_height, multisamples, GL_DEPTH_COMPONENT16,
GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, nullptr, false) ||
!m_vram_read_texture.Create(texture_width, texture_height, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false,
true) ||
!m_vram_read_texture.Create(texture_width, texture_height, 1, render_texture_format, GL_RGBA, render_texture_type,
nullptr, false, true) ||
!m_vram_read_texture.CreateFramebuffer() ||
!m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr,
false) ||
(!m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, render_texture_format, GL_RGBA, render_texture_type,
nullptr, false)) ||
!m_vram_encoding_texture.CreateFramebuffer() ||
!m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale,
1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
1, display_texture_format, GL_RGBA, display_texture_type, nullptr, false) ||
!m_display_texture.CreateFramebuffer())
{
return false;
@ -416,7 +423,8 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
if (m_downsample_mode == GPUDownsampleMode::Box)
{
if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE) ||
if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, display_texture_format, GL_RGBA,
display_texture_type) ||
!m_downsample_texture.CreateFramebuffer())
{
return false;
@ -509,8 +517,8 @@ bool GPU_HW_OpenGL::CompilePrograms()
const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout();
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_pgxp_depth_buffer, m_supports_dual_source_blend);
m_use_rgb5a1_framebuffer, m_true_color, m_scaled_dithering, m_texture_filtering,
m_using_uv_limits, m_pgxp_depth_buffer, m_supports_dual_source_blend);
Common::Timer compile_time;
const int progress_total = (4 * 9 * 2 * 2) + (2 * 3) + 6;
@ -862,18 +870,20 @@ void GPU_HW_OpenGL::UpdateDisplay()
{
UpdateVRAMReadTexture();
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_read_texture.GetGLId())),
HostDisplayPixelFormat::RGBA8, m_vram_read_texture.GetWidth(),
static_cast<s32>(m_vram_read_texture.GetHeight()), 0,
m_vram_read_texture.GetHeight(), m_vram_read_texture.GetWidth(),
-static_cast<s32>(m_vram_read_texture.GetHeight()));
m_host_display->SetDisplayTexture(
reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_read_texture.GetGLId())),
m_use_rgb5a1_framebuffer ? HostDisplayPixelFormat::RGBA5551 : HostDisplayPixelFormat::RGBA8,
m_vram_read_texture.GetWidth(), static_cast<s32>(m_vram_read_texture.GetHeight()), 0,
m_vram_read_texture.GetHeight(), m_vram_read_texture.GetWidth(),
-static_cast<s32>(m_vram_read_texture.GetHeight()));
}
else
{
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
HostDisplayPixelFormat::RGBA8, m_vram_texture.GetWidth(),
static_cast<s32>(m_vram_texture.GetHeight()), 0, m_vram_texture.GetHeight(),
m_vram_texture.GetWidth(), -static_cast<s32>(m_vram_texture.GetHeight()));
m_host_display->SetDisplayTexture(
reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
m_use_rgb5a1_framebuffer ? HostDisplayPixelFormat::RGBA5551 : HostDisplayPixelFormat::RGBA8,
m_vram_texture.GetWidth(), static_cast<s32>(m_vram_texture.GetHeight()), 0, m_vram_texture.GetHeight(),
m_vram_texture.GetWidth(), -static_cast<s32>(m_vram_texture.GetHeight()));
}
m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
@ -907,8 +917,9 @@ void GPU_HW_OpenGL::UpdateDisplay()
else
{
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
HostDisplayPixelFormat::RGBA8, m_vram_texture.GetWidth(),
m_vram_texture.GetHeight(), scaled_vram_offset_x,
m_use_rgb5a1_framebuffer ? HostDisplayPixelFormat::RGBA5551 :
HostDisplayPixelFormat::RGBA8,
m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x,
m_vram_texture.GetHeight() - scaled_vram_offset_y, scaled_display_width,
-static_cast<s32>(scaled_display_height));
}
@ -947,10 +958,11 @@ void GPU_HW_OpenGL::UpdateDisplay()
}
else
{
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
HostDisplayPixelFormat::RGBA8, m_display_texture.GetWidth(),
m_display_texture.GetHeight(), 0, scaled_display_height, scaled_display_width,
-static_cast<s32>(scaled_display_height));
m_host_display->SetDisplayTexture(
reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
m_use_rgb5a1_framebuffer ? HostDisplayPixelFormat::RGBA5551 : HostDisplayPixelFormat::RGBA8,
m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, scaled_display_height, scaled_display_width,
-static_cast<s32>(scaled_display_height));
}
// restore state
@ -971,35 +983,95 @@ void GPU_HW_OpenGL::UpdateDisplay()
}
}
static u16 SwapGL16ToPSX16(u16 val)
{
// RRRRRGGGGGBBBBBA -> ABBBBBGGGGGRRRRR
return ((val << 15) | // A
(val >> 11) | // R
((val >> 1) & 0x3E0) | // G
((val << 9) & 0x7C00)); // B
}
static u16 SwapPSX16ToGL16(u16 val)
{
// ABBBBBGGGGGRRRRR -> // RRRRRGGGGGBBBBBA
return ((val >> 15) | // A
((val << 11) & 0xF800) | // R
((val << 1) & 0x7C0) | // G
((val >> 9) & 0x3E)); // B
}
void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
// Get bounds with wrap-around handled.
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight();
if (!m_use_rgb5a1_framebuffer)
{
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight();
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_rect.GetHeight(), copy_rect.GetWidth(),
copy_rect.GetHeight()};
m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture.Bind();
m_vram_read_program.Bind();
UploadUniformBuffer(uniforms, sizeof(uniforms));
glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST);
glViewport(0, 0, encoded_width, encoded_height);
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_rect.GetHeight(), copy_rect.GetWidth(),
copy_rect.GetHeight()};
m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture.Bind();
m_vram_read_program.Bind();
UploadUniformBuffer(uniforms, sizeof(uniforms));
glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST);
glViewport(0, 0, encoded_width, encoded_height);
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
// Readback encoded texture.
m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glPixelStorei(GL_PACK_ALIGNMENT, 2);
glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2);
glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
RestoreGraphicsAPIState();
// Readback encoded texture.
m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glPixelStorei(GL_PACK_ALIGNMENT, 2);
glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2);
glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
RestoreGraphicsAPIState();
}
else
{
const u32 copy_width = copy_rect.GetWidth();
const u32 copy_height = copy_rect.GetHeight();
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_fbo_id);
glPixelStorei(GL_PACK_ALIGNMENT, 2);
glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH);
glReadPixels(copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_height, copy_width, copy_height, GL_RGBA,
GL_UNSIGNED_SHORT_5_5_5_1, &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
// flip and convert colours
const u32 rows_to_flip = copy_height / 2;
u16* base_ptr = &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left];
for (u32 row = 0; row < rows_to_flip; row++)
{
const u32 row1 = row;
const u32 row2 = copy_height - row - 1;
u16* row1_ptr = base_ptr + row1 * VRAM_WIDTH;
u16* row2_ptr = base_ptr + row2 * VRAM_WIDTH;
for (u32 col = 0; col < copy_width; col++)
{
const u16 temp = SwapGL16ToPSX16(row1_ptr[col]);
row1_ptr[col] = SwapGL16ToPSX16(row2_ptr[col]);
row2_ptr[col] = temp;
}
}
if (copy_height & 1u)
{
const u32 row = rows_to_flip;
u16* row_ptr = base_ptr + row * VRAM_WIDTH;
for (u32 col = 0; col < copy_width; col++)
row_ptr[col] = SwapGL16ToPSX16(row_ptr[col]);
}
}
}
void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
@ -1108,44 +1180,87 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
GPU_HW::UpdateVRAM(x, y, width, height, data, set_mask, check_mask);
const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32));
// reverse copy the rows so it matches opengl's lower-left origin
const u32 source_stride = width * sizeof(u16);
const u8* source_ptr = static_cast<const u8*>(data) + (source_stride * (height - 1));
u32* dest_ptr = static_cast<u32*>(map_result.pointer);
for (u32 row = 0; row < height; row++)
{
const u8* source_row_ptr = source_ptr;
for (u32 col = 0; col < width; col++)
{
u16 src_col;
std::memcpy(&src_col, source_row_ptr, sizeof(src_col));
source_row_ptr += sizeof(src_col);
*(dest_ptr++) = RGBA5551ToRGBA8888(src_col);
}
source_ptr -= source_stride;
}
m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32));
m_texture_stream_buffer->Bind();
// have to write to the 1x texture first
if (m_resolution_scale > 1)
m_vram_encoding_texture.Bind();
else
m_vram_texture.Bind();
// lower-left origin flip happens here
const u32 flipped_y = VRAM_HEIGHT - y - height;
// update texture data
glTexSubImage2D(m_vram_texture.GetGLTarget(), 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE,
reinterpret_cast<void*>(static_cast<uintptr_t>(map_result.buffer_offset)));
m_texture_stream_buffer->Unbind();
if (!m_use_rgb5a1_framebuffer)
{
const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32));
// reverse copy the rows so it matches opengl's lower-left origin
const u32 source_stride = width * sizeof(u16);
const u8* source_ptr = static_cast<const u8*>(data) + (source_stride * (height - 1));
u32* dest_ptr = static_cast<u32*>(map_result.pointer);
for (u32 row = 0; row < height; row++)
{
const u8* source_row_ptr = source_ptr;
for (u32 col = 0; col < width; col++)
{
u16 src_col;
std::memcpy(&src_col, source_row_ptr, sizeof(src_col));
source_row_ptr += sizeof(src_col);
*(dest_ptr++) = RGBA5551ToRGBA8888(src_col);
}
source_ptr -= source_stride;
}
m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32));
m_texture_stream_buffer->Bind();
// have to write to the 1x texture first
if (m_resolution_scale > 1)
m_vram_encoding_texture.Bind();
else
m_vram_texture.Bind();
// update texture data
glTexSubImage2D(m_vram_texture.GetGLTarget(), 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE,
reinterpret_cast<void*>(static_cast<uintptr_t>(map_result.buffer_offset)));
m_texture_stream_buffer->Unbind();
}
else
{
const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32));
// reverse copy the rows so it matches opengl's lower-left origin
const u32 source_stride = width * sizeof(u16);
const u8* source_ptr = static_cast<const u8*>(data) + (source_stride * (height - 1));
u16* dest_ptr = static_cast<u16*>(map_result.pointer);
for (u32 row = 0; row < height; row++)
{
const u8* source_row_ptr = source_ptr;
for (u32 col = 0; col < width; col++)
{
// TODO: Byte flip?
u16 src_col;
std::memcpy(&src_col, source_row_ptr, sizeof(src_col));
source_row_ptr += sizeof(src_col);
*(dest_ptr++) = SwapPSX16ToGL16(src_col);
}
source_ptr -= source_stride;
}
m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32));
m_texture_stream_buffer->Bind();
// have to write to the 1x texture first
if (m_resolution_scale > 1)
m_vram_encoding_texture.Bind();
else
m_vram_texture.Bind();
// update texture data
glPixelStorei(GL_UNPACK_ALIGNMENT, 2);
glTexSubImage2D(m_vram_texture.GetGLTarget(), 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1,
reinterpret_cast<void*>(static_cast<uintptr_t>(map_result.buffer_offset)));
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
m_texture_stream_buffer->Unbind();
}
if (m_resolution_scale > 1)
{

View File

@ -4,13 +4,13 @@
#include <glad.h>
GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples,
bool per_sample_shading, bool true_color, bool scaled_dithering,
GPUTextureFilter texture_filtering, bool uv_limits, bool pgxp_depth,
bool supports_dual_source_blend)
bool per_sample_shading, bool rgb5a1_framebuffer, bool true_color,
bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits,
bool pgxp_depth, bool supports_dual_source_blend)
: ShaderGen(render_api, supports_dual_source_blend), m_resolution_scale(resolution_scale),
m_multisamples(multisamples), m_per_sample_shading(per_sample_shading), m_true_color(true_color),
m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits),
m_pgxp_depth(pgxp_depth)
m_multisamples(multisamples), m_per_sample_shading(per_sample_shading), m_rgb5a1_framebuffer(rgb5a1_framebuffer),
m_true_color(true_color), m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering),
m_uv_limits(uv_limits), m_pgxp_depth(pgxp_depth)
{
}

View File

@ -6,8 +6,8 @@ class GPU_HW_ShaderGen : public ShaderGen
{
public:
GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading,
bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits,
bool pgxp_depth, bool supports_dual_source_blend);
bool rgb5a1_framebuffer, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering,
bool uv_limits, bool pgxp_depth, bool supports_dual_source_blend);
~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured);
@ -37,6 +37,7 @@ private:
u32 m_resolution_scale;
u32 m_multisamples;
bool m_per_sample_shading;
bool m_rgb5a1_framebuffer;
bool m_true_color;
bool m_scaled_dithering;
GPUTextureFilter m_texture_filter;

View File

@ -320,9 +320,12 @@ void GPU_HW_Vulkan::SetCapabilities()
m_supports_dual_source_blend = g_vulkan_context->GetDeviceFeatures().dualSrcBlend;
m_supports_per_sample_shading = g_vulkan_context->GetDeviceFeatures().sampleRateShading;
m_supports_adaptive_downsampling = true;
m_supports_rgb5a1_framebuffer = g_vulkan_context->SupportsTextureFormat(VK_FORMAT_R5G5B5A1_UNORM_PACK16) ||
g_vulkan_context->SupportsTextureFormat(VK_FORMAT_B5G5R5A1_UNORM_PACK16);
Log_InfoPrintf("Dual-source blend: %s", m_supports_dual_source_blend ? "supported" : "not supported");
Log_InfoPrintf("Per-sample shading: %s", m_supports_per_sample_shading ? "supported" : "not supported");
Log_InfoPrintf("RGB5A1 framebuffer: %s", m_supports_rgb5a1_framebuffer ? "supported" : "not supported");
Log_InfoPrintf("Max multisamples: %u", m_max_multisamples);
#ifdef __APPLE__
@ -509,36 +512,59 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
// scale vram size to internal resolution
const u32 texture_width = VRAM_WIDTH * m_resolution_scale;
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
const VkFormat texture_format = VK_FORMAT_R8G8B8A8_UNORM;
u32 readback_texture_width = VRAM_WIDTH / 2;
u32 readback_texture_height = VRAM_HEIGHT;
// figure out which 16-bit format to use
VkFormat render_format = VK_FORMAT_R8G8B8A8_UNORM;
VkFormat display_format = VK_FORMAT_R8G8B8A8_UNORM;
if (m_use_rgb5a1_framebuffer)
{
if (g_vulkan_context->SupportsTextureFormat(VK_FORMAT_R5G5B5A1_UNORM_PACK16))
{
render_format = VK_FORMAT_R5G5B5A1_UNORM_PACK16;
display_format = VK_FORMAT_R5G5B5A1_UNORM_PACK16;
readback_texture_width = VRAM_WIDTH;
readback_texture_height = VRAM_HEIGHT;
}
else if (g_vulkan_context->SupportsTextureFormat(VK_FORMAT_B5G5R5A1_UNORM_PACK16))
{
render_format = VK_FORMAT_B5G5R5A1_UNORM_PACK16;
display_format = VK_FORMAT_B5G5R5A1_UNORM_PACK16;
readback_texture_width = VRAM_WIDTH;
readback_texture_height = VRAM_HEIGHT;
}
}
const VkFormat depth_format = VK_FORMAT_D16_UNORM;
const VkSampleCountFlagBits samples = static_cast<VkSampleCountFlagBits>(m_multisamples);
if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D,
if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, render_format, samples, VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
!m_vram_depth_texture.Create(texture_width, texture_height, 1, 1, depth_format, samples, VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
!m_vram_read_texture.Create(texture_width, texture_height, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT,
!m_vram_read_texture.Create(texture_width, texture_height, 1, 1, render_format, VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
!m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale,
1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D,
1, 1, display_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
!m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
!m_vram_readback_texture.Create(readback_texture_width, readback_texture_height, 1, 1, render_format,
VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) ||
!m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2,
VRAM_HEIGHT))
!m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, render_format,
readback_texture_width, readback_texture_height))
{
return false;
}
m_vram_render_pass =
g_vulkan_context->GetRenderPass(texture_format, depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD);
g_vulkan_context->GetRenderPass(render_format, depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD);
m_vram_update_depth_render_pass =
g_vulkan_context->GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, samples, VK_ATTACHMENT_LOAD_OP_DONT_CARE);
m_display_render_pass = g_vulkan_context->GetRenderPass(m_display_texture.GetFormat(), VK_FORMAT_UNDEFINED,
@ -605,7 +631,7 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
{
const u32 levels = GetAdaptiveDownsamplingMipLevels();
if (!m_downsample_texture.Create(texture_width, texture_height, levels, 1, texture_format, VK_SAMPLE_COUNT_1_BIT,
if (!m_downsample_texture.Create(texture_width, texture_height, levels, 1, render_format, VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
@ -683,7 +709,7 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
}
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT,
if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, render_format, VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT))
@ -818,8 +844,8 @@ bool GPU_HW_Vulkan::CompilePipelines()
VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache();
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_pgxp_depth_buffer, m_supports_dual_source_blend);
m_use_rgb5a1_framebuffer, m_true_color, m_scaled_dithering, m_texture_filtering,
m_using_uv_limits, m_pgxp_depth_buffer, m_supports_dual_source_blend);
Common::Timer compile_time;
const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 2 + (2 * 3) + 1;
@ -1421,43 +1447,94 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
// Get bounds with wrap-around handled.
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight();
if (!m_use_rgb5a1_framebuffer || m_resolution_scale > 1)
{
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight();
EndRenderPass();
EndRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
// Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we use
// the actual size we're rendering to...
BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, m_vram_readback_texture.GetWidth(),
m_vram_readback_texture.GetHeight());
// Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we
// use the actual size we're rendering to...
BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, m_vram_readback_texture.GetWidth(),
m_vram_readback_texture.GetHeight());
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_readback_pipeline);
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
uniforms);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
&m_vram_read_descriptor_set, 0, nullptr);
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height);
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_readback_pipeline);
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
uniforms);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
&m_vram_read_descriptor_set, 0, nullptr);
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height);
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
EndRenderPass();
EndRenderPass();
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
// Stage the readback.
m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width,
encoded_height);
// Stage the readback.
m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width,
encoded_height);
// And copy it into our shadow buffer (will execute command buffer and stall).
m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left],
VRAM_WIDTH * sizeof(u16));
// And copy it into our shadow buffer (will execute command buffer and stall).
m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left],
VRAM_WIDTH * sizeof(u16));
}
else
{
const u32 copy_width = copy_rect.GetWidth();
const u32 copy_height = copy_rect.GetHeight();
EndRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, copy_rect.left, copy_rect.top, 0, 0, 0, 0,
copy_width, copy_height);
m_vram_readback_staging_texture.Flush();
const u32 src_stride = m_vram_readback_staging_texture.GetMappedStride();
const u32 dst_stride = VRAM_WIDTH * sizeof(u16);
const u32 copy_size = copy_width * sizeof(u16);
const u8* src_ptr = reinterpret_cast<const u8*>(m_vram_readback_staging_texture.GetMappedPointer());
u8* dst_ptr = reinterpret_cast<u8*>(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
if (m_vram_texture.GetFormat() == VK_FORMAT_R5G5B5A1_UNORM_PACK16)
{
for (u32 row = 0; row < copy_height; row++)
{
std::memcpy(dst_ptr, src_ptr, copy_size);
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}
else
{
for (u32 row = 0; row < copy_height; row++)
{
const u8* src_row_ptr = src_ptr;
u8* dst_row_ptr = dst_ptr;
for (u32 col = 0; col < copy_width; col++)
{
u16 pix;
std::memcpy(&pix, src_row_ptr, sizeof(pix));
src_row_ptr += sizeof(u16);
pix = (pix & 0x83E0) | ((pix >> 10) & 0x1F) | ((pix & 0x1F) << 10);
std::memcpy(dst_row_ptr, &pix, sizeof(pix));
dst_row_ptr += sizeof(u16);
}
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}
}
RestoreGraphicsAPIState();
}

View File

@ -116,6 +116,7 @@ struct Settings
bool gpu_disable_interlacing = false;
bool gpu_force_ntsc_timings = false;
bool gpu_widescreen_hack = false;
bool gpu_use_rgb5a1_framebuffer = true;
bool gpu_pgxp_enable = false;
bool gpu_pgxp_culling = true;
bool gpu_pgxp_texture_correction = true;