From 26d8c85cd94ca92d6c3183c6c1ca03f5fb347b8b Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Tue, 18 May 2021 14:18:27 +1000 Subject: [PATCH] wip --- src/core/gpu_hw_d3d11.cpp | 54 +++++++++++++++++++++++++++++++++-- src/core/gpu_hw_d3d11.h | 6 ++++ src/core/gpu_hw_shadergen.cpp | 34 ++++++++++++++++------ src/core/gpu_hw_shadergen.h | 1 + src/core/shadergen.cpp | 32 +++++++++++++++++---- src/core/shadergen.h | 1 + 6 files changed, 110 insertions(+), 18 deletions(-) diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 09ba7e78b..9469226ce 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -281,6 +281,26 @@ bool GPU_HW_D3D11::CreateFramebuffer() if (FAILED(hr)) return false; + if (!m_force_progressive_scan) + { + const CD3D11_TEXTURE2D_DESC texture_desc(texture_format, texture_width, texture_height, 2, 1, + D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); + const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_TEXTURE2DARRAY, texture_format); + const CD3D11_RENDER_TARGET_VIEW_DESC odd_rtv_desc(D3D11_RTV_DIMENSION_TEXTURE2DARRAY, texture_format, 0, 0, 1); + const CD3D11_RENDER_TARGET_VIEW_DESC even_rtv_desc(D3D11_RTV_DIMENSION_TEXTURE2DARRAY, texture_format, 0, 1, 1); + if (FAILED(hr = m_device->CreateTexture2D(&texture_desc, nullptr, m_field_texture.ReleaseAndGetAddressOf())) || + FAILED(hr = m_device->CreateShaderResourceView(m_field_texture.Get(), &srv_desc, + m_field_texture_srv.ReleaseAndGetAddressOf())) || + FAILED(hr = m_device->CreateRenderTargetView(m_field_texture.Get(), &odd_rtv_desc, + m_field_texture_odd_rtv.ReleaseAndGetAddressOf())) || + FAILED(hr = m_device->CreateRenderTargetView(m_field_texture.Get(), &even_rtv_desc, + m_field_texture_even_rtv.ReleaseAndGetAddressOf()))) + + { + return false; + } + } + if (m_downsample_mode == GPUDownsampleMode::Adaptive) { const u32 levels = GetAdaptiveDownsamplingMipLevels(); @@ -509,7 +529,7 @@ bool GPU_HW_D3D11::CompileShaders() m_pgxp_depth_buffer, m_supports_dual_source_blend); Common::Timer compile_time; - const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3) + 1; + const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3) + 1 + 1; int progress_value = 0; #define UPDATE_PROGRESS() \ do \ @@ -655,6 +675,11 @@ bool GPU_HW_D3D11::CompileShaders() } } + m_field_blend_pixel_shader = + shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateFieldBlendFragmentShader()); + if (!m_field_blend_pixel_shader) + return false; + UPDATE_PROGRESS(); if (m_downsample_mode == GPUDownsampleMode::Adaptive) @@ -695,6 +720,7 @@ void GPU_HW_D3D11::DestroyShaders() m_downsample_blur_pass_pixel_shader.Reset(); m_downsample_mid_pass_pixel_shader.Reset(); m_downsample_first_pass_pixel_shader.Reset(); + m_field_blend_pixel_shader.Reset(); m_display_pixel_shaders = {}; m_vram_update_depth_pixel_shader.Reset(); m_vram_copy_pixel_shader.Reset(); @@ -879,7 +905,7 @@ void GPU_HW_D3D11::UpdateDisplay() const u32 display_width = m_crtc_state.display_vram_width; const u32 display_height = m_crtc_state.display_vram_height; const u32 scaled_display_width = display_width * resolution_scale; - const u32 scaled_display_height = display_height * resolution_scale; + u32 scaled_display_height = display_height * resolution_scale; const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); if (IsDisplayDisabled()) @@ -905,8 +931,23 @@ void GPU_HW_D3D11::UpdateDisplay() } else { + if (interlaced != InterlacedRenderMode::None) + { + scaled_display_height /= 2; + + m_context->OMSetRenderTargets(1, + GetInterlacedDisplayField() ? m_field_texture_even_rtv.GetAddressOf() : + m_field_texture_odd_rtv.GetAddressOf(), + nullptr); + SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); + } + else + { + m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr); + SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); + } + m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get()); - m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr); m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); @@ -935,6 +976,13 @@ void GPU_HW_D3D11::UpdateDisplay() scaled_display_width, scaled_display_height); } + if (interlaced != InterlacedRenderMode::None) + { + m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr); + m_context->PSSetShaderResources(0, 1, m_field_texture_srv.GetAddressOf()); + DrawUtilityShader(m_field_blend_pixel_shader.Get(), nullptr, 0); + } + RestoreGraphicsAPIState(); } diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index eec2867b2..1fd5d7402 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -88,6 +88,11 @@ private: D3D11::Texture m_vram_read_texture; D3D11::Texture m_vram_encoding_texture; D3D11::Texture m_display_texture; + ComPtr m_field_texture; + ComPtr m_field_texture_srv; + ComPtr m_field_texture_odd_rtv; + ComPtr m_field_texture_even_rtv; + D3D11::StreamBuffer m_vertex_stream_buffer; @@ -130,6 +135,7 @@ private: ComPtr m_vram_copy_pixel_shader; ComPtr m_vram_update_depth_pixel_shader; std::array, 3>, 2> m_display_pixel_shaders; // [depth_24][interlaced] + ComPtr m_field_blend_pixel_shader; D3D11::Texture m_vram_replacement_texture; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 4484ab084..ec71bbb75 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1100,15 +1100,8 @@ float3 SampleVRAM24Smoothed(uint2 icoords) { uint2 icoords = uint2(v_pos.xy) + uint2(u_crop_left, 0u); - #if INTERLACED - if ((fixYCoord(icoords.y) & 1u) != u_field_offset) - discard; - - #if !INTERLEAVED - icoords.y /= 2u; - #else - icoords.y &= ~1u; - #endif + #if INTERLACED && INTERLEAVED + icoords.y *= 2u; #endif #if DEPTH_24BIT @@ -1126,6 +1119,29 @@ float3 SampleVRAM24Smoothed(uint2 icoords) return ss.str(); } +std::string GPU_HW_ShaderGen::GenerateFieldBlendFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + + WriteCommonFunctions(ss); + DeclareTextureArray(ss, "samp0", 0); + + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1); + ss << R"( +{ + int2 icoords = int2(v_pos.xy); + + float3 odd = LOAD_TEXTURE_ARRAY(samp0, icoords, 0, 0).rgb; + float3 even = LOAD_TEXTURE_ARRAY(samp0, icoords, 1, 0).rgb; + + o_col0 = float4((odd + even) / 2.0, 1.0); +} +)"; + + return ss.str(); +} + std::string GPU_HW_ShaderGen::GenerateVRAMReadFragmentShader() { std::stringstream ss; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index dff617e97..fff85fed6 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -16,6 +16,7 @@ public: std::string GenerateInterlacedFillFragmentShader(); std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode, bool smooth_chroma); + std::string GenerateFieldBlendFragmentShader(); std::string GenerateVRAMReadFragmentShader(); std::string GenerateVRAMWriteFragmentShader(bool use_ssbo); std::string GenerateVRAMCopyFragmentShader(); diff --git a/src/core/shadergen.cpp b/src/core/shadergen.cpp index 25748c47c..c58cbe3d9 100644 --- a/src/core/shadergen.cpp +++ b/src/core/shadergen.cpp @@ -178,6 +178,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss) ss << "#define SAMPLE_TEXTURE_LEVEL_OFFSET(name, coords, level, offset) textureLod(name, coords, level, offset)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n"; ss << "#define LOAD_TEXTURE_MS(name, coords, sample) texelFetch(name, coords, int(sample))\n"; + ss << "#define LOAD_TEXTURE_ARRAY(name, coords, layer, mip) texelFetch(name, int3((coords), (layer)), (mip))\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n"; ss << "#define LOAD_TEXTURE_BUFFER(name, index) texelFetch(name, index)\n"; ss << "#define BEGIN_ARRAY(type, size) type[size](\n"; @@ -220,6 +221,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss) "offset)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n"; ss << "#define LOAD_TEXTURE_MS(name, coords, sample) name.Load(coords, sample)\n"; + ss << "#define LOAD_TEXTURE_ARRAY(name, coords, layer, mip) name.Load(int4((coords), (layer), (mip)))\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n"; ss << "#define LOAD_TEXTURE_BUFFER(name, index) name.Load(index)\n"; ss << "#define BEGIN_ARRAY(type, size) {\n"; @@ -280,6 +282,24 @@ void ShaderGen::DeclareTexture(std::stringstream& ss, const char* name, u32 inde } } +void ShaderGen::DeclareTextureArray(std::stringstream& ss, const char* name, u32 index) +{ + if (m_glsl) + { + if (IsVulkan()) + ss << "layout(set = 0, binding = " << (index + 1u) << ") "; + else if (m_use_glsl_binding_layout) + ss << "layout(binding = " << index << ") "; + + ss << "uniform sampler2DArray " << name << ";\n"; + } + else + { + ss << "Texture2DArray " << name << " : register(t" << index << ");\n"; + ss << "SamplerState " << name << "_ss : register(s" << index << ");\n"; + } +} + void ShaderGen::DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned) { if (m_glsl) @@ -349,7 +369,7 @@ void ShaderGen::DeclareVertexEntryPoint( for (u32 i = 0; i < num_texcoord_outputs; i++) ss << " " << qualifier << "float2 v_tex" << i << ";\n"; - for (const auto &[qualifiers, name] : additional_outputs) + for (const auto& [qualifiers, name] : additional_outputs) { const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; ss << " " << qualifier_to_use << " " << name << ";\n"; @@ -366,7 +386,7 @@ void ShaderGen::DeclareVertexEntryPoint( for (u32 i = 0; i < num_texcoord_outputs; i++) ss << qualifier << "out float2 v_tex" << i << ";\n"; - for (const auto &[qualifiers, name] : additional_outputs) + for (const auto& [qualifiers, name] : additional_outputs) { const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; ss << qualifier_to_use << " out " << name << ";\n"; @@ -408,7 +428,7 @@ void ShaderGen::DeclareVertexEntryPoint( ss << " " << qualifier << "out float2 v_tex" << i << " : TEXCOORD" << i << ",\n"; u32 additional_counter = num_texcoord_outputs; - for (const auto &[qualifiers, name] : additional_outputs) + for (const auto& [qualifiers, name] : additional_outputs) { const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; ss << " " << qualifier_to_use << " out " << name << " : TEXCOORD" << additional_counter << ",\n"; @@ -442,7 +462,7 @@ void ShaderGen::DeclareFragmentEntryPoint( for (u32 i = 0; i < num_texcoord_inputs; i++) ss << " " << qualifier << "float2 v_tex" << i << ";\n"; - for (const auto &[qualifiers, name] : additional_inputs) + for (const auto& [qualifiers, name] : additional_inputs) { const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; ss << " " << qualifier_to_use << " " << name << ";\n"; @@ -459,7 +479,7 @@ void ShaderGen::DeclareFragmentEntryPoint( for (u32 i = 0; i < num_texcoord_inputs; i++) ss << qualifier << "in float2 v_tex" << i << ";\n"; - for (const auto &[qualifiers, name] : additional_inputs) + for (const auto& [qualifiers, name] : additional_inputs) { const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; ss << qualifier_to_use << " in " << name << ";\n"; @@ -512,7 +532,7 @@ void ShaderGen::DeclareFragmentEntryPoint( ss << " " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i << ",\n"; u32 additional_counter = num_texcoord_inputs; - for (const auto &[qualifiers, name] : additional_inputs) + for (const auto& [qualifiers, name] : additional_inputs) { const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; ss << " " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter << ",\n"; diff --git a/src/core/shadergen.h b/src/core/shadergen.h index f3a9196c9..cc209e498 100644 --- a/src/core/shadergen.h +++ b/src/core/shadergen.h @@ -31,6 +31,7 @@ protected: void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members, bool push_constant_on_vulkan); void DeclareTexture(std::stringstream& ss, const char* name, u32 index, bool multisampled = false); + void DeclareTextureArray(std::stringstream& ss, const char* name, u32 index); void DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned); void DeclareVertexEntryPoint(std::stringstream& ss, const std::initializer_list& attributes, u32 num_color_outputs, u32 num_texcoord_outputs,