From 9ef7e8c5d05cfc3addf224d447ad3ab30b64bb76 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 1 Jun 2024 17:04:35 +1000 Subject: [PATCH] GPU/HW: Use GSVector instead of Rectangle --- src/core/gpu.cpp | 78 +-- src/core/gpu.h | 29 +- src/core/gpu_commands.cpp | 2 + src/core/gpu_hw.cpp | 636 ++++++++++++------------ src/core/gpu_hw.h | 40 +- src/core/gpu_sw.cpp | 49 +- src/core/gpu_types.h | 21 +- src/core/system.cpp | 4 +- src/util/d3d11_device.cpp | 14 +- src/util/d3d11_device.h | 4 +- src/util/d3d12_device.cpp | 19 +- src/util/d3d12_device.h | 8 +- src/util/d3d_common.cpp | 11 +- src/util/gpu_device.cpp | 27 +- src/util/gpu_device.h | 11 +- src/util/gpu_texture.h | 7 +- src/util/metal_device.h | 9 +- src/util/opengl_device.cpp | 18 +- src/util/opengl_device.h | 10 +- src/util/postprocessing.cpp | 9 +- src/util/postprocessing.h | 4 +- src/util/postprocessing_shader.h | 8 +- src/util/postprocessing_shader_fx.cpp | 46 +- src/util/postprocessing_shader_fx.h | 6 +- src/util/postprocessing_shader_glsl.cpp | 11 +- src/util/postprocessing_shader_glsl.h | 6 +- src/util/vulkan_device.cpp | 29 +- src/util/vulkan_device.h | 8 +- 28 files changed, 586 insertions(+), 538 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 1e5b75d97..6bd04803e 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -21,6 +21,7 @@ #include "common/align.h" #include "common/error.h" #include "common/file_system.h" +#include "common/gsvector_formatter.h" #include "common/heap_array.h" #include "common/log.h" #include "common/path.h" @@ -351,6 +352,7 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ m_draw_mode.texture_page_changed = true; m_draw_mode.texture_window_changed = true; m_drawing_area_changed = true; + SetClampedDrawingArea(); UpdateDMARequest(); } @@ -1089,12 +1091,11 @@ void GPU::UpdateCommandTickEvent() void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, float* display_y) const { - const Common::Rectangle draw_rc = - CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight()); + const GSVector4i draw_rc = CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight(), true); // convert coordinates to active display region, then to full display region - const float scaled_display_x = (window_x - static_cast(draw_rc.left)) / static_cast(draw_rc.GetWidth()); - const float scaled_display_y = (window_y - static_cast(draw_rc.top)) / static_cast(draw_rc.GetHeight()); + const float scaled_display_x = (window_x - static_cast(draw_rc.left)) / static_cast(draw_rc.width()); + const float scaled_display_y = (window_y - static_cast(draw_rc.top)) / static_cast(draw_rc.height()); // scale back to internal resolution *display_x = scaled_display_x * static_cast(m_crtc_state.display_width); @@ -1632,6 +1633,21 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he } } +void GPU::SetClampedDrawingArea() +{ + if (!IsDrawingAreaIsValid()) [[unlikely]] + { + m_clamped_drawing_area = GSVector4i::zero(); + return; + } + + const u32 right = std::min(m_drawing_area.right + 1, static_cast(VRAM_WIDTH)); + const u32 left = std::min(m_drawing_area.left, std::min(m_drawing_area.right, VRAM_WIDTH - 1)); + const u32 bottom = std::min(m_drawing_area.bottom + 1, static_cast(VRAM_HEIGHT)); + const u32 top = std::min(m_drawing_area.top, std::min(m_drawing_area.bottom, VRAM_HEIGHT - 1)); + m_clamped_drawing_area = GSVector4i(left, top, right, bottom); +} + void GPU::SetDrawMode(u16 value) { GPUDrawModeReg new_mode_reg{static_cast(value & GPUDrawModeReg::MASK)}; @@ -1917,16 +1933,15 @@ bool GPU::PresentDisplay() { FlushRender(); - const Common::Rectangle draw_rect = - m_display_texture ? CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight()) : - Common::Rectangle(); + const GSVector4i draw_rect = m_display_texture ? + CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight()) : + GSVector4i::zero(); return RenderDisplay(nullptr, draw_rect, !g_settings.debugging.show_vram); } -bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_rect, bool postfx) +bool GPU::RenderDisplay(GPUTexture* target, const GSVector4i draw_rect, bool postfx) { - GL_SCOPE_FMT("RenderDisplay: {}x{} at {},{}", draw_rect.GetWidth(), draw_rect.GetHeight(), draw_rect.left, - draw_rect.top); + GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); if (m_display_texture) m_display_texture->MakeReadyForSampling(); @@ -1947,8 +1962,8 @@ bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_r // Now we can apply the post chain. GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); - if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, 0, 0, - display_texture_view_width, display_texture_view_height, + if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, + GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), display_texture_view_width, display_texture_view_height, m_crtc_state.display_width, m_crtc_state.display_height)) { @@ -1966,7 +1981,7 @@ bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_r (postfx && PostProcessing::DisplayChain.IsActive() && !g_gpu_device->GetWindowInfo().IsSurfaceless() && hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); - const Common::Rectangle real_draw_rect = + const GSVector4i real_draw_rect = g_gpu_device->UsesLowerLeftOrigin() ? GPUDevice::FlipToLowerLeft(draw_rect, target_height) : draw_rect; if (really_postfx) { @@ -2009,11 +2024,9 @@ bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_r { texture_filter_linear = true; uniforms.params[0] = std::max( - std::floor(static_cast(draw_rect.GetWidth()) / static_cast(m_display_texture_view_width)), - 1.0f); + std::floor(static_cast(draw_rect.width()) / static_cast(m_display_texture_view_width)), 1.0f); uniforms.params[1] = std::max( - std::floor(static_cast(draw_rect.GetHeight()) / static_cast(m_display_texture_view_height)), - 1.0f); + std::floor(static_cast(draw_rect.height()) / static_cast(m_display_texture_view_height)), 1.0f); uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; } @@ -2048,8 +2061,7 @@ bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_r uniforms.src_size[3] = rcp_height; g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); - g_gpu_device->SetViewportAndScissor(real_draw_rect.left, real_draw_rect.top, real_draw_rect.GetWidth(), - real_draw_rect.GetHeight()); + g_gpu_device->SetViewportAndScissor(real_draw_rect); g_gpu_device->Draw(3, 0); } @@ -2066,14 +2078,11 @@ bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_r const s32 orig_height = static_cast(std::ceil(static_cast(m_crtc_state.display_height) * upscale_y)); return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, - real_draw_rect.left, real_draw_rect.top, real_draw_rect.GetWidth(), - real_draw_rect.GetHeight(), orig_width, orig_height, - m_crtc_state.display_width, m_crtc_state.display_height); + real_draw_rect, orig_width, orig_height, m_crtc_state.display_width, + m_crtc_state.display_height); } else - { return true; - } } void GPU::DestroyDeinterlaceTextures() @@ -2303,8 +2312,7 @@ bool GPU::ApplyChromaSmoothing() return true; } -Common::Rectangle GPU::CalculateDrawRect(s32 window_width, s32 window_height, - bool apply_aspect_ratio /* = true */) const +GSVector4i GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_aspect_ratio /* = true */) const { const bool integer_scale = (g_settings.display_scaling == DisplayScalingMode::NearestInteger || g_settings.display_scaling == DisplayScalingMode::BlinearInteger); @@ -2401,9 +2409,11 @@ Common::Rectangle GPU::CalculateDrawRect(s32 window_width, s32 window_heigh } // TODO: This should be a float rectangle. But because GL is lame, it only has integer viewports... - return Common::Rectangle::FromExtents( - static_cast(active_left * scale + left_padding), static_cast(active_top * scale + top_padding), - static_cast(active_width * scale), static_cast(active_height * scale)); + const s32 left = static_cast(active_left * scale + left_padding); + const s32 top = static_cast(active_top * scale + top_padding); + const s32 right = left + static_cast(active_width * scale); + const s32 bottom = top + static_cast(active_height * scale); + return GSVector4i(left, top, right, bottom); } bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, @@ -2575,7 +2585,7 @@ bool GPU::WriteDisplayTextureToFile(std::string filename, bool compress_on_threa flip_y, std::move(texture_data), texture_data_stride, m_display_texture->GetFormat(), false, compress_on_thread); } -bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangle& draw_rect, bool postfx, +bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i draw_rect, bool postfx, std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) { const GPUTexture::Format hdformat = @@ -2627,15 +2637,15 @@ bool GPU::RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mod { u32 width = g_gpu_device->GetWindowWidth(); u32 height = g_gpu_device->GetWindowHeight(); - Common::Rectangle draw_rect = CalculateDrawRect(width, height); + GSVector4i draw_rect = CalculateDrawRect(width, height, true); const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_settings.debugging.show_vram); if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) { if (mode == DisplayScreenshotMode::InternalResolution) { - const u32 draw_width = static_cast(draw_rect.GetWidth()); - const u32 draw_height = static_cast(draw_rect.GetHeight()); + const u32 draw_width = static_cast(draw_rect.width()); + const u32 draw_height = static_cast(draw_rect.height()); // If internal res, scale the computed draw rectangle to the internal res. // We re-use the draw rect because it's already been AR corrected. @@ -2679,7 +2689,7 @@ bool GPU::RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mod } // Remove padding, it's not part of the framebuffer. - draw_rect.Set(0, 0, static_cast(width), static_cast(height)); + draw_rect = GSVector4i(0, 0, static_cast(width), static_cast(height)); } if (width == 0 || height == 0) return false; diff --git a/src/core/gpu.h b/src/core/gpu.h index 1b86c6f05..3b8759aee 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -10,7 +10,6 @@ #include "common/bitfield.h" #include "common/fifo_queue.h" -#include "common/rectangle.h" #include "common/types.h" #include @@ -208,13 +207,13 @@ public: virtual void FlushRender() = 0; /// Helper function for computing the draw rectangle in a larger window. - Common::Rectangle CalculateDrawRect(s32 window_width, s32 window_height, bool apply_aspect_ratio = true) const; + GSVector4i CalculateDrawRect(s32 window_width, s32 window_height, bool apply_aspect_ratio = true) const; /// Helper function to save current display texture to PNG. bool WriteDisplayTextureToFile(std::string filename, bool compress_on_thread = false); /// Renders the display, optionally with postprocessing to the specified image. - bool RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangle& draw_rect, bool postfx, + bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i draw_rect, bool postfx, std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format); /// Helper function to save screenshot to PNG. @@ -276,6 +275,9 @@ protected: /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. ALWAYS_INLINE u32 GetActiveLineLSB() const { return ZeroExtend32(m_crtc_state.active_line_lsb); } + /// Updates drawing area that's suitablef or clamping. + void SetClampedDrawingArea(); + /// Sets/decodes GP0(E1h) (set draw mode). void SetDrawMode(u16 bits); @@ -323,23 +325,21 @@ protected: virtual void DrawRendererStats(); virtual void OnBufferSwapped(); - ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(s32 x1, s32 y1, s32 x2, s32 y2, s32 x3, s32 y3, bool shaded, + ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(GSVector4i v1, GSVector4i v2, GSVector4i v3, bool shaded, bool textured, bool semitransparent) { // This will not produce the correct results for triangles which are partially outside the clip area. // However, usually it'll undershoot not overshoot. If we wanted to make this more accurate, we'd need to intersect // the edges with the clip rectangle. // TODO: Coordinates are exclusive, so off by one here... - const s32 clip_right = static_cast(m_drawing_area.right) + 1; - const s32 clip_bottom = static_cast(m_drawing_area.bottom) + 1; - x1 = std::clamp(x1, static_cast(m_drawing_area.left), clip_right); - x2 = std::clamp(x2, static_cast(m_drawing_area.left), clip_right); - x3 = std::clamp(x3, static_cast(m_drawing_area.left), clip_right); - y1 = std::clamp(y1, static_cast(m_drawing_area.top), clip_bottom); - y2 = std::clamp(y2, static_cast(m_drawing_area.top), clip_bottom); - y3 = std::clamp(y3, static_cast(m_drawing_area.top), clip_bottom); + const GSVector4i clamp_min = m_clamped_drawing_area; // would be xyxy(), but zw isn't used. + const GSVector4i clamp_max = m_clamped_drawing_area.zwzw(); + v1 = v1.sat_i32(clamp_min, clamp_max); + v2 = v2.sat_i32(clamp_min, clamp_max); + v3 = v3.sat_i32(clamp_min, clamp_max); - TickCount pixels = std::abs((x1 * y2 + x2 * y3 + x3 * y1 - x1 * y3 - x2 * y1 - x3 * y2) / 2); + TickCount pixels = + std::abs((v1.x * v2.y + v2.x * v3.y + v3.x * v1.y - v1.x * v3.y - v2.x * v1.y - v3.x * v2.y) / 2); if (textured) pixels += pixels; if (semitransparent || m_GPUSTAT.check_mask_before_draw) @@ -479,6 +479,7 @@ protected: GPUDrawingArea m_drawing_area = {}; GPUDrawingOffset m_drawing_offset = {}; + GSVector4i m_clamped_drawing_area = {}; bool m_console_is_pal = false; bool m_set_texture_disable_mask = false; @@ -609,7 +610,7 @@ protected: void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height); - bool RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_rect, bool postfx); + bool RenderDisplay(GPUTexture* target, const GSVector4i draw_rect, bool postfx); bool Deinterlace(u32 field, u32 line_skip); bool DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip); diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 06a18f6cb..24c9a927c 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -250,6 +250,7 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand() m_drawing_area.left = left; m_drawing_area.top = top; m_drawing_area_changed = true; + SetClampedDrawingArea(); } AddCommandTicks(1); @@ -271,6 +272,7 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand() m_drawing_area.right = right; m_drawing_area.bottom = bottom; m_drawing_area_changed = true; + SetClampedDrawingArea(); } AddCommandTicks(1); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index d00a8eb76..ccdb68049 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -24,6 +24,7 @@ #include "imgui.h" #include +#include #include #include @@ -49,6 +50,19 @@ ALWAYS_INLINE static constexpr std::tuple MinMax(T v1, T v2) return std::tie(v1, v2); } +/// Returns the distance between two rectangles. +ALWAYS_INLINE static float RectDistance(const GSVector4i lhs, const GSVector4i rhs) +{ + const s32 lcx = (lhs.left + ((lhs.right - lhs.left) / 2)); + const s32 lcy = (lhs.top + ((lhs.bottom - lhs.top) / 2)); + const s32 rcx = (rhs.left + ((rhs.right - rhs.left) / 2)); + const s32 rcy = (rhs.top + ((rhs.bottom - rhs.top) / 2)); + const s32 dx = (lcx - rcx); + const s32 dy = (lcy - rcy); + const s32 distsq = (dx * dx) + (dy * dy); + return std::sqrt(static_cast(distsq)); +} + ALWAYS_INLINE static u32 GetMaxResolutionScale() { return g_gpu_device->GetMaxTextureSize() / VRAM_WIDTH; @@ -96,20 +110,24 @@ ALWAYS_INLINE static bool IsBlendedTextureFiltering(GPUTextureFilter filter) } /// Computes the area affected by a VRAM transfer, including wrap-around of X. -ALWAYS_INLINE_RELEASE static Common::Rectangle GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) +ALWAYS_INLINE_RELEASE static GSVector4i GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) { - Common::Rectangle out_rc = Common::Rectangle::FromExtents(x % VRAM_WIDTH, y % VRAM_HEIGHT, width, height); - if (out_rc.right > VRAM_WIDTH) + GSVector4i ret; + ret.left = x % VRAM_WIDTH; + ret.top = y % VRAM_HEIGHT; + ret.right = ret.left + width; + ret.bottom = ret.top + height; + if (ret.right > static_cast(VRAM_WIDTH)) { - out_rc.left = 0; - out_rc.right = VRAM_WIDTH; + ret.left = 0; + ret.right = static_cast(VRAM_WIDTH); } - if (out_rc.bottom > VRAM_HEIGHT) + if (ret.bottom > static_cast(VRAM_HEIGHT)) { - out_rc.top = 0; - out_rc.bottom = VRAM_HEIGHT; + ret.top = 0; + ret.bottom = static_cast(VRAM_HEIGHT); } - return out_rc; + return ret; } namespace { @@ -317,8 +335,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di else if (sw.IsReading()) { // Need to update the VRAM copy on the GPU with the state data. - UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, - Common::Rectangle(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); + UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); } // invalidate the whole VRAM read texture when loading state @@ -337,7 +354,7 @@ void GPU_HW::RestoreDeviceContext() { g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); SetVRAMRenderTarget(); - g_gpu_device->SetViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + g_gpu_device->SetViewport(m_vram_texture->GetRect()); SetScissor(); m_batch_ubo_dirty = true; } @@ -542,20 +559,6 @@ void GPU_HW::CheckSettings() } } -void GPU_HW::SetClampedDrawingArea() -{ - if (!IsDrawingAreaIsValid()) [[unlikely]] - { - m_clamped_drawing_area = {}; - return; - } - - m_clamped_drawing_area.right = std::min(m_drawing_area.right + 1, static_cast(VRAM_WIDTH)); - m_clamped_drawing_area.left = std::min(m_drawing_area.left, std::min(m_clamped_drawing_area.right, VRAM_WIDTH - 1)); - m_clamped_drawing_area.bottom = std::min(m_drawing_area.bottom + 1, static_cast(VRAM_HEIGHT)); - m_clamped_drawing_area.top = std::min(m_drawing_area.top, std::min(m_drawing_area.bottom, VRAM_HEIGHT - 1)); -} - u32 GPU_HW::CalculateResolutionScale() const { const u32 max_resolution_scale = GetMaxResolutionScale(); @@ -638,34 +641,19 @@ bool GPU_HW::IsUsingDownsampling() const void GPU_HW::SetFullVRAMDirtyRectangle() { - m_vram_dirty_draw_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + m_vram_dirty_draw_rect = VRAM_SIZE_RECT; m_draw_mode.SetTexturePageChanged(); } void GPU_HW::ClearVRAMDirtyRectangle() { - m_vram_dirty_draw_rect.SetInvalid(); - m_vram_dirty_write_rect.SetInvalid(); + m_vram_dirty_draw_rect = INVALID_RECT; + m_vram_dirty_write_rect = INVALID_RECT; } -void GPU_HW::IncludeDrawnDirtyRectangle(s32 min_x, s32 min_y, s32 max_x, s32 max_y) +void GPU_HW::IncludeDrawnDirtyRectangle(const GSVector4i rect) { - const u32 clamped_min_x = std::clamp(min_x, static_cast(m_clamped_drawing_area.left), - static_cast(m_clamped_drawing_area.right - 1)); - const u32 clamped_max_x = - std::clamp(max_x, static_cast(m_clamped_drawing_area.left), static_cast(m_clamped_drawing_area.right)); - m_vram_dirty_draw_rect.left = std::min(m_vram_dirty_draw_rect.left, clamped_min_x); - m_vram_dirty_draw_rect.right = std::max(m_vram_dirty_draw_rect.right, clamped_max_x); - - const u32 clamped_min_y = std::clamp(min_y, static_cast(m_clamped_drawing_area.top), - static_cast(m_clamped_drawing_area.bottom - 1)); - const u32 clamped_max_y = - std::clamp(max_y, static_cast(m_clamped_drawing_area.top), static_cast(m_clamped_drawing_area.bottom)); - m_vram_dirty_draw_rect.top = std::min(m_vram_dirty_draw_rect.top, clamped_min_y); - m_vram_dirty_draw_rect.bottom = std::max(m_vram_dirty_draw_rect.bottom, clamped_max_y); - - DebugAssert(m_vram_dirty_draw_rect.left < VRAM_WIDTH && m_vram_dirty_draw_rect.right <= VRAM_WIDTH); - DebugAssert(m_vram_dirty_draw_rect.top < VRAM_HEIGHT && m_vram_dirty_draw_rect.bottom <= VRAM_HEIGHT); + m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect.rintersect(m_clamped_drawing_area)); } std::tuple GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */) @@ -1490,7 +1478,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) { GL_SCOPE("UpdateVRAMReadTexture()"); - const auto update = [this](Common::Rectangle& rect, u8 dbit) { + const auto update = [this](GSVector4i& rect, u8 dbit) { if (m_texpage_dirty & dbit) { m_texpage_dirty &= ~dbit; @@ -1498,14 +1486,14 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) GL_INS_FMT("{} texpage is no longer dirty", (dbit & TEXPAGE_DIRTY_DRAWN_RECT) ? "DRAW" : "WRITE"); } - const auto scaled_rect = rect * m_resolution_scale; + const GSVector4i scaled_rect = rect.mul32l(GSVector4i(m_resolution_scale)); if (m_vram_texture->IsMultisampled()) { if (g_gpu_device->GetFeatures().partial_msaa_resolve) { g_gpu_device->ResolveTextureRegion(m_vram_read_texture.get(), scaled_rect.left, scaled_rect.top, 0, 0, - m_vram_texture.get(), scaled_rect.left, scaled_rect.top, - scaled_rect.GetWidth(), scaled_rect.GetHeight()); + m_vram_texture.get(), scaled_rect.left, scaled_rect.top, scaled_rect.width(), + scaled_rect.height()); } else { @@ -1517,29 +1505,29 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) { g_gpu_device->CopyTextureRegion(m_vram_read_texture.get(), scaled_rect.left, scaled_rect.top, 0, 0, m_vram_texture.get(), scaled_rect.left, scaled_rect.top, 0, 0, - scaled_rect.GetWidth(), scaled_rect.GetHeight()); + scaled_rect.width(), scaled_rect.height()); } // m_counters.num_read_texture_updates++; - rect.SetInvalid(); + rect = INVALID_RECT; }; if (drawn) { - DebugAssert(m_vram_dirty_draw_rect.Valid()); + DebugAssert(!m_vram_dirty_draw_rect.eq(INVALID_RECT)); GL_INS_FMT("Updating draw rect {},{} => {},{} ({}x{})", m_vram_dirty_draw_rect.left, m_vram_dirty_draw_rect.right, - m_vram_dirty_draw_rect.top, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.GetWidth(), - m_vram_dirty_draw_rect.GetHeight()); + m_vram_dirty_draw_rect.top, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.width(), + m_vram_dirty_draw_rect.height()); u8 dbits = TEXPAGE_DIRTY_DRAWN_RECT; - if (written && m_vram_dirty_draw_rect.Intersects(m_vram_dirty_write_rect)) + if (written && m_vram_dirty_draw_rect.rintersects(m_vram_dirty_write_rect)) { - DebugAssert(m_vram_dirty_write_rect.Valid()); + DebugAssert(!m_vram_dirty_write_rect.eq(INVALID_RECT)); GL_INS_FMT("Including write rect {},{} => {},{} ({}x{})", m_vram_dirty_write_rect.left, m_vram_dirty_write_rect.right, m_vram_dirty_write_rect.top, m_vram_dirty_write_rect.bottom, - m_vram_dirty_write_rect.GetWidth(), m_vram_dirty_write_rect.GetHeight()); - m_vram_dirty_draw_rect.Include(m_vram_dirty_write_rect); - m_vram_dirty_write_rect.SetInvalid(); + m_vram_dirty_write_rect.width(), m_vram_dirty_write_rect.height()); + m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(m_vram_dirty_write_rect); + m_vram_dirty_write_rect = INVALID_RECT; dbits = TEXPAGE_DIRTY_DRAWN_RECT | TEXPAGE_DIRTY_WRITTEN_RECT; written = false; } @@ -1550,7 +1538,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) { GL_INS_FMT("Updating write rect {},{} => {},{} ({}x{})", m_vram_dirty_write_rect.left, m_vram_dirty_write_rect.right, m_vram_dirty_write_rect.top, m_vram_dirty_write_rect.bottom, - m_vram_dirty_write_rect.GetWidth(), m_vram_dirty_write_rect.GetHeight()); + m_vram_dirty_write_rect.width(), m_vram_dirty_write_rect.height()); update(m_vram_dirty_write_rect, TEXPAGE_DIRTY_WRITTEN_RECT); } } @@ -1561,7 +1549,7 @@ void GPU_HW::UpdateDepthBufferFromMaskBit() return; // Viewport should already be set full, only need to fudge the scissor. - g_gpu_device->SetScissor(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + g_gpu_device->SetScissor(m_vram_texture->GetRect()); g_gpu_device->InvalidateRenderTarget(m_vram_depth_texture.get()); g_gpu_device->SetRenderTargets(nullptr, 0, m_vram_depth_texture.get()); g_gpu_device->SetPipeline(m_vram_update_depth_pipeline.get()); @@ -1614,12 +1602,7 @@ void GPU_HW::ClearDepthBuffer() void GPU_HW::SetScissor() { - const s32 left = m_drawing_area.left * m_resolution_scale; - const s32 right = std::max((m_drawing_area.right + 1) * m_resolution_scale, left + 1); - const s32 top = m_drawing_area.top * m_resolution_scale; - const s32 bottom = std::max((m_drawing_area.bottom + 1) * m_resolution_scale, top + 1); - - g_gpu_device->SetScissor(left, top, right - left, bottom - top); + g_gpu_device->SetScissor(m_clamped_drawing_area.mul32l(GSVector4i(m_resolution_scale))); } void GPU_HW::MapGPUBuffer(u32 required_vertices, u32 required_indices) @@ -1805,7 +1788,7 @@ bool GPU_HW::IsPossibleSpritePolygon(const BatchVertex* vertices) const return (zero_dudy && zero_dvdx); } -ALWAYS_INLINE_RELEASE void GPU_HW::ExpandLineTriangles(BatchVertex* vertices, u32 base_vertex) +ALWAYS_INLINE_RELEASE bool GPU_HW::ExpandLineTriangles(BatchVertex* vertices) { // Line expansion inspired by beetle-psx. BatchVertex *vshort, *vlong; @@ -1838,7 +1821,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::ExpandLineTriangles(BatchVertex* vertices, u3 } else { - return; + return false; } // Determine line direction. Vertical lines will have a width of 1, horizontal lines a height of 1. @@ -1850,7 +1833,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::ExpandLineTriangles(BatchVertex* vertices, u3 if (vshort->x == vlong->x) std::swap(vshort, vcorner); else if (vcorner->x != vlong->x) - return; + return false; GL_INS_FMT("Vertical line from Y={} to {}", vcorner->y, vlong->y); } @@ -1860,14 +1843,14 @@ ALWAYS_INLINE_RELEASE void GPU_HW::ExpandLineTriangles(BatchVertex* vertices, u3 if (vshort->y == vlong->y) std::swap(vshort, vcorner); else if (vcorner->y != vlong->y) - return; + return false; GL_INS_FMT("Horizontal line from X={} to {}", vcorner->x, vlong->x); } else { // Not a line-like triangle. - return; + return false; } // We could adjust the short texture coordinate to +1 from its original position, rather than leaving it the same. @@ -1900,14 +1883,14 @@ ALWAYS_INLINE_RELEASE void GPU_HW::ExpandLineTriangles(BatchVertex* vertices, u3 } else { - return; + return false; } // Determine line direction. Vertical lines will have a width of 1, horizontal lines a height of 1. vertical = (std::abs(va->x - vc->x) == 1.0f); horizontal = (std::abs(va->y - vb->y) == 1.0f); if (!vertical && !horizontal) - return; + return false; // Determine which vertex is the right angle, based on the vertical position. const BatchVertex* vcorner; @@ -1916,7 +1899,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::ExpandLineTriangles(BatchVertex* vertices, u3 else if (vb->y == vc->y) vcorner = vb; else - return; + return false; // Find short/long edge of the triangle. BatchVertex* vother = ((vcorner == va) ? vb : va); @@ -1927,52 +1910,64 @@ ALWAYS_INLINE_RELEASE void GPU_HW::ExpandLineTriangles(BatchVertex* vertices, u3 // Therefore the difference in V should be ignored. vshort->u = vcorner->u; vshort->v = vcorner->v; - - // We need to re-compute the UV limits, since we adjusted them above. - if (m_compute_uv_range) - ComputePolygonUVLimits(vertices[0].texpage, vertices, 3); - - // This is super jank, but because we rewrote the UVs on one of the vertices above, we need to rewrite it to GPU - // memory again. Has to be all of them as well, not just vshort, because the UV limits may have changed. - DebugAssert(m_batch_vertex_count >= 3); - std::memcpy(m_batch_vertex_ptr - 3, vertices, sizeof(BatchVertex) * 3); } - // Need to write the 4th vertex to the GPU. + // Need to write the 4th vertex. DebugAssert(m_batch_vertex_space >= 1); - BatchVertex* last = &(*(m_batch_vertex_ptr++) = *vlong); + BatchVertex* last = &(vertices[3] = *vlong); last->x = vertical ? vshort->x : vlong->x; last->y = horizontal ? vshort->y : vlong->y; - m_batch_vertex_count++; - m_batch_vertex_space--; - // Generate indices for second triangle. - DebugAssert(m_batch_index_space >= 3); + // Generate indices. + const u32 base_vertex = m_batch_vertex_count; + DebugAssert(m_batch_index_space >= 6); + *(m_batch_index_ptr++) = Truncate16(base_vertex); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); + *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); *(m_batch_index_ptr++) = Truncate16(base_vertex + (vshort - vertices)); *(m_batch_index_ptr++) = Truncate16(base_vertex + (vlong - vertices)); *(m_batch_index_ptr++) = Truncate16(base_vertex + 3); - m_batch_index_count += 3; - m_batch_index_space -= 3; + m_batch_index_count += 6; + m_batch_index_space -= 6; + + // Upload vertices. + DebugAssert(m_batch_vertex_space >= 4); + std::memcpy(m_batch_vertex_ptr, vertices, sizeof(BatchVertex) * 4); + m_batch_vertex_ptr += 4; + m_batch_vertex_count += 4; + m_batch_vertex_space -= 4; + return true; } -void GPU_HW::ComputePolygonUVLimits(u32 texpage, BatchVertex* vertices, u32 num_vertices) +void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) { - u32 min_u = vertices[0].u, max_u = vertices[0].u, min_v = vertices[0].v, max_v = vertices[0].v; - for (u32 i = 1; i < num_vertices; i++) + DebugAssert(num_vertices == 3 || num_vertices == 4); + + GSVector4i v0 = GSVector4i::load32(&vertices[0].u); + GSVector4i v1 = GSVector4i::load32(&vertices[1].u); + GSVector4i v2 = GSVector4i::load32(&vertices[2].u); + GSVector4i v3; + GSVector4i min = v0.min_u16(v1).min_u16(v2); + GSVector4i max = v0.max_u16(v1).max_u16(v2); + if (num_vertices == 4) { - min_u = std::min(min_u, vertices[i].u); - max_u = std::max(max_u, vertices[i].u); - min_v = std::min(min_v, vertices[i].v); - max_v = std::max(max_v, vertices[i].v); + v3 = GSVector4i::load32(&vertices[3].u); + min = min.min_u16(v3); + max = max.max_u16(v3); } + u32 min_u = min.extract16<0>(); + u32 min_v = min.extract16<1>(); + u32 max_u = max.extract16<0>(); + u32 max_v = max.extract16<1>(); max_u = (min_u != max_u) ? (max_u - 1) : max_u; max_v = (min_v != max_v) ? (max_v - 1) : max_v; - CheckForTexPageOverlap(texpage, min_u, min_v, max_u, max_v); - for (u32 i = 0; i < num_vertices; i++) vertices[i].SetUVLimits(min_u, max_u, min_v, max_v); + + if (m_texpage_dirty != 0) + CheckForTexPageOverlap(min.upl32(max).u16to32()); } void GPU_HW::SetBatchDepthBuffer(bool enabled) @@ -2137,10 +2132,13 @@ void GPU_HW::LoadVertices() const bool raw_texture = textured && rc.raw_texture_enable; const bool shaded = rc.shading_enable; const bool pgxp = g_settings.gpu_pgxp_enable; + + // TODO: Using 64-bit vectors instead of 32-bit could be advantageous here, particularly for small ARM cores and + // RISC-V. const u32 first_color = rc.color_for_first_vertex; - const u32 num_vertices = rc.quad_polygon ? 4 : 3; + u32 num_vertices = rc.quad_polygon ? 4 : 3; std::array vertices; - std::array, 4> native_vertex_positions; + std::array native_vertex_positions; std::array native_texcoords; bool valid_w = g_settings.gpu_pgxp_texture_correction; for (u32 i = 0; i < num_vertices; i++) @@ -2150,10 +2148,8 @@ void GPU_HW::LoadVertices() const u64 maddr_and_pos = m_fifo.Pop(); const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; const u16 texcoord = textured ? Truncate16(FifoPop()) : 0; - const s32 native_x = m_drawing_offset.x + vp.x; - const s32 native_y = m_drawing_offset.y + vp.y; - native_vertex_positions[i][0] = native_x; - native_vertex_positions[i][1] = native_y; + const s32 native_x = native_vertex_positions[i].x = m_drawing_offset.x + vp.x; + const s32 native_y = native_vertex_positions[i].y = m_drawing_offset.y + vp.y; native_texcoords[i] = texcoord; vertices[i].Set(static_cast(native_x), static_cast(native_y), depth, 1.0f, color, texpage, texcoord, 0xFFFF0000u); @@ -2187,14 +2183,114 @@ void GPU_HW::LoadVertices() else if (m_allow_sprite_mode) SetBatchSpriteMode((pgxp && !is_3d) || IsPossibleSpritePolygon(vertices.data())); - if (m_compute_uv_range && textured) - ComputePolygonUVLimits(texpage, vertices.data(), num_vertices); - if (!IsDrawingAreaIsValid()) [[unlikely]] return; - const u32 start_index = m_batch_vertex_count; + if (m_sw_renderer) + { + GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + + const u32 sw_num_vertices = rc.quad_polygon ? 4 : 3; + for (u32 i = 0; i < sw_num_vertices; i++) + { + GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; + vert->x = native_vertex_positions[i].x; + vert->y = native_vertex_positions[i].y; + vert->texcoord = native_texcoords[i]; + vert->color = vertices[i].color; + } + + m_sw_renderer->PushCommand(cmd); + } + + // Cull polygons which are too large. + const GSVector4 v0f = GSVector4::loadl(&vertices[0].x); + const GSVector4 v1f = GSVector4::loadl(&vertices[1].x); + const GSVector4 v2f = GSVector4::loadl(&vertices[2].x); + const GSVector4 min_pos_12 = v1f.min(v2f); + const GSVector4 max_pos_12 = v1f.max(v2f); + const GSVector4i draw_rect_012 = + GSVector4i(min_pos_12.min(v0f).upld(max_pos_12.max(v0f))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const bool first_tri_culled = + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || + !m_clamped_drawing_area.rintersects(draw_rect_012)); + if (first_tri_culled) + { + GL_INS_FMT("Culling off-screen/too-large polygon: {},{} {},{} {},{}", native_vertex_positions[0].x, + native_vertex_positions[0].y, native_vertex_positions[1].x, native_vertex_positions[1].y, + native_vertex_positions[2].x, native_vertex_positions[2].y); + + if (!rc.quad_polygon) + return; + } + else + { + if (textured && m_compute_uv_range) + ComputePolygonUVLimits(vertices.data(), num_vertices); + + IncludeDrawnDirtyRectangle(draw_rect_012); + AddDrawTriangleTicks(GSVector4i(native_vertex_positions[0]), GSVector4i(native_vertex_positions[1]), + GSVector4i(native_vertex_positions[2]), rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + + // Expand lines to triangles (Doom, Soul Blade, etc.) + if (!rc.quad_polygon && m_line_detect_mode >= GPULineDetectMode::BasicTriangles && !is_3d && + ExpandLineTriangles(vertices.data())) + { + return; + } + + const u32 start_index = m_batch_vertex_count; + DebugAssert(m_batch_index_space >= 3); + *(m_batch_index_ptr++) = Truncate16(start_index); + *(m_batch_index_ptr++) = Truncate16(start_index + 1); + *(m_batch_index_ptr++) = Truncate16(start_index + 2); + m_batch_index_count += 3; + m_batch_index_space -= 3; + } + + // quads if (rc.quad_polygon) + { + const GSVector4 v3f = GSVector4::loadl(&vertices[3].x); + const GSVector4i draw_rect_123 = + GSVector4i(min_pos_12.min(v3f).upld(max_pos_12.max(v3f))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + + // Cull polygons which are too large. + const bool second_tri_culled = + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || + !m_clamped_drawing_area.rintersects(draw_rect_123)); + if (second_tri_culled) + { + GL_INS_FMT("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", + native_vertex_positions[2].x, native_vertex_positions[2].y, native_vertex_positions[1].x, + native_vertex_positions[1].y, native_vertex_positions[0].x, native_vertex_positions[0].y); + + if (first_tri_culled) + return; + } + else + { + if (first_tri_culled && textured && m_compute_uv_range) + ComputePolygonUVLimits(vertices.data(), num_vertices); + + IncludeDrawnDirtyRectangle(draw_rect_123); + AddDrawTriangleTicks(GSVector4i(native_vertex_positions[2]), GSVector4i(native_vertex_positions[1]), + GSVector4i(native_vertex_positions[3]), rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + + const u32 start_index = m_batch_vertex_count; + DebugAssert(m_batch_index_space >= 3); + *(m_batch_index_ptr++) = Truncate16(start_index + 2); + *(m_batch_index_ptr++) = Truncate16(start_index + 1); + *(m_batch_index_ptr++) = Truncate16(start_index + 3); + m_batch_index_count += 3; + m_batch_index_space -= 3; + } + } + + if (num_vertices == 4) { DebugAssert(m_batch_vertex_space >= 4); std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4); @@ -2210,96 +2306,6 @@ void GPU_HW::LoadVertices() m_batch_vertex_count += 3; m_batch_vertex_space -= 3; } - - // Cull polygons which are too large. - const auto [min_x_12, max_x_12] = MinMax(native_vertex_positions[1][0], native_vertex_positions[2][0]); - const auto [min_y_12, max_y_12] = MinMax(native_vertex_positions[1][1], native_vertex_positions[2][1]); - const s32 min_x = std::min(min_x_12, native_vertex_positions[0][0]); - const s32 max_x = std::max(max_x_12, native_vertex_positions[0][0]); - const s32 min_y = std::min(min_y_12, native_vertex_positions[0][1]); - const s32 max_y = std::max(max_y_12, native_vertex_positions[0][1]); - const bool first_tri_culled = ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT); - - if (first_tri_culled) - { - DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", native_vertex_positions[0][0], - native_vertex_positions[0][1], native_vertex_positions[1][0], native_vertex_positions[1][1], - native_vertex_positions[2][0], native_vertex_positions[2][1]); - } - else - { - // TODO: Cull triangles that fall entirely off-screen. - IncludeDrawnDirtyRectangle(min_x, min_y, max_x, max_y); - - AddDrawTriangleTicks(native_vertex_positions[0][0], native_vertex_positions[0][1], - native_vertex_positions[1][0], native_vertex_positions[1][1], - native_vertex_positions[2][0], native_vertex_positions[2][1], rc.shading_enable, - rc.texture_enable, rc.transparency_enable); - - DebugAssert(m_batch_index_space >= 3); - *(m_batch_index_ptr++) = Truncate16(start_index); - *(m_batch_index_ptr++) = Truncate16(start_index + 1); - *(m_batch_index_ptr++) = Truncate16(start_index + 2); - m_batch_index_count += 3; - m_batch_index_space -= 3; - } - - // quads - if (rc.quad_polygon) - { - const s32 min_x_123 = std::min(min_x_12, native_vertex_positions[3][0]); - const s32 max_x_123 = std::max(max_x_12, native_vertex_positions[3][0]); - const s32 min_y_123 = std::min(min_y_12, native_vertex_positions[3][1]); - const s32 max_y_123 = std::max(max_y_12, native_vertex_positions[3][1]); - - // Cull polygons which are too large. - if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT) - { - DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", native_vertex_positions[2][0], - native_vertex_positions[2][1], native_vertex_positions[1][0], native_vertex_positions[1][1], - native_vertex_positions[0][0], native_vertex_positions[0][1]); - } - else - { - IncludeDrawnDirtyRectangle(min_x_123, min_y_123, max_x_123, max_y_123); - - AddDrawTriangleTicks(native_vertex_positions[2][0], native_vertex_positions[2][1], - native_vertex_positions[1][0], native_vertex_positions[1][1], - native_vertex_positions[3][0], native_vertex_positions[3][1], rc.shading_enable, - rc.texture_enable, rc.transparency_enable); - - DebugAssert(m_batch_index_space >= 3); - *(m_batch_index_ptr++) = Truncate16(start_index + 2); - *(m_batch_index_ptr++) = Truncate16(start_index + 1); - *(m_batch_index_ptr++) = Truncate16(start_index + 3); - m_batch_index_count += 3; - m_batch_index_space -= 3; - } - } - else - { - // Expand lines to triangles (Doom, Soul Blade, etc.) - if (m_line_detect_mode >= GPULineDetectMode::BasicTriangles && !is_3d && !first_tri_culled) - ExpandLineTriangles(vertices.data(), start_index); - } - - if (m_sw_renderer) - { - GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices); - FillDrawCommand(cmd, rc); - - const u32 sw_num_vertices = rc.quad_polygon ? 4 : 3; - for (u32 i = 0; i < sw_num_vertices; i++) - { - GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; - vert->x = native_vertex_positions[i][0]; - vert->y = native_vertex_positions[i][1]; - vert->texcoord = native_texcoords[i]; - vert->color = vertices[i].color; - } - - m_sw_renderer->PushCommand(cmd); - } } break; @@ -2311,10 +2317,10 @@ void GPU_HW::LoadVertices() const s32 pos_y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0); - u16 orig_tex_left = ZeroExtend16(texcoord_x); - u16 orig_tex_top = ZeroExtend16(texcoord_y); - s32 rectangle_width; - s32 rectangle_height; + u32 orig_tex_left = ZeroExtend16(texcoord_x); + u32 orig_tex_top = ZeroExtend16(texcoord_y); + u32 rectangle_width; + u32 rectangle_height; switch (rc.rectangle_size) { case GPUDrawRectangleSize::R1x1: @@ -2332,14 +2338,8 @@ void GPU_HW::LoadVertices() default: { const u32 width_and_height = FifoPop(); - rectangle_width = static_cast(width_and_height & VRAM_WIDTH_MASK); - rectangle_height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); - - if (rectangle_width >= MAX_PRIMITIVE_WIDTH || rectangle_height >= MAX_PRIMITIVE_HEIGHT) - { - DEBUG_LOG("Culling too-large rectangle: {},{} {}x{}", pos_x, pos_y, rectangle_width, rectangle_height); - return; - } + rectangle_width = (width_and_height & VRAM_WIDTH_MASK); + rectangle_height = ((width_and_height >> 16) & VRAM_HEIGHT_MASK); } break; } @@ -2354,34 +2354,42 @@ void GPU_HW::LoadVertices() m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE); // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. - u16 tex_top = orig_tex_top; - for (s32 y_offset = 0; y_offset < rectangle_height;) + u32 tex_top = orig_tex_top; + for (u32 y_offset = 0; y_offset < rectangle_height;) { - const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); - const float quad_start_y = static_cast(pos_y + y_offset); + const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); + const float quad_start_y = static_cast(pos_y + static_cast(y_offset)); const float quad_end_y = quad_start_y + static_cast(quad_height); - const u16 tex_bottom = tex_top + static_cast(quad_height); + const u32 tex_bottom = tex_top + quad_height; - u16 tex_left = orig_tex_left; - for (s32 x_offset = 0; x_offset < rectangle_width;) + u32 tex_left = orig_tex_left; + for (u32 x_offset = 0; x_offset < rectangle_width;) { - const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); - const float quad_start_x = static_cast(pos_x + x_offset); + const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); + const float quad_start_x = static_cast(pos_x + static_cast(x_offset)); const float quad_end_x = quad_start_x + static_cast(quad_width); - const u16 tex_right = tex_left + static_cast(quad_width); + const u32 tex_right = tex_left + quad_width; const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); - CheckForTexPageOverlap(texpage, tex_left, tex_top, tex_right - 1, tex_bottom - 1); + if (rc.texture_enable && m_texpage_dirty != 0) + { + CheckForTexPageOverlap(GSVector4i(static_cast(tex_left), static_cast(tex_top), + static_cast(tex_right), static_cast(tex_bottom))); + } const u32 base_vertex = m_batch_vertex_count; (m_batch_vertex_ptr++) - ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits); + ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_top), + uv_limits); (m_batch_vertex_ptr++) - ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits); + ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_top), + uv_limits); (m_batch_vertex_ptr++) - ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits); + ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_bottom), + uv_limits); (m_batch_vertex_ptr++) - ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits); + ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_bottom), + uv_limits); m_batch_vertex_count += 4; m_batch_vertex_space -= 4; @@ -2402,7 +2410,7 @@ void GPU_HW::LoadVertices() tex_top = 0; } - IncludeDrawnDirtyRectangle(pos_x, pos_y, pos_x + rectangle_width, pos_y + rectangle_height); + IncludeDrawnDirtyRectangle(GSVector4i(pos_x, pos_y, pos_x + rectangle_width, pos_y + rectangle_height)); AddDrawRectangleTicks(pos_x, pos_y, rectangle_width, rectangle_height, rc.texture_enable, rc.transparency_enable); if (m_sw_renderer) @@ -2459,7 +2467,7 @@ void GPU_HW::LoadVertices() return; } - IncludeDrawnDirtyRectangle(min_x, min_y, max_x + 1, max_y + 1); + IncludeDrawnDirtyRectangle(GSVector4i(min_x, min_y, max_x + 1, max_y + 1)); AddDrawLineTicks(min_x, min_y, max_x, max_y, rc.shading_enable); // TODO: Should we do a PGXP lookup here? Most lines are 2D. @@ -2519,7 +2527,7 @@ void GPU_HW::LoadVertices() } else { - IncludeDrawnDirtyRectangle(min_x, min_y, max_x + 1, max_y + 1); + IncludeDrawnDirtyRectangle(GSVector4i(min_x, min_y, max_x + 1, max_y + 1)); AddDrawLineTicks(min_x, min_y, max_x, max_y, rc.shading_enable); // TODO: Should we do a PGXP lookup here? Most lines are 2D. @@ -2588,52 +2596,57 @@ bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u3 return true; } -void GPU_HW::IncludeVRAMDirtyRectangle(Common::Rectangle& rect, const Common::Rectangle& new_rect) +void GPU_HW::IncludeVRAMDirtyRectangle(GSVector4i& rect, const GSVector4i new_rect) { - rect.Include(new_rect); + if (rect.rcontains(new_rect)) + return; + + rect = rect.runion(new_rect); // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the // shadow texture is updated - if (!m_draw_mode.IsTexturePageChanged() && - (m_draw_mode.mode_reg.GetTexturePageRectangle().Intersects(new_rect) || + if (!m_draw_mode.IsTexturePageChanged() && m_batch.texture_mode != BatchTextureMode::Disabled && + (m_draw_mode.mode_reg.GetTexturePageRectangle().rintersects(new_rect) || (m_draw_mode.mode_reg.IsUsingPalette() && - m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode).Intersects(new_rect)))) + m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode).rintersects(new_rect)))) { m_draw_mode.SetTexturePageChanged(); } } -ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v) +ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) { - if (!m_texpage_dirty) - return; + DebugAssert(m_texpage_dirty != 0 && m_batch.texture_mode != BatchTextureMode::Disabled); - static constexpr std::array, 4> uv_shifts_adds = {{{2, 3}, {1, 1}, {0, 0}, {0, 0}}}; - - const u32 xoffs = (texpage & 0xFu) * 64u; - const u32 yoffs = ((texpage >> 4) & 1u) * 256u; - const u32 xshift = uv_shifts_adds[(texpage >> 7) & 3][0]; - const u32 xadd = uv_shifts_adds[(texpage >> 7) & 3][1]; - - const u32 vram_min_u = - (((min_u & m_draw_mode.texture_window.and_x) | m_draw_mode.texture_window.or_x) >> xshift) + xoffs; - const u32 vram_max_u = - ((((max_u & m_draw_mode.texture_window.and_x) | m_draw_mode.texture_window.or_x) + xadd) >> xshift) + xoffs; - const u32 vram_min_v = ((min_v & m_draw_mode.texture_window.and_y) | m_draw_mode.texture_window.or_y) + yoffs; - const u32 vram_max_v = ((max_v & m_draw_mode.texture_window.and_y) | m_draw_mode.texture_window.or_y) + yoffs; - - // Log_InfoFmt("{}: {},{} => {},{}", s_draw_number, vram_min_u, vram_min_v, vram_max_u, vram_max_v); - - if (vram_min_u < m_current_uv_range.left || vram_min_v < m_current_uv_range.top || - vram_max_u >= m_current_uv_range.right || vram_max_v >= m_current_uv_range.bottom) + if (m_texture_window_active) { - m_current_uv_range.Include(vram_min_u, vram_max_u + 1, vram_min_v, vram_max_v + 1); + const GSVector4i twin = GSVector4i::load(m_batch_ubo_data.u_texture_window); + uv_rect = ((uv_rect & twin.xyxy()) | twin.zwzw()); + + // Min could be greater than max after applying window, correct for it. + uv_rect = uv_rect.min_i32(uv_rect.zwzw()).max_i32(uv_rect.xyxy()); + } + + const GPUTextureMode tmode = m_draw_mode.mode_reg.texture_mode; + const u32 xshift = (tmode >= GPUTextureMode::Direct16Bit) ? 0 : (2 - static_cast(tmode)); + const GSVector4i page_offset = GSVector4i(m_current_texture_page_offset).xyxy(); + + uv_rect = uv_rect.blend32<5>(uv_rect.srl32(xshift)); // shift only goes on the x + uv_rect = uv_rect.add32(page_offset); // page offset + uv_rect = uv_rect.add32(GSVector4i::cxpr(0, 0, 1, 1)); // make exclusive + uv_rect = uv_rect.rintersect(VRAM_SIZE_RECT); // clamp to vram bounds + + const GSVector4i new_uv_rect = m_current_uv_range.runion(uv_rect); + + if (!m_current_uv_range.eq(new_uv_rect)) + { + m_current_uv_range = new_uv_rect; bool update_drawn = false, update_written = false; if (m_texpage_dirty & TEXPAGE_DIRTY_DRAWN_RECT) { - DebugAssert(m_vram_dirty_draw_rect.Valid()); - update_drawn = m_current_uv_range.Intersects(m_vram_dirty_draw_rect); + DebugAssert(!m_vram_dirty_draw_rect.eq(INVALID_RECT)); + update_drawn = m_current_uv_range.rintersects(m_vram_dirty_draw_rect); if (update_drawn) { GL_INS_FMT("Updating VRAM cache due to UV {{{},{} => {},{}}} intersection with dirty DRAW {{{},{} => {},{}}}", @@ -2644,8 +2657,8 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u } if (m_texpage_dirty & TEXPAGE_DIRTY_WRITTEN_RECT) { - DebugAssert(m_vram_dirty_write_rect.Valid()); - update_written = m_current_uv_range.Intersects(m_vram_dirty_write_rect); + DebugAssert(!m_vram_dirty_write_rect.eq(INVALID_RECT)); + update_written = m_current_uv_range.rintersects(m_vram_dirty_write_rect); if (update_written) { GL_INS_FMT("Updating VRAM cache due to UV {{{},{} => {},{}}} intersection with dirty WRITE {{{},{} => {},{}}}", @@ -2838,24 +2851,22 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) } GL_INS_FMT("Dirty draw area before: {},{} => {},{} ({}x{})", m_vram_dirty_draw_rect.left, m_vram_dirty_draw_rect.top, - m_vram_dirty_draw_rect.right, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.GetWidth(), - m_vram_dirty_draw_rect.GetHeight()); + m_vram_dirty_draw_rect.right, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.width(), + m_vram_dirty_draw_rect.height()); - IncludeVRAMDirtyRectangle( - m_vram_dirty_draw_rect, - Common::Rectangle::FromExtents(x, y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); + IncludeVRAMDirtyRectangle(m_vram_dirty_draw_rect, GSVector4i(x, y, x + width, y + height).rintersect(VRAM_SIZE_RECT)); GL_INS_FMT("Dirty draw area after: {},{} => {},{} ({}x{})", m_vram_dirty_draw_rect.left, m_vram_dirty_draw_rect.top, - m_vram_dirty_draw_rect.right, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.GetWidth(), - m_vram_dirty_draw_rect.GetHeight()); + m_vram_dirty_draw_rect.right, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.width(), + m_vram_dirty_draw_rect.height()); const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)); g_gpu_device->SetPipeline( m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(IsInterlacedRenderingEnabled())].get()); - const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); - g_gpu_device->SetViewportAndScissor(bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, - bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale); + const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); + const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale)); + g_gpu_device->SetViewportAndScissor(scaled_bounds); struct VRAMFillUBOData { @@ -2893,7 +2904,7 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) } // Get bounds with wrap-around handled. - Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + GSVector4i copy_rect = GetVRAMTransferBounds(x, y, width, height); // Has to be aligned to an even pixel for the download, due to 32-bit packing. if (copy_rect.left & 1) @@ -2901,14 +2912,14 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) if (copy_rect.right & 1) copy_rect.right++; - DebugAssert((copy_rect.left % 2) == 0 && (copy_rect.GetWidth() % 2) == 0); + DebugAssert((copy_rect.left % 2) == 0 && (copy_rect.width() % 2) == 0); const u32 encoded_left = copy_rect.left / 2; const u32 encoded_top = copy_rect.top; - const u32 encoded_width = copy_rect.GetWidth() / 2; - const u32 encoded_height = copy_rect.GetHeight(); + const u32 encoded_width = copy_rect.width() / 2; + const u32 encoded_height = copy_rect.height(); // Encode the 24-bit texture as 16-bit. - const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; + const s32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.width(), copy_rect.height()}; g_gpu_device->SetRenderTarget(m_vram_readback_texture.get()); g_gpu_device->SetPipeline(m_vram_readback_pipeline.get()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); @@ -2958,8 +2969,8 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b m_sw_renderer->PushCommand(cmd); } - const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); - DebugAssert(bounds.right <= VRAM_WIDTH && bounds.bottom <= VRAM_HEIGHT); + const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); + DebugAssert(bounds.right <= static_cast(VRAM_WIDTH) && bounds.bottom <= static_cast(VRAM_HEIGHT)); IncludeVRAMDirtyRectangle(m_vram_dirty_write_rect, bounds); if (check_mask) @@ -2981,7 +2992,7 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b } void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, - bool check_mask, const Common::Rectangle& bounds) + bool check_mask, const GSVector4i bounds) { std::unique_ptr upload_texture; u32 map_index; @@ -3024,8 +3035,8 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da height, map_index, (set_mask) ? 0x8000u : 0x00, GetCurrentNormalizedVertexDepth()}; // the viewport should already be set to the full vram, so just adjust the scissor - const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; - g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); + const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale)); + g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.width(), scaled_bounds.height()); g_gpu_device->SetPipeline( m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer && NeedsDepthBuffer())].get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); @@ -3066,10 +3077,10 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT); - const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); - const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); - const bool intersect_with_draw = m_vram_dirty_draw_rect.Intersects(src_bounds); - const bool intersect_with_write = m_vram_dirty_write_rect.Intersects(src_bounds); + const GSVector4i src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); + const GSVector4i dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); + const bool intersect_with_draw = m_vram_dirty_draw_rect.rintersects(src_bounds); + const bool intersect_with_write = m_vram_dirty_write_rect.rintersects(src_bounds); if (use_shader || IsUsingMultisampling()) { @@ -3102,9 +3113,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 GetCurrentNormalizedVertexDepth()}; // VRAM read texture should already be bound. - const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); - g_gpu_device->SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), - dst_bounds_scaled.GetHeight()); + const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale)); + g_gpu_device->SetViewportAndScissor(dst_bounds_scaled); g_gpu_device->SetPipeline( m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer && NeedsDepthBuffer())] .get()); @@ -3119,7 +3129,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 } GPUTexture* src_tex = m_vram_texture.get(); - const bool overlaps_with_self = src_bounds.Intersects(dst_bounds); + const bool overlaps_with_self = src_bounds.rintersects(dst_bounds); if (!g_gpu_device->GetFeatures().texture_copy_to_self || overlaps_with_self) { src_tex = m_vram_read_texture.get(); @@ -3127,7 +3137,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write); } - Common::Rectangle* update_rect; + GSVector4i* update_rect; if (intersect_with_draw || intersect_with_write) { update_rect = intersect_with_draw ? &m_vram_dirty_draw_rect : &m_vram_dirty_write_rect; @@ -3135,8 +3145,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 else { const bool use_write = - (m_vram_dirty_write_rect.Valid() && m_vram_dirty_draw_rect.Valid() && - m_vram_dirty_write_rect.GetDistance(dst_bounds) < m_vram_dirty_draw_rect.GetDistance(dst_bounds)); + (!m_vram_dirty_write_rect.eq(INVALID_RECT) && !m_vram_dirty_draw_rect.eq(INVALID_RECT) && + RectDistance(m_vram_dirty_write_rect, dst_bounds) < RectDistance(m_vram_dirty_draw_rect, dst_bounds)); update_rect = use_write ? &m_vram_dirty_write_rect : &m_vram_dirty_draw_rect; } IncludeVRAMDirtyRectangle(*update_rect, dst_bounds); @@ -3184,10 +3194,9 @@ void GPU_HW::DispatchRenderCommand() if (m_draw_mode.mode_reg.IsUsingPalette()) { - const Common::Rectangle palette_rect = - m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode); - const bool update_drawn = palette_rect.Intersects(m_vram_dirty_draw_rect); - const bool update_written = palette_rect.Intersects(m_vram_dirty_write_rect); + const GSVector4i palette_rect = m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode); + const bool update_drawn = palette_rect.rintersects(m_vram_dirty_draw_rect); + const bool update_written = palette_rect.rintersects(m_vram_dirty_write_rect); if (update_drawn || update_written) { GL_INS("Palette in VRAM dirty area, flushing cache"); @@ -3198,16 +3207,18 @@ void GPU_HW::DispatchRenderCommand() } } - const Common::Rectangle page_rect = m_draw_mode.mode_reg.GetTexturePageRectangle(); - u8 new_texpage_dirty = m_vram_dirty_draw_rect.Intersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0; - new_texpage_dirty |= m_vram_dirty_write_rect.Intersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0; + const GSVector4i page_rect = m_draw_mode.mode_reg.GetTexturePageRectangle(); + m_current_texture_page_offset = page_rect.xy(); + + u8 new_texpage_dirty = m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0; + new_texpage_dirty |= m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0; if (new_texpage_dirty != 0) { GL_INS("Texpage is in dirty area, checking UV ranges"); m_texpage_dirty = new_texpage_dirty; m_compute_uv_range = true; - m_current_uv_range.SetInvalid(); + m_current_uv_range = INVALID_RECT; } else { @@ -3282,10 +3293,13 @@ void GPU_HW::DispatchRenderCommand() { m_draw_mode.ClearTextureWindowChangedFlag(); - m_batch_ubo_data.u_texture_window_and[0] = ZeroExtend32(m_draw_mode.texture_window.and_x); - m_batch_ubo_data.u_texture_window_and[1] = ZeroExtend32(m_draw_mode.texture_window.and_y); - m_batch_ubo_data.u_texture_window_or[0] = ZeroExtend32(m_draw_mode.texture_window.or_x); - m_batch_ubo_data.u_texture_window_or[1] = ZeroExtend32(m_draw_mode.texture_window.or_y); + m_batch_ubo_data.u_texture_window[0] = ZeroExtend32(m_draw_mode.texture_window.and_x); + m_batch_ubo_data.u_texture_window[1] = ZeroExtend32(m_draw_mode.texture_window.and_y); + m_batch_ubo_data.u_texture_window[2] = ZeroExtend32(m_draw_mode.texture_window.or_x); + m_batch_ubo_data.u_texture_window[3] = ZeroExtend32(m_draw_mode.texture_window.or_y); + + m_texture_window_active = ((m_draw_mode.texture_window.and_x & m_draw_mode.texture_window.and_y) != 0xFF || + ((m_draw_mode.texture_window.or_x | m_draw_mode.texture_window.or_y) != 0)); m_batch_ubo_dirty = true; } @@ -3343,8 +3357,8 @@ void GPU_HW::FlushRender() #endif GL_INS_FMT("Dirty draw area: {},{} => {},{} ({}x{})", m_vram_dirty_draw_rect.left, m_vram_dirty_draw_rect.top, - m_vram_dirty_draw_rect.right, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.GetWidth(), - m_vram_dirty_draw_rect.GetHeight()); + m_vram_dirty_draw_rect.right, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.width(), + m_vram_dirty_draw_rect.height()); if (m_batch_ubo_dirty) { @@ -3631,7 +3645,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); g_gpu_device->SetRenderTarget(m_downsample_texture.get()); - g_gpu_device->SetViewportAndScissor(0, 0, level_width, level_height); + g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, level_width, level_height)); g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() : m_downsample_mid_pass_pipeline.get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); @@ -3661,7 +3675,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top g_gpu_device->InvalidateRenderTarget(weight_texture.get()); g_gpu_device->SetRenderTarget(weight_texture.get()); g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetViewportAndScissor(0, 0, last_width, last_height); + g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, last_width, last_height)); g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); @@ -3681,7 +3695,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top g_gpu_device->SetRenderTarget(m_downsample_texture.get()); g_gpu_device->SetTextureSampler(0, level_texture.get(), m_downsample_composite_sampler.get()); g_gpu_device->SetTextureSampler(1, weight_texture.get(), m_downsample_lod_sampler.get()); - g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, width, height)); g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index aa149e818..00b96a7b6 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -9,10 +9,10 @@ #include "util/gpu_device.h" #include "common/dimensional_array.h" +#include "common/gsvector.h" #include "common/heap_array.h" -#include -#include +#include #include #include #include @@ -122,8 +122,7 @@ private: struct BatchUBOData { - u32 u_texture_window_and[2]; - u32 u_texture_window_or[2]; + u32 u_texture_window[4]; // and_x, and_y, or_x, or_y float u_src_alpha_factor; float u_dst_alpha_factor; u32 u_interlaced_displayed_field; @@ -137,6 +136,11 @@ private: u32 num_uniform_buffer_updates; }; + static constexpr GSVector4i VRAM_SIZE_RECT = GSVector4i::cxpr(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + static constexpr GSVector4i INVALID_RECT = + GSVector4i::cxpr(std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::min(), + std::numeric_limits::min()); + /// Returns true if a depth buffer should be created. bool NeedsDepthBuffer() const; GPUTexture::Format GetDepthBufferFormat() const; @@ -153,7 +157,6 @@ private: void PrintSettingsToLog(); void CheckSettings(); - void SetClampedDrawingArea(); void UpdateVRAMReadTexture(bool drawn, bool written); void UpdateDepthBufferFromMaskBit(); void CopyAndClearDepthBuffer(); @@ -172,9 +175,9 @@ private: void SetFullVRAMDirtyRectangle(); void ClearVRAMDirtyRectangle(); - void IncludeVRAMDirtyRectangle(Common::Rectangle& rect, const Common::Rectangle& new_rect); - void IncludeDrawnDirtyRectangle(s32 min_x, s32 min_y, s32 max_x, s32 max_y); - void CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v); + void IncludeVRAMDirtyRectangle(GSVector4i& rect, const GSVector4i new_rect); + void IncludeDrawnDirtyRectangle(const GSVector4i rect); + void CheckForTexPageOverlap(GSVector4i uv_rect); bool IsFlushed() const; void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices); @@ -205,7 +208,7 @@ private: void OnBufferSwapped() override; void UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, - bool check_mask, const Common::Rectangle& bounds); + bool check_mask, const GSVector4i bounds); bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); /// Expands a line into two triangles. @@ -214,10 +217,10 @@ private: /// Handles quads with flipped texture coordinate directions. void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices); bool IsPossibleSpritePolygon(const BatchVertex* vertices) const; - void ExpandLineTriangles(BatchVertex* vertices, u32 base_vertex); + bool ExpandLineTriangles(BatchVertex* vertices); - /// Computes polygon U/V boundaries. - void ComputePolygonUVLimits(u32 texpage, BatchVertex* vertices, u32 num_vertices); + /// Computes polygon U/V boundaries, and for overlap with the current texture page. + void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices); /// Sets the depth test flag for PGXP depth buffering. void SetBatchDepthBuffer(bool enabled); @@ -271,9 +274,10 @@ private: bool m_compute_uv_range : 1 = false; bool m_allow_sprite_mode : 1 = false; bool m_allow_shader_blend : 1 = false; + bool m_depth_was_copied : 1 = false; + bool m_texture_window_active : 1 = false; u8 m_texpage_dirty = 0; - bool m_depth_was_copied = false; BatchConfig m_batch; @@ -282,10 +286,10 @@ private: BatchUBOData m_batch_ubo_data = {}; // Bounding box of VRAM area that the GPU has drawn into. - GPUDrawingArea m_clamped_drawing_area = {}; - Common::Rectangle m_vram_dirty_draw_rect; - Common::Rectangle m_vram_dirty_write_rect; - Common::Rectangle m_current_uv_range; + GSVector4i m_vram_dirty_draw_rect = INVALID_RECT; + GSVector4i m_vram_dirty_write_rect = INVALID_RECT; + GSVector4i m_current_uv_range = INVALID_RECT; + GSVector2i m_current_texture_page_offset = {}; std::unique_ptr m_wireframe_pipeline; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 55338543f..7de702c54 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -523,6 +523,7 @@ void GPU_SW::DispatchRenderCommand() GPUBackendDrawPolygonCommand* cmd = m_backend.NewDrawPolygonCommand(num_vertices); FillDrawCommand(cmd, rc); + std::array positions; const u32 first_color = rc.color_for_first_vertex; const bool shaded = rc.shading_enable; const bool textured = rc.texture_enable; @@ -535,49 +536,55 @@ void GPU_SW::DispatchRenderCommand() vert->x = m_drawing_offset.x + vp.x; vert->y = m_drawing_offset.y + vp.y; vert->texcoord = textured ? Truncate16(FifoPop()) : 0; + positions[i] = GSVector4i::loadl(&vert->x); } if (!IsDrawingAreaIsValid()) return; // Cull polygons which are too large. - const auto [min_x_12, max_x_12] = MinMax(cmd->vertices[1].x, cmd->vertices[2].x); - const auto [min_y_12, max_y_12] = MinMax(cmd->vertices[1].y, cmd->vertices[2].y); - const s32 min_x = std::min(min_x_12, cmd->vertices[0].x); - const s32 max_x = std::max(max_x_12, cmd->vertices[0].x); - const s32 min_y = std::min(min_y_12, cmd->vertices[0].y); - const s32 max_y = std::max(max_y_12, cmd->vertices[0].y); - - if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + const GSVector4i min_pos_12 = positions[1].min_i32(positions[2]); + const GSVector4i max_pos_12 = positions[1].max_i32(positions[2]); + const GSVector4i draw_rect_012 = + min_pos_12.min_i32(positions[0]).upl64(max_pos_12.max_i32(positions[0])).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const bool first_tri_culled = + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || + !m_clamped_drawing_area.rintersects(draw_rect_012)); + if (first_tri_culled) { - DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, + DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); + + if (!rc.quad_polygon) + return; } else { - AddDrawTriangleTicks(cmd->vertices[0].x, cmd->vertices[0].y, cmd->vertices[1].x, cmd->vertices[1].y, - cmd->vertices[2].x, cmd->vertices[2].y, rc.shading_enable, rc.texture_enable, + AddDrawTriangleTicks(positions[0], positions[1], positions[2], rc.shading_enable, rc.texture_enable, rc.transparency_enable); } // quads if (rc.quad_polygon) { - const s32 min_x_123 = std::min(min_x_12, cmd->vertices[3].x); - const s32 max_x_123 = std::max(max_x_12, cmd->vertices[3].x); - const s32 min_y_123 = std::min(min_y_12, cmd->vertices[3].y); - const s32 max_y_123 = std::max(max_y_12, cmd->vertices[3].y); + const GSVector4i draw_rect_123 = + min_pos_12.min_i32(positions[3]).upl64(max_pos_12.max_i32(positions[3])).add32(GSVector4i::cxpr(0, 0, 1, 1)); // Cull polygons which are too large. - if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT) + const bool second_tri_culled = + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || + !m_clamped_drawing_area.rintersects(draw_rect_123)); + if (second_tri_culled) { DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y); + + if (first_tri_culled) + return; } else { - AddDrawTriangleTicks(cmd->vertices[2].x, cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, - cmd->vertices[3].x, cmd->vertices[3].y, rc.shading_enable, rc.texture_enable, + AddDrawTriangleTicks(positions[2], positions[1], positions[3], rc.shading_enable, rc.texture_enable, rc.transparency_enable); } } @@ -627,12 +634,6 @@ void GPU_SW::DispatchRenderCommand() const u32 width_and_height = FifoPop(); cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK); cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); - - if (cmd->width >= MAX_PRIMITIVE_WIDTH || cmd->height >= MAX_PRIMITIVE_HEIGHT) - { - DEBUG_LOG("Culling too-large rectangle: {},{} {}x{}", cmd->x, cmd->y, cmd->width, cmd->height); - return; - } } break; } diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index 5f6cfa9ae..ea41d4991 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -4,7 +4,7 @@ #pragma once #include "common/bitfield.h" #include "common/bitutils.h" -#include "common/rectangle.h" +#include "common/gsvector.h" #include "types.h" #include @@ -189,18 +189,19 @@ union GPUDrawModeReg BitField texture_x_flip; BitField texture_y_flip; - ALWAYS_INLINE u16 GetTexturePageBaseX() const { return ZeroExtend16(texture_page_x_base.GetValue()) * 64; } - ALWAYS_INLINE u16 GetTexturePageBaseY() const { return ZeroExtend16(texture_page_y_base.GetValue()) * 256; } + ALWAYS_INLINE u32 GetTexturePageBaseX() const { return ZeroExtend32(texture_page_x_base.GetValue()) * 64; } + ALWAYS_INLINE u32 GetTexturePageBaseY() const { return ZeroExtend32(texture_page_y_base.GetValue()) * 256; } /// Returns true if the texture mode requires a palette. ALWAYS_INLINE bool IsUsingPalette() const { return (bits & (2 << 7)) == 0; } /// Returns a rectangle comprising the texture page area. - ALWAYS_INLINE_RELEASE Common::Rectangle GetTexturePageRectangle() const + ALWAYS_INLINE_RELEASE GSVector4i GetTexturePageRectangle() const { - return Common::Rectangle::FromExtents(GetTexturePageBaseX(), GetTexturePageBaseY(), - texture_page_widths[static_cast(texture_mode.GetValue())], - TEXTURE_PAGE_HEIGHT); + const u32 base_x = GetTexturePageBaseX(); + const u32 base_y = GetTexturePageBaseY(); + return GSVector4i(base_x, base_y, base_x + texture_page_widths[static_cast(texture_mode.GetValue())], + base_y + TEXTURE_PAGE_HEIGHT); } }; @@ -217,10 +218,12 @@ union GPUTexturePaletteReg ALWAYS_INLINE u32 GetYBase() const { return static_cast(y); } /// Returns a rectangle comprising the texture palette area. - ALWAYS_INLINE_RELEASE Common::Rectangle GetRectangle(GPUTextureMode mode) const + ALWAYS_INLINE_RELEASE GSVector4i GetRectangle(GPUTextureMode mode) const { static constexpr std::array palette_widths = {{16, 256, 0, 0}}; - return Common::Rectangle::FromExtents(GetXBase(), GetYBase(), palette_widths[static_cast(mode)], 1); + const u32 base_x = GetXBase(); + const u32 base_y = GetYBase(); + return GSVector4i(base_x, base_y, base_x + palette_widths[static_cast(mode)], base_y + 1); } }; diff --git a/src/core/system.cpp b/src/core/system.cpp index 72e8cfa90..83e828334 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -2727,8 +2727,8 @@ bool System::SaveStateToStream(ByteStream* state, Error* error, u32 screenshot_s u32 screenshot_stride; GPUTexture::Format screenshot_format; if (g_gpu->RenderScreenshotToBuffer(screenshot_width, screenshot_height, - Common::Rectangle::FromExtents(0, 0, screenshot_width, screenshot_height), - false, &screenshot_buffer, &screenshot_stride, &screenshot_format) && + GSVector4i(0, 0, screenshot_width, screenshot_height), false, + &screenshot_buffer, &screenshot_stride, &screenshot_format) && GPUTexture::ConvertTextureDataToRGBA8(screenshot_width, screenshot_height, screenshot_buffer, screenshot_stride, screenshot_format)) { diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index 4e056a667..1fa7e4e2e 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -14,7 +14,6 @@ #include "common/file_system.h" #include "common/log.h" #include "common/path.h" -#include "common/rectangle.h" #include "common/string_util.h" #include "fmt/format.h" @@ -1086,17 +1085,18 @@ void D3D11Device::UnbindTexture(D3D11Texture* tex) } } -void D3D11Device::SetViewport(s32 x, s32 y, s32 width, s32 height) +void D3D11Device::SetViewport(const GSVector4i rc) { - const CD3D11_VIEWPORT vp(static_cast(x), static_cast(y), static_cast(width), - static_cast(height), 0.0f, 1.0f); + const CD3D11_VIEWPORT vp(static_cast(rc.left), static_cast(rc.top), static_cast(rc.width()), + static_cast(rc.height()), 0.0f, 1.0f); m_context->RSSetViewports(1, &vp); } -void D3D11Device::SetScissor(s32 x, s32 y, s32 width, s32 height) +void D3D11Device::SetScissor(const GSVector4i rc) { - const CD3D11_RECT rc(x, y, x + width, y + height); - m_context->RSSetScissorRects(1, &rc); + alignas(16) D3D11_RECT drc; + GSVector4i::store(&drc, rc); + m_context->RSSetScissorRects(1, &drc); } void D3D11Device::Draw(u32 vertex_count, u32 base_vertex) diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index babb8ec27..396c8a726 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -92,8 +92,8 @@ public: void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; - void SetViewport(s32 x, s32 y, s32 width, s32 height) override; - void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void SetViewport(const GSVector4i rc) override; + void SetScissor(const GSVector4i rc) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index dfe1eb7be..baa5191b8 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1927,8 +1927,8 @@ void D3D12Device::SetViewport(ID3D12GraphicsCommandList4* cmdlist) { const D3D12_VIEWPORT vp = {static_cast(m_current_viewport.left), static_cast(m_current_viewport.top), - static_cast(m_current_viewport.GetWidth()), - static_cast(m_current_viewport.GetHeight()), + static_cast(m_current_viewport.width()), + static_cast(m_current_viewport.height()), 0.0f, 1.0f}; cmdlist->RSSetViewports(1, &vp); @@ -1936,9 +1936,8 @@ void D3D12Device::SetViewport(ID3D12GraphicsCommandList4* cmdlist) void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist) { - const D3D12_RECT rc = {static_cast(m_current_scissor.left), static_cast(m_current_scissor.top), - static_cast(m_current_scissor.right), static_cast(m_current_scissor.bottom)}; - cmdlist->RSSetScissorRects(1, &rc); + static_assert(sizeof(GSVector4i) == sizeof(D3D12_RECT)); + cmdlist->RSSetScissorRects(1, reinterpret_cast(&m_current_scissor)); } void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) @@ -2027,10 +2026,9 @@ void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf) m_dirty_flags |= DIRTY_FLAG_TEXTURES; } -void D3D12Device::SetViewport(s32 x, s32 y, s32 width, s32 height) +void D3D12Device::SetViewport(const GSVector4i rc) { - const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); - if (m_current_viewport == rc) + if (m_current_viewport.eq(rc)) return; m_current_viewport = rc; @@ -2041,10 +2039,9 @@ void D3D12Device::SetViewport(s32 x, s32 y, s32 width, s32 height) SetViewport(GetCommandList()); } -void D3D12Device::SetScissor(s32 x, s32 y, s32 width, s32 height) +void D3D12Device::SetScissor(const GSVector4i rc) { - const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); - if (m_current_scissor == rc) + if (m_current_scissor.eq(rc)) return; m_current_scissor = rc; diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index f7d64a30a..56a7a7280 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -114,8 +114,8 @@ public: void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; - void SetViewport(s32 x, s32 y, s32 width, s32 height) override; - void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void SetViewport(const GSVector4i rc) override; + void SetScissor(const GSVector4i rc) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; @@ -344,6 +344,6 @@ private: std::array m_current_textures = {}; std::array m_current_samplers = {}; D3D12TextureBuffer* m_current_texture_buffer = nullptr; - Common::Rectangle m_current_viewport{0, 0, 1, 1}; - Common::Rectangle m_current_scissor{0, 0, 1, 1}; + GSVector4i m_current_viewport = GSVector4i::cxpr(0, 0, 1, 1); + GSVector4i m_current_scissor = {}; }; diff --git a/src/util/d3d_common.cpp b/src/util/d3d_common.cpp index b0e2802c8..50b3bf65b 100644 --- a/src/util/d3d_common.cpp +++ b/src/util/d3d_common.cpp @@ -7,8 +7,8 @@ #include "common/assert.h" #include "common/error.h" #include "common/file_system.h" +#include "common/gsvector.h" #include "common/log.h" -#include "common/rectangle.h" #include "common/string_util.h" #include "fmt/format.h" @@ -179,7 +179,7 @@ bool D3DCommon::GetRequestedExclusiveFullscreenModeDesc(IDXGIFactory5* factory, DXGI_MODE_DESC* fullscreen_mode, IDXGIOutput** output) { // We need to find which monitor the window is located on. - const Common::Rectangle client_rc_vec(window_rect.left, window_rect.top, window_rect.right, window_rect.bottom); + const GSVector4i client_rc_vec(window_rect.left, window_rect.top, window_rect.right, window_rect.bottom); // The window might be on a different adapter to which we are rendering.. so we have to enumerate them all. HRESULT hr; @@ -204,10 +204,9 @@ bool D3DCommon::GetRequestedExclusiveFullscreenModeDesc(IDXGIFactory5* factory, else if (FAILED(hr) || FAILED(this_output->GetDesc(&output_desc))) continue; - const Common::Rectangle output_rc(output_desc.DesktopCoordinates.left, output_desc.DesktopCoordinates.top, - output_desc.DesktopCoordinates.right, - output_desc.DesktopCoordinates.bottom); - if (!client_rc_vec.Intersects(output_rc)) + const GSVector4i output_rc(output_desc.DesktopCoordinates.left, output_desc.DesktopCoordinates.top, + output_desc.DesktopCoordinates.right, output_desc.DesktopCoordinates.bottom); + if (!client_rc_vec.rintersects(output_rc)) { intersecting_output = std::move(this_output); break; diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index d2312c242..d49aad029 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -643,10 +643,25 @@ void GPUDevice::SetRenderTarget(GPUTexture* rt, GPUTexture* ds, GPUPipeline::Ren SetRenderTargets(rt ? &rt : nullptr, rt ? 1 : 0, ds, render_pass_flags); } +void GPUDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) +{ + SetViewport(GSVector4i(x, y, x + width, y + height)); +} + +void GPUDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) +{ + SetScissor(GSVector4i(x, y, x + width, y + height)); +} + void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height) { - SetViewport(x, y, width, height); - SetScissor(x, y, width, height); + SetViewportAndScissor(GSVector4i(x, y, x + width, y + height)); +} + +void GPUDevice::SetViewportAndScissor(const GSVector4i rc) +{ + SetViewport(rc); + SetScissor(rc); } void GPUDevice::ClearRenderTarget(GPUTexture* t, u32 c) @@ -818,11 +833,13 @@ bool GPUDevice::UsesLowerLeftOrigin() const return (api == RenderAPI::OpenGL || api == RenderAPI::OpenGLES); } -Common::Rectangle GPUDevice::FlipToLowerLeft(const Common::Rectangle& rc, s32 target_height) +GSVector4i GPUDevice::FlipToLowerLeft(GSVector4i rc, s32 target_height) { - const s32 height = rc.GetHeight(); + const s32 height = rc.height(); const s32 flipped_y = target_height - rc.top - height; - return Common::Rectangle(rc.left, flipped_y, rc.right, flipped_y + height); + rc.top = flipped_y; + rc.bottom = flipped_y + height; + return rc; } bool GPUDevice::IsTexturePoolType(GPUTexture::Type type) diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index b3e5ffc51..5e4e94204 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -8,8 +8,8 @@ #include "window_info.h" #include "common/bitfield.h" +#include "common/gsvector.h" #include "common/heap_array.h" -#include "common/rectangle.h" #include "common/small_string.h" #include "common/types.h" @@ -676,11 +676,14 @@ public: virtual void SetPipeline(GPUPipeline* pipeline) = 0; virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0; virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0; - virtual void SetViewport(s32 x, s32 y, s32 width, s32 height) = 0; // TODO: Rectangle - virtual void SetScissor(s32 x, s32 y, s32 width, s32 height) = 0; + virtual void SetViewport(const GSVector4i rc) = 0; + virtual void SetScissor(const GSVector4i rc) = 0; void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr, GPUPipeline::RenderPassFlag render_pass_flags = GPUPipeline::NoRenderPassFlags); + void SetViewport(s32 x, s32 y, s32 width, s32 height); + void SetScissor(s32 x, s32 y, s32 width, s32 height); void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height); + void SetViewportAndScissor(const GSVector4i rc); // Drawing abstraction. virtual void Draw(u32 vertex_count, u32 base_vertex) = 0; @@ -704,7 +707,7 @@ public: bool UpdateImGuiFontTexture(); bool UsesLowerLeftOrigin() const; - static Common::Rectangle FlipToLowerLeft(const Common::Rectangle& rc, s32 target_height); + static GSVector4i FlipToLowerLeft(GSVector4i rc, s32 target_height); bool ResizeTexture(std::unique_ptr* tex, u32 new_width, u32 new_height, GPUTexture::Type type, GPUTexture::Format format, bool preserve = true); bool ShouldSkipPresentingFrame(); diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index 0e018fa94..d9f29133b 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -1,8 +1,9 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once +#include "common/gsvector.h" #include "common/types.h" #include @@ -103,6 +104,10 @@ public: ALWAYS_INLINE u32 GetSamples() const { return m_samples; } ALWAYS_INLINE Type GetType() const { return m_type; } ALWAYS_INLINE Format GetFormat() const { return m_format; } + ALWAYS_INLINE GSVector4i GetRect() const + { + return GSVector4i(0, 0, static_cast(m_width), static_cast(m_height)); + } ALWAYS_INLINE bool IsTextureArray() const { return m_layers > 1; } ALWAYS_INLINE bool IsMultisampled() const { return m_samples > 1; } diff --git a/src/util/metal_device.h b/src/util/metal_device.h index 0ef9d0f59..1885b7f70 100644 --- a/src/util/metal_device.h +++ b/src/util/metal_device.h @@ -20,7 +20,6 @@ #include "metal_stream_buffer.h" #include "window_info.h" -#include "common/rectangle.h" #include "common/timer.h" #include @@ -255,8 +254,8 @@ public: void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; - void SetViewport(s32 x, s32 y, s32 width, s32 height) override; - void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void SetViewport(const GSVector4i rc) override; + void SetScissor(const GSVector4i rc) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; @@ -404,8 +403,8 @@ private: std::array, MAX_TEXTURE_SAMPLERS> m_current_textures = {}; std::array, MAX_TEXTURE_SAMPLERS> m_current_samplers = {}; id m_current_ssbo = nil; - Common::Rectangle m_current_viewport = {}; - Common::Rectangle m_current_scissor = {}; + GSVector4i m_current_viewport = {}; + GSVector4i m_current_scissor = {}; bool m_vsync_enabled = false; diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index bff198081..ee2ccf5ce 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -766,14 +766,12 @@ bool OpenGLDevice::BeginPresent(bool skip_present) m_last_blend_state.write_a); glEnable(GL_SCISSOR_TEST); - const Common::Rectangle window_rc = - Common::Rectangle::FromExtents(0, 0, m_window_info.surface_width, m_window_info.surface_height); - m_current_fbo = 0; m_num_current_render_targets = 0; std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_current_depth_target = nullptr; + const GSVector4i window_rc = GSVector4i(0, 0, m_window_info.surface_width, m_window_info.surface_height); m_last_viewport = window_rc; m_last_scissor = window_rc; UpdateViewport(); @@ -1220,20 +1218,18 @@ void OpenGLDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) } } -void OpenGLDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) +void OpenGLDevice::SetViewport(const GSVector4i rc) { - const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); - if (m_last_viewport == rc) + if (m_last_viewport.eq(rc)) return; m_last_viewport = rc; UpdateViewport(); } -void OpenGLDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) +void OpenGLDevice::SetScissor(const GSVector4i rc) { - const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); - if (m_last_scissor == rc) + if (m_last_scissor.eq(rc)) return; m_last_scissor = rc; @@ -1242,10 +1238,10 @@ void OpenGLDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) void OpenGLDevice::UpdateViewport() { - glViewport(m_last_viewport.left, m_last_viewport.top, m_last_viewport.GetWidth(), m_last_viewport.GetHeight()); + glViewport(m_last_viewport.left, m_last_viewport.top, m_last_viewport.width(), m_last_viewport.height()); } void OpenGLDevice::UpdateScissor() { - glScissor(m_last_scissor.left, m_last_scissor.top, m_last_scissor.GetWidth(), m_last_scissor.GetHeight()); + glScissor(m_last_scissor.left, m_last_scissor.top, m_last_scissor.width(), m_last_scissor.height()); } diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h index e257f0e8b..63e6f195e 100644 --- a/src/util/opengl_device.h +++ b/src/util/opengl_device.h @@ -11,8 +11,6 @@ #include "opengl_pipeline.h" #include "opengl_texture.h" -#include "common/rectangle.h" - #include #include #include @@ -96,8 +94,8 @@ public: void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; - void SetViewport(s32 x, s32 y, s32 width, s32 height) override; - void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void SetViewport(const GSVector4i rc) override; + void SetScissor(const GSVector4i rc) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; @@ -203,8 +201,8 @@ private: u32 m_last_texture_unit = 0; std::array, MAX_TEXTURE_SAMPLERS> m_last_samplers = {}; GLuint m_last_ssbo = 0; - Common::Rectangle m_last_viewport{0, 0, 1, 1}; - Common::Rectangle m_last_scissor{0, 0, 1, 1}; + GSVector4i m_last_viewport = {}; + GSVector4i m_last_scissor = GSVector4i::cxpr(0, 0, 1, 1); // Misc framebuffers GLuint m_read_fbo = 0; diff --git a/src/util/postprocessing.cpp b/src/util/postprocessing.cpp index af2b1ea81..009b2b28f 100644 --- a/src/util/postprocessing.cpp +++ b/src/util/postprocessing.cpp @@ -620,8 +620,8 @@ void PostProcessing::Chain::DestroyTextures() } bool PostProcessing::Chain::Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, - s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width, - s32 orig_height, s32 native_width, s32 native_height) + GSVector4i final_rect, s32 orig_width, s32 orig_height, s32 native_width, + s32 native_height) { GL_SCOPE_FMT("{} Apply", m_section); @@ -634,9 +634,8 @@ bool PostProcessing::Chain::Apply(GPUTexture* input_color, GPUTexture* input_dep { const bool is_final = (stage.get() == m_stages.back().get()); - if (!stage->Apply(input_color, input_depth, is_final ? final_target : output, final_left, final_top, final_width, - final_height, orig_width, orig_height, native_width, native_height, m_target_width, - m_target_height)) + if (!stage->Apply(input_color, input_depth, is_final ? final_target : output, final_rect, orig_width, orig_height, + native_width, native_height, m_target_width, m_target_height)) { return false; } diff --git a/src/util/postprocessing.h b/src/util/postprocessing.h index af4f0bb9d..1ab2f5c53 100644 --- a/src/util/postprocessing.h +++ b/src/util/postprocessing.h @@ -134,8 +134,8 @@ public: bool CheckTargets(GPUTexture::Format target_format, u32 target_width, u32 target_height, ProgressCallback* progress = nullptr); - bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, s32 final_left, s32 final_top, - s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height); + bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, const GSVector4i final_rect, + s32 orig_width, s32 orig_height, s32 native_width, s32 native_height); private: void ClearStagesWithError(const Error& error); diff --git a/src/util/postprocessing_shader.h b/src/util/postprocessing_shader.h index 3930d4629..5be75e6a0 100644 --- a/src/util/postprocessing_shader.h +++ b/src/util/postprocessing_shader.h @@ -7,7 +7,7 @@ #include "gpu_texture.h" -#include "common/rectangle.h" +#include "common/gsvector.h" #include "common/settings_interface.h" #include "common/timer.h" #include "common/types.h" @@ -49,9 +49,9 @@ public: virtual bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height, ProgressCallback* progress) = 0; - virtual bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, s32 final_left, - s32 final_top, s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, - s32 native_width, s32 native_height, u32 target_width, u32 target_height) = 0; + virtual bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, GSVector4i final_rect, + s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, u32 target_width, + u32 target_height) = 0; protected: static void ParseKeyValue(std::string_view line, std::string_view* key, std::string_view* value); diff --git a/src/util/postprocessing_shader_fx.cpp b/src/util/postprocessing_shader_fx.cpp index 28def57d8..fc7659c69 100644 --- a/src/util/postprocessing_shader_fx.cpp +++ b/src/util/postprocessing_shader_fx.cpp @@ -1490,16 +1490,15 @@ bool PostProcessing::ReShadeFXShader::ResizeOutput(GPUTexture::Format format, u3 } bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, - s32 final_left, s32 final_top, s32 final_width, s32 final_height, - s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, - u32 target_width, u32 target_height) + GSVector4i final_rect, s32 orig_width, s32 orig_height, s32 native_width, + s32 native_height, u32 target_width, u32 target_height) { GL_PUSH_FMT("PostProcessingShaderFX {}", m_name); m_frame_count++; // Reshade always draws at full size. - g_gpu_device->SetViewportAndScissor(0, 0, target_width, target_height); + g_gpu_device->SetViewportAndScissor(final_rect); if (m_uniforms_size > 0) { @@ -1675,84 +1674,85 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input_color, GPUTexture* case SourceOptionType::ViewportX: { - const float value = static_cast(final_left); + const float value = static_cast(final_rect.left); std::memcpy(dst, &value, sizeof(value)); } break; case SourceOptionType::ViewportY: { - const float value = static_cast(final_top); + const float value = static_cast(final_rect.top); std::memcpy(dst, &value, sizeof(value)); } break; case SourceOptionType::ViewportWidth: { - const float value = static_cast(final_width); + const float value = static_cast(final_rect.width()); std::memcpy(dst, &value, sizeof(value)); } break; case SourceOptionType::ViewportHeight: { - const float value = static_cast(final_height); + const float value = static_cast(final_rect.height()); std::memcpy(dst, &value, sizeof(value)); } break; case SourceOptionType::ViewportOffset: { - const float value[2] = {static_cast(final_left), static_cast(final_top)}; - std::memcpy(dst, &value, sizeof(value)); + GSVector4::storel(dst, GSVector4(final_rect)); } break; case SourceOptionType::ViewportSize: { - const float value[2] = {static_cast(final_width), static_cast(final_height)}; + const float value[2] = {static_cast(final_rect.width()), static_cast(final_rect.height())}; std::memcpy(dst, &value, sizeof(value)); } break; case SourceOptionType::InternalPixelSize: { - const float value[2] = {static_cast(final_width) / static_cast(orig_width), - static_cast(final_height) / static_cast(orig_height)}; + const float value[2] = {static_cast(final_rect.width()) / static_cast(orig_width), + static_cast(final_rect.height()) / static_cast(orig_height)}; std::memcpy(dst, value, sizeof(value)); } break; case SourceOptionType::InternalNormPixelSize: { - const float value[2] = { - (static_cast(final_width) / static_cast(orig_width)) / static_cast(target_width), - (static_cast(final_height) / static_cast(orig_height)) / static_cast(target_height)}; + const float value[2] = {(static_cast(final_rect.width()) / static_cast(orig_width)) / + static_cast(target_width), + (static_cast(final_rect.height()) / static_cast(orig_height)) / + static_cast(target_height)}; std::memcpy(dst, value, sizeof(value)); } break; case SourceOptionType::NativePixelSize: { - const float value[2] = {static_cast(final_width) / static_cast(native_width), - static_cast(final_height) / static_cast(native_height)}; + const float value[2] = {static_cast(final_rect.width()) / static_cast(native_width), + static_cast(final_rect.height()) / static_cast(native_height)}; std::memcpy(dst, value, sizeof(value)); } break; case SourceOptionType::NativeNormPixelSize: { - const float value[2] = { - (static_cast(final_width) / static_cast(native_width)) / static_cast(target_width), - (static_cast(final_height) / static_cast(native_height)) / static_cast(target_height)}; + const float value[2] = {(static_cast(final_rect.width()) / static_cast(native_width)) / + static_cast(target_width), + (static_cast(final_rect.height()) / static_cast(native_height)) / + static_cast(target_height)}; std::memcpy(dst, value, sizeof(value)); } break; case SourceOptionType::BufferToViewportRatio: { - const float value[2] = {static_cast(target_width) / static_cast(final_width), - static_cast(target_height) / static_cast(final_height)}; + const float value[2] = {static_cast(target_width) / static_cast(final_rect.width()), + static_cast(target_height) / static_cast(final_rect.height())}; std::memcpy(dst, value, sizeof(value)); } break; diff --git a/src/util/postprocessing_shader_fx.h b/src/util/postprocessing_shader_fx.h index 0ff054b75..259352a50 100644 --- a/src/util/postprocessing_shader_fx.h +++ b/src/util/postprocessing_shader_fx.h @@ -31,9 +31,9 @@ public: bool ResizeOutput(GPUTexture::Format format, u32 width, u32 height) override; bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height, ProgressCallback* progress) override; - bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, s32 final_left, s32 final_top, - s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, - u32 target_width, u32 target_height) override; + bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, GSVector4i final_rect, + s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, u32 target_width, + u32 target_height) override; private: using TextureID = s32; diff --git a/src/util/postprocessing_shader_glsl.cpp b/src/util/postprocessing_shader_glsl.cpp index 0861d3490..d2eb321c1 100644 --- a/src/util/postprocessing_shader_glsl.cpp +++ b/src/util/postprocessing_shader_glsl.cpp @@ -168,9 +168,8 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32 } bool PostProcessing::GLSLShader::Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, - s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width, - s32 orig_height, s32 native_width, s32 native_height, u32 target_width, - u32 target_height) + GSVector4i final_rect, s32 orig_width, s32 orig_height, s32 native_width, + s32 native_height, u32 target_width, u32 target_height) { GL_SCOPE_FMT("GLSL Shader {}", m_name); @@ -188,12 +187,12 @@ bool PostProcessing::GLSLShader::Apply(GPUTexture* input_color, GPUTexture* inpu g_gpu_device->SetPipeline(m_pipeline.get()); g_gpu_device->SetTextureSampler(0, input_color, m_sampler.get()); - g_gpu_device->SetViewportAndScissor(final_left, final_top, final_width, final_height); + g_gpu_device->SetViewportAndScissor(final_rect); const u32 uniforms_size = GetUniformsSize(); void* uniforms = g_gpu_device->MapUniformBuffer(uniforms_size); - FillUniformBuffer(uniforms, final_left, final_top, final_width, final_height, target_width, target_height, orig_width, - orig_height, native_width, native_height, + FillUniformBuffer(uniforms, final_rect.left, final_rect.top, final_rect.width(), final_rect.height(), target_width, + target_height, orig_width, orig_height, native_width, native_height, static_cast(PostProcessing::GetTimer().GetTimeSeconds())); g_gpu_device->UnmapUniformBuffer(uniforms_size); g_gpu_device->Draw(3, 0); diff --git a/src/util/postprocessing_shader_glsl.h b/src/util/postprocessing_shader_glsl.h index 108015c7a..1dd5f4126 100644 --- a/src/util/postprocessing_shader_glsl.h +++ b/src/util/postprocessing_shader_glsl.h @@ -24,9 +24,9 @@ public: bool ResizeOutput(GPUTexture::Format format, u32 width, u32 height) override; bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height, ProgressCallback* progress) override; - bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, s32 final_left, s32 final_top, - s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, - u32 target_width, u32 target_height) override; + bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, GSVector4i final_rect, + s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, u32 target_width, + u32 target_height) override; private: struct CommonUniforms diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 0bdb7da64..f6434672f 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -3611,15 +3611,14 @@ void VulkanDevice::SetInitialPipelineState() const VkViewport vp = {static_cast(m_current_viewport.left), static_cast(m_current_viewport.top), - static_cast(m_current_viewport.GetWidth()), - static_cast(m_current_viewport.GetHeight()), + static_cast(m_current_viewport.width()), + static_cast(m_current_viewport.height()), 0.0f, 1.0f}; vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp); - const VkRect2D vrc = { - {m_current_scissor.left, m_current_scissor.top}, - {static_cast(m_current_scissor.GetWidth()), static_cast(m_current_scissor.GetHeight())}}; + const VkRect2D vrc = {{m_current_scissor.left, m_current_scissor.top}, + {static_cast(m_current_scissor.width()), static_cast(m_current_scissor.height())}}; vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc); } @@ -3706,10 +3705,9 @@ void VulkanDevice::UnbindTextureBuffer(VulkanTextureBuffer* buf) m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; } -void VulkanDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) +void VulkanDevice::SetViewport(const GSVector4i rc) { - const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); - if (m_current_viewport == rc) + if (m_current_viewport.eq(rc)) return; m_current_viewport = rc; @@ -3717,15 +3715,18 @@ void VulkanDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) if (m_dirty_flags & DIRTY_FLAG_INITIAL) return; - const VkViewport vp = { - static_cast(x), static_cast(y), static_cast(width), static_cast(height), 0.0f, 1.0f}; + const VkViewport vp = {static_cast(rc.x), + static_cast(rc.y), + static_cast(rc.width()), + static_cast(rc.height()), + 0.0f, + 1.0f}; vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp); } -void VulkanDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) +void VulkanDevice::SetScissor(const GSVector4i rc) { - const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); - if (m_current_scissor == rc) + if (m_current_scissor.eq(rc)) return; m_current_scissor = rc; @@ -3733,7 +3734,7 @@ void VulkanDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) if (m_dirty_flags & DIRTY_FLAG_INITIAL) return; - const VkRect2D vrc = {{x, y}, {static_cast(width), static_cast(height)}}; + const VkRect2D vrc = {{rc.x, rc.y}, {static_cast(rc.width()), static_cast(rc.height())}}; vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc); } diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index e5cdfb475..cdf5ea819 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -122,8 +122,8 @@ public: void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; - void SetViewport(s32 x, s32 y, s32 width, s32 height) override; - void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void SetViewport(const GSVector4i rc) override; + void SetScissor(const GSVector4i rc) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; @@ -477,6 +477,6 @@ private: std::array m_current_textures = {}; std::array m_current_samplers = {}; VulkanTextureBuffer* m_current_texture_buffer = nullptr; - Common::Rectangle m_current_viewport{0, 0, 1, 1}; - Common::Rectangle m_current_scissor{0, 0, 1, 1}; + GSVector4i m_current_viewport = {}; + GSVector4i m_current_scissor = GSVector4i::cxpr(0, 0, 1, 1); };