From 6c1b98e46a2bbb2be57711363714de7ee852c1f5 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 17 Nov 2024 16:09:03 +1000 Subject: [PATCH] GPU/SW: Fix 256-bit AVX2 path --- src/common/gsvector_sse.h | 14 +++++++------- src/core/gpu_sw_rasterizer.cpp | 7 ------- src/core/gpu_sw_rasterizer.inl | 6 +++--- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/src/common/gsvector_sse.h b/src/common/gsvector_sse.h index ae203d07d..86517ed56 100644 --- a/src/common/gsvector_sse.h +++ b/src/common/gsvector_sse.h @@ -2541,12 +2541,12 @@ public: ALWAYS_INLINE operator __m256i() const { return m; } - ALWAYS_INLINE GSVector8i min_i8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi8(m, v)); } - ALWAYS_INLINE GSVector8i max_i8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi8(m, v)); } - ALWAYS_INLINE GSVector8i min_i16(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi16(m, v)); } - ALWAYS_INLINE GSVector8i max_i16(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi16(m, v)); } - ALWAYS_INLINE GSVector8i min_i32(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi32(m, v)); } - ALWAYS_INLINE GSVector8i max_i32(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi32(m, v)); } + ALWAYS_INLINE GSVector8i min_s8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi8(m, v)); } + ALWAYS_INLINE GSVector8i max_s8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi8(m, v)); } + ALWAYS_INLINE GSVector8i min_s16(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi16(m, v)); } + ALWAYS_INLINE GSVector8i max_s16(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi16(m, v)); } + ALWAYS_INLINE GSVector8i min_s32(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi32(m, v)); } + ALWAYS_INLINE GSVector8i max_s32(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi32(m, v)); } ALWAYS_INLINE GSVector8i min_u8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epu8(m, v)); } ALWAYS_INLINE GSVector8i max_u8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epu8(m, v)); } @@ -2849,7 +2849,7 @@ public: return _mm256_extract_epi64(m, i); } - ALWAYS_INLINE static GSVector8i zext32(s32 v) { return GSVector8i(_mm256_castsi128_si256(GSVector4i::zext32(v))); } + ALWAYS_INLINE static GSVector8i zext32(s32 v) { return GSVector8i(_mm256_zextsi128_si256(GSVector4i::zext32(v))); } ALWAYS_INLINE static GSVector8i loadnt(const void* p) { diff --git a/src/core/gpu_sw_rasterizer.cpp b/src/core/gpu_sw_rasterizer.cpp index f2648dcd0..76cfc99db 100644 --- a/src/core/gpu_sw_rasterizer.cpp +++ b/src/core/gpu_sw_rasterizer.cpp @@ -12,13 +12,6 @@ LOG_CHANNEL(GPU_SW_Rasterizer); -// Disable 256-bit. We emit that path in a separate file. -// TODO: For those who are compiling with -march=native, probably only want to compile the 256-bit renderer. -// Once it's done, anyway.... -#ifdef GSVECTOR_HAS_256 -#undef GSVECTOR_HAS_256 -#endif - namespace GPU_SW_Rasterizer { constinit const DitherLUT g_dither_lut = []() constexpr { DitherLUT lut = {}; diff --git a/src/core/gpu_sw_rasterizer.inl b/src/core/gpu_sw_rasterizer.inl index 0a1ed95e3..128f3af9c 100644 --- a/src/core/gpu_sw_rasterizer.inl +++ b/src/core/gpu_sw_rasterizer.inl @@ -1143,9 +1143,9 @@ struct TriangleVectors : PixelVectors } // namespace template -static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, - const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep, - const TriangleVectors& tv) +ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, + UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep, + const TriangleVectors& tv) { s32 width = x_bound - x_start; s32 current_x = TruncateGPUVertexPosition(x_start);