GPU/SW: Fix 256-bit AVX2 path

This commit is contained in:
Stenzek 2024-11-17 16:09:03 +10:00
parent 122fe3d52f
commit 6c1b98e46a
No known key found for this signature in database
3 changed files with 10 additions and 17 deletions

View File

@ -2541,12 +2541,12 @@ public:
ALWAYS_INLINE operator __m256i() const { return m; } ALWAYS_INLINE operator __m256i() const { return m; }
ALWAYS_INLINE GSVector8i min_i8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi8(m, v)); } ALWAYS_INLINE GSVector8i min_s8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi8(m, v)); }
ALWAYS_INLINE GSVector8i max_i8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi8(m, v)); } ALWAYS_INLINE GSVector8i max_s8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi8(m, v)); }
ALWAYS_INLINE GSVector8i min_i16(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi16(m, v)); } ALWAYS_INLINE GSVector8i min_s16(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi16(m, v)); }
ALWAYS_INLINE GSVector8i max_i16(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi16(m, v)); } ALWAYS_INLINE GSVector8i max_s16(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi16(m, v)); }
ALWAYS_INLINE GSVector8i min_i32(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi32(m, v)); } ALWAYS_INLINE GSVector8i min_s32(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi32(m, v)); }
ALWAYS_INLINE GSVector8i max_i32(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi32(m, v)); } ALWAYS_INLINE GSVector8i max_s32(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi32(m, v)); }
ALWAYS_INLINE GSVector8i min_u8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epu8(m, v)); } ALWAYS_INLINE GSVector8i min_u8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epu8(m, v)); }
ALWAYS_INLINE GSVector8i max_u8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epu8(m, v)); } ALWAYS_INLINE GSVector8i max_u8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epu8(m, v)); }
@ -2849,7 +2849,7 @@ public:
return _mm256_extract_epi64(m, i); return _mm256_extract_epi64(m, i);
} }
ALWAYS_INLINE static GSVector8i zext32(s32 v) { return GSVector8i(_mm256_castsi128_si256(GSVector4i::zext32(v))); } ALWAYS_INLINE static GSVector8i zext32(s32 v) { return GSVector8i(_mm256_zextsi128_si256(GSVector4i::zext32(v))); }
ALWAYS_INLINE static GSVector8i loadnt(const void* p) ALWAYS_INLINE static GSVector8i loadnt(const void* p)
{ {

View File

@ -12,13 +12,6 @@
LOG_CHANNEL(GPU_SW_Rasterizer); LOG_CHANNEL(GPU_SW_Rasterizer);
// Disable 256-bit. We emit that path in a separate file.
// TODO: For those who are compiling with -march=native, probably only want to compile the 256-bit renderer.
// Once it's done, anyway....
#ifdef GSVECTOR_HAS_256
#undef GSVECTOR_HAS_256
#endif
namespace GPU_SW_Rasterizer { namespace GPU_SW_Rasterizer {
constinit const DitherLUT g_dither_lut = []() constexpr { constinit const DitherLUT g_dither_lut = []() constexpr {
DitherLUT lut = {}; DitherLUT lut = {};

View File

@ -1143,9 +1143,9 @@ struct TriangleVectors : PixelVectors<texture_enable>
} // namespace } // namespace
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable> template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable>
static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound,
const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep, UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep,
const TriangleVectors<shading_enable, texture_enable>& tv) const TriangleVectors<shading_enable, texture_enable>& tv)
{ {
s32 width = x_bound - x_start; s32 width = x_bound - x_start;
s32 current_x = TruncateGPUVertexPosition(x_start); s32 current_x = TruncateGPUVertexPosition(x_start);