GPU/SW: Fix 256-bit AVX2 path
This commit is contained in:
parent
122fe3d52f
commit
6c1b98e46a
|
@ -2541,12 +2541,12 @@ public:
|
||||||
|
|
||||||
ALWAYS_INLINE operator __m256i() const { return m; }
|
ALWAYS_INLINE operator __m256i() const { return m; }
|
||||||
|
|
||||||
ALWAYS_INLINE GSVector8i min_i8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi8(m, v)); }
|
ALWAYS_INLINE GSVector8i min_s8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi8(m, v)); }
|
||||||
ALWAYS_INLINE GSVector8i max_i8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi8(m, v)); }
|
ALWAYS_INLINE GSVector8i max_s8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi8(m, v)); }
|
||||||
ALWAYS_INLINE GSVector8i min_i16(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi16(m, v)); }
|
ALWAYS_INLINE GSVector8i min_s16(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi16(m, v)); }
|
||||||
ALWAYS_INLINE GSVector8i max_i16(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi16(m, v)); }
|
ALWAYS_INLINE GSVector8i max_s16(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi16(m, v)); }
|
||||||
ALWAYS_INLINE GSVector8i min_i32(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi32(m, v)); }
|
ALWAYS_INLINE GSVector8i min_s32(const GSVector8i& v) const { return GSVector8i(_mm256_min_epi32(m, v)); }
|
||||||
ALWAYS_INLINE GSVector8i max_i32(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi32(m, v)); }
|
ALWAYS_INLINE GSVector8i max_s32(const GSVector8i& v) const { return GSVector8i(_mm256_max_epi32(m, v)); }
|
||||||
|
|
||||||
ALWAYS_INLINE GSVector8i min_u8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epu8(m, v)); }
|
ALWAYS_INLINE GSVector8i min_u8(const GSVector8i& v) const { return GSVector8i(_mm256_min_epu8(m, v)); }
|
||||||
ALWAYS_INLINE GSVector8i max_u8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epu8(m, v)); }
|
ALWAYS_INLINE GSVector8i max_u8(const GSVector8i& v) const { return GSVector8i(_mm256_max_epu8(m, v)); }
|
||||||
|
@ -2849,7 +2849,7 @@ public:
|
||||||
return _mm256_extract_epi64(m, i);
|
return _mm256_extract_epi64(m, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE static GSVector8i zext32(s32 v) { return GSVector8i(_mm256_castsi128_si256(GSVector4i::zext32(v))); }
|
ALWAYS_INLINE static GSVector8i zext32(s32 v) { return GSVector8i(_mm256_zextsi128_si256(GSVector4i::zext32(v))); }
|
||||||
|
|
||||||
ALWAYS_INLINE static GSVector8i loadnt(const void* p)
|
ALWAYS_INLINE static GSVector8i loadnt(const void* p)
|
||||||
{
|
{
|
||||||
|
|
|
@ -12,13 +12,6 @@
|
||||||
|
|
||||||
LOG_CHANNEL(GPU_SW_Rasterizer);
|
LOG_CHANNEL(GPU_SW_Rasterizer);
|
||||||
|
|
||||||
// Disable 256-bit. We emit that path in a separate file.
|
|
||||||
// TODO: For those who are compiling with -march=native, probably only want to compile the 256-bit renderer.
|
|
||||||
// Once it's done, anyway....
|
|
||||||
#ifdef GSVECTOR_HAS_256
|
|
||||||
#undef GSVECTOR_HAS_256
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace GPU_SW_Rasterizer {
|
namespace GPU_SW_Rasterizer {
|
||||||
constinit const DitherLUT g_dither_lut = []() constexpr {
|
constinit const DitherLUT g_dither_lut = []() constexpr {
|
||||||
DitherLUT lut = {};
|
DitherLUT lut = {};
|
||||||
|
|
|
@ -1143,9 +1143,9 @@ struct TriangleVectors : PixelVectors<texture_enable>
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable>
|
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable>
|
||||||
static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv,
|
ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound,
|
||||||
const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep,
|
UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep,
|
||||||
const TriangleVectors<shading_enable, texture_enable>& tv)
|
const TriangleVectors<shading_enable, texture_enable>& tv)
|
||||||
{
|
{
|
||||||
s32 width = x_bound - x_start;
|
s32 width = x_bound - x_start;
|
||||||
s32 current_x = TruncateGPUVertexPosition(x_start);
|
s32 current_x = TruncateGPUVertexPosition(x_start);
|
||||||
|
|
Loading…
Reference in New Issue