fix SpanSplit for .net8

removed the NETCOREAPP3_0 code because it didn't even compile; lots of internals missing and we probably wouldn't need the 1% speedup anyway (the function using it is unused currently too)
This commit is contained in:
Morilli 2025-02-14 18:28:42 +01:00
parent 24641433e4
commit 426c743da9
1 changed files with 1 additions and 125 deletions

View File

@ -5,16 +5,13 @@
* and https://github.com/dotnet/runtime/blob/v9.0.0/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/ValueListBuilder.cs
*/
#if !NET8_0_OR_GREATER
#if !NET9_0_OR_GREATER
#pragma warning disable RS0030 // `Debug.Assert` w/o message, breaks BizHawk convention
#pragma warning disable SA1514 // "Element documentation header should be preceded by blank line"
using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
#if NETCOREAPP3_0_OR_GREATER
using System.Runtime.Intrinsics;
#endif
namespace BizHawk.Common
{
@ -534,13 +531,6 @@ namespace BizHawk.Common
sep0 = separators[0];
sep1 = separators.Length > 1 ? separators[1] : sep0;
sep2 = separators.Length > 2 ? separators[2] : sep1;
#if NETCOREAPP3_0_OR_GREATER
if (Vector128.IsHardwareAccelerated && source.Length >= Vector128<ushort>.Count * 2)
{
MakeSeparatorListVectorized(source, ref sepListBuilder, sep0, sep1, sep2);
return;
}
#endif
for (int i = 0; i < source.Length; i++)
{
@ -583,120 +573,6 @@ namespace BizHawk.Common
}
}
#if NETCOREAPP3_0_OR_GREATER
private static void MakeSeparatorListVectorized(ReadOnlySpan<char> sourceSpan, ref ValueListBuilder<int> sepListBuilder, char c, char c2, char c3)
{
// Redundant test so we won't prejit remainder of this method
// on platforms where it is not supported
if (!Vector128.IsHardwareAccelerated)
{
throw new PlatformNotSupportedException();
}
Debug.Assert(sourceSpan.Length >= Vector128<ushort>.Count);
nuint lengthToExamine = (uint)sourceSpan.Length;
nuint offset = 0;
ref char source = ref MemoryMarshal.GetReference(sourceSpan);
if (Vector512.IsHardwareAccelerated && lengthToExamine >= (uint)Vector512<ushort>.Count*2)
{
Vector512<ushort> v1 = Vector512.Create((ushort)c);
Vector512<ushort> v2 = Vector512.Create((ushort)c2);
Vector512<ushort> v3 = Vector512.Create((ushort)c3);
do
{
Vector512<ushort> vector = Vector512.LoadUnsafe(ref source, offset);
Vector512<ushort> v1Eq = Vector512.Equals(vector, v1);
Vector512<ushort> v2Eq = Vector512.Equals(vector, v2);
Vector512<ushort> v3Eq = Vector512.Equals(vector, v3);
Vector512<byte> cmp = (v1Eq | v2Eq | v3Eq).AsByte();
if (cmp != Vector512<byte>.Zero)
{
// Skip every other bit
ulong mask = cmp.ExtractMostSignificantBits() & 0x5555555555555555;
do
{
uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char);
sepListBuilder.Append((int)(offset + bitPos));
mask = BitOperations.ResetLowestSetBit(mask);
} while (mask != 0);
}
offset += (nuint)Vector512<ushort>.Count;
} while (offset <= lengthToExamine - (nuint)Vector512<ushort>.Count);
}
else if (Vector256.IsHardwareAccelerated && lengthToExamine >= (uint)Vector256<ushort>.Count*2)
{
Vector256<ushort> v1 = Vector256.Create((ushort)c);
Vector256<ushort> v2 = Vector256.Create((ushort)c2);
Vector256<ushort> v3 = Vector256.Create((ushort)c3);
do
{
Vector256<ushort> vector = Vector256.LoadUnsafe(ref source, offset);
Vector256<ushort> v1Eq = Vector256.Equals(vector, v1);
Vector256<ushort> v2Eq = Vector256.Equals(vector, v2);
Vector256<ushort> v3Eq = Vector256.Equals(vector, v3);
Vector256<byte> cmp = (v1Eq | v2Eq | v3Eq).AsByte();
if (cmp != Vector256<byte>.Zero)
{
// Skip every other bit
uint mask = cmp.ExtractMostSignificantBits() & 0x55555555;
do
{
uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char);
sepListBuilder.Append((int)(offset + bitPos));
mask = BitOperations.ResetLowestSetBit(mask);
} while (mask != 0);
}
offset += (nuint)Vector256<ushort>.Count;
} while (offset <= lengthToExamine - (nuint)Vector256<ushort>.Count);
}
else if (Vector128.IsHardwareAccelerated)
{
Vector128<ushort> v1 = Vector128.Create((ushort)c);
Vector128<ushort> v2 = Vector128.Create((ushort)c2);
Vector128<ushort> v3 = Vector128.Create((ushort)c3);
do
{
Vector128<ushort> vector = Vector128.LoadUnsafe(ref source, offset);
Vector128<ushort> v1Eq = Vector128.Equals(vector, v1);
Vector128<ushort> v2Eq = Vector128.Equals(vector, v2);
Vector128<ushort> v3Eq = Vector128.Equals(vector, v3);
Vector128<byte> cmp = (v1Eq | v2Eq | v3Eq).AsByte();
if (cmp != Vector128<byte>.Zero)
{
// Skip every other bit
uint mask = cmp.ExtractMostSignificantBits() & 0x5555;
do
{
uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char);
sepListBuilder.Append((int)(offset + bitPos));
mask = BitOperations.ResetLowestSetBit(mask);
} while (mask != 0);
}
offset += (nuint)Vector128<ushort>.Count;
} while (offset <= lengthToExamine - (nuint)Vector128<ushort>.Count);
}
while (offset < lengthToExamine)
{
char curr = Unsafe.Add(ref source, offset);
if (curr == c || curr == c2 || curr == c3)
{
sepListBuilder.Append((int)offset);
}
offset++;
}
}
#endif
/// <summary>
/// Uses ValueListBuilder to create list that holds indexes of separators in string.
/// </summary>