StringUtil: Add BytePatternSearch()
This commit is contained in:
parent
9e09f53566
commit
cbbfc2f11a
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "string_util.h"
|
||||
|
@ -9,6 +9,12 @@
|
|||
#include <cstdio>
|
||||
#include <sstream>
|
||||
|
||||
#ifndef __APPLE__
|
||||
#include <malloc.h> // alloca
|
||||
#else
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "windows_headers.h"
|
||||
#endif
|
||||
|
@ -427,6 +433,96 @@ void StringUtil::EllipsiseInPlace(std::string& str, u32 max_length, const char*
|
|||
}
|
||||
}
|
||||
|
||||
std::optional<size_t> StringUtil::BytePatternSearch(const std::span<const u8> bytes, const std::string_view pattern)
|
||||
{
|
||||
// Parse the pattern into a bytemask.
|
||||
size_t pattern_length = 0;
|
||||
bool hinibble = true;
|
||||
for (size_t i = 0; i < pattern.size(); i++)
|
||||
{
|
||||
if ((pattern[i] >= '0' && pattern[i] <= '9') || (pattern[i] >= 'a' && pattern[i] <= 'f') ||
|
||||
(pattern[i] >= 'A' && pattern[i] <= 'F') || pattern[i] == '?')
|
||||
{
|
||||
hinibble ^= true;
|
||||
if (hinibble)
|
||||
pattern_length++;
|
||||
}
|
||||
else if (pattern[i] == ' ' || pattern[i] == '\r' || pattern[i] == '\n')
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pattern_length == 0)
|
||||
return std::nullopt;
|
||||
|
||||
const bool allocate_on_heap = (pattern_length >= 512);
|
||||
u8* match_bytes = allocate_on_heap ? static_cast<u8*>(alloca(pattern_length * 2)) : new u8[pattern_length * 2];
|
||||
u8* match_masks = match_bytes + pattern_length;
|
||||
|
||||
hinibble = true;
|
||||
u8 match_byte = 0;
|
||||
u8 match_mask = 0;
|
||||
for (size_t i = 0, match_len = 0; i < pattern.size(); i++)
|
||||
{
|
||||
u8 nibble = 0, nibble_mask = 0xF;
|
||||
if (pattern[i] >= '0' && pattern[i] <= '9')
|
||||
nibble = pattern[i] - '0';
|
||||
else if (pattern[i] >= 'a' && pattern[i] <= 'f')
|
||||
nibble = pattern[i] - 'a' + 0xa;
|
||||
else if (pattern[i] >= 'A' && pattern[i] <= 'F')
|
||||
nibble = pattern[i] - 'A' + 0xa;
|
||||
else if (pattern[i] == '?')
|
||||
nibble_mask = 0;
|
||||
else if (pattern[i] == ' ' || pattern[i] == '\r' || pattern[i] == '\n')
|
||||
continue;
|
||||
else
|
||||
break;
|
||||
|
||||
hinibble ^= true;
|
||||
if (hinibble)
|
||||
{
|
||||
match_bytes[match_len] = nibble | (match_byte << 4);
|
||||
match_masks[match_len] = nibble_mask | (match_mask << 4);
|
||||
match_len++;
|
||||
}
|
||||
else
|
||||
{
|
||||
match_byte = nibble;
|
||||
match_mask = nibble_mask;
|
||||
}
|
||||
}
|
||||
if (pattern_length == 0)
|
||||
return std::nullopt;
|
||||
|
||||
std::optional<size_t> ret;
|
||||
const size_t max_search_offset = bytes.size() - pattern_length;
|
||||
for (size_t offset = 0; offset < max_search_offset; offset++)
|
||||
{
|
||||
const u8* start = bytes.data() + offset;
|
||||
for (size_t match_offset = 0;;)
|
||||
{
|
||||
if ((start[match_offset] & match_masks[match_offset]) != match_bytes[match_offset])
|
||||
break;
|
||||
|
||||
match_offset++;
|
||||
if (match_offset == pattern_length)
|
||||
{
|
||||
// found it!
|
||||
ret = offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (allocate_on_heap)
|
||||
delete[] match_bytes;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t StringUtil::DecodeUTF8(const std::string_view str, size_t offset, char32_t* ch)
|
||||
{
|
||||
return DecodeUTF8(str.data() + offset, str.length() - offset, ch);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
|
@ -8,6 +8,7 @@
|
|||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
@ -275,6 +276,9 @@ size_t DecodeUTF8(const std::string& str, size_t offset, char32_t* ch);
|
|||
std::string Ellipsise(const std::string_view str, u32 max_length, const char* ellipsis = "...");
|
||||
void EllipsiseInPlace(std::string& str, u32 max_length, const char* ellipsis = "...");
|
||||
|
||||
/// Searches for the specified byte pattern in the given memory span. Wildcards (i.e. ??) are supported.
|
||||
std::optional<size_t> BytePatternSearch(const std::span<const u8> bytes, const std::string_view pattern);
|
||||
|
||||
/// Strided memcpy/memcmp.
|
||||
ALWAYS_INLINE static void StrideMemCpy(void* dst, std::size_t dst_stride, const void* src, std::size_t src_stride,
|
||||
std::size_t copy_size, std::size_t count)
|
||||
|
|
Loading…
Reference in New Issue