commit
939fa1ed1c
|
@ -136,9 +136,18 @@ add_library(common
|
||||||
WorkQueueThread.h
|
WorkQueueThread.h
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(MSVC AND _M_ARM_64)
|
||||||
|
# Workaround msvc arm64 optimizer bug
|
||||||
|
# TODO remove after updating to VS 17.4
|
||||||
|
set_source_files_properties(
|
||||||
|
Crypto/SHA1.cpp
|
||||||
|
PROPERTIES COMPILE_FLAGS "/d2ssa-peeps-post-color-")
|
||||||
|
endif()
|
||||||
|
|
||||||
if(NOT MSVC AND _M_ARM_64)
|
if(NOT MSVC AND _M_ARM_64)
|
||||||
set_source_files_properties(
|
set_source_files_properties(
|
||||||
Crypto/AES.cpp
|
Crypto/AES.cpp
|
||||||
|
Crypto/SHA1.cpp
|
||||||
PROPERTIES COMPILE_FLAGS "-march=armv8-a+crypto")
|
PROPERTIES COMPILE_FLAGS "-march=armv8-a+crypto")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -264,7 +264,7 @@ public:
|
||||||
template <size_t RoundIdx>
|
template <size_t RoundIdx>
|
||||||
inline constexpr void StoreRoundKey(const u32* rk)
|
inline constexpr void StoreRoundKey(const u32* rk)
|
||||||
{
|
{
|
||||||
const uint8x16_t rk_block = vld1q_u32(rk);
|
const uint8x16_t rk_block = vreinterpretq_u8_u32(vld1q_u32(rk));
|
||||||
if constexpr (AesMode == Mode::Encrypt)
|
if constexpr (AesMode == Mode::Encrypt)
|
||||||
round_keys[RoundIdx] = rk_block;
|
round_keys[RoundIdx] = rk_block;
|
||||||
else
|
else
|
||||||
|
|
|
@ -19,14 +19,6 @@
|
||||||
#ifdef _M_X86_64
|
#ifdef _M_X86_64
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#elif defined(_M_ARM_64)
|
#elif defined(_M_ARM_64)
|
||||||
#if defined(__clang__)
|
|
||||||
// This is a bit of a hack to get clang to accept the sha1 intrinsics without modifying cmdline
|
|
||||||
// flags. Note __ARM_FEATURE_CRYPTO is deprecated and "SHA2" flag is the lowest one which includes
|
|
||||||
// SHA1.
|
|
||||||
#define __ARM_FEATURE_SHA2
|
|
||||||
// ...needed for older clang before they made the switchover to more granular flags.
|
|
||||||
#define __ARM_FEATURE_CRYPTO
|
|
||||||
#endif
|
|
||||||
#include <arm_acle.h>
|
#include <arm_acle.h>
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#endif
|
#endif
|
||||||
|
@ -259,17 +251,6 @@ private:
|
||||||
|
|
||||||
#ifdef _M_ARM_64
|
#ifdef _M_ARM_64
|
||||||
|
|
||||||
// The armv8 flags are very annoying:
|
|
||||||
// clang inserts "+" prefixes itself, gcc does not.
|
|
||||||
// clang has deprecated "crypto" (removed in clang 13), gcc has not.
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#define TARGET_ARMV8_SHA1
|
|
||||||
#elif defined(__clang__)
|
|
||||||
#define TARGET_ARMV8_SHA1 [[gnu::target("sha2")]]
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define TARGET_ARMV8_SHA1 [[gnu::target("+crypto")]]
|
|
||||||
#endif
|
|
||||||
|
|
||||||
class ContextNeon final : public BlockContext
|
class ContextNeon final : public BlockContext
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -290,7 +271,6 @@ private:
|
||||||
u32 e{};
|
u32 e{};
|
||||||
};
|
};
|
||||||
|
|
||||||
TARGET_ARMV8_SHA1
|
|
||||||
static inline uint32x4_t MsgSchedule(WorkBlock* wblock, size_t i)
|
static inline uint32x4_t MsgSchedule(WorkBlock* wblock, size_t i)
|
||||||
{
|
{
|
||||||
auto& w = *wblock;
|
auto& w = *wblock;
|
||||||
|
@ -302,7 +282,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t Func>
|
template <size_t Func>
|
||||||
TARGET_ARMV8_SHA1 static inline constexpr uint32x4_t f(State state, uint32x4_t w)
|
static inline constexpr uint32x4_t f(State state, uint32x4_t w)
|
||||||
{
|
{
|
||||||
const auto wk = vaddq_u32(w, vdupq_n_u32(K[Func]));
|
const auto wk = vaddq_u32(w, vdupq_n_u32(K[Func]));
|
||||||
if constexpr (Func == 0)
|
if constexpr (Func == 0)
|
||||||
|
@ -314,12 +294,8 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t Func>
|
template <size_t Func>
|
||||||
TARGET_ARMV8_SHA1 static inline constexpr State FourRounds(State state, uint32x4_t w)
|
static inline constexpr State FourRounds(State state, uint32x4_t w)
|
||||||
{
|
{
|
||||||
#ifdef _MSC_VER
|
|
||||||
// FIXME it seems the msvc optimizer gets a little too happy
|
|
||||||
_ReadBarrier();
|
|
||||||
#endif
|
|
||||||
return {f<Func>(state, w), vsha1h_u32(vgetq_lane_u32(state.abcd, 0))};
|
return {f<Func>(state, w), vsha1h_u32(vgetq_lane_u32(state.abcd, 0))};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -724,7 +724,12 @@
|
||||||
<ClCompile Include="Common\Crypto\AES.cpp" />
|
<ClCompile Include="Common\Crypto\AES.cpp" />
|
||||||
<ClCompile Include="Common\Crypto\bn.cpp" />
|
<ClCompile Include="Common\Crypto\bn.cpp" />
|
||||||
<ClCompile Include="Common\Crypto\ec.cpp" />
|
<ClCompile Include="Common\Crypto\ec.cpp" />
|
||||||
<ClCompile Include="Common\Crypto\SHA1.cpp" />
|
<ClCompile Include="Common\Crypto\SHA1.cpp">
|
||||||
|
<!--Workaround msvc arm64 optimizer bug
|
||||||
|
TODO remove after updating to VS 17.4
|
||||||
|
-->
|
||||||
|
<AdditionalOptions Condition="'$(Platform)'=='ARM64'">/d2ssa-peeps-post-color- %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</ClCompile>
|
||||||
<ClCompile Include="Common\Debug\MemoryPatches.cpp" />
|
<ClCompile Include="Common\Debug\MemoryPatches.cpp" />
|
||||||
<ClCompile Include="Common\Debug\Watches.cpp" />
|
<ClCompile Include="Common\Debug\Watches.cpp" />
|
||||||
<ClCompile Include="Common\DynamicLibrary.cpp" />
|
<ClCompile Include="Common\DynamicLibrary.cpp" />
|
||||||
|
|
Loading…
Reference in New Issue