From 0bc043a0bb908e296c66c5a391743367bfe3995a Mon Sep 17 00:00:00 2001 From: JordanTheToaster Date: Fri, 1 Nov 2024 23:04:27 +0000 Subject: [PATCH] 3rdparty: Update LZMA/7zipSDK to 24.08 --- 3rdparty/lzma/include/7zCrc.h | 5 +- 3rdparty/lzma/include/7zTypes.h | 14 +- 3rdparty/lzma/include/7zVersion.h | 10 +- 3rdparty/lzma/include/Alloc.h | 15 +- 3rdparty/lzma/include/Bra.h | 36 +- 3rdparty/lzma/include/Compiler.h | 91 ++++- 3rdparty/lzma/include/CpuArch.h | 146 +++++++- 3rdparty/lzma/include/LzFind.h | 5 +- 3rdparty/lzma/include/LzFindMt.h | 9 +- 3rdparty/lzma/include/Precomp.h | 123 ++++++- 3rdparty/lzma/include/Threads.h | 20 +- 3rdparty/lzma/include/Xz.h | 7 +- 3rdparty/lzma/include/XzCrc64.h | 8 +- 3rdparty/lzma/src/7zArcIn.c | 6 +- 3rdparty/lzma/src/7zCrc.c | 532 +++++++++++++++++------------- 3rdparty/lzma/src/7zCrcOpt.c | 244 +++++++++----- 3rdparty/lzma/src/7zDec.c | 46 ++- 3rdparty/lzma/src/Aes.c | 56 +++- 3rdparty/lzma/src/AesOpt.c | 229 +++++++++---- 3rdparty/lzma/src/Alloc.c | 174 +++++++--- 3rdparty/lzma/src/Bra.c | 325 +++++++++++++++++- 3rdparty/lzma/src/CpuArch.c | 110 ++++-- 3rdparty/lzma/src/DllSecur.c | 18 +- 3rdparty/lzma/src/LzFind.c | 127 ++++--- 3rdparty/lzma/src/LzFindMt.c | 58 ++-- 3rdparty/lzma/src/Lzma2Dec.c | 6 +- 3rdparty/lzma/src/LzmaEnc.c | 26 +- 3rdparty/lzma/src/MtCoder.c | 8 +- 3rdparty/lzma/src/MtDec.c | 18 +- 3rdparty/lzma/src/Ppmd7.c | 33 +- 3rdparty/lzma/src/Ppmd7Dec.c | 24 +- 3rdparty/lzma/src/Ppmd7Enc.c | 17 +- 3rdparty/lzma/src/Sha256.c | 52 ++- 3rdparty/lzma/src/Sha256Opt.c | 131 ++++++-- 3rdparty/lzma/src/SwapBytes.c | 63 +++- 3rdparty/lzma/src/Threads.c | 53 ++- 3rdparty/lzma/src/Xz.c | 4 +- 3rdparty/lzma/src/XzCrc64.c | 128 +++++-- 3rdparty/lzma/src/XzCrc64Opt.c | 254 ++++++++++++-- 3rdparty/lzma/src/XzDec.c | 84 ++--- 3rdparty/lzma/src/XzEnc.c | 60 ++-- 3rdparty/lzma/src/XzIn.c | 10 +- 42 files changed, 2471 insertions(+), 914 deletions(-) diff --git a/3rdparty/lzma/include/7zCrc.h b/3rdparty/lzma/include/7zCrc.h index 4afaeae4a8..3e6d408b92 100644 --- a/3rdparty/lzma/include/7zCrc.h +++ b/3rdparty/lzma/include/7zCrc.h @@ -1,5 +1,5 @@ /* 7zCrc.h -- CRC32 calculation -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_7Z_CRC_H #define ZIP7_INC_7Z_CRC_H @@ -20,7 +20,8 @@ void Z7_FASTCALL CrcGenerateTable(void); UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size); UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size); -typedef UInt32 (Z7_FASTCALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table); +typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_FUNC)(UInt32 v, const void *data, size_t size); +Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo); EXTERN_C_END diff --git a/3rdparty/lzma/include/7zTypes.h b/3rdparty/lzma/include/7zTypes.h index 1fcb2473e1..5b77420a3b 100644 --- a/3rdparty/lzma/include/7zTypes.h +++ b/3rdparty/lzma/include/7zTypes.h @@ -1,5 +1,5 @@ /* 7zTypes.h -- Basic types -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-24 : Igor Pavlov : Public domain */ #ifndef ZIP7_7Z_TYPES_H #define ZIP7_7Z_TYPES_H @@ -530,20 +530,20 @@ struct ISzAlloc #define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) */ #if defined (__clang__) || defined(__GNUC__) -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \ +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \ _Pragma("GCC diagnostic pop") #else -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL -#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL #endif #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ - Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ + Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ - Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL + Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) diff --git a/3rdparty/lzma/include/7zVersion.h b/3rdparty/lzma/include/7zVersion.h index 7549239614..1ddef80b08 100644 --- a/3rdparty/lzma/include/7zVersion.h +++ b/3rdparty/lzma/include/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 23 -#define MY_VER_MINOR 01 +#define MY_VER_MAJOR 24 +#define MY_VER_MINOR 8 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "23.01" +#define MY_VERSION_NUMBERS "24.08" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,12 +10,12 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2023-06-20" +#define MY_DATE "2024-08-11" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2023 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR diff --git a/3rdparty/lzma/include/Alloc.h b/3rdparty/lzma/include/Alloc.h index fac5b62fb9..01bf6b7dd6 100644 --- a/3rdparty/lzma/include/Alloc.h +++ b/3rdparty/lzma/include/Alloc.h @@ -1,5 +1,5 @@ /* Alloc.h -- Memory allocation functions -2023-03-04 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_ALLOC_H #define ZIP7_INC_ALLOC_H @@ -22,6 +22,9 @@ void *MyAlloc(size_t size); void MyFree(void *address); void *MyRealloc(void *address, size_t size); +void *z7_AlignedAlloc(size_t size); +void z7_AlignedFree(void *p); + #ifdef _WIN32 #ifdef Z7_LARGE_PAGES @@ -33,12 +36,14 @@ void MidFree(void *address); void *BigAlloc(size_t size); void BigFree(void *address); +/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */ + #else -#define MidAlloc(size) MyAlloc(size) -#define MidFree(address) MyFree(address) -#define BigAlloc(size) MyAlloc(size) -#define BigFree(address) MyFree(address) +#define MidAlloc(size) z7_AlignedAlloc(size) +#define MidFree(address) z7_AlignedFree(address) +#define BigAlloc(size) z7_AlignedAlloc(size) +#define BigFree(address) z7_AlignedFree(address) #endif diff --git a/3rdparty/lzma/include/Bra.h b/3rdparty/lzma/include/Bra.h index a4ee568e21..b47112cedc 100644 --- a/3rdparty/lzma/include/Bra.h +++ b/3rdparty/lzma/include/Bra.h @@ -1,5 +1,5 @@ /* Bra.h -- Branch converters for executables -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-20 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_BRA_H #define ZIP7_INC_BRA_H @@ -8,8 +8,12 @@ EXTERN_C_BEGIN -#define Z7_BRANCH_CONV_DEC(name) z7_BranchConv_ ## name ## _Dec -#define Z7_BRANCH_CONV_ENC(name) z7_BranchConv_ ## name ## _Enc +/* #define PPC BAD_PPC_11 // for debug */ + +#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec +#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc +#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name) +#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name) #define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec #define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc @@ -20,19 +24,20 @@ typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv)); typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt)); #define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0 -Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_DEC(X86)); -Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_ENC(X86)); +Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86)); +Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86)); #define Z7_BRANCH_FUNCS_DECL(name) \ -Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_DEC(name)); \ -Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_ENC(name)); +Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \ +Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name)); -Z7_BRANCH_FUNCS_DECL(ARM64) -Z7_BRANCH_FUNCS_DECL(ARM) -Z7_BRANCH_FUNCS_DECL(ARMT) -Z7_BRANCH_FUNCS_DECL(PPC) -Z7_BRANCH_FUNCS_DECL(SPARC) -Z7_BRANCH_FUNCS_DECL(IA64) +Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64) +Z7_BRANCH_FUNCS_DECL (BranchConv_ARM) +Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT) +Z7_BRANCH_FUNCS_DECL (BranchConv_PPC) +Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC) +Z7_BRANCH_FUNCS_DECL (BranchConv_IA64) +Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV) /* These functions convert data that contain CPU instructions. @@ -49,14 +54,14 @@ and one for decoding (_Enc/_Dec postfixes in function name). In params: data : data buffer size : size of data - pc : current virtual Program Counter (Instruction Pinter) value + pc : current virtual Program Counter (Instruction Pointer) value In/Out param: state : pointer to state variable (for X86 converter only) Return: The pointer to position in (data) buffer after last byte that was processed. If the caller calls converter again, it must call it starting with that position. - But the caller is allowed to move data in buffer. so pointer to + But the caller is allowed to move data in buffer. So pointer to current processed position also will be changed for next call. Also the caller must increase internal (pc) value for next call. @@ -65,6 +70,7 @@ Each converter has some characteristics: Endian, Alignment, LookAhead. X86 little 1 4 ARMT little 2 2 + RISCV little 2 6 ARM little 4 0 ARM64 little 4 0 PPC big 4 0 diff --git a/3rdparty/lzma/include/Compiler.h b/3rdparty/lzma/include/Compiler.h index 185a52deb5..2a9c2b7a08 100644 --- a/3rdparty/lzma/include/Compiler.h +++ b/3rdparty/lzma/include/Compiler.h @@ -1,5 +1,5 @@ /* Compiler.h : Compiler specific defines and pragmas -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_COMPILER_H #define ZIP7_INC_COMPILER_H @@ -25,11 +25,79 @@ #define Z7_MINGW #endif +#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__)) +#define Z7_MCST_LCC +#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__) +#endif + +/* +#if defined(__AVX2__) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \ + || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \ + || defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) + #define Z7_COMPILER_AVX2_SUPPORTED + #endif +#endif +*/ + // #pragma GCC diagnostic ignored "-Wunknown-pragmas" #ifdef __clang__ // padding size of '' with 4 bytes to alignment boundary #pragma GCC diagnostic ignored "-Wpadded" + +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \ + && defined(__FreeBSD__) +// freebsd: +#pragma GCC diagnostic ignored "-Wexcess-padding" +#endif + +#if __clang_major__ >= 16 +#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage" +#endif + +#if __clang_major__ == 13 +#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// cheri +#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast" +#endif +#endif + +#if __clang_major__ == 13 + // for + #pragma GCC diagnostic ignored "-Wreserved-identifier" +#endif + +#endif // __clang__ + +#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16 +// #pragma GCC diagnostic ignored "-Wcast-function-type-strict" +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \ + _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION +#endif + +typedef void (*Z7_void_Function)(void); +#if defined(__clang__) || defined(__GNUC__) +#define Z7_CAST_FUNC_C (Z7_void_Function) +#elif defined(_MSC_VER) && _MSC_VER > 1920 +#define Z7_CAST_FUNC_C (void *) +// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' +#else +#define Z7_CAST_FUNC_C +#endif +/* +#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__) + // #pragma GCC diagnostic ignored "-Wcast-function-type" +#endif +*/ +#ifdef __GNUC__ +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000) +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif #endif @@ -101,7 +169,8 @@ _Pragma("clang loop unroll(disable)") \ _Pragma("clang loop vectorize(disable)") #define Z7_ATTRIB_NO_VECTORIZE -#elif defined(__GNUC__) && (__GNUC__ >= 5) +#elif defined(__GNUC__) && (__GNUC__ >= 5) \ + && (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610)) #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) // __attribute__((optimize("no-unroll-loops"))); #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE @@ -142,15 +211,23 @@ #endif -#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000)) -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ +#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600)) + +#if (Z7_CLANG_VERSION < 130000) +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") -#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ +#endif + +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ _Pragma("GCC diagnostic pop") #else -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER -#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif #define UNUSED_VAR(x) (void)x; diff --git a/3rdparty/lzma/include/CpuArch.h b/3rdparty/lzma/include/CpuArch.h index 8e5d8a543f..683cfaa862 100644 --- a/3rdparty/lzma/include/CpuArch.h +++ b/3rdparty/lzma/include/CpuArch.h @@ -1,5 +1,5 @@ /* CpuArch.h -- CPU specific code -2023-04-02 : Igor Pavlov : Public domain */ +2024-06-17 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_CPU_ARCH_H #define ZIP7_INC_CPU_ARCH_H @@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) */ +#if !defined(_M_ARM64EC) #if defined(_M_X64) \ || defined(_M_AMD64) \ || defined(__x86_64__) \ @@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #define MY_CPU_64BIT #endif +#endif #if defined(_M_IX86) \ @@ -47,17 +49,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #if defined(_M_ARM64) \ + || defined(_M_ARM64EC) \ || defined(__AARCH64EL__) \ || defined(__AARCH64EB__) \ || defined(__aarch64__) #define MY_CPU_ARM64 - #ifdef __ILP32__ +#if defined(__ILP32__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) #define MY_CPU_NAME "arm64-32" #define MY_CPU_SIZEOF_POINTER 4 - #else +#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) + #define MY_CPU_NAME "arm64-128" + #define MY_CPU_SIZEOF_POINTER 16 +#else +#if defined(_M_ARM64EC) + #define MY_CPU_NAME "arm64ec" +#else #define MY_CPU_NAME "arm64" +#endif #define MY_CPU_SIZEOF_POINTER 8 - #endif +#endif #define MY_CPU_64BIT #endif @@ -133,8 +144,36 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif +#if defined(__sparc__) \ + || defined(__sparc) + #define MY_CPU_SPARC + #if defined(__LP64__) \ + || defined(_LP64) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_NAME "sparcv9" + #define MY_CPU_SIZEOF_POINTER 8 + #define MY_CPU_64BIT + #elif defined(__sparc_v9__) \ + || defined(__sparcv9) + #define MY_CPU_64BIT + #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "sparcv9-32" + #else + #define MY_CPU_NAME "sparcv9m" + #endif + #elif defined(__sparc_v8__) \ + || defined(__sparcv8) + #define MY_CPU_NAME "sparcv8" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "sparc" + #endif +#endif + + #if defined(__riscv) \ || defined(__riscv__) + #define MY_CPU_RISCV #if __riscv_xlen == 32 #define MY_CPU_NAME "riscv32" #elif __riscv_xlen == 64 @@ -145,6 +184,39 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif +#if defined(__loongarch__) + #define MY_CPU_LOONGARCH + #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64) + #define MY_CPU_64BIT + #endif + #if defined(__loongarch64) + #define MY_CPU_NAME "loongarch64" + #define MY_CPU_LOONGARCH64 + #else + #define MY_CPU_NAME "loongarch" + #endif +#endif + + +// #undef MY_CPU_NAME +// #undef MY_CPU_SIZEOF_POINTER +// #define __e2k__ +// #define __SIZEOF_POINTER__ 4 +#if defined(__e2k__) + #define MY_CPU_E2K + #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "e2k-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "e2k" + #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #endif + #define MY_CPU_64BIT +#endif + + #if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) #define MY_CPU_X86_OR_AMD64 #endif @@ -175,6 +247,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. || defined(MY_CPU_ARM_LE) \ || defined(MY_CPU_ARM64_LE) \ || defined(MY_CPU_IA64_LE) \ + || defined(_LITTLE_ENDIAN) \ || defined(__LITTLE_ENDIAN__) \ || defined(__ARMEL__) \ || defined(__THUMBEL__) \ @@ -251,6 +324,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #ifndef MY_CPU_NAME + // #define MY_CPU_IS_UNKNOWN #ifdef MY_CPU_LE #define MY_CPU_NAME "LE" #elif defined(MY_CPU_BE) @@ -295,9 +369,19 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define Z7_BSWAP64(v) _byteswap_uint64(v) #define Z7_CPU_FAST_BSWAP_SUPPORTED -#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ - || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) - +/* GCC can generate slow code that calls function for __builtin_bswap32() for: + - GCC for RISCV, if Zbb/XTHeadBb extension is not used. + - GCC for SPARC. + The code from CLANG for SPARC also is not fastest. + So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases. +*/ +#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \ + && !defined(MY_CPU_SPARC) \ + && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \ + ) + #define Z7_BSWAP16(v) __builtin_bswap16(v) #define Z7_BSWAP32(v) __builtin_bswap32(v) #define Z7_BSWAP64(v) __builtin_bswap64(v) @@ -329,13 +413,48 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #ifdef MY_CPU_LE #if defined(MY_CPU_X86_OR_AMD64) \ - || defined(MY_CPU_ARM64) + || defined(MY_CPU_ARM64) \ + || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \ + || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6) #define MY_CPU_LE_UNALIGN #define MY_CPU_LE_UNALIGN_64 #elif defined(__ARM_FEATURE_UNALIGNED) - /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. - So we can't use unaligned 64-bit operations. */ - #define MY_CPU_LE_UNALIGN +/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions. + Description of problems: +problem-1 : 32-bit ARM architecture: + multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM) + require 32-bit (WORD) alignment (by 32-bit ARM architecture). + So there is "Alignment fault exception", if data is not aligned for 32-bit. + +problem-2 : 32-bit kernels and arm64 kernels: + 32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception". + So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux. + + But some arm64 kernels do not handle these faults in 32-bit programs. + So we have unhandled exception for such instructions. + Probably some new arm64 kernels have fixed it, and unaligned + paired-access instructions work in new kernels? + +problem-3 : compiler for 32-bit arm: + Compilers use LDRD/STRD/LDM/STM for UInt64 accesses + and for another cases where two 32-bit accesses are fused + to one multi-access instruction. + So UInt64 variables must be aligned for 32-bit, and each + 32-bit access must be aligned for 32-bit, if we want to + avoid "Alignment fault" exception (handled or unhandled). + +problem-4 : performace: + Even if unaligned access is handled by kernel, it will be slow. + So if we allow unaligned access, we can get fast unaligned + single-access, and slow unaligned paired-access. + + We don't allow unaligned access on 32-bit arm, because compiler + genarates paired-access instructions that require 32-bit alignment, + and some arm64 kernels have no handler for these instructions. + Also unaligned paired-access instructions will be slow, if kernel handles them. +*/ + // it must be disabled: + // #define MY_CPU_LE_UNALIGN #endif #endif @@ -439,11 +558,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #if defined(MY_CPU_BE) +#define GetBe64a(p) (*(const UInt64 *)(const void *)(p)) #define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) #define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) #define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define GetUi64a(p) GetUi64(p) #define GetUi32a(p) GetUi32(p) #define GetUi16a(p) GetUi16(p) #define SetUi32a(p, v) SetUi32(p, v) @@ -451,11 +572,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #elif defined(MY_CPU_LE) +#define GetUi64a(p) (*(const UInt64 *)(const void *)(p)) #define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) #define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) #define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define GetBe64a(p) GetBe64(p) #define GetBe32a(p) GetBe32(p) #define GetBe16a(p) GetBe16(p) #define SetBe32a(p, v) SetBe32(p, v) @@ -486,6 +609,7 @@ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); BoolInt CPU_IsSupported_AES(void); BoolInt CPU_IsSupported_AVX(void); BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); BoolInt CPU_IsSupported_VAES_AVX2(void); BoolInt CPU_IsSupported_CMOV(void); BoolInt CPU_IsSupported_SSE(void); diff --git a/3rdparty/lzma/include/LzFind.h b/3rdparty/lzma/include/LzFind.h index a3f72c9870..67e8a6e028 100644 --- a/3rdparty/lzma/include/LzFind.h +++ b/3rdparty/lzma/include/LzFind.h @@ -1,5 +1,5 @@ /* LzFind.h -- Match finder for LZ algorithms -2023-03-04 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZ_FIND_H #define ZIP7_INC_LZ_FIND_H @@ -144,7 +144,8 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable); void MatchFinder_Init_LowHash(CMatchFinder *p); void MatchFinder_Init_HighHash(CMatchFinder *p); void MatchFinder_Init_4(CMatchFinder *p); -void MatchFinder_Init(CMatchFinder *p); +// void MatchFinder_Init(CMatchFinder *p); +void MatchFinder_Init(void *p); UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); diff --git a/3rdparty/lzma/include/LzFindMt.h b/3rdparty/lzma/include/LzFindMt.h index db5923ea05..fcb479da9e 100644 --- a/3rdparty/lzma/include/LzFindMt.h +++ b/3rdparty/lzma/include/LzFindMt.h @@ -1,5 +1,5 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2023-03-05 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZ_FIND_MT_H #define ZIP7_INC_LZ_FIND_MT_H @@ -31,7 +31,10 @@ typedef struct // UInt32 numBlocks_Sent; } CMtSync; -typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); + +struct CMatchFinderMt_; + +typedef UInt32 * (*Mf_Mix_Matches)(struct CMatchFinderMt_ *p, UInt32 matchMinPos, UInt32 *distances); /* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ #define kMtCacheLineDummy 128 @@ -39,7 +42,7 @@ typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distance typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); -typedef struct +typedef struct CMatchFinderMt_ { /* LZ */ const Byte *pointerToCurPos; diff --git a/3rdparty/lzma/include/Precomp.h b/3rdparty/lzma/include/Precomp.h index 69afb2ffd1..7747fdd74c 100644 --- a/3rdparty/lzma/include/Precomp.h +++ b/3rdparty/lzma/include/Precomp.h @@ -1,10 +1,127 @@ -/* Precomp.h -- StdAfx -2023-04-02 : Igor Pavlov : Public domain */ +/* Precomp.h -- precompilation file +2024-01-25 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_PRECOMP_H #define ZIP7_INC_PRECOMP_H +/* + this file must be included before another *.h files and before . + this file is included from the following files: + C\*.c + C\Util\*\Precomp.h <- C\Util\*\*.c + CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp + + this file can set the following macros: + Z7_LARGE_PAGES 1 + Z7_LONG_PATH 1 + Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip + _WIN32_WINNT 0x0500 (or higher) + WINVER _WIN32_WINNT + UNICODE 1 + _UNICODE 1 +*/ + #include "Compiler.h" -/* #include "7zTypes.h" */ + +#ifdef _MSC_VER +// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty +#if _MSC_VER >= 1912 +// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception. +#endif +#endif + +/* +// for debug: +#define UNICODE 1 +#define _UNICODE 1 +#define _WIN32_WINNT 0x0500 // win2000 +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +*/ + +#ifdef _WIN32 +/* + this "Precomp.h" file must be included before , + if we want to define _WIN32_WINNT before . +*/ + +#ifndef Z7_LARGE_PAGES +#ifndef Z7_NO_LARGE_PAGES +#define Z7_LARGE_PAGES 1 +#endif +#endif + +#ifndef Z7_LONG_PATH +#ifndef Z7_NO_LONG_PATH +#define Z7_LONG_PATH 1 +#endif +#endif + +#ifndef Z7_DEVICE_FILE +#ifndef Z7_NO_DEVICE_FILE +// #define Z7_DEVICE_FILE 1 +#endif +#endif + +// we don't change macros if included after +#ifndef _WINDOWS_ + +#ifndef Z7_WIN32_WINNT_MIN + #if defined(_M_ARM64) || defined(__aarch64__) + // #define Z7_WIN32_WINNT_MIN 0x0a00 // win10 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT) + // #define Z7_WIN32_WINNT_MIN 0x0602 // win8 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64) + #define Z7_WIN32_WINNT_MIN 0x0503 // win2003 + // #elif defined(_M_IX86) || defined(__i386__) + // #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + #else // x86 and another(old) systems + #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + // #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug + #endif +#endif // Z7_WIN32_WINNT_MIN + + +#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT +#ifdef _WIN32_WINNT + // #error Stop_Compiling_Bad_WIN32_WINNT +#else + #ifndef Z7_NO_DEFINE_WIN32_WINNT +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define _WIN32_WINNT Z7_WIN32_WINNT_MIN +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER + #endif +#endif // _WIN32_WINNT + +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT + + +#ifndef _MBCS +#ifndef Z7_NO_UNICODE +// UNICODE and _UNICODE are used by and by 7-zip code. + +#ifndef UNICODE +#define UNICODE 1 +#endif + +#ifndef _UNICODE +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _UNICODE 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // Z7_NO_UNICODE +#endif // _MBCS +#endif // _WINDOWS_ + +// #include "7zWindows.h" + +#endif // _WIN32 #endif diff --git a/3rdparty/lzma/include/Threads.h b/3rdparty/lzma/include/Threads.h index 4028464a33..c1484a2773 100644 --- a/3rdparty/lzma/include/Threads.h +++ b/3rdparty/lzma/include/Threads.h @@ -1,5 +1,5 @@ /* Threads.h -- multithreading library -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-28 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_THREADS_H #define ZIP7_INC_THREADS_H @@ -9,12 +9,21 @@ #else +#include "Compiler.h" + +// #define Z7_AFFINITY_DISABLE #if defined(__linux__) #if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #ifndef Z7_AFFINITY_DISABLE #define Z7_AFFINITY_SUPPORTED // #pragma message(" ==== Z7_AFFINITY_SUPPORTED") -// #define _GNU_SOURCE +#if !defined(_GNU_SOURCE) +// #pragma message(" ==== _GNU_SOURCE set") +// we need _GNU_SOURCE for cpu_set_t, if we compile for MUSL +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif #endif #endif #endif @@ -173,7 +182,7 @@ WRes CriticalSection_Init(CCriticalSection *p); #else // _WIN32 -typedef struct _CEvent +typedef struct { int _created; int _manual_reset; @@ -199,7 +208,7 @@ WRes Event_Wait(CEvent *p); WRes Event_Close(CEvent *p); -typedef struct _CSemaphore +typedef struct { int _created; UInt32 _count; @@ -219,7 +228,7 @@ WRes Semaphore_Wait(CSemaphore *p); WRes Semaphore_Close(CSemaphore *p); -typedef struct _CCriticalSection +typedef struct { pthread_mutex_t _mutex; } CCriticalSection; @@ -230,6 +239,7 @@ void CriticalSection_Enter(CCriticalSection *cs); void CriticalSection_Leave(CCriticalSection *cs); LONG InterlockedIncrement(LONG volatile *addend); +LONG InterlockedDecrement(LONG volatile *addend); #endif // _WIN32 diff --git a/3rdparty/lzma/include/Xz.h b/3rdparty/lzma/include/Xz.h index d5001f6cac..42bc685341 100644 --- a/3rdparty/lzma/include/Xz.h +++ b/3rdparty/lzma/include/Xz.h @@ -1,5 +1,5 @@ /* Xz.h - Xz interface -2023-04-13 : Igor Pavlov : Public domain */ +2024-01-26 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_XZ_H #define ZIP7_INC_XZ_H @@ -18,6 +18,7 @@ EXTERN_C_BEGIN #define XZ_ID_ARMT 8 #define XZ_ID_SPARC 9 #define XZ_ID_ARM64 0xa +#define XZ_ID_RISCV 0xb #define XZ_ID_LZMA2 0x21 unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value); @@ -233,13 +234,13 @@ typedef enum typedef struct { EXzState state; - UInt32 pos; + unsigned pos; unsigned alignPos; unsigned indexPreSize; CXzStreamFlags streamFlags; - UInt32 blockHeaderSize; + unsigned blockHeaderSize; UInt64 packSize; UInt64 unpackSize; diff --git a/3rdparty/lzma/include/XzCrc64.h b/3rdparty/lzma/include/XzCrc64.h index ca46869a64..04f8153df4 100644 --- a/3rdparty/lzma/include/XzCrc64.h +++ b/3rdparty/lzma/include/XzCrc64.h @@ -1,5 +1,5 @@ /* XzCrc64.h -- CRC64 calculation -2023-04-02 : Igor Pavlov : Public domain */ +2023-12-08 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_XZ_CRC64_H #define ZIP7_INC_XZ_CRC64_H @@ -10,16 +10,16 @@ EXTERN_C_BEGIN -extern UInt64 g_Crc64Table[]; +// extern UInt64 g_Crc64Table[]; void Z7_FASTCALL Crc64GenerateTable(void); #define CRC64_INIT_VAL UINT64_CONST(0xFFFFFFFFFFFFFFFF) #define CRC64_GET_DIGEST(crc) ((crc) ^ CRC64_INIT_VAL) -#define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +// #define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) UInt64 Z7_FASTCALL Crc64Update(UInt64 crc, const void *data, size_t size); -UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size); +// UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size); EXTERN_C_END diff --git a/3rdparty/lzma/src/7zArcIn.c b/3rdparty/lzma/src/7zArcIn.c index 43fa7c2145..23f2949922 100644 --- a/3rdparty/lzma/src/7zArcIn.c +++ b/3rdparty/lzma/src/7zArcIn.c @@ -1,5 +1,5 @@ /* 7zArcIn.c -- 7z Input functions -2023-05-11 : Igor Pavlov : Public domain */ +2023-09-07 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -301,7 +301,7 @@ static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v) static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems) { - Byte b = 0; + unsigned b = 0; unsigned m = 0; UInt32 sum = 0; for (; numItems != 0; numItems--) @@ -312,7 +312,7 @@ static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems) m = 8; } m--; - sum += ((b >> m) & 1); + sum += (UInt32)((b >> m) & 1); } return sum; } diff --git a/3rdparty/lzma/src/7zCrc.c b/3rdparty/lzma/src/7zCrc.c index ccfd01f9e5..6e2db9eab1 100644 --- a/3rdparty/lzma/src/7zCrc.c +++ b/3rdparty/lzma/src/7zCrc.c @@ -1,93 +1,96 @@ /* 7zCrc.c -- CRC32 calculation and init -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "7zCrc.h" #include "CpuArch.h" -#define kCrcPoly 0xEDB88320 +// for debug: +// #define __ARM_FEATURE_CRC32 1 -#ifdef MY_CPU_LE - #define CRC_NUM_TABLES 8 +#ifdef __ARM_FEATURE_CRC32 +// #pragma message("__ARM_FEATURE_CRC32") +#define Z7_CRC_HW_FORCE +#endif + +// #define Z7_CRC_DEBUG_BE +#ifdef Z7_CRC_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE +#endif + +#ifdef Z7_CRC_HW_FORCE + #define Z7_CRC_NUM_TABLES_USE 1 #else - #define CRC_NUM_TABLES 9 - - UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table); - UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table); +#ifdef Z7_CRC_NUM_TABLES + #define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES +#else + #define Z7_CRC_NUM_TABLES_USE 12 +#endif #endif -#ifndef MY_CPU_BE - UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); - UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); +#if Z7_CRC_NUM_TABLES_USE < 1 + #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES #endif -/* -extern -CRC_FUNC g_CrcUpdateT4; -CRC_FUNC g_CrcUpdateT4; -*/ -extern -CRC_FUNC g_CrcUpdateT8; -CRC_FUNC g_CrcUpdateT8; -extern -CRC_FUNC g_CrcUpdateT0_32; -CRC_FUNC g_CrcUpdateT0_32; -extern -CRC_FUNC g_CrcUpdateT0_64; -CRC_FUNC g_CrcUpdateT0_64; -extern -CRC_FUNC g_CrcUpdate; -CRC_FUNC g_CrcUpdate; +#if defined(MY_CPU_LE) || (Z7_CRC_NUM_TABLES_USE == 1) + #define Z7_CRC_NUM_TABLES_TOTAL Z7_CRC_NUM_TABLES_USE +#else + #define Z7_CRC_NUM_TABLES_TOTAL (Z7_CRC_NUM_TABLES_USE + 1) +#endif -UInt32 g_CrcTable[256 * CRC_NUM_TABLES]; +#ifndef Z7_CRC_HW_FORCE -UInt32 Z7_FASTCALL CrcUpdate(UInt32 v, const void *data, size_t size) -{ - return g_CrcUpdate(v, data, size, g_CrcTable); -} - -UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size) -{ - return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL; -} - -#if CRC_NUM_TABLES < 4 \ - || (CRC_NUM_TABLES == 4 && defined(MY_CPU_BE)) \ +#if Z7_CRC_NUM_TABLES_USE == 1 \ || (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) -#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) -UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table); -UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table) +#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +#define Z7_CRC_UPDATE_T1_FUNC_NAME CrcUpdateGT1 +static UInt32 Z7_FASTCALL Z7_CRC_UPDATE_T1_FUNC_NAME(UInt32 v, const void *data, size_t size) { + const UInt32 *table = g_CrcTable; const Byte *p = (const Byte *)data; - const Byte *pEnd = p + size; - for (; p != pEnd; p++) + const Byte *lim = p + size; + for (; p != lim; p++) v = CRC_UPDATE_BYTE_2(v, *p); return v; } #endif + +#if Z7_CRC_NUM_TABLES_USE != 1 +#ifndef MY_CPU_BE + #define FUNC_NAME_LE_2(s) CrcUpdateT ## s + #define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s) + #define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC_NUM_TABLES_USE) + UInt32 Z7_FASTCALL FUNC_NAME_LE (UInt32 v, const void *data, size_t size, const UInt32 *table); +#endif +#ifndef MY_CPU_LE + #define FUNC_NAME_BE_2(s) CrcUpdateT1_BeT ## s + #define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s) + #define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC_NUM_TABLES_USE) + UInt32 Z7_FASTCALL FUNC_NAME_BE (UInt32 v, const void *data, size_t size, const UInt32 *table); +#endif +#endif + +#endif // Z7_CRC_HW_FORCE + /* ---------- hardware CRC ---------- */ #ifdef MY_CPU_LE #if defined(MY_CPU_ARM_OR_ARM64) - // #pragma message("ARM*") - #if defined(_MSC_VER) && !defined(__clang__) - #if defined(MY_CPU_ARM64) - #if (_MSC_VER >= 1910) - #ifndef __clang__ - #define USE_ARM64_CRC - #include - #endif - #endif - #endif - #elif (defined(__clang__) && (__clang_major__ >= 3)) \ - || (defined(__GNUC__) && (__GNUC__ > 4)) + #if (defined(__clang__) && (__clang_major__ >= 3)) \ + || defined(__GNUC__) && (__GNUC__ >= 6) && defined(MY_CPU_ARM64) \ + || defined(__GNUC__) && (__GNUC__ >= 8) #if !defined(__ARM_FEATURE_CRC32) +// #pragma message("!defined(__ARM_FEATURE_CRC32)") +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER #define __ARM_FEATURE_CRC32 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER + #define Z7_ARM_FEATURE_CRC32_WAS_SET #if defined(__clang__) #if defined(MY_CPU_ARM64) #define ATTRIB_CRC __attribute__((__target__("crc"))) @@ -96,100 +99,120 @@ UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UI #endif #else #if defined(MY_CPU_ARM64) +#if !defined(Z7_GCC_VERSION) || (Z7_GCC_VERSION >= 60000) #define ATTRIB_CRC __attribute__((__target__("+crc"))) +#endif #else +#if !defined(Z7_GCC_VERSION) || (__GNUC__ >= 8) +#if defined(__ARM_FP) && __GNUC__ >= 8 +// for -mfloat-abi=hard: similar to + #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc+simd"))) +#else #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc"))) +#endif +#endif #endif #endif #endif #if defined(__ARM_FEATURE_CRC32) - #define USE_ARM64_CRC + // #pragma message("") +/* +arm_acle.h (GGC): + before Nov 17, 2017: +#ifdef __ARM_FEATURE_CRC32 + + Nov 17, 2017: gcc10.0 (gcc 9.2.0) checked" +#if __ARM_ARCH >= 8 +#pragma GCC target ("arch=armv8-a+crc") + + Aug 22, 2019: GCC 8.4?, 9.2.1, 10.1: +#ifdef __ARM_FEATURE_CRC32 +#ifdef __ARM_FP +#pragma GCC target ("arch=armv8-a+crc+simd") +#else +#pragma GCC target ("arch=armv8-a+crc") +#endif +*/ +#if defined(__ARM_ARCH) && __ARM_ARCH < 8 +#if defined(Z7_GCC_VERSION) && (__GNUC__ == 8) && (Z7_GCC_VERSION < 80400) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 9) && (Z7_GCC_VERSION < 90201) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 10) && (Z7_GCC_VERSION < 100100) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #pragma message("#define __ARM_ARCH 8") +#undef __ARM_ARCH +#define __ARM_ARCH 8 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif +#endif + #define Z7_CRC_HW_USE #include #endif + #elif defined(_MSC_VER) + #if defined(MY_CPU_ARM64) + #if (_MSC_VER >= 1910) + #ifdef __clang__ + // #define Z7_CRC_HW_USE + // #include + #else + #define Z7_CRC_HW_USE + #include + #endif + #endif + #endif #endif -#else +#else // non-ARM* -// no hardware CRC - -// #define USE_CRC_EMU - -#ifdef USE_CRC_EMU - -#pragma message("ARM64 CRC emulation") - -Z7_FORCE_INLINE -UInt32 __crc32b(UInt32 v, UInt32 data) -{ - const UInt32 *table = g_CrcTable; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); - return v; -} - -Z7_FORCE_INLINE -UInt32 __crc32w(UInt32 v, UInt32 data) -{ - const UInt32 *table = g_CrcTable; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - return v; -} - -Z7_FORCE_INLINE -UInt32 __crc32d(UInt32 v, UInt64 data) -{ - const UInt32 *table = g_CrcTable; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; - return v; -} - -#endif // USE_CRC_EMU - -#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE) - - - -#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) - -#define T0_32_UNROLL_BYTES (4 * 4) -#define T0_64_UNROLL_BYTES (4 * 8) - -#ifndef ATTRIB_CRC -#define ATTRIB_CRC +// #define Z7_CRC_HW_USE // for debug : we can test HW-branch of code +#ifdef Z7_CRC_HW_USE +#include "7zCrcEmu.h" #endif + +#endif // non-ARM* + + + +#if defined(Z7_CRC_HW_USE) + // #pragma message("USE ARM HW CRC") -ATTRIB_CRC -UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table); -ATTRIB_CRC -UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table) +#ifdef MY_CPU_64BIT + #define CRC_HW_WORD_TYPE UInt64 + #define CRC_HW_WORD_FUNC __crc32d +#else + #define CRC_HW_WORD_TYPE UInt32 + #define CRC_HW_WORD_FUNC __crc32w +#endif + +#define CRC_HW_UNROLL_BYTES (sizeof(CRC_HW_WORD_TYPE) * 4) + +#ifdef ATTRIB_CRC + ATTRIB_CRC +#endif +Z7_NO_INLINE +#ifdef Z7_CRC_HW_FORCE + UInt32 Z7_FASTCALL CrcUpdate +#else + static UInt32 Z7_FASTCALL CrcUpdate_HW +#endif + (UInt32 v, const void *data, size_t size) { const Byte *p = (const Byte *)data; - UNUSED_VAR(table); - - for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--) + for (; size != 0 && ((unsigned)(ptrdiff_t)p & (CRC_HW_UNROLL_BYTES - 1)) != 0; size--) v = __crc32b(v, *p++); - - if (size >= T0_32_UNROLL_BYTES) + if (size >= CRC_HW_UNROLL_BYTES) { const Byte *lim = p + size; - size &= (T0_32_UNROLL_BYTES - 1); + size &= CRC_HW_UNROLL_BYTES - 1; lim -= size; do { - v = __crc32w(v, *(const UInt32 *)(const void *)(p)); - v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; - v = __crc32w(v, *(const UInt32 *)(const void *)(p)); - v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p)); + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE))); + p += 2 * sizeof(CRC_HW_WORD_TYPE); + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p)); + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE))); + p += 2 * sizeof(CRC_HW_WORD_TYPE); } while (p != lim); } @@ -200,141 +223,198 @@ UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const return v; } -ATTRIB_CRC -UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table); -ATTRIB_CRC -UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table) -{ - const Byte *p = (const Byte *)data; - UNUSED_VAR(table); - - for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--) - v = __crc32b(v, *p++); - - if (size >= T0_64_UNROLL_BYTES) - { - const Byte *lim = p + size; - size &= (T0_64_UNROLL_BYTES - 1); - lim -= size; - do - { - v = __crc32d(v, *(const UInt64 *)(const void *)(p)); - v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8; - v = __crc32d(v, *(const UInt64 *)(const void *)(p)); - v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8; - } - while (p != lim); - } - - for (; size != 0; size--) - v = __crc32b(v, *p++); - - return v; -} - -#undef T0_32_UNROLL_BYTES -#undef T0_64_UNROLL_BYTES - -#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) +#ifdef Z7_ARM_FEATURE_CRC32_WAS_SET +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#undef __ARM_FEATURE_CRC32 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#undef Z7_ARM_FEATURE_CRC32_WAS_SET +#endif +#endif // defined(Z7_CRC_HW_USE) #endif // MY_CPU_LE +#ifndef Z7_CRC_HW_FORCE + +#if defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) +/* +typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_WITH_TABLE_FUNC) + (UInt32 v, const void *data, size_t size, const UInt32 *table); +Z7_CRC_UPDATE_WITH_TABLE_FUNC g_CrcUpdate; +*/ +static unsigned g_Crc_Algo; +#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) +static unsigned g_Crc_Be; +#endif +#endif // defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) + + + +Z7_NO_INLINE +#ifdef Z7_CRC_HW_USE + static UInt32 Z7_FASTCALL CrcUpdate_Base +#else + UInt32 Z7_FASTCALL CrcUpdate +#endif + (UInt32 crc, const void *data, size_t size) +{ +#if Z7_CRC_NUM_TABLES_USE == 1 + return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size); +#else // Z7_CRC_NUM_TABLES_USE != 1 +#ifdef Z7_CRC_UPDATE_T1_FUNC_NAME + if (g_Crc_Algo == 1) + return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size); +#endif + +#ifdef MY_CPU_LE + return FUNC_NAME_LE(crc, data, size, g_CrcTable); +#elif defined(MY_CPU_BE) + return FUNC_NAME_BE(crc, data, size, g_CrcTable); +#else + if (g_Crc_Be) + return FUNC_NAME_BE(crc, data, size, g_CrcTable); + else + return FUNC_NAME_LE(crc, data, size, g_CrcTable); +#endif +#endif // Z7_CRC_NUM_TABLES_USE != 1 +} + + +#ifdef Z7_CRC_HW_USE +Z7_NO_INLINE +UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size) +{ + if (g_Crc_Algo == 0) + return CrcUpdate_HW(crc, data, size); + return CrcUpdate_Base(crc, data, size); +} +#endif + +#endif // !defined(Z7_CRC_HW_FORCE) + + + +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size) +{ + return CrcUpdate(CRC_INIT_VAL, data, size) ^ CRC_INIT_VAL; +} + + +MY_ALIGN(64) +UInt32 g_CrcTable[256 * Z7_CRC_NUM_TABLES_TOTAL]; + void Z7_FASTCALL CrcGenerateTable(void) { UInt32 i; for (i = 0; i < 256; i++) { +#if defined(Z7_CRC_HW_FORCE) + g_CrcTable[i] = __crc32b(i, 0); +#else + #define kCrcPoly 0xEDB88320 UInt32 r = i; unsigned j; for (j = 0; j < 8; j++) r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); g_CrcTable[i] = r; +#endif } - for (i = 256; i < 256 * CRC_NUM_TABLES; i++) + for (i = 256; i < 256 * Z7_CRC_NUM_TABLES_USE; i++) { const UInt32 r = g_CrcTable[(size_t)i - 256]; g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8); } - #if CRC_NUM_TABLES < 4 - g_CrcUpdate = CrcUpdateT1; - #elif defined(MY_CPU_LE) - // g_CrcUpdateT4 = CrcUpdateT4; - #if CRC_NUM_TABLES < 8 - g_CrcUpdate = CrcUpdateT4; - #else // CRC_NUM_TABLES >= 8 - g_CrcUpdateT8 = CrcUpdateT8; - /* - #ifdef MY_CPU_X86_OR_AMD64 - if (!CPU_Is_InOrder()) - #endif - */ - g_CrcUpdate = CrcUpdateT8; - #endif - #else +#if !defined(Z7_CRC_HW_FORCE) && \ + (defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) || defined(MY_CPU_BE)) + +#if Z7_CRC_NUM_TABLES_USE <= 1 + g_Crc_Algo = 1; +#else // Z7_CRC_NUM_TABLES_USE <= 1 + +#if defined(MY_CPU_LE) + g_Crc_Algo = Z7_CRC_NUM_TABLES_USE; +#else // !defined(MY_CPU_LE) { - #ifndef MY_CPU_BE +#ifndef MY_CPU_BE UInt32 k = 0x01020304; const Byte *p = (const Byte *)&k; if (p[0] == 4 && p[1] == 3) - { - #if CRC_NUM_TABLES < 8 - // g_CrcUpdateT4 = CrcUpdateT4; - g_CrcUpdate = CrcUpdateT4; - #else // CRC_NUM_TABLES >= 8 - g_CrcUpdateT8 = CrcUpdateT8; - g_CrcUpdate = CrcUpdateT8; - #endif - } + g_Crc_Algo = Z7_CRC_NUM_TABLES_USE; else if (p[0] != 1 || p[1] != 2) - g_CrcUpdate = CrcUpdateT1; + g_Crc_Algo = 1; else - #endif // MY_CPU_BE +#endif // MY_CPU_BE { - for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--) + for (i = 256 * Z7_CRC_NUM_TABLES_TOTAL - 1; i >= 256; i--) { const UInt32 x = g_CrcTable[(size_t)i - 256]; g_CrcTable[i] = Z7_BSWAP32(x); } - #if CRC_NUM_TABLES <= 4 - g_CrcUpdate = CrcUpdateT1; - #elif CRC_NUM_TABLES <= 8 - // g_CrcUpdateT4 = CrcUpdateT1_BeT4; - g_CrcUpdate = CrcUpdateT1_BeT4; - #else // CRC_NUM_TABLES > 8 - g_CrcUpdateT8 = CrcUpdateT1_BeT8; - g_CrcUpdate = CrcUpdateT1_BeT8; - #endif +#if defined(Z7_CRC_UPDATE_T1_FUNC_NAME) + g_Crc_Algo = Z7_CRC_NUM_TABLES_USE; +#endif +#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) + g_Crc_Be = 1; +#endif } } - #endif // CRC_NUM_TABLES < 4 +#endif // !defined(MY_CPU_LE) - #ifdef MY_CPU_LE - #ifdef USE_ARM64_CRC - if (CPU_IsSupported_CRC32()) - { - g_CrcUpdateT0_32 = CrcUpdateT0_32; - g_CrcUpdateT0_64 = CrcUpdateT0_64; - g_CrcUpdate = - #if defined(MY_CPU_ARM) - CrcUpdateT0_32; - #else - CrcUpdateT0_64; - #endif - } - #endif - - #ifdef USE_CRC_EMU - g_CrcUpdateT0_32 = CrcUpdateT0_32; - g_CrcUpdateT0_64 = CrcUpdateT0_64; - g_CrcUpdate = CrcUpdateT0_64; - #endif +#ifdef MY_CPU_LE +#ifdef Z7_CRC_HW_USE + if (CPU_IsSupported_CRC32()) + g_Crc_Algo = 0; +#endif // Z7_CRC_HW_USE +#endif // MY_CPU_LE + +#endif // Z7_CRC_NUM_TABLES_USE <= 1 +#endif // g_Crc_Algo was declared +} + +Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo) +{ + if (algo == 0) + return &CrcUpdate; + +#if defined(Z7_CRC_HW_USE) + if (algo == sizeof(CRC_HW_WORD_TYPE) * 8) + { +#ifdef Z7_CRC_HW_FORCE + return &CrcUpdate; +#else + if (g_Crc_Algo == 0) + return &CrcUpdate_HW; +#endif + } +#endif + +#ifndef Z7_CRC_HW_FORCE + if (algo == Z7_CRC_NUM_TABLES_USE) + return + #ifdef Z7_CRC_HW_USE + &CrcUpdate_Base; + #else + &CrcUpdate; #endif +#endif + + return NULL; } #undef kCrcPoly -#undef CRC64_NUM_TABLES +#undef Z7_CRC_NUM_TABLES_USE +#undef Z7_CRC_NUM_TABLES_TOTAL #undef CRC_UPDATE_BYTE_2 +#undef FUNC_NAME_LE_2 +#undef FUNC_NAME_LE_1 +#undef FUNC_NAME_LE +#undef FUNC_NAME_BE_2 +#undef FUNC_NAME_BE_1 +#undef FUNC_NAME_BE + +#undef CRC_HW_UNROLL_BYTES +#undef CRC_HW_WORD_FUNC +#undef CRC_HW_WORD_TYPE diff --git a/3rdparty/lzma/src/7zCrcOpt.c b/3rdparty/lzma/src/7zCrcOpt.c index 9c649290df..9408017ed4 100644 --- a/3rdparty/lzma/src/7zCrcOpt.c +++ b/3rdparty/lzma/src/7zCrcOpt.c @@ -1,117 +1,199 @@ -/* 7zCrcOpt.c -- CRC32 calculation -2023-04-02 : Igor Pavlov : Public domain */ +/* 7zCrcOpt.c -- CRC32 calculation (optimized functions) +2023-12-07 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "CpuArch.h" +#if !defined(Z7_CRC_NUM_TABLES) || Z7_CRC_NUM_TABLES > 1 + +// for debug only : define Z7_CRC_DEBUG_BE to test big-endian code in little-endian cpu +// #define Z7_CRC_DEBUG_BE +#ifdef Z7_CRC_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE +#endif + +// the value Z7_CRC_NUM_TABLES_USE must be defined to same value as in 7zCrc.c +#ifdef Z7_CRC_NUM_TABLES +#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES +#else +#define Z7_CRC_NUM_TABLES_USE 12 +#endif + +#if Z7_CRC_NUM_TABLES_USE % 4 || \ + Z7_CRC_NUM_TABLES_USE < 4 * 1 || \ + Z7_CRC_NUM_TABLES_USE > 4 * 6 + #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif + + #ifndef MY_CPU_BE -#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) -UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); -UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table) +#define Q(n, d) \ + ( (table + ((n) * 4 + 3) * 0x100)[(Byte)(d)] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] ) + +#define R(a) *((const UInt32 *)(const void *)p + (a)) + +#define CRC_FUNC_PRE_LE2(step) \ +UInt32 Z7_FASTCALL CrcUpdateT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table) + +#define CRC_FUNC_PRE_LE(step) \ + CRC_FUNC_PRE_LE2(step); \ + CRC_FUNC_PRE_LE2(step) + +CRC_FUNC_PRE_LE(Z7_CRC_NUM_TABLES_USE) { const Byte *p = (const Byte *)data; - for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) + const Byte *lim; + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++) v = CRC_UPDATE_BYTE_2(v, *p); - for (; size >= 4; size -= 4, p += 4) + lim = p + size; + if (size >= Z7_CRC_NUM_TABLES_USE) { - v ^= *(const UInt32 *)(const void *)p; - v = - (table + 0x300)[((v ) & 0xFF)] - ^ (table + 0x200)[((v >> 8) & 0xFF)] - ^ (table + 0x100)[((v >> 16) & 0xFF)] - ^ (table + 0x000)[((v >> 24))]; + lim -= Z7_CRC_NUM_TABLES_USE; + do + { + v ^= R(0); + { +#if Z7_CRC_NUM_TABLES_USE == 1 * 4 + v = Q(0, v); +#else +#define U2(r, op) \ + { d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); } + UInt32 d, x; + U2(1, =) +#if Z7_CRC_NUM_TABLES_USE >= 3 * 4 +#define U(r) U2(r, ^=) + U(2) +#if Z7_CRC_NUM_TABLES_USE >= 4 * 4 + U(3) +#if Z7_CRC_NUM_TABLES_USE >= 5 * 4 + U(4) +#if Z7_CRC_NUM_TABLES_USE >= 6 * 4 + U(5) +#if Z7_CRC_NUM_TABLES_USE >= 7 * 4 +#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif +#endif +#endif +#endif +#endif +#undef U +#undef U2 + v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v); +#endif + } + p += Z7_CRC_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC_NUM_TABLES_USE; } - for (; size > 0; size--, p++) + for (; p < lim; p++) v = CRC_UPDATE_BYTE_2(v, *p); return v; } -UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); -UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table) -{ - const Byte *p = (const Byte *)data; - for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++) - v = CRC_UPDATE_BYTE_2(v, *p); - for (; size >= 8; size -= 8, p += 8) - { - UInt32 d; - v ^= *(const UInt32 *)(const void *)p; - v = - (table + 0x700)[((v ) & 0xFF)] - ^ (table + 0x600)[((v >> 8) & 0xFF)] - ^ (table + 0x500)[((v >> 16) & 0xFF)] - ^ (table + 0x400)[((v >> 24))]; - d = *((const UInt32 *)(const void *)p + 1); - v ^= - (table + 0x300)[((d ) & 0xFF)] - ^ (table + 0x200)[((d >> 8) & 0xFF)] - ^ (table + 0x100)[((d >> 16) & 0xFF)] - ^ (table + 0x000)[((d >> 24))]; - } - for (; size > 0; size--, p++) - v = CRC_UPDATE_BYTE_2(v, *p); - return v; -} +#undef CRC_UPDATE_BYTE_2 +#undef R +#undef Q +#undef CRC_FUNC_PRE_LE +#undef CRC_FUNC_PRE_LE2 #endif + + #ifndef MY_CPU_LE -#define CRC_UINT32_SWAP(v) Z7_BSWAP32(v) +#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 24) ^ (b)] ^ ((crc) << 8)) -#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8)) +#define Q(n, d) \ + ( (table + ((n) * 4 + 0) * 0x100)[((d)) & 0xFF] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] ) -UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table) +#ifdef Z7_CRC_DEBUG_BE + #define R(a) GetBe32a((const UInt32 *)(const void *)p + (a)) +#else + #define R(a) *((const UInt32 *)(const void *)p + (a)) +#endif + + +#define CRC_FUNC_PRE_BE2(step) \ +UInt32 Z7_FASTCALL CrcUpdateT1_BeT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table) + +#define CRC_FUNC_PRE_BE(step) \ + CRC_FUNC_PRE_BE2(step); \ + CRC_FUNC_PRE_BE2(step) + +CRC_FUNC_PRE_BE(Z7_CRC_NUM_TABLES_USE) { const Byte *p = (const Byte *)data; + const Byte *lim; table += 0x100; - v = CRC_UINT32_SWAP(v); - for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) + v = Z7_BSWAP32(v); + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++) v = CRC_UPDATE_BYTE_2_BE(v, *p); - for (; size >= 4; size -= 4, p += 4) + lim = p + size; + if (size >= Z7_CRC_NUM_TABLES_USE) { - v ^= *(const UInt32 *)(const void *)p; - v = - (table + 0x000)[((v ) & 0xFF)] - ^ (table + 0x100)[((v >> 8) & 0xFF)] - ^ (table + 0x200)[((v >> 16) & 0xFF)] - ^ (table + 0x300)[((v >> 24))]; + lim -= Z7_CRC_NUM_TABLES_USE; + do + { + v ^= R(0); + { +#if Z7_CRC_NUM_TABLES_USE == 1 * 4 + v = Q(0, v); +#else +#define U2(r, op) \ + { d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); } + UInt32 d, x; + U2(1, =) +#if Z7_CRC_NUM_TABLES_USE >= 3 * 4 +#define U(r) U2(r, ^=) + U(2) +#if Z7_CRC_NUM_TABLES_USE >= 4 * 4 + U(3) +#if Z7_CRC_NUM_TABLES_USE >= 5 * 4 + U(4) +#if Z7_CRC_NUM_TABLES_USE >= 6 * 4 + U(5) +#if Z7_CRC_NUM_TABLES_USE >= 7 * 4 +#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif +#endif +#endif +#endif +#endif +#undef U +#undef U2 + v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v); +#endif + } + p += Z7_CRC_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC_NUM_TABLES_USE; } - for (; size > 0; size--, p++) + for (; p < lim; p++) v = CRC_UPDATE_BYTE_2_BE(v, *p); - return CRC_UINT32_SWAP(v); + return Z7_BSWAP32(v); } -UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table) -{ - const Byte *p = (const Byte *)data; - table += 0x100; - v = CRC_UINT32_SWAP(v); - for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++) - v = CRC_UPDATE_BYTE_2_BE(v, *p); - for (; size >= 8; size -= 8, p += 8) - { - UInt32 d; - v ^= *(const UInt32 *)(const void *)p; - v = - (table + 0x400)[((v ) & 0xFF)] - ^ (table + 0x500)[((v >> 8) & 0xFF)] - ^ (table + 0x600)[((v >> 16) & 0xFF)] - ^ (table + 0x700)[((v >> 24))]; - d = *((const UInt32 *)(const void *)p + 1); - v ^= - (table + 0x000)[((d ) & 0xFF)] - ^ (table + 0x100)[((d >> 8) & 0xFF)] - ^ (table + 0x200)[((d >> 16) & 0xFF)] - ^ (table + 0x300)[((d >> 24))]; - } - for (; size > 0; size--, p++) - v = CRC_UPDATE_BYTE_2_BE(v, *p); - return CRC_UINT32_SWAP(v); -} +#undef CRC_UPDATE_BYTE_2_BE +#undef R +#undef Q +#undef CRC_FUNC_PRE_BE +#undef CRC_FUNC_PRE_BE2 #endif +#undef Z7_CRC_NUM_TABLES_USE +#endif diff --git a/3rdparty/lzma/src/7zDec.c b/3rdparty/lzma/src/7zDec.c index 96c60359a4..c9b4064e3f 100644 --- a/3rdparty/lzma/src/7zDec.c +++ b/3rdparty/lzma/src/7zDec.c @@ -1,5 +1,5 @@ /* 7zDec.c -- Decoding from 7z folder -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -51,6 +51,7 @@ #ifndef Z7_NO_METHODS_FILTERS #define k_Delta 3 +#define k_RISCV 0xb #define k_BCJ 0x3030103 #define k_PPC 0x3030205 #define k_IA64 0x3030401 @@ -362,6 +363,7 @@ static SRes CheckSupportedFolder(const CSzFolder *f) case k_IA64: case k_SPARC: case k_ARM: + case k_RISCV: #endif #ifdef Z7_USE_FILTER_ARM64 case k_ARM64: @@ -535,10 +537,10 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, } } } - #if defined(Z7_USE_BRANCH_FILTER) +#if defined(Z7_USE_BRANCH_FILTER) else if (ci == 1) { - #if !defined(Z7_NO_METHODS_FILTERS) +#if !defined(Z7_NO_METHODS_FILTERS) if (coder->MethodID == k_Delta) { if (coder->PropsSize != 1) @@ -550,22 +552,43 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, } continue; } - #endif +#endif - #ifdef Z7_USE_FILTER_ARM64 +#ifdef Z7_USE_FILTER_ARM64 if (coder->MethodID == k_ARM64) { UInt32 pc = 0; if (coder->PropsSize == 4) + { pc = GetUi32(propsData + coder->PropsOffset); + if (pc & 3) + return SZ_ERROR_UNSUPPORTED; + } else if (coder->PropsSize != 0) return SZ_ERROR_UNSUPPORTED; z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc); continue; } - #endif - - #if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) + if (coder->MethodID == k_RISCV) + { + UInt32 pc = 0; + if (coder->PropsSize == 4) + { + pc = GetUi32(propsData + coder->PropsOffset); + if (pc & 1) + return SZ_ERROR_UNSUPPORTED; + } + else if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + z7_BranchConv_RISCV_Dec(outBuffer, outSize, pc); + continue; + } +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) { if (coder->PropsSize != 0) return SZ_ERROR_UNSUPPORTED; @@ -579,7 +602,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0 break; } - CASE_BRA_CONV(PPC) + case k_PPC: Z7_BRANCH_CONV_DEC_2(BranchConv_PPC)(outBuffer, outSize, 0); break; // pc = 0; + // CASE_BRA_CONV(PPC) CASE_BRA_CONV(IA64) CASE_BRA_CONV(SPARC) CASE_BRA_CONV(ARM) @@ -592,9 +616,9 @@ static SRes SzFolder_Decode2(const CSzFolder *folder, } continue; } - #endif +#endif } // (c == 1) - #endif +#endif // Z7_USE_BRANCH_FILTER else return SZ_ERROR_UNSUPPORTED; } diff --git a/3rdparty/lzma/src/Aes.c b/3rdparty/lzma/src/Aes.c index bcaafab118..abc5d24b48 100644 --- a/3rdparty/lzma/src/Aes.c +++ b/3rdparty/lzma/src/Aes.c @@ -1,5 +1,5 @@ /* Aes.c -- AES encryption / decryption -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -13,7 +13,9 @@ AES_CODE_FUNC g_AesCtr_Code; UInt32 g_Aes_SupportedFunctions_Flags; #endif +MY_ALIGN(64) static UInt32 T[256 * 4]; +MY_ALIGN(64) static const Byte Sbox[256] = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, @@ -33,7 +35,9 @@ static const Byte Sbox[256] = { 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; +MY_ALIGN(64) static UInt32 D[256 * 4]; +MY_ALIGN(64) static Byte InvS[256]; #define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF) @@ -54,24 +58,54 @@ static Byte InvS[256]; // #define Z7_SHOW_AES_STATUS #ifdef MY_CPU_X86_OR_AMD64 - #define USE_HW_AES -#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) - #if defined(__clang__) - #if (__clang_major__ >= 8) // fix that check - #define USE_HW_AES - #endif - #elif defined(__GNUC__) - #if (__GNUC__ >= 6) // fix that check + + #if defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1110) #define USE_HW_AES + #if (__INTEL_COMPILER >= 1900) + #define USE_HW_VAES + #endif #endif + #elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400) + #define USE_HW_AES + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_HW_VAES + #endif #elif defined(_MSC_VER) - #if _MSC_VER >= 1910 + #define USE_HW_AES + #define USE_HW_VAES + #endif + +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__ARM_FEATURE_AES) \ + || defined(__ARM_FEATURE_CRYPTO) + #define USE_HW_AES + #else + #if defined(MY_CPU_ARM64) \ + || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ + || defined(Z7_MSC_VER_ORIGINAL) + #if defined(__ARM_FP) && \ + ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) \ + ) \ + || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) && \ + (Z7_CLANG_VERSION < 170000 || \ + Z7_CLANG_VERSION > 170001) #define USE_HW_AES #endif + #endif + #endif #endif #endif #ifdef USE_HW_AES +// #pragma message("=== Aes.c USE_HW_AES === ") #ifdef Z7_SHOW_AES_STATUS #include #define PRF(x) x @@ -136,6 +170,7 @@ void AesGenTables(void) #endif #ifdef MY_CPU_X86_OR_AMD64 + #ifdef USE_HW_VAES if (CPU_IsSupported_VAES_AVX2()) { PRF(printf("\n===vaes avx2\n")); @@ -146,6 +181,7 @@ void AesGenTables(void) #endif } #endif + #endif } #endif diff --git a/3rdparty/lzma/src/AesOpt.c b/3rdparty/lzma/src/AesOpt.c index e48a6d641f..58769ea059 100644 --- a/3rdparty/lzma/src/AesOpt.c +++ b/3rdparty/lzma/src/AesOpt.c @@ -1,5 +1,5 @@ /* AesOpt.c -- AES optimized code for x86 AES hardware instructions -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -15,8 +15,8 @@ #define USE_INTEL_VAES #endif #endif - #elif defined(__clang__) && (__clang_major__ > 3 || __clang_major__ == 3 && __clang_minor__ >= 8) \ - || defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4) + #elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400) #define USE_INTEL_AES #if !defined(__AES__) #define ATTRIB_AES __attribute__((__target__("aes"))) @@ -35,27 +35,37 @@ #define USE_INTEL_VAES #endif #endif + #ifndef USE_INTEL_AES + #define Z7_USE_AES_HW_STUB + #endif + #ifndef USE_INTEL_VAES + #define Z7_USE_VAES_HW_STUB + #endif #endif -#ifndef ATTRIB_AES - #define ATTRIB_AES -#endif -#ifndef ATTRIB_VAES - #define ATTRIB_VAES -#endif + #ifndef USE_INTEL_AES + // #define Z7_USE_AES_HW_STUB // for debug + #endif + #ifndef USE_INTEL_VAES + // #define Z7_USE_VAES_HW_STUB // for debug + #endif #ifdef USE_INTEL_AES #include -#ifndef USE_INTEL_VAES +#if !defined(USE_INTEL_VAES) && defined(Z7_USE_VAES_HW_STUB) #define AES_TYPE_keys UInt32 #define AES_TYPE_data Byte // #define AES_TYPE_keys __m128i // #define AES_TYPE_data __m128i #endif +#ifndef ATTRIB_AES + #define ATTRIB_AES +#endif + #define AES_FUNC_START(name) \ void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks) // void Z7_FASTCALL name(__m128i *p, __m128i *data, size_t numBlocks) @@ -69,8 +79,6 @@ AES_FUNC_START (name) #define MM_OP_m(op, src) MM_OP(op, m, src) #define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src) -#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src) - AES_FUNC_START2 (AesCbc_Encode_HW) { @@ -139,11 +147,6 @@ AES_FUNC_START2 (AesCbc_Encode_HW) #define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]) #endif -#define AVX_DECLARE_VAR(reg, ii) __m256i reg; -#define AVX_LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii]; -#define AVX_STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg; -#define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii])) - #define MM_OP_key(op, reg) MM_OP(op, reg, key); #define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg) @@ -152,27 +155,13 @@ AES_FUNC_START2 (AesCbc_Encode_HW) #define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg) #define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg) - -#define AVX_AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg) -#define AVX_AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg) -#define AVX_AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg) -#define AVX_AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg) -#define AVX_AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg) - #define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one) reg = ctr; #define CTR_END( reg, ii) MM_XOR (data[ii], reg) -#define AVX_CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two) reg = _mm256_xor_si256(ctr2, key); -#define AVX_CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg) - #define WOP_KEY(op, n) { \ const __m128i key = w[n]; \ WOP(op); } -#define AVX_WOP_KEY(op, n) { \ - const __m256i key = w[n]; \ - WOP(op); } - #define WIDE_LOOP_START \ dataEnd = data + numBlocks; \ @@ -190,6 +179,40 @@ AES_FUNC_START2 (AesCbc_Encode_HW) for (; data < dataEnd; data++) + +#ifdef USE_INTEL_VAES + +#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src) +#define AVX_DECLARE_VAR(reg, ii) __m256i reg; +#define AVX_LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii]; +#define AVX_STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg; +/* +AVX_XOR_data_M1() needs unaligned memory load +if (we don't use _mm256_loadu_si256() here) +{ + Most compilers with enabled optimizations generate fused AVX (LOAD + OP) + instruction that can load unaligned data. + But GCC and CLANG without -O2 or -O1 optimizations can generate separated + LOAD-ALIGNED (vmovdqa) instruction that will fail on execution. +} +Note: some compilers generate more instructions, if we use _mm256_loadu_si256() here. +v23.02: we use _mm256_loadu_si256() here, because we need compatibility with any compiler. +*/ +#define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, _mm256_loadu_si256(&(((const __m256i *)(const void *)(data - 1))[ii]))) +// for debug only: the following code will fail on execution, if compiled by some compilers: +// #define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii])) + +#define AVX_AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg) +#define AVX_AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg) +#define AVX_AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg) +#define AVX_AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg) +#define AVX_AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg) +#define AVX_CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two) reg = _mm256_xor_si256(ctr2, key); +#define AVX_CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg) +#define AVX_WOP_KEY(op, n) { \ + const __m256i key = w[n]; \ + WOP(op); } + #define NUM_AES_KEYS_MAX 15 #define WIDE_LOOP_START_AVX(OP) \ @@ -214,6 +237,9 @@ AES_FUNC_START2 (AesCbc_Encode_HW) /* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified, MSVC still can insert vzeroupper instruction. */ +#endif + + AES_FUNC_START2 (AesCbc_Decode_HW) { @@ -380,6 +406,9 @@ required that must be included before . #endif #endif // __clang__ && _MSC_VER +#ifndef ATTRIB_VAES + #define ATTRIB_VAES +#endif #define VAES_FUNC_START2(name) \ AES_FUNC_START (name); \ @@ -519,10 +548,18 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256) /* no USE_INTEL_AES */ +#if defined(Z7_USE_AES_HW_STUB) +// We can compile this file with another C compiler, +// or we can compile asm version. +// So we can generate real code instead of this stub function. +// #if defined(_MSC_VER) #pragma message("AES HW_SW stub was used") +// #endif +#if !defined(USE_INTEL_VAES) && defined(Z7_USE_VAES_HW_STUB) #define AES_TYPE_keys UInt32 #define AES_TYPE_data Byte +#endif #define AES_FUNC_START(name) \ void Z7_FASTCALL name(UInt32 *p, Byte *data, size_t numBlocks) \ @@ -535,13 +572,16 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256) AES_COMPAT_STUB (AesCbc_Encode) AES_COMPAT_STUB (AesCbc_Decode) AES_COMPAT_STUB (AesCtr_Code) +#endif // Z7_USE_AES_HW_STUB #endif // USE_INTEL_AES #ifndef USE_INTEL_VAES - +#if defined(Z7_USE_VAES_HW_STUB) +// #if defined(_MSC_VER) #pragma message("VAES HW_SW stub was used") +// #endif #define VAES_COMPAT_STUB(name) \ void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \ @@ -550,36 +590,59 @@ AES_COMPAT_STUB (AesCtr_Code) VAES_COMPAT_STUB (AesCbc_Decode_HW) VAES_COMPAT_STUB (AesCtr_Code_HW) - +#endif #endif // ! USE_INTEL_VAES + + #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) - #if defined(__clang__) - #if (__clang_major__ >= 8) // fix that check + #if defined(__ARM_FEATURE_AES) \ + || defined(__ARM_FEATURE_CRYPTO) + #define USE_HW_AES + #else + #if defined(MY_CPU_ARM64) \ + || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ + || defined(Z7_MSC_VER_ORIGINAL) + #if defined(__ARM_FP) && \ + ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) \ + ) \ + || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) && \ + (Z7_CLANG_VERSION < 170000 || \ + Z7_CLANG_VERSION > 170001) #define USE_HW_AES #endif - #elif defined(__GNUC__) - #if (__GNUC__ >= 6) // fix that check - #define USE_HW_AES #endif - #elif defined(_MSC_VER) - #if _MSC_VER >= 1910 - #define USE_HW_AES #endif #endif #ifdef USE_HW_AES // #pragma message("=== AES HW === ") +// __ARM_FEATURE_CRYPTO macro is deprecated in favor of the finer grained feature macro __ARM_FEATURE_AES #if defined(__clang__) || defined(__GNUC__) +#if !defined(__ARM_FEATURE_AES) && \ + !defined(__ARM_FEATURE_CRYPTO) #ifdef MY_CPU_ARM64 - #define ATTRIB_AES __attribute__((__target__("+crypto,aes"))) +#if defined(__clang__) + #define ATTRIB_AES __attribute__((__target__("crypto"))) +#else + #define ATTRIB_AES __attribute__((__target__("+crypto"))) +#endif #else +#if defined(__clang__) + #define ATTRIB_AES __attribute__((__target__("armv8-a,aes"))) +#else #define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) +#endif #endif +#endif #else // _MSC_VER // for arm32 @@ -590,11 +653,59 @@ VAES_COMPAT_STUB (AesCtr_Code_HW) #define ATTRIB_AES #endif -#if defined(_MSC_VER) && !defined(__clang__) && defined(MY_CPU_ARM64) +#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) #include #else -#include +/* + clang-17.0.1: error : Cannot select: intrinsic %llvm.arm.neon.aese + clang + 3.8.1 : __ARM_NEON : defined(__ARM_FEATURE_CRYPTO) + 7.0.1 : __ARM_NEON : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO) + 11.?.0 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO) + 13.0.1 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_AES) + 16 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 +*/ +#if defined(__clang__) && __clang_major__ < 16 +#if !defined(__ARM_FEATURE_AES) && \ + !defined(__ARM_FEATURE_CRYPTO) +// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ") + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1 +// #if defined(__clang__) && __clang_major__ < 13 + #define __ARM_FEATURE_CRYPTO 1 +// #else + #define __ARM_FEATURE_AES 1 +// #endif + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif +#endif // clang + +#if defined(__clang__) + +#if defined(__ARM_ARCH) && __ARM_ARCH < 8 + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #pragma message("#define __ARM_ARCH 8") + #undef __ARM_ARCH + #define __ARM_ARCH 8 + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // clang + +#include + +#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \ + defined(__ARM_FEATURE_CRYPTO) && \ + defined(__ARM_FEATURE_AES) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #undef __ARM_FEATURE_CRYPTO + #undef __ARM_FEATURE_AES + #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ") +#endif + +#endif // Z7_MSC_VER_ORIGINAL typedef uint8x16_t v128; @@ -620,7 +731,7 @@ AES_FUNC_START (name) AES_FUNC_START2 (AesCbc_Encode_HW) { - v128 *p = (v128*)(void*)ivAes; + v128 * const p = (v128*)(void*)ivAes; v128 *data = (v128*)(void*)data8; v128 m = *p; const v128 k0 = p[2]; @@ -639,7 +750,7 @@ AES_FUNC_START2 (AesCbc_Encode_HW) const v128 k_z0 = w[2]; for (; numBlocks != 0; numBlocks--, data++) { - MM_XOR_m (*data); + MM_XOR_m (*data) AES_E_MC_m (k0) AES_E_MC_m (k1) AES_E_MC_m (k2) @@ -660,7 +771,7 @@ AES_FUNC_START2 (AesCbc_Encode_HW) } } AES_E_m (k_z1) - MM_XOR_m (k_z0); + MM_XOR_m (k_z0) *data = m; } *p = m; @@ -745,7 +856,7 @@ AES_FUNC_START2 (AesCbc_Decode_HW) while (w != p); WOP_KEY (AES_D, 1) WOP_KEY (AES_XOR, 0) - MM_XOR (m0, iv); + MM_XOR (m0, iv) WOP_M1 (XOR_data_M1) iv = data[NUM_WAYS - 1]; WOP (STORE_data) @@ -759,14 +870,14 @@ AES_FUNC_START2 (AesCbc_Decode_HW) AES_D_IMC_m (w[2]) do { - AES_D_IMC_m (w[1]); - AES_D_IMC_m (w[0]); + AES_D_IMC_m (w[1]) + AES_D_IMC_m (w[0]) w -= 2; } while (w != p); - AES_D_m (w[1]); - MM_XOR_m (w[0]); - MM_XOR_m (iv); + AES_D_m (w[1]) + MM_XOR_m (w[0]) + MM_XOR_m (iv) iv = *data; *data = m; } @@ -783,6 +894,12 @@ AES_FUNC_START2 (AesCtr_Code_HW) const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; const v128 *dataEnd; uint64x2_t one = vdupq_n_u64(0); + +// the bug in clang: +// __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); +#if defined(__clang__) && (__clang_major__ <= 9) +#pragma GCC diagnostic ignored "-Wvector-conversion" +#endif one = vsetq_lane_u64(1, one, 0); p += 2; @@ -809,11 +926,11 @@ AES_FUNC_START2 (AesCtr_Code_HW) { const v128 *w = p; v128 m; - CTR_START (m, 0); + CTR_START (m, 0) do { - AES_E_MC_m (w[0]); - AES_E_MC_m (w[1]); + AES_E_MC_m (w[0]) + AES_E_MC_m (w[1]) w += 2; } while (w != wEnd); diff --git a/3rdparty/lzma/src/Alloc.c b/3rdparty/lzma/src/Alloc.c index d841bf20a3..63e1a121e7 100644 --- a/3rdparty/lzma/src/Alloc.c +++ b/3rdparty/lzma/src/Alloc.c @@ -1,5 +1,5 @@ /* Alloc.c -- Memory allocation functions -2023-04-02 : Igor Pavlov : Public domain */ +2024-02-18 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -10,19 +10,18 @@ #include "Alloc.h" -#ifdef _WIN32 -#ifdef Z7_LARGE_PAGES -#if defined(__clang__) || defined(__GNUC__) -typedef void (*Z7_voidFunction)(void); -#define MY_CAST_FUNC (Z7_voidFunction) -#elif defined(_MSC_VER) && _MSC_VER > 1920 -#define MY_CAST_FUNC (void *) -// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' -#else -#define MY_CAST_FUNC +#if defined(Z7_LARGE_PAGES) && defined(_WIN32) && \ + (!defined(Z7_WIN32_WINNT_MIN) || Z7_WIN32_WINNT_MIN < 0x0502) // < Win2003 (xp-64) + #define Z7_USE_DYN_GetLargePageMinimum +#endif + +// for debug: +#if 0 +#if defined(__CHERI__) && defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// #pragma message("=== Z7_ALLOC_NO_OFFSET_ALLOCATOR === ") +#define Z7_ALLOC_NO_OFFSET_ALLOCATOR +#endif #endif -#endif // Z7_LARGE_PAGES -#endif // _WIN32 // #define SZ_ALLOC_DEBUG /* #define SZ_ALLOC_DEBUG */ @@ -146,7 +145,9 @@ static void PrintAddr(void *p) #define PRINT_FREE(name, cnt, ptr) #define Print(s) #define PrintLn() +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR #define PrintHex(v, align) +#endif #define PrintAddr(p) #endif @@ -246,9 +247,9 @@ void MidFree(void *address) #ifdef Z7_LARGE_PAGES #ifdef MEM_LARGE_PAGES - #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES + #define MY_MEM_LARGE_PAGES MEM_LARGE_PAGES #else - #define MY__MEM_LARGE_PAGES 0x20000000 + #define MY_MEM_LARGE_PAGES 0x20000000 #endif extern @@ -258,19 +259,23 @@ typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID); void SetLargePageSize(void) { - #ifdef Z7_LARGE_PAGES SIZE_T size; +#ifdef Z7_USE_DYN_GetLargePageMinimum +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + const Func_GetLargePageMinimum fn = - (Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + (Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); if (!fn) return; size = fn(); +#else + size = GetLargePageMinimum(); +#endif if (size == 0 || (size & (size - 1)) != 0) return; g_LargePageSize = size; - #endif } #endif // Z7_LARGE_PAGES @@ -292,7 +297,7 @@ void *BigAlloc(size_t size) size2 = (size + ps) & ~ps; if (size2 >= size) { - void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); + void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE); if (p) { PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p) @@ -328,20 +333,7 @@ const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; #endif -/* - uintptr_t : C99 (optional) - : unsupported in VS6 -*/ - -#ifdef _WIN32 - typedef UINT_PTR UIntPtr; -#else - /* - typedef uintptr_t UIntPtr; - */ - typedef ptrdiff_t UIntPtr; -#endif - +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR #define ADJUST_ALLOC_SIZE 0 /* @@ -352,14 +344,36 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; MyAlloc() can return address that is NOT multiple of sizeof(void *). */ - /* -#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1)))) + uintptr_t : C99 (optional) + : unsupported in VS6 */ -#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) +typedef + #ifdef _WIN32 + UINT_PTR + #elif 1 + uintptr_t + #else + ptrdiff_t + #endif + MY_uintptr_t; +#if 0 \ + || (defined(__CHERI__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > 8)) +// for 128-bit pointers (cheri): +#define MY_ALIGN_PTR_DOWN(p, align) \ + ((void *)((char *)(p) - ((size_t)(MY_uintptr_t)(p) & ((align) - 1)))) +#else +#define MY_ALIGN_PTR_DOWN(p, align) \ + ((void *)((((MY_uintptr_t)(p)) & ~((MY_uintptr_t)(align) - 1)))) +#endif -#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) +#endif + +#if !defined(_WIN32) \ + && (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \ + || defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) #define USE_posix_memalign #endif @@ -399,14 +413,13 @@ static int posix_memalign(void **ptr, size_t align, size_t size) #define ALLOC_ALIGN_SIZE ((size_t)1 << 7) -static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +void *z7_AlignedAlloc(size_t size) { - #ifndef USE_posix_memalign +#ifndef USE_posix_memalign void *p; void *pAligned; size_t newSize; - UNUSED_VAR(pp) /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned block to prevent cache line sharing with another allocated blocks */ @@ -431,10 +444,9 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) return pAligned; - #else +#else void *p; - UNUSED_VAR(pp) if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) return NULL; @@ -443,19 +455,37 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) return p; - #endif +#endif +} + + +void z7_AlignedFree(void *address) +{ +#ifndef USE_posix_memalign + if (address) + MyFree(((void **)address)[-1]); +#else + free(address); +#endif +} + + +static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +{ + UNUSED_VAR(pp) + return z7_AlignedAlloc(size); } static void SzAlignedFree(ISzAllocPtr pp, void *address) { UNUSED_VAR(pp) - #ifndef USE_posix_memalign +#ifndef USE_posix_memalign if (address) MyFree(((void **)address)[-1]); - #else +#else free(address); - #endif +#endif } @@ -463,16 +493,44 @@ const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; -#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) - /* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */ -#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] -/* -#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1] -*/ +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR +#if 1 + #define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) + #define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] +#else + // we can use this simplified code, + // if (CAlignOffsetAlloc::offset == (k * sizeof(void *)) + #define REAL_BLOCK_PTR_VAR(p) (((void **)(p))[-1]) +#endif +#endif + + +#if 0 +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR +#include +static void PrintPtr(const char *s, const void *p) +{ + const Byte *p2 = (const Byte *)&p; + unsigned i; + printf("%s %p ", s, p); + for (i = sizeof(p); i != 0;) + { + i--; + printf("%02x", p2[i]); + } + printf("\n"); +} +#endif +#endif + static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) { +#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) + UNUSED_VAR(pp) + return z7_AlignedAlloc(size); +#else const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); void *adr; void *pAligned; @@ -501,6 +559,12 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr + alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset; +#if 0 + printf("\nalignSize = %6x, offset=%6x, size=%8x \n", (unsigned)alignSize, (unsigned)p->offset, (unsigned)size); + PrintPtr("base", adr); + PrintPtr("alig", pAligned); +#endif + PrintLn(); Print("- Aligned: "); Print(" size="); PrintHex(size, 8); @@ -512,11 +576,16 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) REAL_BLOCK_PTR_VAR(pAligned) = adr; return pAligned; +#endif } static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) { +#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) + UNUSED_VAR(pp) + z7_AlignedFree(address); +#else if (address) { const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); @@ -525,6 +594,7 @@ static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) PrintLn(); ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address)); } +#endif } diff --git a/3rdparty/lzma/src/Bra.c b/3rdparty/lzma/src/Bra.c index 22e0e478ec..e61edf8f12 100644 --- a/3rdparty/lzma/src/Bra.c +++ b/3rdparty/lzma/src/Bra.c @@ -1,11 +1,11 @@ /* Bra.c -- Branch converters for RISC code -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-20 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "Bra.h" -#include "CpuArch.h" #include "RotateDefs.h" +#include "CpuArch.h" #if defined(MY_CPU_SIZEOF_POINTER) \ && ( MY_CPU_SIZEOF_POINTER == 4 \ @@ -26,7 +26,7 @@ #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; -#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name +#define Z7_BRANCH_CONV(name) z7_ ## name #define Z7_BRANCH_FUNC_MAIN(name) \ static \ @@ -42,11 +42,11 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ #ifdef Z7_EXTRACT_ONLY #define Z7_BRANCH_FUNCS_IMP(name) \ - Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) #else #define Z7_BRANCH_FUNCS_IMP(name) \ - Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \ - Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1) + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1) #endif #if defined(__clang__) @@ -72,7 +72,7 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ #endif -Z7_BRANCH_FUNC_MAIN(ARM64) +Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64) { // Byte *p = data; const Byte *lim; @@ -121,10 +121,10 @@ Z7_BRANCH_FUNC_MAIN(ARM64) } } } -Z7_BRANCH_FUNCS_IMP(ARM64) +Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64) -Z7_BRANCH_FUNC_MAIN(ARM) +Z7_BRANCH_FUNC_MAIN(BranchConv_ARM) { // Byte *p = data; const Byte *lim; @@ -152,10 +152,10 @@ Z7_BRANCH_FUNC_MAIN(ARM) } } } -Z7_BRANCH_FUNCS_IMP(ARM) +Z7_BRANCH_FUNCS_IMP(BranchConv_ARM) -Z7_BRANCH_FUNC_MAIN(PPC) +Z7_BRANCH_FUNC_MAIN(BranchConv_PPC) { // Byte *p = data; const Byte *lim; @@ -192,14 +192,14 @@ Z7_BRANCH_FUNC_MAIN(PPC) } } } -Z7_BRANCH_FUNCS_IMP(PPC) +Z7_BRANCH_FUNCS_IMP(BranchConv_PPC) #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED #define BR_SPARC_USE_ROTATE #endif -Z7_BRANCH_FUNC_MAIN(SPARC) +Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC) { // Byte *p = data; const Byte *lim; @@ -254,10 +254,10 @@ Z7_BRANCH_FUNC_MAIN(SPARC) } } } -Z7_BRANCH_FUNCS_IMP(SPARC) +Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC) -Z7_BRANCH_FUNC_MAIN(ARMT) +Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT) { // Byte *p = data; Byte *lim; @@ -335,12 +335,12 @@ Z7_BRANCH_FUNC_MAIN(ARMT) // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); } -Z7_BRANCH_FUNCS_IMP(ARMT) +Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT) // #define BR_IA64_NO_INLINE -Z7_BRANCH_FUNC_MAIN(IA64) +Z7_BRANCH_FUNC_MAIN(BranchConv_IA64) { // Byte *p = data; const Byte *lim; @@ -417,4 +417,293 @@ Z7_BRANCH_FUNC_MAIN(IA64) } } } -Z7_BRANCH_FUNCS_IMP(IA64) +Z7_BRANCH_FUNCS_IMP(BranchConv_IA64) + + +#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET; +#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET; + +#if 1 && defined(MY_CPU_LE_UNALIGN) + #define RISCV_USE_UNALIGNED_LOAD +#endif + +#ifdef RISCV_USE_UNALIGNED_LOAD + #define RISCV_GET_UI32(p) GetUi32(p) + #define RISCV_SET_UI32(p, v) { SetUi32(p, v) } +#else + #define RISCV_GET_UI32(p) \ + ((UInt32)GetUi16a(p) + \ + ((UInt32)GetUi16a((p) + 2) << 16)) + #define RISCV_SET_UI32(p, v) { \ + SetUi16a(p, (UInt16)(v)) \ + SetUi16a((p) + 2, (UInt16)(v >> 16)) } +#endif + +#if 1 && defined(MY_CPU_LE) + #define RISCV_USE_16BIT_LOAD +#endif + +#ifdef RISCV_USE_16BIT_LOAD + #define RISCV_LOAD_VAL(p) GetUi16a(p) +#else + #define RISCV_LOAD_VAL(p) (*(p)) +#endif + +#define RISCV_INSTR_SIZE 2 +#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE) +#define RISCV_STEP_2 4 +#define RISCV_REG_VAL (2 << 7) +#define RISCV_CMD_VAL 3 +#if 1 + // for code size optimization: + #define RISCV_DELTA_7F 0x7f +#else + #define RISCV_DELTA_7F 0 +#endif + +#define RISCV_CHECK_1(v, b) \ + (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0) + +#if 1 + #define RISCV_CHECK_2(v, r) \ + ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \ + << 18) \ + < ((r) & 0x1d)) +#else + // this branch gives larger code, because + // compilers generate larger code for big constants. + #define RISCV_CHECK_2(v, r) \ + ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ + & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ + < ((r) & 0x1d)) +#endif + + +#define RISCV_SCAN_LOOP \ + Byte *lim; \ + size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \ + if (size <= 6) return p; \ + size -= 6; \ + lim = p + size; \ + BR_PC_INIT \ + for (;;) \ + { \ + UInt32 a, v; \ + /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \ + for (;;) \ + { \ + if Z7_UNLIKELY(p >= lim) { return p; } \ + a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \ + if ((a & 0x77) == 0) break; \ + a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \ + p += RISCV_INSTR_SIZE * 2; \ + if ((a & 0x77) == 0) \ + { \ + p -= RISCV_INSTR_SIZE; \ + if Z7_UNLIKELY(p >= lim) { return p; } \ + break; \ + } \ + } +// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL +// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL +// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC +// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC + +Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc) +{ + RISCV_SCAN_LOOP + v = a; + a = RISCV_GET_UI32(p); +#ifndef RISCV_USE_16BIT_LOAD + v += (UInt32)p[1] << 8; +#endif + + if ((v & 8) == 0) // JAL + { + if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80) + { + p += RISCV_INSTR_SIZE; + continue; + } + { + v = ((a & 1u << 31) >> 11) + | ((a & 0x3ff << 21) >> 20) + | ((a & 1 << 20) >> 9) + | (a & 0xff << 12); + BR_CONVERT_VAL_ENC(v) + // ((v & 1) == 0) + // v: bits [1 : 20] contain offset bits +#if 0 && defined(RISCV_USE_UNALIGNED_LOAD) + a &= 0xfff; + a |= ((UInt32)(v << 23)) + | ((UInt32)(v << 7) & ((UInt32)0xff << 16)) + | ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8)); + RISCV_SET_UI32(p, a) +#else // aligned +#if 0 + SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff))) +#else + p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf)); +#endif + +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + v <<= 15; + v = Z7_BSWAP32(v); + SetUi16a(p + 2, (UInt16)v) +#else + p[2] = (Byte)(v >> 9); + p[3] = (Byte)(v >> 1); +#endif +#endif // aligned + } + p += 4; + continue; + } // JAL + + { + // AUIPC + if (v & 0xe80) // (not x0) and (not x2) + { + const UInt32 b = RISCV_GET_UI32(p + 4); + if (RISCV_CHECK_1(v, b)) + { + { + const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL); + RISCV_SET_UI32(p, temp) + } + a &= 0xfffff000; + { +#if 1 + const int t = -1 >> 1; + if (t != -1) + a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation + else +#endif + a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension). + } + BR_CONVERT_VAL_ENC(a) +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + a = Z7_BSWAP32(a); + RISCV_SET_UI32(p + 4, a) +#else + SetBe32(p + 4, a) +#endif + p += 8; + } + else + p += RISCV_STEP_1; + } + else + { + UInt32 r = a >> 27; + if (RISCV_CHECK_2(v, r)) + { + v = RISCV_GET_UI32(p + 4); + r = (r << 7) + 0x17 + (v & 0xfffff000); + a = (a >> 12) | (v << 20); + RISCV_SET_UI32(p, r) + RISCV_SET_UI32(p + 4, a) + p += 8; + } + else + p += RISCV_STEP_2; + } + } + } // for +} + + +Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc) +{ + RISCV_SCAN_LOOP +#ifdef RISCV_USE_16BIT_LOAD + if ((a & 8) == 0) + { +#else + v = a; + a += (UInt32)p[1] << 8; + if ((v & 8) == 0) + { +#endif + // JAL + a -= 0x100 - RISCV_DELTA_7F; + if (a & 0xd80) + { + p += RISCV_INSTR_SIZE; + continue; + } + { + const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff; +#if 0 // unaligned + a = GetUi32(p); + v = (UInt32)(a >> 23) & ((UInt32)0xff << 1) + | (UInt32)(a >> 7) & ((UInt32)0xff << 9) +#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + v = GetUi16a(p + 2); + v = Z7_BSWAP32(v) >> 15 +#else + v = (UInt32)p[3] << 1 + | (UInt32)p[2] << 9 +#endif + | (UInt32)((a & 0xf000) << 5); + BR_CONVERT_VAL_DEC(v) + a = a_old + | (v << 11 & 1u << 31) + | (v << 20 & 0x3ff << 21) + | (v << 9 & 1 << 20) + | (v & 0xff << 12); + RISCV_SET_UI32(p, a) + } + p += 4; + continue; + } // JAL + + { + // AUIPC + v = a; +#if 1 && defined(RISCV_USE_UNALIGNED_LOAD) + a = GetUi32(p); +#else + a |= (UInt32)GetUi16a(p + 2) << 16; +#endif + if ((v & 0xe80) == 0) // x0/x2 + { + const UInt32 r = a >> 27; + if (RISCV_CHECK_2(v, r)) + { + UInt32 b; +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + b = RISCV_GET_UI32(p + 4); + b = Z7_BSWAP32(b); +#else + b = GetBe32(p + 4); +#endif + v = a >> 12; + BR_CONVERT_VAL_DEC(b) + a = (r << 7) + 0x17; + a += (b + 0x800) & 0xfffff000; + v |= b << 20; + RISCV_SET_UI32(p, a) + RISCV_SET_UI32(p + 4, v) + p += 8; + } + else + p += RISCV_STEP_2; + } + else + { + const UInt32 b = RISCV_GET_UI32(p + 4); + if (!RISCV_CHECK_1(v, b)) + p += RISCV_STEP_1; + else + { + v = (a & 0xfffff000) | (b >> 20); + a = (b << 12) | (0x17 + RISCV_REG_VAL); + RISCV_SET_UI32(p, a) + RISCV_SET_UI32(p + 4, v) + p += 8; + } + } + } + } // for +} diff --git a/3rdparty/lzma/src/CpuArch.c b/3rdparty/lzma/src/CpuArch.c index 33f8a3ab4c..e792f39deb 100644 --- a/3rdparty/lzma/src/CpuArch.c +++ b/3rdparty/lzma/src/CpuArch.c @@ -1,5 +1,5 @@ /* CpuArch.c -- CPU specific code -2023-05-18 : Igor Pavlov : Public domain */ +2024-07-04 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -226,7 +226,7 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! */ static -Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo) +Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo) { UNUSED_VAR(subFunction) __cpuid(CPUInfo, func); @@ -242,13 +242,13 @@ Z7_NO_INLINE #endif void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) { - MY_cpuidex((int *)p, (int)func, 0); + MY_cpuidex((Int32 *)p, (Int32)func, 0); } Z7_NO_INLINE UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) { - int a[4]; + Int32 a[4]; MY_cpuidex(a, 0, 0); return a[0]; } @@ -384,7 +384,7 @@ BoolInt CPU_IsSupported_CMOV(void) UInt32 a[4]; if (!x86cpuid_Func_1(&a[0])) return 0; - return (a[3] >> 15) & 1; + return (BoolInt)(a[3] >> 15) & 1; } BoolInt CPU_IsSupported_SSE(void) @@ -393,7 +393,7 @@ BoolInt CPU_IsSupported_SSE(void) CHECK_SYS_SSE_SUPPORT if (!x86cpuid_Func_1(&a[0])) return 0; - return (a[3] >> 25) & 1; + return (BoolInt)(a[3] >> 25) & 1; } BoolInt CPU_IsSupported_SSE2(void) @@ -402,7 +402,7 @@ BoolInt CPU_IsSupported_SSE2(void) CHECK_SYS_SSE_SUPPORT if (!x86cpuid_Func_1(&a[0])) return 0; - return (a[3] >> 26) & 1; + return (BoolInt)(a[3] >> 26) & 1; } #endif @@ -419,17 +419,17 @@ static UInt32 x86cpuid_Func_1_ECX(void) BoolInt CPU_IsSupported_AES(void) { - return (x86cpuid_Func_1_ECX() >> 25) & 1; + return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1; } BoolInt CPU_IsSupported_SSSE3(void) { - return (x86cpuid_Func_1_ECX() >> 9) & 1; + return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1; } BoolInt CPU_IsSupported_SSE41(void) { - return (x86cpuid_Func_1_ECX() >> 19) & 1; + return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1; } BoolInt CPU_IsSupported_SHA(void) @@ -441,7 +441,7 @@ BoolInt CPU_IsSupported_SHA(void) { UInt32 d[4]; z7_x86_cpuid(d, 7); - return (d[1] >> 29) & 1; + return (BoolInt)(d[1] >> 29) & 1; } } @@ -638,10 +638,10 @@ BoolInt CPU_IsSupported_AVX(void) { const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); - // printf("\n=== XGetBV=%d\n", bm); + // printf("\n=== XGetBV=0x%x\n", bm); return 1 - & (bm >> 1) // SSE state is supported (set by OS) for storing/restoring - & (bm >> 2); // AVX state is supported (set by OS) for storing/restoring + & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring + & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring } // since Win7SP1: we can use GetEnabledXStateFeatures(); } @@ -658,10 +658,39 @@ BoolInt CPU_IsSupported_AVX2(void) z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 - & (d[1] >> 5); // avx2 + & (BoolInt)(d[1] >> 5); // avx2 } } +#if 0 +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + BoolInt v; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + v = 1 + & (BoolInt)(d[1] >> 16) // avx512f + & (BoolInt)(d[1] >> 31); // avx512vl + if (!v) + return False; + } + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); + return 1 + & (BoolInt)(bm >> 5) // OPMASK + & (BoolInt)(bm >> 6) // ZMM upper 256-bit + & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31 + } +} +#endif + BoolInt CPU_IsSupported_VAES_AVX2(void) { if (!CPU_IsSupported_AVX()) @@ -673,9 +702,9 @@ BoolInt CPU_IsSupported_VAES_AVX2(void) z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 - & (d[1] >> 5) // avx2 + & (BoolInt)(d[1] >> 5) // avx2 // & (d[1] >> 31) // avx512vl - & (d[2] >> 9); // vaes // VEX-256/EVEX + & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX } } @@ -688,7 +717,7 @@ BoolInt CPU_IsSupported_PageGB(void) if (d[0] < 0x80000001) return False; z7_x86_cpuid(d, 0x80000001); - return (d[3] >> 26) & 1; + return (BoolInt)(d[3] >> 26) & 1; } } @@ -760,33 +789,70 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #else // __APPLE__ -#include +#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216) + #define Z7_GETAUXV_AVAILABLE +#else +// #pragma message("=== is not NEW GLIBC === ") + #if defined __has_include + #if __has_include () +// #pragma message("=== sys/auxv.h is avail=== ") + #define Z7_GETAUXV_AVAILABLE + #endif + #endif +#endif +#ifdef Z7_GETAUXV_AVAILABLE +// #pragma message("=== Z7_GETAUXV_AVAILABLE === ") +#include #define USE_HWCAP +#endif #ifdef USE_HWCAP +#if defined(__FreeBSD__) +static unsigned long MY_getauxval(int aux) +{ + unsigned long val; + if (elf_aux_info(aux, &val, sizeof(val))) + return 0; + return val; +} +#else +#define MY_getauxval getauxval + #if defined __has_include + #if __has_include () #include + #endif + #endif +#endif #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ - BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } + BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); } #ifdef MY_CPU_ARM64 #define MY_HWCAP_CHECK_FUNC(name) \ MY_HWCAP_CHECK_FUNC_2(name, name) +#if 1 || defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +#endif // MY_HWCAP_CHECK_FUNC (ASIMD) #elif defined(MY_CPU_ARM) #define MY_HWCAP_CHECK_FUNC(name) \ - BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } + BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); } MY_HWCAP_CHECK_FUNC_2(NEON, NEON) #endif #else // USE_HWCAP #define MY_HWCAP_CHECK_FUNC(name) \ - BoolInt CPU_IsSupported_ ## name() { return 0; } + BoolInt CPU_IsSupported_ ## name(void) { return 0; } +#if defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else MY_HWCAP_CHECK_FUNC(NEON) +#endif #endif // USE_HWCAP diff --git a/3rdparty/lzma/src/DllSecur.c b/3rdparty/lzma/src/DllSecur.c index 02a0f977ea..bbbfc0a763 100644 --- a/3rdparty/lzma/src/DllSecur.c +++ b/3rdparty/lzma/src/DllSecur.c @@ -1,5 +1,5 @@ /* DllSecur.c -- DLL loading security -2023-04-02 : Igor Pavlov : Public domain */ +2023-12-03 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -11,19 +11,7 @@ #ifndef UNDER_CE -#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__) - // #pragma GCC diagnostic ignored "-Wcast-function-type" -#endif - -#if defined(__clang__) || defined(__GNUC__) -typedef void (*Z7_voidFunction)(void); -#define MY_CAST_FUNC (Z7_voidFunction) -#elif defined(_MSC_VER) && _MSC_VER > 1920 -#define MY_CAST_FUNC (void *) -// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' -#else -#define MY_CAST_FUNC -#endif +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags); @@ -61,7 +49,7 @@ static const char * const g_Dlls = if ((UInt16)GetVersion() != 6) { \ const \ Func_SetDefaultDllDirectories setDllDirs = \ - (Func_SetDefaultDllDirectories) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \ + (Func_SetDefaultDllDirectories) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \ "SetDefaultDllDirectories"); \ if (setDllDirs) if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) return; } diff --git a/3rdparty/lzma/src/LzFind.c b/3rdparty/lzma/src/LzFind.c index 799d6227ba..1ce404648e 100644 --- a/3rdparty/lzma/src/LzFind.c +++ b/3rdparty/lzma/src/LzFind.c @@ -1,5 +1,5 @@ /* LzFind.c -- Match finder for LZ algorithms -2023-03-14 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -108,9 +108,15 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all return (p->bufBase != NULL); } -static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +static const Byte *MatchFinder_GetPointerToCurrentPos(void *p) +{ + return ((CMatchFinder *)p)->buffer; +} -static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } +static UInt32 MatchFinder_GetNumAvailableBytes(void *p) +{ + return GET_AVAIL_BYTES((CMatchFinder *)p); +} Z7_NO_INLINE @@ -571,8 +577,9 @@ void MatchFinder_Init_4(CMatchFinder *p) #define CYC_TO_POS_OFFSET 0 // #define CYC_TO_POS_OFFSET 1 // for debug -void MatchFinder_Init(CMatchFinder *p) +void MatchFinder_Init(void *_p) { + CMatchFinder *p = (CMatchFinder *)_p; MatchFinder_Init_HighHash(p); MatchFinder_Init_LowHash(p); MatchFinder_Init_4(p); @@ -607,16 +614,16 @@ void MatchFinder_Init(CMatchFinder *p) #endif #endif -// #elif defined(MY_CPU_ARM_OR_ARM64) -#elif defined(MY_CPU_ARM64) +#elif defined(MY_CPU_ARM64) \ + /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */ - #if defined(__clang__) && (__clang_major__ >= 8) \ - || defined(__GNUC__) && (__GNUC__ >= 8) + #if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) #define USE_LZFIND_SATUR_SUB_128 #ifdef MY_CPU_ARM64 // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__(""))) #else - // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon"))) #endif #elif defined(_MSC_VER) @@ -625,7 +632,7 @@ void MatchFinder_Init(CMatchFinder *p) #endif #endif - #if defined(_MSC_VER) && !defined(__clang__) && defined(MY_CPU_ARM64) + #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) #include #else #include @@ -1082,9 +1089,11 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const #define MOVE_POS \ - ++p->cyclicBufferPos; \ + p->cyclicBufferPos++; \ p->buffer++; \ - { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } + { const UInt32 pos1 = p->pos + 1; \ + p->pos = pos1; \ + if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } #define MOVE_POS_RET MOVE_POS return distances; @@ -1103,20 +1112,26 @@ static void MatchFinder_MovePos(CMatchFinder *p) } #define GET_MATCHES_HEADER2(minLen, ret_op) \ - unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ - lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ + UInt32 hv; const Byte *cur; UInt32 curMatch; \ + UInt32 lenLimit = p->lenLimit; \ + if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \ cur = p->buffer; #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) -#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) +#define SKIP_HEADER(minLen) \ + do { GET_MATCHES_HEADER2(minLen, continue) -#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue +#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \ + p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue -#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num); +#define SKIP_FOOTER \ + SkipMatchesSpec(MF_PARAMS(p)); \ + MOVE_POS \ + } while (--num); #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ - distances = func(MF_PARAMS(p), \ - distances, (UInt32)_maxLen_); MOVE_POS_RET + distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \ + MOVE_POS_RET #define GET_MATCHES_FOOTER_BT(_maxLen_) \ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) @@ -1133,8 +1148,9 @@ static void MatchFinder_MovePos(CMatchFinder *p) for (; c != lim; c++) if (*(c + diff) != *c) break; \ maxLen = (unsigned)(c - cur); } -static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; GET_MATCHES_HEADER(2) HASH2_CALC curMatch = p->hash[hv]; @@ -1158,8 +1174,9 @@ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) mmm = pos; -static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, d2, pos; unsigned maxLen; @@ -1199,8 +1216,9 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, h3, d2, d3, pos; unsigned maxLen; @@ -1267,10 +1285,12 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; - UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -1339,8 +1359,9 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, h3, d2, d3, pos; unsigned maxLen; @@ -1407,10 +1428,12 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; - UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -1466,7 +1489,7 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (*(cur - d2 + 3) != cur[3]) break; UPDATE_maxLen - distances[-2] = maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; @@ -1489,8 +1512,9 @@ UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt2_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(2) { HASH2_CALC @@ -1511,8 +1535,9 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_FOOTER } -static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt3_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(3) { UInt32 h2; @@ -1526,8 +1551,9 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_FOOTER } -static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt4_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(4) { UInt32 h2, h3; @@ -1542,8 +1568,9 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_FOOTER } -static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt5_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(5) { UInt32 h2, h3; @@ -1589,8 +1616,9 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) }} while(num); \ -static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Hc4_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; HC_SKIP_HEADER(4) UInt32 h2, h3; @@ -1604,8 +1632,9 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) } -static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Hc5_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; HC_SKIP_HEADER(5) UInt32 h2, h3; @@ -1634,41 +1663,41 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) { - vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + vTable->Init = MatchFinder_Init; + vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { if (p->numHashBytes <= 4) { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + vTable->GetMatches = Hc4_MatchFinder_GetMatches; + vTable->Skip = Hc4_MatchFinder_Skip; } else { - vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; + vTable->GetMatches = Hc5_MatchFinder_GetMatches; + vTable->Skip = Hc5_MatchFinder_Skip; } } else if (p->numHashBytes == 2) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + vTable->GetMatches = Bt2_MatchFinder_GetMatches; + vTable->Skip = Bt2_MatchFinder_Skip; } else if (p->numHashBytes == 3) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + vTable->GetMatches = Bt3_MatchFinder_GetMatches; + vTable->Skip = Bt3_MatchFinder_Skip; } else if (p->numHashBytes == 4) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + vTable->GetMatches = Bt4_MatchFinder_GetMatches; + vTable->Skip = Bt4_MatchFinder_Skip; } else { - vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; + vTable->GetMatches = Bt5_MatchFinder_GetMatches; + vTable->Skip = Bt5_MatchFinder_Skip; } } diff --git a/3rdparty/lzma/src/LzFindMt.c b/3rdparty/lzma/src/LzFindMt.c index 5253e6ebb3..ac9d59d0fd 100644 --- a/3rdparty/lzma/src/LzFindMt.c +++ b/3rdparty/lzma/src/LzFindMt.c @@ -1,5 +1,5 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -94,7 +94,7 @@ static void MtSync_Construct(CMtSync *p) } -#define DEBUG_BUFFER_LOCK // define it to debug lock state +// #define DEBUG_BUFFER_LOCK // define it to debug lock state #ifdef DEBUG_BUFFER_LOCK #include @@ -877,8 +877,9 @@ SRes MatchFinderMt_InitMt(CMatchFinderMt *p) } -static void MatchFinderMt_Init(CMatchFinderMt *p) +static void MatchFinderMt_Init(void *_p) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; CMatchFinder *mf = MF(p); p->btBufPos = @@ -981,8 +982,9 @@ static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) -static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) +static const Byte * MatchFinderMt_GetPointerToCurrentPos(void *_p) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; return p->pointerToCurPos; } @@ -990,8 +992,9 @@ static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); -static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) +static UInt32 MatchFinderMt_GetNumAvailableBytes(void *_p) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; if (p->btBufPos != p->btBufPosLimit) return p->btNumAvailBytes; return MatchFinderMt_GetNextBlock_Bt(p); @@ -1243,8 +1246,9 @@ static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) } -static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt2_GetMatches(void *_p, UInt32 *d) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; const UInt32 *bt = p->btBufPos; const UInt32 len = *bt++; const UInt32 *btLim = bt + len; @@ -1267,8 +1271,9 @@ static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) -static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt_GetMatches(void *_p, UInt32 *d) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; const UInt32 *bt = p->btBufPos; UInt32 len = *bt++; const UInt32 avail = p->btNumAvailBytes - 1; @@ -1315,14 +1320,16 @@ static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); -static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) +static void MatchFinderMt0_Skip(void *_p, UInt32 num) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; SKIP_HEADER2_MT { p->btNumAvailBytes--; SKIP_FOOTER_MT } -static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) +static void MatchFinderMt2_Skip(void *_p, UInt32 num) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; SKIP_HEADER_MT(2) UInt32 h2; MT_HASH2_CALC @@ -1330,8 +1337,9 @@ static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) SKIP_FOOTER_MT } -static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) +static void MatchFinderMt3_Skip(void *_p, UInt32 num) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; SKIP_HEADER_MT(3) UInt32 h2, h3; MT_HASH3_CALC @@ -1361,39 +1369,39 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) { - vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; + vTable->Init = MatchFinderMt_Init; + vTable->GetNumAvailableBytes = MatchFinderMt_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = MatchFinderMt_GetPointerToCurrentPos; + vTable->GetMatches = MatchFinderMt_GetMatches; switch (MF(p)->numHashBytes) { case 2: p->GetHeadsFunc = GetHeads2; - p->MixMatchesFunc = (Mf_Mix_Matches)NULL; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; + p->MixMatchesFunc = NULL; + vTable->Skip = MatchFinderMt0_Skip; + vTable->GetMatches = MatchFinderMt2_GetMatches; break; case 3: p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; + p->MixMatchesFunc = MixMatches2; + vTable->Skip = MatchFinderMt2_Skip; break; case 4: p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; // it's fast inline version of GetMatches() - // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; + // vTable->GetMatches = MatchFinderMt_GetMatches_Bt4; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; + p->MixMatchesFunc = MixMatches3; + vTable->Skip = MatchFinderMt3_Skip; break; default: p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; + p->MixMatchesFunc = MixMatches4; vTable->Skip = - (Mf_Skip_Func)MatchFinderMt3_Skip; - // (Mf_Skip_Func)MatchFinderMt4_Skip; + MatchFinderMt3_Skip; + // MatchFinderMt4_Skip; break; } } diff --git a/3rdparty/lzma/src/Lzma2Dec.c b/3rdparty/lzma/src/Lzma2Dec.c index 388cbc7173..8bf54e499e 100644 --- a/3rdparty/lzma/src/Lzma2Dec.c +++ b/3rdparty/lzma/src/Lzma2Dec.c @@ -1,5 +1,5 @@ /* Lzma2Dec.c -- LZMA2 Decoder -2023-03-03 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ /* #define SHOW_DEBUG_INFO */ @@ -157,8 +157,10 @@ static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) p->decoder.prop.lp = (Byte)lp; return LZMA2_STATE_DATA; } + + default: + return LZMA2_STATE_ERROR; } - return LZMA2_STATE_ERROR; } static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size) diff --git a/3rdparty/lzma/src/LzmaEnc.c b/3rdparty/lzma/src/LzmaEnc.c index 6d13cac8ce..37b2787db6 100644 --- a/3rdparty/lzma/src/LzmaEnc.c +++ b/3rdparty/lzma/src/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2023-04-13: Igor Pavlov : Public domain */ +2024-01-24: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -195,11 +195,11 @@ unsigned GetPosSlot1(UInt32 pos); unsigned GetPosSlot1(UInt32 pos) { unsigned res; - BSR2_RET(pos, res); + BSR2_RET(pos, res) return res; } -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res) } +#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) } #else // ! LZMA_LOG_BSR @@ -512,7 +512,7 @@ struct CLzmaEnc COPY_ARR(d, s, posEncoders) \ (d)->lenProbs = (s)->lenProbs; \ (d)->repLenProbs = (s)->repLenProbs; \ - memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb)); + memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp); void LzmaEnc_SaveState(CLzmaEncHandle p) { @@ -1040,14 +1040,14 @@ Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables( UInt32 price = b; do { - unsigned bit = sym & 1; + const unsigned bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); } while (sym >= 2); { - unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; + const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); } @@ -1056,7 +1056,7 @@ Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables( { unsigned posState; - size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); + const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); for (posState = 1; posState < numPosStates; posState++) memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); } @@ -2696,12 +2696,12 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, #endif { - unsigned lclp = p->lc + p->lp; + const unsigned lclp = p->lc + p->lp; if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) { LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); if (!p->litProbs || !p->saveState.litProbs) { LzmaEnc_FreeLits(p, alloc); @@ -2802,8 +2802,8 @@ static void LzmaEnc_Init(CLzmaEnc *p) } { - UInt32 num = (UInt32)0x300 << (p->lp + p->lc); - UInt32 k; + const size_t num = (size_t)0x300 << (p->lp + p->lc); + size_t k; CLzmaProb *probs = p->litProbs; for (k = 0; k < num; k++) probs[k] = kProbInitValue; diff --git a/3rdparty/lzma/src/MtCoder.c b/3rdparty/lzma/src/MtCoder.c index 6f58abb2a3..03959b6cad 100644 --- a/3rdparty/lzma/src/MtCoder.c +++ b/3rdparty/lzma/src/MtCoder.c @@ -1,5 +1,5 @@ /* MtCoder.c -- Multi-thread Coder -2023-04-13 : Igor Pavlov : Public domain */ +2023-09-07 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -430,7 +430,7 @@ SRes MtCoder_Code(CMtCoder *p) SRes res = SZ_OK; if (numThreads > MTCODER_THREADS_MAX) - numThreads = MTCODER_THREADS_MAX; + numThreads = MTCODER_THREADS_MAX; numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads); if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++; @@ -438,7 +438,7 @@ SRes MtCoder_Code(CMtCoder *p) if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++; if (numBlocksMax > MTCODER_BLOCKS_MAX) - numBlocksMax = MTCODER_BLOCKS_MAX; + numBlocksMax = MTCODER_BLOCKS_MAX; if (p->blockSize != p->allocatedBufsSize) { @@ -469,7 +469,7 @@ SRes MtCoder_Code(CMtCoder *p) { RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->readEvent)) - RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax)) + RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, (UInt32)numBlocksMax, (UInt32)numBlocksMax)) } for (i = 0; i < MTCODER_BLOCKS_MAX - 1; i++) diff --git a/3rdparty/lzma/src/MtDec.c b/3rdparty/lzma/src/MtDec.c index 782069926d..96274b6f3a 100644 --- a/3rdparty/lzma/src/MtDec.c +++ b/3rdparty/lzma/src/MtDec.c @@ -1,5 +1,5 @@ /* MtDec.c -- Multi-thread Decoder -2023-04-02 : Igor Pavlov : Public domain */ +2024-02-20 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -809,6 +809,16 @@ static WRes MtDec_ThreadFunc2(CMtDecThread *t) #endif +typedef + #ifdef _WIN32 + UINT_PTR + #elif 1 + uintptr_t + #else + ptrdiff_t + #endif + MY_uintptr_t; + static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp) { WRes res; @@ -821,7 +831,7 @@ static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp) res = MtDec_ThreadFunc2(t); p = t->mtDec; if (res == 0) - return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes; + return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)p->exitThreadWRes; { // it's unexpected situation for some threading function error if (p->exitThreadWRes == 0) @@ -832,7 +842,7 @@ static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp) Event_Set(&p->threads[0].canWrite); MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res)); } - return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res; + return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)res; } static Z7_NO_INLINE THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp) @@ -1072,7 +1082,7 @@ SRes MtDec_Code(CMtDec *p) if (wres == 0) { wres = Event_Set(&nextThread->canWrite); if (wres == 0) { wres = Event_Set(&nextThread->canRead); if (wres == 0) { THREAD_FUNC_RET_TYPE res = MtDec_ThreadFunc(nextThread); - wres = (WRes)(UINT_PTR)res; + wres = (WRes)(MY_uintptr_t)res; if (wres != 0) { p->needContinue = False; diff --git a/3rdparty/lzma/src/Ppmd7.c b/3rdparty/lzma/src/Ppmd7.c index 6e1307e2d3..efcc5d86e9 100644 --- a/3rdparty/lzma/src/Ppmd7.c +++ b/3rdparty/lzma/src/Ppmd7.c @@ -1,5 +1,5 @@ /* Ppmd7.c -- PPMdH codec -2023-04-02 : Igor Pavlov : Public domain +2023-09-07 : Igor Pavlov : Public domain This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #include "Precomp.h" @@ -302,8 +302,17 @@ static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx) #define MEM_12_CPY(dest, src, num) \ - { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ - do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } + { UInt32 *d = (UInt32 *)(dest); \ + const UInt32 *z = (const UInt32 *)(src); \ + unsigned n = (num); \ + do { \ + d[0] = z[0]; \ + d[1] = z[1]; \ + d[2] = z[2]; \ + z += 3; \ + d += 3; \ + } while (--n); \ + } /* @@ -711,8 +720,8 @@ void Ppmd7_UpdateModel(CPpmd7 *p) if ((ns1 & 1) == 0) { /* Expand for one UNIT */ - unsigned oldNU = ns1 >> 1; - unsigned i = U2I(oldNU); + const unsigned oldNU = ns1 >> 1; + const unsigned i = U2I(oldNU); if (i != U2I((size_t)oldNU + 1)) { void *ptr = Ppmd7_AllocUnits(p, i + 1); @@ -731,7 +740,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p) sum = c->Union2.SummFreq; /* max increase of Escape_Freq is 3 here. total increase of Union2.SummFreq for all symbols is less than 256 here */ - sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)); + sum += (UInt32)(unsigned)((2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1))); /* original PPMdH uses 16-bit variable for (sum) here. But (sum < 0x9000). So we don't truncate (sum) to 16-bit */ // sum = (UInt16)sum; @@ -761,7 +770,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p) // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context s->Freq = (Byte)freq; // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here - sum = freq + p->InitEsc + (ns > 3); + sum = (UInt32)(freq + p->InitEsc + (ns > 3)); } } @@ -933,10 +942,10 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) p->HiBitsFlag; { // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ - unsigned summ = (UInt16)see->Summ; // & 0xFFFF - unsigned r = (summ >> see->Shift); + const unsigned summ = (UInt16)see->Summ; // & 0xFFFF + const unsigned r = (summ >> see->Shift); see->Summ = (UInt16)(summ - r); - *escFreq = r + (r == 0); + *escFreq = (UInt32)(r + (r == 0)); } } else @@ -981,9 +990,9 @@ void Ppmd7_Update1_0(CPpmd7 *p) CPpmd_State *s = p->FoundState; CPpmd7_Context *mc = p->MinContext; unsigned freq = s->Freq; - unsigned summFreq = mc->Union2.SummFreq; + const unsigned summFreq = mc->Union2.SummFreq; p->PrevSuccess = (2 * freq > summFreq); - p->RunLength += (int)p->PrevSuccess; + p->RunLength += (Int32)p->PrevSuccess; mc->Union2.SummFreq = (UInt16)(summFreq + 4); freq += 4; s->Freq = (Byte)freq; diff --git a/3rdparty/lzma/src/Ppmd7Dec.c b/3rdparty/lzma/src/Ppmd7Dec.c index 832382828d..081ab89557 100644 --- a/3rdparty/lzma/src/Ppmd7Dec.c +++ b/3rdparty/lzma/src/Ppmd7Dec.c @@ -1,5 +1,5 @@ /* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder -2023-04-02 : Igor Pavlov : Public domain +2023-09-07 : Igor Pavlov : Public domain This code is based on: PPMd var.H (2001): Dmitry Shkarin : Public domain */ @@ -58,7 +58,7 @@ static void Ppmd7z_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size) #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) void Ppmd7_UpdateModel(CPpmd7 *p); -#define MASK(sym) ((unsigned char *)charMask)[sym] +#define MASK(sym) ((Byte *)charMask)[sym] // Z7_FORCE_INLINE // static int Ppmd7z_DecodeSymbol(CPpmd7 *p) @@ -120,8 +120,8 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) MASK(s->Symbol) = 0; do { - unsigned sym0 = s2[0].Symbol; - unsigned sym1 = s2[1].Symbol; + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; s2 += 2; MASK(sym0) = 0; MASK(sym1) = 0; @@ -209,17 +209,17 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) unsigned num2 = num / 2; num &= 1; - hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num); s += num; p->MinContext = mc; do { - unsigned sym0 = s[0].Symbol; - unsigned sym1 = s[1].Symbol; + const unsigned sym0 = s[0].Symbol; + const unsigned sym1 = s[1].Symbol; s += 2; - hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); - hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0))); + hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1))); } while (--num2); } @@ -238,13 +238,13 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p) s = Ppmd7_GetStats(p, p->MinContext); hiCnt = count; - // count -= s->Freq & (unsigned)(MASK(s->Symbol)); + // count -= s->Freq & (UInt32)(MASK(s->Symbol)); // if ((Int32)count >= 0) { for (;;) { - count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; - // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; } } s--; diff --git a/3rdparty/lzma/src/Ppmd7Enc.c b/3rdparty/lzma/src/Ppmd7Enc.c index 41106bab45..49cbbe6499 100644 --- a/3rdparty/lzma/src/Ppmd7Enc.c +++ b/3rdparty/lzma/src/Ppmd7Enc.c @@ -1,5 +1,5 @@ /* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder -2023-04-02 : Igor Pavlov : Public domain +2023-09-07 : Igor Pavlov : Public domain This code is based on: PPMd var.H (2001): Dmitry Shkarin : Public domain */ @@ -82,7 +82,7 @@ void Ppmd7z_Flush_RangeEnc(CPpmd7 *p) void Ppmd7_UpdateModel(CPpmd7 *p); -#define MASK(sym) ((unsigned char *)charMask)[sym] +#define MASK(sym) ((Byte *)charMask)[sym] Z7_FORCE_INLINE static @@ -139,8 +139,8 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) MASK(s->Symbol) = 0; do { - unsigned sym0 = s2[0].Symbol; - unsigned sym1 = s2[1].Symbol; + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; s2 += 2; MASK(sym0) = 0; MASK(sym1) = 0; @@ -265,16 +265,15 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) if (num2 != 0) { s += i; - for (;;) + do { - unsigned sym0 = s[0].Symbol; - unsigned sym1 = s[1].Symbol; + const unsigned sym0 = s[0].Symbol; + const unsigned sym1 = s[1].Symbol; s += 2; sum += (s[-2].Freq & (unsigned)(MASK(sym0))); sum += (s[-1].Freq & (unsigned)(MASK(sym1))); - if (--num2 == 0) - break; } + while (--num2); } diff --git a/3rdparty/lzma/src/Sha256.c b/3rdparty/lzma/src/Sha256.c index 018cf6f4b4..14d3be9c6a 100644 --- a/3rdparty/lzma/src/Sha256.c +++ b/3rdparty/lzma/src/Sha256.c @@ -1,5 +1,5 @@ /* Sha256.c -- SHA-256 Hash -2023-04-02 : Igor Pavlov : Public domain +2024-03-01 : Igor Pavlov : Public domain This code is based on public domain code from Wei Dai's Crypto++ library. */ #include "Precomp.h" @@ -15,35 +15,35 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */ #endif #ifdef MY_CPU_X86_OR_AMD64 - #ifdef _MSC_VER - #if _MSC_VER >= 1200 + #if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \ + || defined(_MSC_VER) && (_MSC_VER >= 1200) #define Z7_COMPILER_SHA256_SUPPORTED - #endif - #elif defined(__clang__) - #if (__clang_major__ >= 8) // fix that check - #define Z7_COMPILER_SHA256_SUPPORTED - #endif - #elif defined(__GNUC__) - #if (__GNUC__ >= 8) // fix that check - #define Z7_COMPILER_SHA256_SUPPORTED - #endif - #elif defined(__INTEL_COMPILER) - #if (__INTEL_COMPILER >= 1800) // fix that check - #define Z7_COMPILER_SHA256_SUPPORTED - #endif #endif -#elif defined(MY_CPU_ARM_OR_ARM64) - #ifdef _MSC_VER - #if _MSC_VER >= 1910 +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__ARM_FEATURE_SHA2) \ + || defined(__ARM_FEATURE_CRYPTO) + #define Z7_COMPILER_SHA256_SUPPORTED + #else + #if defined(MY_CPU_ARM64) \ + || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ + || defined(Z7_MSC_VER_ORIGINAL) + #if defined(__ARM_FP) && \ + ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) \ + ) \ + || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) && \ + (Z7_CLANG_VERSION < 170000 || \ + Z7_CLANG_VERSION > 170001) #define Z7_COMPILER_SHA256_SUPPORTED #endif - #elif defined(__clang__) - #if (__clang_major__ >= 8) // fix that check - #define Z7_COMPILER_SHA256_SUPPORTED #endif - #elif defined(__GNUC__) - #if (__GNUC__ >= 6) // fix that check - #define Z7_COMPILER_SHA256_SUPPORTED #endif #endif #endif @@ -224,8 +224,6 @@ void Sha256_Init(CSha256 *p) #endif -void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); - // static extern MY_ALIGN(64) const UInt32 SHA256_K_ARRAY[64]; diff --git a/3rdparty/lzma/src/Sha256Opt.c b/3rdparty/lzma/src/Sha256Opt.c index afb351d278..eb38166646 100644 --- a/3rdparty/lzma/src/Sha256Opt.c +++ b/3rdparty/lzma/src/Sha256Opt.c @@ -1,5 +1,5 @@ /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "Compiler.h" @@ -11,6 +11,8 @@ #endif #endif +// #define Z7_USE_HW_SHA_STUB // for debug + #ifdef MY_CPU_X86_OR_AMD64 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check #define USE_HW_SHA @@ -32,9 +34,14 @@ #endif #if (_MSC_VER >= USE_VER_MIN) #define USE_HW_SHA + #else + #define Z7_USE_HW_SHA_STUB #endif #endif // #endif // MY_CPU_X86_OR_AMD64 +#ifndef USE_HW_SHA + // #define Z7_USE_HW_SHA_STUB // for debug +#endif #ifdef USE_HW_SHA @@ -202,19 +209,28 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ #endif // USE_HW_SHA -#elif defined(MY_CPU_ARM_OR_ARM64) - - #if defined(__clang__) - #if (__clang_major__ >= 8) // fix that check +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__ARM_FEATURE_SHA2) \ + || defined(__ARM_FEATURE_CRYPTO) + #define USE_HW_SHA + #else + #if defined(MY_CPU_ARM64) \ + || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ + || defined(Z7_MSC_VER_ORIGINAL) + #if defined(__ARM_FP) && \ + ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) \ + ) \ + || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) && \ + (Z7_CLANG_VERSION < 170000 || \ + Z7_CLANG_VERSION > 170001) #define USE_HW_SHA #endif - #elif defined(__GNUC__) - #if (__GNUC__ >= 6) // fix that check - #define USE_HW_SHA #endif - #elif defined(_MSC_VER) - #if _MSC_VER >= 1910 - #define USE_HW_SHA #endif #endif @@ -222,23 +238,87 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ // #pragma message("=== Sha256 HW === ") + #if defined(__clang__) || defined(__GNUC__) +#if !defined(__ARM_FEATURE_SHA2) && \ + !defined(__ARM_FEATURE_CRYPTO) #ifdef MY_CPU_ARM64 - #define ATTRIB_SHA __attribute__((__target__("+crypto,sha2"))) +#if defined(__clang__) + #define ATTRIB_SHA __attribute__((__target__("crypto"))) +#else + #define ATTRIB_SHA __attribute__((__target__("+crypto"))) +#endif #else +#if defined(__clang__) && (__clang_major__ >= 1) + #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2"))) +#else #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) +#endif #endif +#endif #else // _MSC_VER // for arm32 #define _ARM_USE_NEW_NEON_INTRINSICS #endif -#if defined(_MSC_VER) && !defined(__clang__) && defined(MY_CPU_ARM64) + + + + +#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) #include #else -#include + + + + + + + + + +#if defined(__clang__) && __clang_major__ < 16 +#if !defined(__ARM_FEATURE_SHA2) && \ + !defined(__ARM_FEATURE_CRYPTO) +// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ") + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1 +// #if defined(__clang__) && __clang_major__ < 13 + #define __ARM_FEATURE_CRYPTO 1 +// #else + #define __ARM_FEATURE_SHA2 1 +// #endif + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif +#endif // clang + +#if defined(__clang__) + +#if defined(__ARM_ARCH) && __ARM_ARCH < 8 + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #pragma message("#define __ARM_ARCH 8") + #undef __ARM_ARCH + #define __ARM_ARCH 8 + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // clang + +#include + +#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \ + defined(__ARM_FEATURE_CRYPTO) && \ + defined(__ARM_FEATURE_SHA2) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #undef __ARM_FEATURE_CRYPTO + #undef __ARM_FEATURE_SHA2 + #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ") +#endif + +#endif // Z7_MSC_VER_ORIGINAL typedef uint32x4_t v128; // typedef __n128 v128; // MSVC @@ -316,10 +396,10 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ LOAD_SHUFFLE (m2, 2) LOAD_SHUFFLE (m3, 3) - R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); - R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); - R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); - R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ) + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ) state0 = vaddq_u32(state0, state0_save); state1 = vaddq_u32(state1, state1_save); @@ -337,16 +417,17 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ #endif // MY_CPU_ARM_OR_ARM64 -#ifndef USE_HW_SHA - +#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB) // #error Stop_Compiling_UNSUPPORTED_SHA // #include - +// We can compile this file with another C compiler, +// or we can compile asm version. +// So we can generate real code instead of this stub function. // #include "Sha256.h" -void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); - +// #if defined(_MSC_VER) #pragma message("Sha256 HW-SW stub was used") - +// #endif +void Z7_FASTCALL Sha256_UpdateBlocks (UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) { @@ -359,7 +440,6 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ return; */ } - #endif @@ -384,3 +464,4 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ #undef USE_HW_SHA #undef ATTRIB_SHA #undef USE_VER_MIN +#undef Z7_USE_HW_SHA_STUB diff --git a/3rdparty/lzma/src/SwapBytes.c b/3rdparty/lzma/src/SwapBytes.c index 7901bbaa87..9290592b7d 100644 --- a/3rdparty/lzma/src/SwapBytes.c +++ b/3rdparty/lzma/src/SwapBytes.c @@ -1,5 +1,5 @@ /* SwapBytes.c -- Byte Swap conversion filter -2023-04-07 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -305,11 +305,12 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr) msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want */ // _mm256_broadcastsi128_si256(*mask128_ptr); - /* +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 80000) #define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) - MY_mm256_set_m128i - */ - _mm256_set_m128i( +#else + #define MY_mm256_set_m128i _mm256_set_m128i +#endif + MY_mm256_set_m128i( *(const __m128i *)mask128_ptr, *(const __m128i *)mask128_ptr); #endif @@ -330,32 +331,59 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr) // compile message "NEON intrinsics not available with the soft-float ABI" -#elif defined(MY_CPU_ARM_OR_ARM64) || \ - (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) -// #elif defined(MY_CPU_ARM64) +#elif defined(MY_CPU_ARM_OR_ARM64) \ + && defined(MY_CPU_LE) \ + && !defined(Z7_DISABLE_ARM_NEON) #if defined(__clang__) && (__clang_major__ >= 8) \ - || defined(__GNUC__) && (__GNUC__ >= 8) - #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) \ + || defined(__GNUC__) && (__GNUC__ >= 6) + #if defined(__ARM_FP) + #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 4)) \ || defined(MY_CPU_ARM64) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) #define USE_SWAP_128 - #endif #ifdef MY_CPU_ARM64 // #define SWAP_ATTRIB_NEON __attribute__((__target__(""))) #else - // #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) - #endif +#if defined(Z7_CLANG_VERSION) + // #define SWAP_ATTRIB_NEON __attribute__((__target__("neon"))) +#else + // #pragma message("SWAP_ATTRIB_NEON __attribute__((__target__(fpu=neon))") + #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=neon"))) +#endif + #endif // MY_CPU_ARM64 + #endif // __ARM_NEON + #endif // __ARM_ARCH + #endif // __ARM_FP + #elif defined(_MSC_VER) #if (_MSC_VER >= 1910) #define USE_SWAP_128 #endif #endif - #if defined(_MSC_VER) && defined(MY_CPU_ARM64) + #ifdef USE_SWAP_128 + #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) #include #else + +/* +#if !defined(__ARM_NEON) +#if defined(Z7_GCC_VERSION) && (__GNUC__ < 5) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 90201) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 100100) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#pragma message("#define __ARM_NEON 1") +// #define __ARM_NEON 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif +#endif +*/ #include #endif + #endif #ifndef USE_SWAP_128 #define FORCE_SWAP_MODE @@ -464,6 +492,13 @@ Z7_ATTRIB_NO_VECTOR \ void Z7_FASTCALL +#if defined(MY_CPU_ARM_OR_ARM64) +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wlanguage-extension-token" +#endif +#endif + + #ifdef MY_CPU_64BIT #if defined(MY_CPU_ARM64) \ diff --git a/3rdparty/lzma/src/Threads.c b/3rdparty/lzma/src/Threads.c index cf52bd3030..464efeca49 100644 --- a/3rdparty/lzma/src/Threads.c +++ b/3rdparty/lzma/src/Threads.c @@ -1,5 +1,5 @@ /* Threads.c -- multithreading library -2023-03-04 : Igor Pavlov : Public domain */ +2024-03-28 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -195,20 +195,19 @@ WRes CriticalSection_Init(CCriticalSection *p) // ---------- POSIX ---------- -#ifndef __APPLE__ +#if defined(__linux__) && !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #ifndef Z7_AFFINITY_DISABLE // _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET // clang < 3.6 : unknown warning group '-Wreserved-id-macro' // clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier" // clang >= 13 : do not give warning #if !defined(_GNU_SOURCE) - #if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12) - #pragma GCC diagnostic ignored "-Wreserved-id-macro" - #endif -#define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif // !defined(_GNU_SOURCE) #endif // Z7_AFFINITY_DISABLE -#endif // __APPLE__ +#endif // __linux__ #include "Threads.h" @@ -244,8 +243,9 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, { if (cpuSet) { - #ifdef Z7_AFFINITY_SUPPORTED - + // pthread_attr_setaffinity_np() is not supported for MUSL compile. + // so we check for __GLIBC__ here +#if defined(Z7_AFFINITY_SUPPORTED) && defined( __GLIBC__) /* printf("\n affinity :"); unsigned i; @@ -267,7 +267,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, // ret2 = pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet); // if (ret2) ret = ret2; - #endif +#endif } ret = pthread_create(&p->_tid, &attr, func, param); @@ -369,13 +369,20 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); } +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) +// freebsd: +#pragma GCC diagnostic ignored "-Wthread-safety-analysis" +#endif + WRes Event_Set(CEvent *p) { RINOK(pthread_mutex_lock(&p->_mutex)) p->_state = True; - int res1 = pthread_cond_broadcast(&p->_cond); - int res2 = pthread_mutex_unlock(&p->_mutex); - return (res2 ? res2 : res1); + { + const int res1 = pthread_cond_broadcast(&p->_cond); + const int res2 = pthread_mutex_unlock(&p->_mutex); + return (res2 ? res2 : res1); + } } WRes Event_Reset(CEvent *p) @@ -408,8 +415,8 @@ WRes Event_Close(CEvent *p) return 0; p->_created = 0; { - int res1 = pthread_mutex_destroy(&p->_mutex); - int res2 = pthread_cond_destroy(&p->_cond); + const int res1 = pthread_mutex_destroy(&p->_mutex); + const int res2 = pthread_cond_destroy(&p->_cond); return (res1 ? res1 : res2); } } @@ -487,8 +494,8 @@ WRes Semaphore_Close(CSemaphore *p) return 0; p->_created = 0; { - int res1 = pthread_mutex_destroy(&p->_mutex); - int res2 = pthread_cond_destroy(&p->_cond); + const int res1 = pthread_mutex_destroy(&p->_mutex); + const int res2 = pthread_cond_destroy(&p->_cond); return (res1 ? res1 : res2); } } @@ -549,6 +556,18 @@ LONG InterlockedIncrement(LONG volatile *addend) #endif } +LONG InterlockedDecrement(LONG volatile *addend) +{ + // Print("InterlockedDecrement") + #ifdef USE_HACK_UNSAFE_ATOMIC + LONG val = *addend - 1; + *addend = val; + return val; + #else + return __sync_sub_and_fetch(addend, 1); + #endif +} + #endif // _WIN32 WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) diff --git a/3rdparty/lzma/src/Xz.c b/3rdparty/lzma/src/Xz.c index 4ad071060a..d07550d097 100644 --- a/3rdparty/lzma/src/Xz.c +++ b/3rdparty/lzma/src/Xz.c @@ -1,5 +1,5 @@ /* Xz.c - Xz -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -52,6 +52,7 @@ void XzCheck_Init(CXzCheck *p, unsigned mode) case XZ_CHECK_CRC32: p->crc = CRC_INIT_VAL; break; case XZ_CHECK_CRC64: p->crc64 = CRC64_INIT_VAL; break; case XZ_CHECK_SHA256: Sha256_Init(&p->sha); break; + default: break; } } @@ -62,6 +63,7 @@ void XzCheck_Update(CXzCheck *p, const void *data, size_t size) case XZ_CHECK_CRC32: p->crc = CrcUpdate(p->crc, data, size); break; case XZ_CHECK_CRC64: p->crc64 = Crc64Update(p->crc64, data, size); break; case XZ_CHECK_SHA256: Sha256_Update(&p->sha, (const Byte *)data, size); break; + default: break; } } diff --git a/3rdparty/lzma/src/XzCrc64.c b/3rdparty/lzma/src/XzCrc64.c index c2fad6cdaa..94fc1afb37 100644 --- a/3rdparty/lzma/src/XzCrc64.c +++ b/3rdparty/lzma/src/XzCrc64.c @@ -1,5 +1,5 @@ /* XzCrc64.c -- CRC64 calculation -2023-04-02 : Igor Pavlov : Public domain */ +2023-12-08 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -8,36 +8,76 @@ #define kCrc64Poly UINT64_CONST(0xC96C5795D7870F42) -#ifdef MY_CPU_LE - #define CRC64_NUM_TABLES 4 -#else - #define CRC64_NUM_TABLES 5 - - UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); +// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu +// #define Z7_CRC64_DEBUG_BE +#ifdef Z7_CRC64_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE #endif +#ifdef Z7_CRC64_NUM_TABLES + #define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES +#else + #define Z7_CRC64_NUM_TABLES_USE 12 +#endif + +#if Z7_CRC64_NUM_TABLES_USE < 1 + #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif + + +#if Z7_CRC64_NUM_TABLES_USE != 1 + #ifndef MY_CPU_BE - UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); + #define FUNC_NAME_LE_2(s) XzCrc64UpdateT ## s + #define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s) + #define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC64_NUM_TABLES_USE) + UInt64 Z7_FASTCALL FUNC_NAME_LE (UInt64 v, const void *data, size_t size, const UInt64 *table); +#endif +#ifndef MY_CPU_LE + #define FUNC_NAME_BE_2(s) XzCrc64UpdateBeT ## s + #define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s) + #define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC64_NUM_TABLES_USE) + UInt64 Z7_FASTCALL FUNC_NAME_BE (UInt64 v, const void *data, size_t size, const UInt64 *table); #endif -typedef UInt64 (Z7_FASTCALL *CRC64_FUNC)(UInt64 v, const void *data, size_t size, const UInt64 *table); +#if defined(MY_CPU_LE) + #define FUNC_REF FUNC_NAME_LE +#elif defined(MY_CPU_BE) + #define FUNC_REF FUNC_NAME_BE +#else + #define FUNC_REF g_Crc64Update + static UInt64 (Z7_FASTCALL *FUNC_REF)(UInt64 v, const void *data, size_t size, const UInt64 *table); +#endif + +#endif + + +MY_ALIGN(64) +static UInt64 g_Crc64Table[256 * Z7_CRC64_NUM_TABLES_USE]; -static CRC64_FUNC g_Crc64Update; -UInt64 g_Crc64Table[256 * CRC64_NUM_TABLES]; UInt64 Z7_FASTCALL Crc64Update(UInt64 v, const void *data, size_t size) { - return g_Crc64Update(v, data, size, g_Crc64Table); +#if Z7_CRC64_NUM_TABLES_USE == 1 + #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + const UInt64 *table = g_Crc64Table; + const Byte *p = (const Byte *)data; + const Byte *lim = p + size; + for (; p != lim; p++) + v = CRC64_UPDATE_BYTE_2(v, *p); + return v; + #undef CRC64_UPDATE_BYTE_2 +#else + return FUNC_REF (v, data, size, g_Crc64Table); +#endif } -UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size) -{ - return g_Crc64Update(CRC64_INIT_VAL, data, size, g_Crc64Table) ^ CRC64_INIT_VAL; -} +Z7_NO_INLINE void Z7_FASTCALL Crc64GenerateTable(void) { - UInt32 i; + unsigned i; for (i = 0; i < 256; i++) { UInt64 r = i; @@ -46,35 +86,55 @@ void Z7_FASTCALL Crc64GenerateTable(void) r = (r >> 1) ^ (kCrc64Poly & ((UInt64)0 - (r & 1))); g_Crc64Table[i] = r; } - for (i = 256; i < 256 * CRC64_NUM_TABLES; i++) + +#if Z7_CRC64_NUM_TABLES_USE != 1 +#if 1 || 1 && defined(MY_CPU_X86) // low register count + for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i++) { - const UInt64 r = g_Crc64Table[(size_t)i - 256]; - g_Crc64Table[i] = g_Crc64Table[r & 0xFF] ^ (r >> 8); + const UInt64 r0 = g_Crc64Table[(size_t)i]; + g_Crc64Table[(size_t)i + 256] = g_Crc64Table[(Byte)r0] ^ (r0 >> 8); } - - #ifdef MY_CPU_LE - - g_Crc64Update = XzCrc64UpdateT4; - - #else +#else + for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i += 2) { - #ifndef MY_CPU_BE + UInt64 r0 = g_Crc64Table[(size_t)(i) ]; + UInt64 r1 = g_Crc64Table[(size_t)(i) + 1]; + r0 = g_Crc64Table[(Byte)r0] ^ (r0 >> 8); + r1 = g_Crc64Table[(Byte)r1] ^ (r1 >> 8); + g_Crc64Table[(size_t)i + 256 ] = r0; + g_Crc64Table[(size_t)i + 256 + 1] = r1; + } +#endif + +#ifndef MY_CPU_LE + { +#ifndef MY_CPU_BE UInt32 k = 1; if (*(const Byte *)&k == 1) - g_Crc64Update = XzCrc64UpdateT4; + FUNC_REF = FUNC_NAME_LE; else - #endif +#endif { - for (i = 256 * CRC64_NUM_TABLES - 1; i >= 256; i--) +#ifndef MY_CPU_BE + FUNC_REF = FUNC_NAME_BE; +#endif + for (i = 0; i < 256 * Z7_CRC64_NUM_TABLES_USE; i++) { - const UInt64 x = g_Crc64Table[(size_t)i - 256]; + const UInt64 x = g_Crc64Table[i]; g_Crc64Table[i] = Z7_BSWAP64(x); } - g_Crc64Update = XzCrc64UpdateT1_BeT4; } } - #endif +#endif // ndef MY_CPU_LE +#endif // Z7_CRC64_NUM_TABLES_USE != 1 } #undef kCrc64Poly -#undef CRC64_NUM_TABLES +#undef Z7_CRC64_NUM_TABLES_USE +#undef FUNC_REF +#undef FUNC_NAME_LE_2 +#undef FUNC_NAME_LE_1 +#undef FUNC_NAME_LE +#undef FUNC_NAME_BE_2 +#undef FUNC_NAME_BE_1 +#undef FUNC_NAME_BE diff --git a/3rdparty/lzma/src/XzCrc64Opt.c b/3rdparty/lzma/src/XzCrc64Opt.c index d03374c006..0c1fc2ffec 100644 --- a/3rdparty/lzma/src/XzCrc64Opt.c +++ b/3rdparty/lzma/src/XzCrc64Opt.c @@ -1,61 +1,261 @@ -/* XzCrc64Opt.c -- CRC64 calculation -2023-04-02 : Igor Pavlov : Public domain */ +/* XzCrc64Opt.c -- CRC64 calculation (optimized functions) +2023-12-08 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "CpuArch.h" +#if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1 + +// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu +// #define Z7_CRC64_DEBUG_BE +#ifdef Z7_CRC64_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE +#endif + +#if defined(MY_CPU_64BIT) +#define Z7_CRC64_USE_64BIT +#endif + +// the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c +#ifdef Z7_CRC64_NUM_TABLES +#define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES +#else +#define Z7_CRC64_NUM_TABLES_USE 12 +#endif + +#if Z7_CRC64_NUM_TABLES_USE % 4 || \ + Z7_CRC64_NUM_TABLES_USE < 4 || \ + Z7_CRC64_NUM_TABLES_USE > 4 * 4 + #error Stop_Compiling_Bad_CRC64_NUM_TABLES +#endif + + #ifndef MY_CPU_BE -#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) -UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); -UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table) +#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0) + +#define Q64LE(n, d) \ + ( (table + ((n) * 8 + 7) * 0x100)[((d) ) & 0xFF] \ + ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] ) + +#define R64(a) *((const UInt64 *)(const void *)p + (a)) + +#else + +#define Q32LE(n, d) \ + ( (table + ((n) * 4 + 3) * 0x100)[((d) ) & 0xFF] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] ) + +#define R32(a) *((const UInt32 *)(const void *)p + (a)) + +#endif + + +#define CRC64_FUNC_PRE_LE2(step) \ +UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table) + +#define CRC64_FUNC_PRE_LE(step) \ + CRC64_FUNC_PRE_LE2(step); \ + CRC64_FUNC_PRE_LE2(step) + +CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE) { const Byte *p = (const Byte *)data; - for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) + const Byte *lim; + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++) v = CRC64_UPDATE_BYTE_2(v, *p); - for (; size >= 4; size -= 4, p += 4) + lim = p + size; + if (size >= Z7_CRC64_NUM_TABLES_USE) { - const UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p; - v = (v >> 32) - ^ (table + 0x300)[((d ) & 0xFF)] - ^ (table + 0x200)[((d >> 8) & 0xFF)] - ^ (table + 0x100)[((d >> 16) & 0xFF)] - ^ (table + 0x000)[((d >> 24))]; + lim -= Z7_CRC64_NUM_TABLES_USE; + do + { +#if Z7_CRC64_NUM_TABLES_USE == 4 + const UInt32 d = (UInt32)v ^ R32(0); + v = (v >> 32) ^ Q32LE(0, d); +#elif Z7_CRC64_NUM_TABLES_USE == 8 +#ifdef Z7_CRC64_USE_64BIT + v ^= R64(0); + v = Q64LE(0, v); +#else + UInt32 v0, v1; + v0 = (UInt32)v ^ R32(0); + v1 = (UInt32)(v >> 32) ^ R32(1); + v = Q32LE(1, v0) ^ Q32LE(0, v1); +#endif +#elif Z7_CRC64_NUM_TABLES_USE == 12 + UInt32 w; + UInt32 v0, v1; + v0 = (UInt32)v ^ R32(0); + v1 = (UInt32)(v >> 32) ^ R32(1); + w = R32(2); + v = Q32LE(0, w); + v ^= Q32LE(2, v0) ^ Q32LE(1, v1); +#elif Z7_CRC64_NUM_TABLES_USE == 16 +#ifdef Z7_CRC64_USE_64BIT + UInt64 w; + UInt64 x; + w = R64(1); x = Q64LE(0, w); + v ^= R64(0); v = x ^ Q64LE(1, v); +#else + UInt32 v0, v1; + UInt32 r0, r1; + v0 = (UInt32)v ^ R32(0); + v1 = (UInt32)(v >> 32) ^ R32(1); + r0 = R32(2); + r1 = R32(3); + v = Q32LE(1, r0) ^ Q32LE(0, r1); + v ^= Q32LE(3, v0) ^ Q32LE(2, v1); +#endif +#else +#error Stop_Compiling_Bad_CRC64_NUM_TABLES +#endif + p += Z7_CRC64_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC64_NUM_TABLES_USE; } - for (; size > 0; size--, p++) + for (; p < lim; p++) v = CRC64_UPDATE_BYTE_2(v, *p); return v; } +#undef CRC64_UPDATE_BYTE_2 +#undef R32 +#undef R64 +#undef Q32LE +#undef Q64LE +#undef CRC64_FUNC_PRE_LE +#undef CRC64_FUNC_PRE_LE2 + #endif + + #ifndef MY_CPU_LE -#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8)) +#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8)) -UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); -UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table) +#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0) + +#define Q64BE(n, d) \ + ( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \ + ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] ) + +#ifdef Z7_CRC64_DEBUG_BE + #define R64BE(a) GetBe64a((const UInt64 *)(const void *)p + (a)) +#else + #define R64BE(a) *((const UInt64 *)(const void *)p + (a)) +#endif + +#else + +#define Q32BE(n, d) \ + ( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] ) + +#ifdef Z7_CRC64_DEBUG_BE + #define R32BE(a) GetBe32a((const UInt32 *)(const void *)p + (a)) +#else + #define R32BE(a) *((const UInt32 *)(const void *)p + (a)) +#endif + +#endif + +#define CRC64_FUNC_PRE_BE2(step) \ +UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table) + +#define CRC64_FUNC_PRE_BE(step) \ + CRC64_FUNC_PRE_BE2(step); \ + CRC64_FUNC_PRE_BE2(step) + +CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE) { const Byte *p = (const Byte *)data; - table += 0x100; + const Byte *lim; v = Z7_BSWAP64(v); - for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++) v = CRC64_UPDATE_BYTE_2_BE(v, *p); - for (; size >= 4; size -= 4, p += 4) + lim = p + size; + if (size >= Z7_CRC64_NUM_TABLES_USE) { - const UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p; - v = (v << 32) - ^ (table + 0x000)[((d ) & 0xFF)] - ^ (table + 0x100)[((d >> 8) & 0xFF)] - ^ (table + 0x200)[((d >> 16) & 0xFF)] - ^ (table + 0x300)[((d >> 24))]; + lim -= Z7_CRC64_NUM_TABLES_USE; + do + { +#if Z7_CRC64_NUM_TABLES_USE == 4 + const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0); + v = (v << 32) ^ Q32BE(0, d); +#elif Z7_CRC64_NUM_TABLES_USE == 12 + const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); + const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); + const UInt32 w = R32BE(2); + v = Q32BE(0, w); + v ^= Q32BE(2, d1) ^ Q32BE(1, d0); + +#elif Z7_CRC64_NUM_TABLES_USE == 8 + #ifdef Z7_CRC64_USE_64BIT + v ^= R64BE(0); + v = Q64BE(0, v); + #else + const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); + const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); + v = Q32BE(1, d1) ^ Q32BE(0, d0); + #endif +#elif Z7_CRC64_NUM_TABLES_USE == 16 + #ifdef Z7_CRC64_USE_64BIT + const UInt64 w = R64BE(1); + v ^= R64BE(0); + v = Q64BE(0, w) ^ Q64BE(1, v); + #else + const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); + const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); + const UInt32 w1 = R32BE(2); + const UInt32 w0 = R32BE(3); + v = Q32BE(1, w1) ^ Q32BE(0, w0); + v ^= Q32BE(3, d1) ^ Q32BE(2, d0); + #endif +#elif +#error Stop_Compiling_Bad_CRC64_NUM_TABLES +#endif + p += Z7_CRC64_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC64_NUM_TABLES_USE; } - for (; size > 0; size--, p++) + for (; p < lim; p++) v = CRC64_UPDATE_BYTE_2_BE(v, *p); return Z7_BSWAP64(v); } +#undef CRC64_UPDATE_BYTE_2_BE +#undef R32BE +#undef R64BE +#undef Q32BE +#undef Q64BE +#undef CRC64_FUNC_PRE_BE +#undef CRC64_FUNC_PRE_BE2 + +#endif +#undef Z7_CRC64_NUM_TABLES_USE #endif diff --git a/3rdparty/lzma/src/XzDec.c b/3rdparty/lzma/src/XzDec.c index a5f703966d..3d1c98e631 100644 --- a/3rdparty/lzma/src/XzDec.c +++ b/3rdparty/lzma/src/XzDec.c @@ -1,5 +1,5 @@ /* XzDec.c -- Xz Decode -2023-04-13 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -105,30 +105,32 @@ static SRes XzBcFilterState_SetProps(void *pp, const Byte *props, size_t propSiz { if (propSize != 1) return SZ_ERROR_UNSUPPORTED; - p->delta = (unsigned)props[0] + 1; + p->delta = (UInt32)props[0] + 1; } else { if (propSize == 4) { - UInt32 v = GetUi32(props); + const UInt32 v = GetUi32(props); switch (p->methodId) { case XZ_ID_PPC: case XZ_ID_ARM: case XZ_ID_SPARC: case XZ_ID_ARM64: - if ((v & 3) != 0) + if (v & 3) return SZ_ERROR_UNSUPPORTED; break; case XZ_ID_ARMT: - if ((v & 1) != 0) + case XZ_ID_RISCV: + if (v & 1) return SZ_ERROR_UNSUPPORTED; break; case XZ_ID_IA64: - if ((v & 0xF) != 0) + if (v & 0xf) return SZ_ERROR_UNSUPPORTED; break; + default: break; } p->ip = v; } @@ -151,12 +153,13 @@ static void XzBcFilterState_Init(void *pp) static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Dec[] = { - Z7_BRANCH_CONV_DEC(PPC), - Z7_BRANCH_CONV_DEC(IA64), - Z7_BRANCH_CONV_DEC(ARM), - Z7_BRANCH_CONV_DEC(ARMT), - Z7_BRANCH_CONV_DEC(SPARC), - Z7_BRANCH_CONV_DEC(ARM64) + Z7_BRANCH_CONV_DEC_2 (BranchConv_PPC), + Z7_BRANCH_CONV_DEC_2 (BranchConv_IA64), + Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM), + Z7_BRANCH_CONV_DEC_2 (BranchConv_ARMT), + Z7_BRANCH_CONV_DEC_2 (BranchConv_SPARC), + Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM64), + Z7_BRANCH_CONV_DEC_2 (BranchConv_RISCV) }; static SizeT XzBcFilterStateBase_Filter_Dec(CXzBcFilterStateBase *p, Byte *data, SizeT size) @@ -262,7 +265,7 @@ static SRes XzBcFilterState_Code2(void *pp, #define XZ_IS_SUPPORTED_FILTER_ID(id) \ - ((id) >= XZ_ID_Delta && (id) <= XZ_ID_ARM64) + ((id) >= XZ_ID_Delta && (id) <= XZ_ID_RISCV) SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id, Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc) @@ -541,13 +544,12 @@ static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 met { IStateCoder *sc = &p->coders[coderIndex]; p->ids[coderIndex] = methodId; - switch (methodId) - { - case XZ_ID_LZMA2: return Lzma2State_SetFromMethod(sc, outBuf, outBufSize, p->alloc); - #ifdef USE_SUBBLOCK - case XZ_ID_Subblock: return SbState_SetFromMethod(sc, p->alloc); - #endif - } + if (methodId == XZ_ID_LZMA2) + return Lzma2State_SetFromMethod(sc, outBuf, outBufSize, p->alloc); +#ifdef USE_SUBBLOCK + if (methodId == XZ_ID_Subblock) + return SbState_SetFromMethod(sc, p->alloc); +#endif if (coderIndex == 0) return SZ_ERROR_UNSUPPORTED; return Xz_StateCoder_Bc_SetFromMethod_Func(sc, methodId, @@ -558,10 +560,8 @@ static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 met static SRes MixCoder_ResetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 methodId, Byte *outBuf, size_t outBufSize) { IStateCoder *sc = &p->coders[coderIndex]; - switch (methodId) - { - case XZ_ID_LZMA2: return Lzma2State_ResetOutBuf(sc, outBuf, outBufSize); - } + if (methodId == XZ_ID_LZMA2) + return Lzma2State_ResetOutBuf(sc, outBuf, outBufSize); return SZ_ERROR_UNSUPPORTED; } @@ -804,7 +804,7 @@ static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte } #define READ_VARINT_AND_CHECK(buf, pos, size, res) \ - { unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ + { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ if (s == 0) return SZ_ERROR_ARCHIVE; \ pos += s; } @@ -1034,7 +1034,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, SRes res; ECoderFinishMode finishMode2 = finishMode; - BoolInt srcFinished2 = srcFinished; + BoolInt srcFinished2 = (BoolInt)srcFinished; BoolInt destFinish = False; if (p->block.packSize != (UInt64)(Int64)-1) @@ -1127,7 +1127,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, return SZ_OK; } - switch (p->state) + switch ((int)p->state) { case XZ_STATE_STREAM_HEADER: { @@ -1172,15 +1172,15 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, p->state = XZ_STATE_STREAM_INDEX; break; } - p->blockHeaderSize = ((UInt32)p->buf[0] << 2) + 4; + p->blockHeaderSize = ((unsigned)p->buf[0] << 2) + 4; break; } if (p->pos != p->blockHeaderSize) { - UInt32 cur = p->blockHeaderSize - p->pos; + unsigned cur = p->blockHeaderSize - p->pos; if (cur > srcRem) - cur = (UInt32)srcRem; + cur = (unsigned)srcRem; memcpy(p->buf + p->pos, src, cur); p->pos += cur; (*srcLen) += cur; @@ -1222,8 +1222,8 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, } else { - UInt32 checkSize = XzFlags_GetCheckSize(p->streamFlags); - UInt32 cur = checkSize - p->pos; + const unsigned checkSize = XzFlags_GetCheckSize(p->streamFlags); + unsigned cur = checkSize - p->pos; if (cur != 0) { if (srcRem == 0) @@ -1232,7 +1232,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, return SZ_OK; } if (cur > srcRem) - cur = (UInt32)srcRem; + cur = (unsigned)srcRem; memcpy(p->buf + p->pos, src, cur); p->pos += cur; (*srcLen) += cur; @@ -1321,9 +1321,9 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, case XZ_STATE_STREAM_FOOTER: { - UInt32 cur = XZ_STREAM_FOOTER_SIZE - p->pos; + unsigned cur = XZ_STREAM_FOOTER_SIZE - p->pos; if (cur > srcRem) - cur = (UInt32)srcRem; + cur = (unsigned)srcRem; memcpy(p->buf + p->pos, src, cur); p->pos += cur; (*srcLen) += cur; @@ -1358,6 +1358,8 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, } case XZ_STATE_BLOCK: break; /* to disable GCC warning */ + + default: return SZ_ERROR_FAIL; } } /* @@ -1773,10 +1775,10 @@ static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbac } } { - UInt64 packSize = block->packSize; - UInt64 packSizeAligned = packSize + ((0 - (unsigned)packSize) & 3); - UInt32 checkSize = XzFlags_GetCheckSize(coder->dec.streamFlags); - UInt64 blockPackSum = coder->inPreSize + packSizeAligned + checkSize; + const UInt64 packSize = block->packSize; + const UInt64 packSizeAligned = packSize + ((0 - (unsigned)packSize) & 3); + const unsigned checkSize = XzFlags_GetCheckSize(coder->dec.streamFlags); + const UInt64 blockPackSum = coder->inPreSize + packSizeAligned + checkSize; // if (blockPackSum <= me->props.inBlockMax) // unpackBlockMaxSize { @@ -2381,7 +2383,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p if (tMode) { XzDecMt_FreeOutBufs(p); - tMode = MtDec_PrepareRead(&p->mtc); + tMode = (BoolInt)MtDec_PrepareRead(&p->mtc); } #endif @@ -2644,7 +2646,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle p, p->outSize = *outDataSize; } - p->finishMode = finishMode; + p->finishMode = (BoolInt)finishMode; // p->outSize = 457; p->outSize_Defined = True; p->finishMode = False; // for test diff --git a/3rdparty/lzma/src/XzEnc.c b/3rdparty/lzma/src/XzEnc.c index 22408e2618..c1affadfa6 100644 --- a/3rdparty/lzma/src/XzEnc.c +++ b/3rdparty/lzma/src/XzEnc.c @@ -1,5 +1,5 @@ /* XzEnc.c -- Xz Encode -2023-04-13 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -29,8 +29,9 @@ #define XZ_GET_PAD_SIZE(dataSize) ((4 - ((unsigned)(dataSize) & 3)) & 3) -/* max pack size for LZMA2 block + check-64bytrs: */ -#define XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize) ((unpackSize) + ((unpackSize) >> 10) + 16 + 64) +#define XZ_CHECK_SIZE_MAX 64 +/* max pack size for LZMA2 block + pad4 + check_size: */ +#define XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize) ((unpackSize) + ((unpackSize) >> 10) + 16 + XZ_CHECK_SIZE_MAX) #define XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(unpackSize) (XZ_BLOCK_HEADER_SIZE_MAX + XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize)) @@ -325,12 +326,13 @@ typedef struct static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Enc[] = { - Z7_BRANCH_CONV_ENC(PPC), - Z7_BRANCH_CONV_ENC(IA64), - Z7_BRANCH_CONV_ENC(ARM), - Z7_BRANCH_CONV_ENC(ARMT), - Z7_BRANCH_CONV_ENC(SPARC), - Z7_BRANCH_CONV_ENC(ARM64) + Z7_BRANCH_CONV_ENC_2 (BranchConv_PPC), + Z7_BRANCH_CONV_ENC_2 (BranchConv_IA64), + Z7_BRANCH_CONV_ENC_2 (BranchConv_ARM), + Z7_BRANCH_CONV_ENC_2 (BranchConv_ARMT), + Z7_BRANCH_CONV_ENC_2 (BranchConv_SPARC), + Z7_BRANCH_CONV_ENC_2 (BranchConv_ARM64), + Z7_BRANCH_CONV_ENC_2 (BranchConv_RISCV) }; static SizeT XzBcFilterStateBase_Filter_Enc(CXzBcFilterStateBase *p, Byte *data, SizeT size) @@ -888,9 +890,9 @@ static SRes Xz_CompressBlock( blockSizes->unpackSize = checkInStream.processed; } { - Byte buf[4 + 64]; - unsigned padSize = XZ_GET_PAD_SIZE(seqSizeOutStream.processed); - UInt64 packSize = seqSizeOutStream.processed; + Byte buf[4 + XZ_CHECK_SIZE_MAX]; + const unsigned padSize = XZ_GET_PAD_SIZE(seqSizeOutStream.processed); + const UInt64 packSize = seqSizeOutStream.processed; buf[0] = 0; buf[1] = 0; @@ -898,7 +900,8 @@ static SRes Xz_CompressBlock( buf[3] = 0; SeqCheckInStream_GetDigest(&checkInStream, buf + 4); - RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize), padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId))) + RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize), + padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId))) blockSizes->totalSize = seqSizeOutStream.processed - padSize; @@ -1083,18 +1086,19 @@ static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBuf CXzEnc *me = (CXzEnc *)pp; SRes res; CMtProgressThunk progressThunk; - - Byte *dest = me->outBufs[outBufIndex]; - + Byte *dest; UNUSED_VAR(finished) - { CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex]; bInfo->totalSize = 0; bInfo->unpackSize = 0; bInfo->headerSize = 0; + // v23.02: we don't compress empty blocks + // also we must ignore that empty block in XzEnc_MtCallback_Write() + if (srcSize == 0) + return SZ_OK; } - + dest = me->outBufs[outBufIndex]; if (!dest) { dest = (Byte *)ISzAlloc_Alloc(me->alloc, me->outBufSize); @@ -1140,18 +1144,20 @@ static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBuf static SRes XzEnc_MtCallback_Write(void *pp, unsigned outBufIndex) { CXzEnc *me = (CXzEnc *)pp; - const CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex]; - const Byte *data = me->outBufs[outBufIndex]; - - RINOK(WriteBytes(me->outStream, data, bInfo->headerSize)) - + // v23.02: we don't write empty blocks + // note: if (bInfo->unpackSize == 0) then there is no compressed data of block + if (bInfo->unpackSize == 0) + return SZ_OK; { - UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize); - RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize)) + const Byte *data = me->outBufs[outBufIndex]; + RINOK(WriteBytes(me->outStream, data, bInfo->headerSize)) + { + const UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize); + RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize)) + } + return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc); } - - return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc); } #endif diff --git a/3rdparty/lzma/src/XzIn.c b/3rdparty/lzma/src/XzIn.c index d0fc763679..b68af965c1 100644 --- a/3rdparty/lzma/src/XzIn.c +++ b/3rdparty/lzma/src/XzIn.c @@ -1,5 +1,5 @@ /* XzIn.c - Xz input -2023-04-02 : Igor Pavlov : Public domain */ +2023-09-07 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -27,7 +27,7 @@ SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream) } #define READ_VARINT_AND_CHECK(buf, pos, size, res) \ - { unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ + { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ if (s == 0) return SZ_ERROR_ARCHIVE; \ pos += s; } @@ -37,7 +37,7 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, unsigned headerSize; *headerSizeRes = 0; RINOK(SeqInStream_ReadByte(inStream, &header[0])) - headerSize = (unsigned)header[0]; + headerSize = header[0]; if (headerSize == 0) { *headerSizeRes = 1; @@ -47,7 +47,7 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, *isIndex = False; headerSize = (headerSize << 2) + 4; - *headerSizeRes = headerSize; + *headerSizeRes = (UInt32)headerSize; { size_t processedSize = headerSize - 1; RINOK(SeqInStream_ReadMax(inStream, header + 1, &processedSize)) @@ -58,7 +58,7 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, } #define ADD_SIZE_CHECK(size, val) \ - { UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; } + { const UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; } UInt64 Xz_GetUnpackSize(const CXzStream *p) {