3rdparty: Update LZMA/7zipSDK to 24.08

This commit is contained in:
JordanTheToaster 2024-11-01 23:04:27 +00:00 committed by lightningterror
parent c936b7db29
commit 0bc043a0bb
42 changed files with 2471 additions and 914 deletions

View File

@ -1,5 +1,5 @@
/* 7zCrc.h -- CRC32 calculation /* 7zCrc.h -- CRC32 calculation
2023-04-02 : Igor Pavlov : Public domain */ 2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_7Z_CRC_H #ifndef ZIP7_INC_7Z_CRC_H
#define ZIP7_INC_7Z_CRC_H #define ZIP7_INC_7Z_CRC_H
@ -20,7 +20,8 @@ void Z7_FASTCALL CrcGenerateTable(void);
UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size); UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size);
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size); UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size);
typedef UInt32 (Z7_FASTCALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table); typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_FUNC)(UInt32 v, const void *data, size_t size);
Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo);
EXTERN_C_END EXTERN_C_END

View File

@ -1,5 +1,5 @@
/* 7zTypes.h -- Basic types /* 7zTypes.h -- Basic types
2023-04-02 : Igor Pavlov : Public domain */ 2024-01-24 : Igor Pavlov : Public domain */
#ifndef ZIP7_7Z_TYPES_H #ifndef ZIP7_7Z_TYPES_H
#define ZIP7_7Z_TYPES_H #define ZIP7_7Z_TYPES_H
@ -530,20 +530,20 @@ struct ISzAlloc
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) #define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
*/ */
#if defined (__clang__) || defined(__GNUC__) #if defined (__clang__) || defined(__GNUC__)
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ #define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
_Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wcast-qual\"") _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \ #define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \
_Pragma("GCC diagnostic pop") _Pragma("GCC diagnostic pop")
#else #else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL #define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL #define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
#endif #endif
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)

View File

@ -1,7 +1,7 @@
#define MY_VER_MAJOR 23 #define MY_VER_MAJOR 24
#define MY_VER_MINOR 01 #define MY_VER_MINOR 8
#define MY_VER_BUILD 0 #define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "23.01" #define MY_VERSION_NUMBERS "24.08"
#define MY_VERSION MY_VERSION_NUMBERS #define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME #ifdef MY_CPU_NAME
@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION #define MY_VERSION_CPU MY_VERSION
#endif #endif
#define MY_DATE "2023-06-20" #define MY_DATE "2024-08-11"
#undef MY_COPYRIGHT #undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE #undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov" #define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2023 Igor Pavlov" #define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR #ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR

View File

@ -1,5 +1,5 @@
/* Alloc.h -- Memory allocation functions /* Alloc.h -- Memory allocation functions
2023-03-04 : Igor Pavlov : Public domain */ 2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_ALLOC_H #ifndef ZIP7_INC_ALLOC_H
#define ZIP7_INC_ALLOC_H #define ZIP7_INC_ALLOC_H
@ -22,6 +22,9 @@ void *MyAlloc(size_t size);
void MyFree(void *address); void MyFree(void *address);
void *MyRealloc(void *address, size_t size); void *MyRealloc(void *address, size_t size);
void *z7_AlignedAlloc(size_t size);
void z7_AlignedFree(void *p);
#ifdef _WIN32 #ifdef _WIN32
#ifdef Z7_LARGE_PAGES #ifdef Z7_LARGE_PAGES
@ -33,12 +36,14 @@ void MidFree(void *address);
void *BigAlloc(size_t size); void *BigAlloc(size_t size);
void BigFree(void *address); void BigFree(void *address);
/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */
#else #else
#define MidAlloc(size) MyAlloc(size) #define MidAlloc(size) z7_AlignedAlloc(size)
#define MidFree(address) MyFree(address) #define MidFree(address) z7_AlignedFree(address)
#define BigAlloc(size) MyAlloc(size) #define BigAlloc(size) z7_AlignedAlloc(size)
#define BigFree(address) MyFree(address) #define BigFree(address) z7_AlignedFree(address)
#endif #endif

View File

@ -1,5 +1,5 @@
/* Bra.h -- Branch converters for executables /* Bra.h -- Branch converters for executables
2023-04-02 : Igor Pavlov : Public domain */ 2024-01-20 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_BRA_H #ifndef ZIP7_INC_BRA_H
#define ZIP7_INC_BRA_H #define ZIP7_INC_BRA_H
@ -8,8 +8,12 @@
EXTERN_C_BEGIN EXTERN_C_BEGIN
#define Z7_BRANCH_CONV_DEC(name) z7_BranchConv_ ## name ## _Dec /* #define PPC BAD_PPC_11 // for debug */
#define Z7_BRANCH_CONV_ENC(name) z7_BranchConv_ ## name ## _Enc
#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec
#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc
#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name)
#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name)
#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec #define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec
#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc #define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc
@ -20,19 +24,20 @@ typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv));
typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt)); typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt));
#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0 #define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0
Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_DEC(X86)); Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86));
Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_ENC(X86)); Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86));
#define Z7_BRANCH_FUNCS_DECL(name) \ #define Z7_BRANCH_FUNCS_DECL(name) \
Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_DEC(name)); \ Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \
Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_ENC(name)); Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name));
Z7_BRANCH_FUNCS_DECL(ARM64) Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64)
Z7_BRANCH_FUNCS_DECL(ARM) Z7_BRANCH_FUNCS_DECL (BranchConv_ARM)
Z7_BRANCH_FUNCS_DECL(ARMT) Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT)
Z7_BRANCH_FUNCS_DECL(PPC) Z7_BRANCH_FUNCS_DECL (BranchConv_PPC)
Z7_BRANCH_FUNCS_DECL(SPARC) Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC)
Z7_BRANCH_FUNCS_DECL(IA64) Z7_BRANCH_FUNCS_DECL (BranchConv_IA64)
Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV)
/* /*
These functions convert data that contain CPU instructions. These functions convert data that contain CPU instructions.
@ -49,14 +54,14 @@ and one for decoding (_Enc/_Dec postfixes in function name).
In params: In params:
data : data buffer data : data buffer
size : size of data size : size of data
pc : current virtual Program Counter (Instruction Pinter) value pc : current virtual Program Counter (Instruction Pointer) value
In/Out param: In/Out param:
state : pointer to state variable (for X86 converter only) state : pointer to state variable (for X86 converter only)
Return: Return:
The pointer to position in (data) buffer after last byte that was processed. The pointer to position in (data) buffer after last byte that was processed.
If the caller calls converter again, it must call it starting with that position. If the caller calls converter again, it must call it starting with that position.
But the caller is allowed to move data in buffer. so pointer to But the caller is allowed to move data in buffer. So pointer to
current processed position also will be changed for next call. current processed position also will be changed for next call.
Also the caller must increase internal (pc) value for next call. Also the caller must increase internal (pc) value for next call.
@ -65,6 +70,7 @@ Each converter has some characteristics: Endian, Alignment, LookAhead.
X86 little 1 4 X86 little 1 4
ARMT little 2 2 ARMT little 2 2
RISCV little 2 6
ARM little 4 0 ARM little 4 0
ARM64 little 4 0 ARM64 little 4 0
PPC big 4 0 PPC big 4 0

View File

@ -1,5 +1,5 @@
/* Compiler.h : Compiler specific defines and pragmas /* Compiler.h : Compiler specific defines and pragmas
2023-04-02 : Igor Pavlov : Public domain */ 2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_COMPILER_H #ifndef ZIP7_INC_COMPILER_H
#define ZIP7_INC_COMPILER_H #define ZIP7_INC_COMPILER_H
@ -25,11 +25,79 @@
#define Z7_MINGW #define Z7_MINGW
#endif #endif
#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__))
#define Z7_MCST_LCC
#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__)
#endif
/*
#if defined(__AVX2__) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
|| defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
|| defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
#define Z7_COMPILER_AVX2_SUPPORTED
#endif
#endif
*/
// #pragma GCC diagnostic ignored "-Wunknown-pragmas" // #pragma GCC diagnostic ignored "-Wunknown-pragmas"
#ifdef __clang__ #ifdef __clang__
// padding size of '' with 4 bytes to alignment boundary // padding size of '' with 4 bytes to alignment boundary
#pragma GCC diagnostic ignored "-Wpadded" #pragma GCC diagnostic ignored "-Wpadded"
#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \
&& defined(__FreeBSD__)
// freebsd:
#pragma GCC diagnostic ignored "-Wexcess-padding"
#endif
#if __clang_major__ >= 16
#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage"
#endif
#if __clang_major__ == 13
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
// cheri
#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast"
#endif
#endif
#if __clang_major__ == 13
// for <arm_neon.h>
#pragma GCC diagnostic ignored "-Wreserved-identifier"
#endif
#endif // __clang__
#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16
// #pragma GCC diagnostic ignored "-Wcast-function-type-strict"
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \
_Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"")
#else
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
#endif
typedef void (*Z7_void_Function)(void);
#if defined(__clang__) || defined(__GNUC__)
#define Z7_CAST_FUNC_C (Z7_void_Function)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define Z7_CAST_FUNC_C (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define Z7_CAST_FUNC_C
#endif
/*
#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
// #pragma GCC diagnostic ignored "-Wcast-function-type"
#endif
*/
#ifdef __GNUC__
#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000)
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#endif
#endif #endif
@ -101,7 +169,8 @@
_Pragma("clang loop unroll(disable)") \ _Pragma("clang loop unroll(disable)") \
_Pragma("clang loop vectorize(disable)") _Pragma("clang loop vectorize(disable)")
#define Z7_ATTRIB_NO_VECTORIZE #define Z7_ATTRIB_NO_VECTORIZE
#elif defined(__GNUC__) && (__GNUC__ >= 5) #elif defined(__GNUC__) && (__GNUC__ >= 5) \
&& (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610))
#define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
// __attribute__((optimize("no-unroll-loops"))); // __attribute__((optimize("no-unroll-loops")));
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
@ -142,15 +211,23 @@
#endif #endif
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000)) #if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600))
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
#if (Z7_CLANG_VERSION < 130000)
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"")
#else
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ #endif
#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic pop") _Pragma("GCC diagnostic pop")
#else #else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER #define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif #endif
#define UNUSED_VAR(x) (void)x; #define UNUSED_VAR(x) (void)x;

View File

@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code /* CpuArch.h -- CPU specific code
2023-04-02 : Igor Pavlov : Public domain */ 2024-06-17 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_CPU_ARCH_H #ifndef ZIP7_INC_CPU_ARCH_H
#define ZIP7_INC_CPU_ARCH_H #define ZIP7_INC_CPU_ARCH_H
@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/ */
#if !defined(_M_ARM64EC)
#if defined(_M_X64) \ #if defined(_M_X64) \
|| defined(_M_AMD64) \ || defined(_M_AMD64) \
|| defined(__x86_64__) \ || defined(__x86_64__) \
@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#define MY_CPU_64BIT #define MY_CPU_64BIT
#endif #endif
#endif
#if defined(_M_IX86) \ #if defined(_M_IX86) \
@ -47,17 +49,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#if defined(_M_ARM64) \ #if defined(_M_ARM64) \
|| defined(_M_ARM64EC) \
|| defined(__AARCH64EL__) \ || defined(__AARCH64EL__) \
|| defined(__AARCH64EB__) \ || defined(__AARCH64EB__) \
|| defined(__aarch64__) || defined(__aarch64__)
#define MY_CPU_ARM64 #define MY_CPU_ARM64
#ifdef __ILP32__ #if defined(__ILP32__) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "arm64-32" #define MY_CPU_NAME "arm64-32"
#define MY_CPU_SIZEOF_POINTER 4 #define MY_CPU_SIZEOF_POINTER 4
#else #elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
#define MY_CPU_NAME "arm64-128"
#define MY_CPU_SIZEOF_POINTER 16
#else
#if defined(_M_ARM64EC)
#define MY_CPU_NAME "arm64ec"
#else
#define MY_CPU_NAME "arm64" #define MY_CPU_NAME "arm64"
#endif
#define MY_CPU_SIZEOF_POINTER 8 #define MY_CPU_SIZEOF_POINTER 8
#endif #endif
#define MY_CPU_64BIT #define MY_CPU_64BIT
#endif #endif
@ -133,8 +144,36 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#if defined(__sparc__) \
|| defined(__sparc)
#define MY_CPU_SPARC
#if defined(__LP64__) \
|| defined(_LP64) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
#define MY_CPU_NAME "sparcv9"
#define MY_CPU_SIZEOF_POINTER 8
#define MY_CPU_64BIT
#elif defined(__sparc_v9__) \
|| defined(__sparcv9)
#define MY_CPU_64BIT
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "sparcv9-32"
#else
#define MY_CPU_NAME "sparcv9m"
#endif
#elif defined(__sparc_v8__) \
|| defined(__sparcv8)
#define MY_CPU_NAME "sparcv8"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "sparc"
#endif
#endif
#if defined(__riscv) \ #if defined(__riscv) \
|| defined(__riscv__) || defined(__riscv__)
#define MY_CPU_RISCV
#if __riscv_xlen == 32 #if __riscv_xlen == 32
#define MY_CPU_NAME "riscv32" #define MY_CPU_NAME "riscv32"
#elif __riscv_xlen == 64 #elif __riscv_xlen == 64
@ -145,6 +184,39 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#if defined(__loongarch__)
#define MY_CPU_LOONGARCH
#if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64)
#define MY_CPU_64BIT
#endif
#if defined(__loongarch64)
#define MY_CPU_NAME "loongarch64"
#define MY_CPU_LOONGARCH64
#else
#define MY_CPU_NAME "loongarch"
#endif
#endif
// #undef MY_CPU_NAME
// #undef MY_CPU_SIZEOF_POINTER
// #define __e2k__
// #define __SIZEOF_POINTER__ 4
#if defined(__e2k__)
#define MY_CPU_E2K
#if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "e2k-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "e2k"
#if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
#define MY_CPU_SIZEOF_POINTER 8
#endif
#endif
#define MY_CPU_64BIT
#endif
#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) #if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
#define MY_CPU_X86_OR_AMD64 #define MY_CPU_X86_OR_AMD64
#endif #endif
@ -175,6 +247,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(MY_CPU_ARM_LE) \ || defined(MY_CPU_ARM_LE) \
|| defined(MY_CPU_ARM64_LE) \ || defined(MY_CPU_ARM64_LE) \
|| defined(MY_CPU_IA64_LE) \ || defined(MY_CPU_IA64_LE) \
|| defined(_LITTLE_ENDIAN) \
|| defined(__LITTLE_ENDIAN__) \ || defined(__LITTLE_ENDIAN__) \
|| defined(__ARMEL__) \ || defined(__ARMEL__) \
|| defined(__THUMBEL__) \ || defined(__THUMBEL__) \
@ -251,6 +324,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifndef MY_CPU_NAME #ifndef MY_CPU_NAME
// #define MY_CPU_IS_UNKNOWN
#ifdef MY_CPU_LE #ifdef MY_CPU_LE
#define MY_CPU_NAME "LE" #define MY_CPU_NAME "LE"
#elif defined(MY_CPU_BE) #elif defined(MY_CPU_BE)
@ -295,8 +369,18 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define Z7_BSWAP64(v) _byteswap_uint64(v) #define Z7_BSWAP64(v) _byteswap_uint64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED #define Z7_CPU_FAST_BSWAP_SUPPORTED
#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ /* GCC can generate slow code that calls function for __builtin_bswap32() for:
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) - GCC for RISCV, if Zbb/XTHeadBb extension is not used.
- GCC for SPARC.
The code from CLANG for SPARC also is not fastest.
So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases.
*/
#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \
&& !defined(MY_CPU_SPARC) \
&& ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \
)
#define Z7_BSWAP16(v) __builtin_bswap16(v) #define Z7_BSWAP16(v) __builtin_bswap16(v)
#define Z7_BSWAP32(v) __builtin_bswap32(v) #define Z7_BSWAP32(v) __builtin_bswap32(v)
@ -329,13 +413,48 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifdef MY_CPU_LE #ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \ #if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64) || defined(MY_CPU_ARM64) \
|| defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \
|| defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6)
#define MY_CPU_LE_UNALIGN #define MY_CPU_LE_UNALIGN
#define MY_CPU_LE_UNALIGN_64 #define MY_CPU_LE_UNALIGN_64
#elif defined(__ARM_FEATURE_UNALIGNED) #elif defined(__ARM_FEATURE_UNALIGNED)
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. /* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions.
So we can't use unaligned 64-bit operations. */ Description of problems:
#define MY_CPU_LE_UNALIGN problem-1 : 32-bit ARM architecture:
multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM)
require 32-bit (WORD) alignment (by 32-bit ARM architecture).
So there is "Alignment fault exception", if data is not aligned for 32-bit.
problem-2 : 32-bit kernels and arm64 kernels:
32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception".
So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux.
But some arm64 kernels do not handle these faults in 32-bit programs.
So we have unhandled exception for such instructions.
Probably some new arm64 kernels have fixed it, and unaligned
paired-access instructions work in new kernels?
problem-3 : compiler for 32-bit arm:
Compilers use LDRD/STRD/LDM/STM for UInt64 accesses
and for another cases where two 32-bit accesses are fused
to one multi-access instruction.
So UInt64 variables must be aligned for 32-bit, and each
32-bit access must be aligned for 32-bit, if we want to
avoid "Alignment fault" exception (handled or unhandled).
problem-4 : performace:
Even if unaligned access is handled by kernel, it will be slow.
So if we allow unaligned access, we can get fast unaligned
single-access, and slow unaligned paired-access.
We don't allow unaligned access on 32-bit arm, because compiler
genarates paired-access instructions that require 32-bit alignment,
and some arm64 kernels have no handler for these instructions.
Also unaligned paired-access instructions will be slow, if kernel handles them.
*/
// it must be disabled:
// #define MY_CPU_LE_UNALIGN
#endif #endif
#endif #endif
@ -439,11 +558,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#if defined(MY_CPU_BE) #if defined(MY_CPU_BE)
#define GetBe64a(p) (*(const UInt64 *)(const void *)(p))
#define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) #define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
#define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) #define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } #define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetUi64a(p) GetUi64(p)
#define GetUi32a(p) GetUi32(p) #define GetUi32a(p) GetUi32(p)
#define GetUi16a(p) GetUi16(p) #define GetUi16a(p) GetUi16(p)
#define SetUi32a(p, v) SetUi32(p, v) #define SetUi32a(p, v) SetUi32(p, v)
@ -451,11 +572,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#elif defined(MY_CPU_LE) #elif defined(MY_CPU_LE)
#define GetUi64a(p) (*(const UInt64 *)(const void *)(p))
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) #define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) #define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } #define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetBe64a(p) GetBe64(p)
#define GetBe32a(p) GetBe32(p) #define GetBe32a(p) GetBe32(p)
#define GetBe16a(p) GetBe16(p) #define GetBe16a(p) GetBe16(p)
#define SetBe32a(p, v) SetBe32(p, v) #define SetBe32a(p, v) SetBe32(p, v)
@ -486,6 +609,7 @@ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
BoolInt CPU_IsSupported_AES(void); BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX(void); BoolInt CPU_IsSupported_AVX(void);
BoolInt CPU_IsSupported_AVX2(void); BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_AVX512F_AVX512VL(void);
BoolInt CPU_IsSupported_VAES_AVX2(void); BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_CMOV(void); BoolInt CPU_IsSupported_CMOV(void);
BoolInt CPU_IsSupported_SSE(void); BoolInt CPU_IsSupported_SSE(void);

View File

@ -1,5 +1,5 @@
/* LzFind.h -- Match finder for LZ algorithms /* LzFind.h -- Match finder for LZ algorithms
2023-03-04 : Igor Pavlov : Public domain */ 2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZ_FIND_H #ifndef ZIP7_INC_LZ_FIND_H
#define ZIP7_INC_LZ_FIND_H #define ZIP7_INC_LZ_FIND_H
@ -144,7 +144,8 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p); void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p); void MatchFinder_Init_HighHash(CMatchFinder *p);
void MatchFinder_Init_4(CMatchFinder *p); void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p); // void MatchFinder_Init(CMatchFinder *p);
void MatchFinder_Init(void *p);
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);

View File

@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms /* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2023-03-05 : Igor Pavlov : Public domain */ 2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZ_FIND_MT_H #ifndef ZIP7_INC_LZ_FIND_MT_H
#define ZIP7_INC_LZ_FIND_MT_H #define ZIP7_INC_LZ_FIND_MT_H
@ -31,7 +31,10 @@ typedef struct
// UInt32 numBlocks_Sent; // UInt32 numBlocks_Sent;
} CMtSync; } CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
struct CMatchFinderMt_;
typedef UInt32 * (*Mf_Mix_Matches)(struct CMatchFinderMt_ *p, UInt32 matchMinPos, UInt32 *distances);
/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ /* kMtCacheLineDummy must be >= size_of_CPU_cache_line */
#define kMtCacheLineDummy 128 #define kMtCacheLineDummy 128
@ -39,7 +42,7 @@ typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distance
typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
typedef struct typedef struct CMatchFinderMt_
{ {
/* LZ */ /* LZ */
const Byte *pointerToCurPos; const Byte *pointerToCurPos;

View File

@ -1,10 +1,127 @@
/* Precomp.h -- StdAfx /* Precomp.h -- precompilation file
2023-04-02 : Igor Pavlov : Public domain */ 2024-01-25 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_PRECOMP_H #ifndef ZIP7_INC_PRECOMP_H
#define ZIP7_INC_PRECOMP_H #define ZIP7_INC_PRECOMP_H
/*
this file must be included before another *.h files and before <windows.h>.
this file is included from the following files:
C\*.c
C\Util\*\Precomp.h <- C\Util\*\*.c
CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp
this file can set the following macros:
Z7_LARGE_PAGES 1
Z7_LONG_PATH 1
Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip
_WIN32_WINNT 0x0500 (or higher)
WINVER _WIN32_WINNT
UNICODE 1
_UNICODE 1
*/
#include "Compiler.h" #include "Compiler.h"
/* #include "7zTypes.h" */
#ifdef _MSC_VER
// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
#if _MSC_VER >= 1912
// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception.
#endif
#endif
/*
// for debug:
#define UNICODE 1
#define _UNICODE 1
#define _WIN32_WINNT 0x0500 // win2000
#ifndef WINVER
#define WINVER _WIN32_WINNT
#endif
*/
#ifdef _WIN32
/*
this "Precomp.h" file must be included before <windows.h>,
if we want to define _WIN32_WINNT before <windows.h>.
*/
#ifndef Z7_LARGE_PAGES
#ifndef Z7_NO_LARGE_PAGES
#define Z7_LARGE_PAGES 1
#endif
#endif
#ifndef Z7_LONG_PATH
#ifndef Z7_NO_LONG_PATH
#define Z7_LONG_PATH 1
#endif
#endif
#ifndef Z7_DEVICE_FILE
#ifndef Z7_NO_DEVICE_FILE
// #define Z7_DEVICE_FILE 1
#endif
#endif
// we don't change macros if included after <windows.h>
#ifndef _WINDOWS_
#ifndef Z7_WIN32_WINNT_MIN
#if defined(_M_ARM64) || defined(__aarch64__)
// #define Z7_WIN32_WINNT_MIN 0x0a00 // win10
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
#elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT)
// #define Z7_WIN32_WINNT_MIN 0x0602 // win8
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
#elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64)
#define Z7_WIN32_WINNT_MIN 0x0503 // win2003
// #elif defined(_M_IX86) || defined(__i386__)
// #define Z7_WIN32_WINNT_MIN 0x0500 // win2000
#else // x86 and another(old) systems
#define Z7_WIN32_WINNT_MIN 0x0500 // win2000
// #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug
#endif
#endif // Z7_WIN32_WINNT_MIN
#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT
#ifdef _WIN32_WINNT
// #error Stop_Compiling_Bad_WIN32_WINNT
#else
#ifndef Z7_NO_DEFINE_WIN32_WINNT
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _WIN32_WINNT Z7_WIN32_WINNT_MIN
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // _WIN32_WINNT
#ifndef WINVER
#define WINVER _WIN32_WINNT
#endif
#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT
#ifndef _MBCS
#ifndef Z7_NO_UNICODE
// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code.
#ifndef UNICODE
#define UNICODE 1
#endif
#ifndef _UNICODE
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _UNICODE 1
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // Z7_NO_UNICODE
#endif // _MBCS
#endif // _WINDOWS_
// #include "7zWindows.h"
#endif // _WIN32
#endif #endif

View File

@ -1,5 +1,5 @@
/* Threads.h -- multithreading library /* Threads.h -- multithreading library
2023-04-02 : Igor Pavlov : Public domain */ 2024-03-28 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_THREADS_H #ifndef ZIP7_INC_THREADS_H
#define ZIP7_INC_THREADS_H #define ZIP7_INC_THREADS_H
@ -9,12 +9,21 @@
#else #else
#include "Compiler.h"
// #define Z7_AFFINITY_DISABLE
#if defined(__linux__) #if defined(__linux__)
#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef Z7_AFFINITY_DISABLE #ifndef Z7_AFFINITY_DISABLE
#define Z7_AFFINITY_SUPPORTED #define Z7_AFFINITY_SUPPORTED
// #pragma message(" ==== Z7_AFFINITY_SUPPORTED") // #pragma message(" ==== Z7_AFFINITY_SUPPORTED")
// #define _GNU_SOURCE #if !defined(_GNU_SOURCE)
// #pragma message(" ==== _GNU_SOURCE set")
// we need _GNU_SOURCE for cpu_set_t, if we compile for MUSL
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _GNU_SOURCE
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif #endif
#endif #endif
#endif #endif
@ -173,7 +182,7 @@ WRes CriticalSection_Init(CCriticalSection *p);
#else // _WIN32 #else // _WIN32
typedef struct _CEvent typedef struct
{ {
int _created; int _created;
int _manual_reset; int _manual_reset;
@ -199,7 +208,7 @@ WRes Event_Wait(CEvent *p);
WRes Event_Close(CEvent *p); WRes Event_Close(CEvent *p);
typedef struct _CSemaphore typedef struct
{ {
int _created; int _created;
UInt32 _count; UInt32 _count;
@ -219,7 +228,7 @@ WRes Semaphore_Wait(CSemaphore *p);
WRes Semaphore_Close(CSemaphore *p); WRes Semaphore_Close(CSemaphore *p);
typedef struct _CCriticalSection typedef struct
{ {
pthread_mutex_t _mutex; pthread_mutex_t _mutex;
} CCriticalSection; } CCriticalSection;
@ -230,6 +239,7 @@ void CriticalSection_Enter(CCriticalSection *cs);
void CriticalSection_Leave(CCriticalSection *cs); void CriticalSection_Leave(CCriticalSection *cs);
LONG InterlockedIncrement(LONG volatile *addend); LONG InterlockedIncrement(LONG volatile *addend);
LONG InterlockedDecrement(LONG volatile *addend);
#endif // _WIN32 #endif // _WIN32

View File

@ -1,5 +1,5 @@
/* Xz.h - Xz interface /* Xz.h - Xz interface
2023-04-13 : Igor Pavlov : Public domain */ 2024-01-26 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_XZ_H #ifndef ZIP7_INC_XZ_H
#define ZIP7_INC_XZ_H #define ZIP7_INC_XZ_H
@ -18,6 +18,7 @@ EXTERN_C_BEGIN
#define XZ_ID_ARMT 8 #define XZ_ID_ARMT 8
#define XZ_ID_SPARC 9 #define XZ_ID_SPARC 9
#define XZ_ID_ARM64 0xa #define XZ_ID_ARM64 0xa
#define XZ_ID_RISCV 0xb
#define XZ_ID_LZMA2 0x21 #define XZ_ID_LZMA2 0x21
unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value); unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value);
@ -233,13 +234,13 @@ typedef enum
typedef struct typedef struct
{ {
EXzState state; EXzState state;
UInt32 pos; unsigned pos;
unsigned alignPos; unsigned alignPos;
unsigned indexPreSize; unsigned indexPreSize;
CXzStreamFlags streamFlags; CXzStreamFlags streamFlags;
UInt32 blockHeaderSize; unsigned blockHeaderSize;
UInt64 packSize; UInt64 packSize;
UInt64 unpackSize; UInt64 unpackSize;

View File

@ -1,5 +1,5 @@
/* XzCrc64.h -- CRC64 calculation /* XzCrc64.h -- CRC64 calculation
2023-04-02 : Igor Pavlov : Public domain */ 2023-12-08 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_XZ_CRC64_H #ifndef ZIP7_INC_XZ_CRC64_H
#define ZIP7_INC_XZ_CRC64_H #define ZIP7_INC_XZ_CRC64_H
@ -10,16 +10,16 @@
EXTERN_C_BEGIN EXTERN_C_BEGIN
extern UInt64 g_Crc64Table[]; // extern UInt64 g_Crc64Table[];
void Z7_FASTCALL Crc64GenerateTable(void); void Z7_FASTCALL Crc64GenerateTable(void);
#define CRC64_INIT_VAL UINT64_CONST(0xFFFFFFFFFFFFFFFF) #define CRC64_INIT_VAL UINT64_CONST(0xFFFFFFFFFFFFFFFF)
#define CRC64_GET_DIGEST(crc) ((crc) ^ CRC64_INIT_VAL) #define CRC64_GET_DIGEST(crc) ((crc) ^ CRC64_INIT_VAL)
#define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) // #define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt64 Z7_FASTCALL Crc64Update(UInt64 crc, const void *data, size_t size); UInt64 Z7_FASTCALL Crc64Update(UInt64 crc, const void *data, size_t size);
UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size); // UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size);
EXTERN_C_END EXTERN_C_END

View File

@ -1,5 +1,5 @@
/* 7zArcIn.c -- 7z Input functions /* 7zArcIn.c -- 7z Input functions
2023-05-11 : Igor Pavlov : Public domain */ 2023-09-07 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -301,7 +301,7 @@ static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v)
static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems) static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems)
{ {
Byte b = 0; unsigned b = 0;
unsigned m = 0; unsigned m = 0;
UInt32 sum = 0; UInt32 sum = 0;
for (; numItems != 0; numItems--) for (; numItems != 0; numItems--)
@ -312,7 +312,7 @@ static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems)
m = 8; m = 8;
} }
m--; m--;
sum += ((b >> m) & 1); sum += (UInt32)((b >> m) & 1);
} }
return sum; return sum;
} }

View File

@ -1,93 +1,96 @@
/* 7zCrc.c -- CRC32 calculation and init /* 7zCrc.c -- CRC32 calculation and init
2023-04-02 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "7zCrc.h" #include "7zCrc.h"
#include "CpuArch.h" #include "CpuArch.h"
#define kCrcPoly 0xEDB88320 // for debug:
// #define __ARM_FEATURE_CRC32 1
#ifdef MY_CPU_LE #ifdef __ARM_FEATURE_CRC32
#define CRC_NUM_TABLES 8 // #pragma message("__ARM_FEATURE_CRC32")
#define Z7_CRC_HW_FORCE
#endif
// #define Z7_CRC_DEBUG_BE
#ifdef Z7_CRC_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif
#ifdef Z7_CRC_HW_FORCE
#define Z7_CRC_NUM_TABLES_USE 1
#else #else
#define CRC_NUM_TABLES 9 #ifdef Z7_CRC_NUM_TABLES
#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES
UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table); #else
UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table); #define Z7_CRC_NUM_TABLES_USE 12
#endif
#endif #endif
#ifndef MY_CPU_BE #if Z7_CRC_NUM_TABLES_USE < 1
UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif #endif
/* #if defined(MY_CPU_LE) || (Z7_CRC_NUM_TABLES_USE == 1)
extern #define Z7_CRC_NUM_TABLES_TOTAL Z7_CRC_NUM_TABLES_USE
CRC_FUNC g_CrcUpdateT4; #else
CRC_FUNC g_CrcUpdateT4; #define Z7_CRC_NUM_TABLES_TOTAL (Z7_CRC_NUM_TABLES_USE + 1)
*/ #endif
extern
CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdateT8;
extern
CRC_FUNC g_CrcUpdateT0_32;
CRC_FUNC g_CrcUpdateT0_32;
extern
CRC_FUNC g_CrcUpdateT0_64;
CRC_FUNC g_CrcUpdateT0_64;
extern
CRC_FUNC g_CrcUpdate;
CRC_FUNC g_CrcUpdate;
UInt32 g_CrcTable[256 * CRC_NUM_TABLES]; #ifndef Z7_CRC_HW_FORCE
UInt32 Z7_FASTCALL CrcUpdate(UInt32 v, const void *data, size_t size) #if Z7_CRC_NUM_TABLES_USE == 1 \
{
return g_CrcUpdate(v, data, size, g_CrcTable);
}
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size)
{
return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
}
#if CRC_NUM_TABLES < 4 \
|| (CRC_NUM_TABLES == 4 && defined(MY_CPU_BE)) \
|| (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) || (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table); #define Z7_CRC_UPDATE_T1_FUNC_NAME CrcUpdateGT1
UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table) static UInt32 Z7_FASTCALL Z7_CRC_UPDATE_T1_FUNC_NAME(UInt32 v, const void *data, size_t size)
{ {
const UInt32 *table = g_CrcTable;
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
const Byte *pEnd = p + size; const Byte *lim = p + size;
for (; p != pEnd; p++) for (; p != lim; p++)
v = CRC_UPDATE_BYTE_2(v, *p); v = CRC_UPDATE_BYTE_2(v, *p);
return v; return v;
} }
#endif #endif
#if Z7_CRC_NUM_TABLES_USE != 1
#ifndef MY_CPU_BE
#define FUNC_NAME_LE_2(s) CrcUpdateT ## s
#define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s)
#define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC_NUM_TABLES_USE)
UInt32 Z7_FASTCALL FUNC_NAME_LE (UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
#ifndef MY_CPU_LE
#define FUNC_NAME_BE_2(s) CrcUpdateT1_BeT ## s
#define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s)
#define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC_NUM_TABLES_USE)
UInt32 Z7_FASTCALL FUNC_NAME_BE (UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
#endif
#endif // Z7_CRC_HW_FORCE
/* ---------- hardware CRC ---------- */ /* ---------- hardware CRC ---------- */
#ifdef MY_CPU_LE #ifdef MY_CPU_LE
#if defined(MY_CPU_ARM_OR_ARM64) #if defined(MY_CPU_ARM_OR_ARM64)
// #pragma message("ARM*") // #pragma message("ARM*")
#if defined(_MSC_VER) && !defined(__clang__) #if (defined(__clang__) && (__clang_major__ >= 3)) \
#if defined(MY_CPU_ARM64) || defined(__GNUC__) && (__GNUC__ >= 6) && defined(MY_CPU_ARM64) \
#if (_MSC_VER >= 1910) || defined(__GNUC__) && (__GNUC__ >= 8)
#ifndef __clang__
#define USE_ARM64_CRC
#include <intrin.h>
#endif
#endif
#endif
#elif (defined(__clang__) && (__clang_major__ >= 3)) \
|| (defined(__GNUC__) && (__GNUC__ > 4))
#if !defined(__ARM_FEATURE_CRC32) #if !defined(__ARM_FEATURE_CRC32)
// #pragma message("!defined(__ARM_FEATURE_CRC32)")
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define __ARM_FEATURE_CRC32 1 #define __ARM_FEATURE_CRC32 1
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#define Z7_ARM_FEATURE_CRC32_WAS_SET
#if defined(__clang__) #if defined(__clang__)
#if defined(MY_CPU_ARM64) #if defined(MY_CPU_ARM64)
#define ATTRIB_CRC __attribute__((__target__("crc"))) #define ATTRIB_CRC __attribute__((__target__("crc")))
@ -96,100 +99,120 @@ UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UI
#endif #endif
#else #else
#if defined(MY_CPU_ARM64) #if defined(MY_CPU_ARM64)
#if !defined(Z7_GCC_VERSION) || (Z7_GCC_VERSION >= 60000)
#define ATTRIB_CRC __attribute__((__target__("+crc"))) #define ATTRIB_CRC __attribute__((__target__("+crc")))
#endif
#else #else
#if !defined(Z7_GCC_VERSION) || (__GNUC__ >= 8)
#if defined(__ARM_FP) && __GNUC__ >= 8
// for -mfloat-abi=hard: similar to <arm_acle.h>
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc+simd")))
#else
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc"))) #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
#endif
#endif
#endif #endif
#endif #endif
#endif #endif
#if defined(__ARM_FEATURE_CRC32) #if defined(__ARM_FEATURE_CRC32)
#define USE_ARM64_CRC // #pragma message("<arm_acle.h>")
/*
arm_acle.h (GGC):
before Nov 17, 2017:
#ifdef __ARM_FEATURE_CRC32
Nov 17, 2017: gcc10.0 (gcc 9.2.0) checked"
#if __ARM_ARCH >= 8
#pragma GCC target ("arch=armv8-a+crc")
Aug 22, 2019: GCC 8.4?, 9.2.1, 10.1:
#ifdef __ARM_FEATURE_CRC32
#ifdef __ARM_FP
#pragma GCC target ("arch=armv8-a+crc+simd")
#else
#pragma GCC target ("arch=armv8-a+crc")
#endif
*/
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
#if defined(Z7_GCC_VERSION) && (__GNUC__ == 8) && (Z7_GCC_VERSION < 80400) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 9) && (Z7_GCC_VERSION < 90201) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 10) && (Z7_GCC_VERSION < 100100)
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
// #pragma message("#define __ARM_ARCH 8")
#undef __ARM_ARCH
#define __ARM_ARCH 8
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif
#define Z7_CRC_HW_USE
#include <arm_acle.h> #include <arm_acle.h>
#endif #endif
#elif defined(_MSC_VER)
#if defined(MY_CPU_ARM64)
#if (_MSC_VER >= 1910)
#ifdef __clang__
// #define Z7_CRC_HW_USE
// #include <arm_acle.h>
#else
#define Z7_CRC_HW_USE
#include <intrin.h>
#endif
#endif
#endif
#endif #endif
#else #else // non-ARM*
// no hardware CRC // #define Z7_CRC_HW_USE // for debug : we can test HW-branch of code
#ifdef Z7_CRC_HW_USE
// #define USE_CRC_EMU #include "7zCrcEmu.h"
#ifdef USE_CRC_EMU
#pragma message("ARM64 CRC emulation")
Z7_FORCE_INLINE
UInt32 __crc32b(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data);
return v;
}
Z7_FORCE_INLINE
UInt32 __crc32w(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
Z7_FORCE_INLINE
UInt32 __crc32d(UInt32 v, UInt64 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
#endif // USE_CRC_EMU
#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#define T0_32_UNROLL_BYTES (4 * 4)
#define T0_64_UNROLL_BYTES (4 * 8)
#ifndef ATTRIB_CRC
#define ATTRIB_CRC
#endif #endif
#endif // non-ARM*
#if defined(Z7_CRC_HW_USE)
// #pragma message("USE ARM HW CRC") // #pragma message("USE ARM HW CRC")
ATTRIB_CRC #ifdef MY_CPU_64BIT
UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table); #define CRC_HW_WORD_TYPE UInt64
ATTRIB_CRC #define CRC_HW_WORD_FUNC __crc32d
UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table) #else
#define CRC_HW_WORD_TYPE UInt32
#define CRC_HW_WORD_FUNC __crc32w
#endif
#define CRC_HW_UNROLL_BYTES (sizeof(CRC_HW_WORD_TYPE) * 4)
#ifdef ATTRIB_CRC
ATTRIB_CRC
#endif
Z7_NO_INLINE
#ifdef Z7_CRC_HW_FORCE
UInt32 Z7_FASTCALL CrcUpdate
#else
static UInt32 Z7_FASTCALL CrcUpdate_HW
#endif
(UInt32 v, const void *data, size_t size)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
UNUSED_VAR(table); for (; size != 0 && ((unsigned)(ptrdiff_t)p & (CRC_HW_UNROLL_BYTES - 1)) != 0; size--)
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++); v = __crc32b(v, *p++);
if (size >= CRC_HW_UNROLL_BYTES)
if (size >= T0_32_UNROLL_BYTES)
{ {
const Byte *lim = p + size; const Byte *lim = p + size;
size &= (T0_32_UNROLL_BYTES - 1); size &= CRC_HW_UNROLL_BYTES - 1;
lim -= size; lim -= size;
do do
{ {
v = __crc32w(v, *(const UInt32 *)(const void *)(p)); v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
v = __crc32w(v, *(const UInt32 *)(const void *)(p)); p += 2 * sizeof(CRC_HW_WORD_TYPE);
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
p += 2 * sizeof(CRC_HW_WORD_TYPE);
} }
while (p != lim); while (p != lim);
} }
@ -200,141 +223,198 @@ UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const
return v; return v;
} }
ATTRIB_CRC #ifdef Z7_ARM_FEATURE_CRC32_WAS_SET
UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table); Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
ATTRIB_CRC #undef __ARM_FEATURE_CRC32
UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table) Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
{ #undef Z7_ARM_FEATURE_CRC32_WAS_SET
const Byte *p = (const Byte *)data; #endif
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_64_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_64_UNROLL_BYTES - 1);
lim -= size;
do
{
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
}
while (p != lim);
}
for (; size != 0; size--)
v = __crc32b(v, *p++);
return v;
}
#undef T0_32_UNROLL_BYTES
#undef T0_64_UNROLL_BYTES
#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#endif // defined(Z7_CRC_HW_USE)
#endif // MY_CPU_LE #endif // MY_CPU_LE
#ifndef Z7_CRC_HW_FORCE
#if defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
/*
typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_WITH_TABLE_FUNC)
(UInt32 v, const void *data, size_t size, const UInt32 *table);
Z7_CRC_UPDATE_WITH_TABLE_FUNC g_CrcUpdate;
*/
static unsigned g_Crc_Algo;
#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
static unsigned g_Crc_Be;
#endif
#endif // defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
Z7_NO_INLINE
#ifdef Z7_CRC_HW_USE
static UInt32 Z7_FASTCALL CrcUpdate_Base
#else
UInt32 Z7_FASTCALL CrcUpdate
#endif
(UInt32 crc, const void *data, size_t size)
{
#if Z7_CRC_NUM_TABLES_USE == 1
return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
#else // Z7_CRC_NUM_TABLES_USE != 1
#ifdef Z7_CRC_UPDATE_T1_FUNC_NAME
if (g_Crc_Algo == 1)
return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
#endif
#ifdef MY_CPU_LE
return FUNC_NAME_LE(crc, data, size, g_CrcTable);
#elif defined(MY_CPU_BE)
return FUNC_NAME_BE(crc, data, size, g_CrcTable);
#else
if (g_Crc_Be)
return FUNC_NAME_BE(crc, data, size, g_CrcTable);
else
return FUNC_NAME_LE(crc, data, size, g_CrcTable);
#endif
#endif // Z7_CRC_NUM_TABLES_USE != 1
}
#ifdef Z7_CRC_HW_USE
Z7_NO_INLINE
UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size)
{
if (g_Crc_Algo == 0)
return CrcUpdate_HW(crc, data, size);
return CrcUpdate_Base(crc, data, size);
}
#endif
#endif // !defined(Z7_CRC_HW_FORCE)
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size)
{
return CrcUpdate(CRC_INIT_VAL, data, size) ^ CRC_INIT_VAL;
}
MY_ALIGN(64)
UInt32 g_CrcTable[256 * Z7_CRC_NUM_TABLES_TOTAL];
void Z7_FASTCALL CrcGenerateTable(void) void Z7_FASTCALL CrcGenerateTable(void)
{ {
UInt32 i; UInt32 i;
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
{ {
#if defined(Z7_CRC_HW_FORCE)
g_CrcTable[i] = __crc32b(i, 0);
#else
#define kCrcPoly 0xEDB88320
UInt32 r = i; UInt32 r = i;
unsigned j; unsigned j;
for (j = 0; j < 8; j++) for (j = 0; j < 8; j++)
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
g_CrcTable[i] = r; g_CrcTable[i] = r;
#endif
} }
for (i = 256; i < 256 * CRC_NUM_TABLES; i++) for (i = 256; i < 256 * Z7_CRC_NUM_TABLES_USE; i++)
{ {
const UInt32 r = g_CrcTable[(size_t)i - 256]; const UInt32 r = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8); g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
} }
#if CRC_NUM_TABLES < 4 #if !defined(Z7_CRC_HW_FORCE) && \
g_CrcUpdate = CrcUpdateT1; (defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) || defined(MY_CPU_BE))
#elif defined(MY_CPU_LE)
// g_CrcUpdateT4 = CrcUpdateT4; #if Z7_CRC_NUM_TABLES_USE <= 1
#if CRC_NUM_TABLES < 8 g_Crc_Algo = 1;
g_CrcUpdate = CrcUpdateT4; #else // Z7_CRC_NUM_TABLES_USE <= 1
#else // CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8; #if defined(MY_CPU_LE)
/* g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
#ifdef MY_CPU_X86_OR_AMD64 #else // !defined(MY_CPU_LE)
if (!CPU_Is_InOrder())
#endif
*/
g_CrcUpdate = CrcUpdateT8;
#endif
#else
{ {
#ifndef MY_CPU_BE #ifndef MY_CPU_BE
UInt32 k = 0x01020304; UInt32 k = 0x01020304;
const Byte *p = (const Byte *)&k; const Byte *p = (const Byte *)&k;
if (p[0] == 4 && p[1] == 3) if (p[0] == 4 && p[1] == 3)
{ g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
#if CRC_NUM_TABLES < 8
// g_CrcUpdateT4 = CrcUpdateT4;
g_CrcUpdate = CrcUpdateT4;
#else // CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8;
g_CrcUpdate = CrcUpdateT8;
#endif
}
else if (p[0] != 1 || p[1] != 2) else if (p[0] != 1 || p[1] != 2)
g_CrcUpdate = CrcUpdateT1; g_Crc_Algo = 1;
else else
#endif // MY_CPU_BE #endif // MY_CPU_BE
{ {
for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--) for (i = 256 * Z7_CRC_NUM_TABLES_TOTAL - 1; i >= 256; i--)
{ {
const UInt32 x = g_CrcTable[(size_t)i - 256]; const UInt32 x = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = Z7_BSWAP32(x); g_CrcTable[i] = Z7_BSWAP32(x);
} }
#if CRC_NUM_TABLES <= 4 #if defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
g_CrcUpdate = CrcUpdateT1; g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
#elif CRC_NUM_TABLES <= 8 #endif
// g_CrcUpdateT4 = CrcUpdateT1_BeT4; #if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
g_CrcUpdate = CrcUpdateT1_BeT4; g_Crc_Be = 1;
#else // CRC_NUM_TABLES > 8 #endif
g_CrcUpdateT8 = CrcUpdateT1_BeT8;
g_CrcUpdate = CrcUpdateT1_BeT8;
#endif
} }
} }
#endif // CRC_NUM_TABLES < 4 #endif // !defined(MY_CPU_LE)
#ifdef MY_CPU_LE #ifdef MY_CPU_LE
#ifdef USE_ARM64_CRC #ifdef Z7_CRC_HW_USE
if (CPU_IsSupported_CRC32()) if (CPU_IsSupported_CRC32())
{ g_Crc_Algo = 0;
g_CrcUpdateT0_32 = CrcUpdateT0_32; #endif // Z7_CRC_HW_USE
g_CrcUpdateT0_64 = CrcUpdateT0_64; #endif // MY_CPU_LE
g_CrcUpdate =
#if defined(MY_CPU_ARM)
CrcUpdateT0_32;
#else
CrcUpdateT0_64;
#endif
}
#endif
#ifdef USE_CRC_EMU #endif // Z7_CRC_NUM_TABLES_USE <= 1
g_CrcUpdateT0_32 = CrcUpdateT0_32; #endif // g_Crc_Algo was declared
g_CrcUpdateT0_64 = CrcUpdateT0_64; }
g_CrcUpdate = CrcUpdateT0_64;
#endif Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo)
{
if (algo == 0)
return &CrcUpdate;
#if defined(Z7_CRC_HW_USE)
if (algo == sizeof(CRC_HW_WORD_TYPE) * 8)
{
#ifdef Z7_CRC_HW_FORCE
return &CrcUpdate;
#else
if (g_Crc_Algo == 0)
return &CrcUpdate_HW;
#endif
}
#endif
#ifndef Z7_CRC_HW_FORCE
if (algo == Z7_CRC_NUM_TABLES_USE)
return
#ifdef Z7_CRC_HW_USE
&CrcUpdate_Base;
#else
&CrcUpdate;
#endif #endif
#endif
return NULL;
} }
#undef kCrcPoly #undef kCrcPoly
#undef CRC64_NUM_TABLES #undef Z7_CRC_NUM_TABLES_USE
#undef Z7_CRC_NUM_TABLES_TOTAL
#undef CRC_UPDATE_BYTE_2 #undef CRC_UPDATE_BYTE_2
#undef FUNC_NAME_LE_2
#undef FUNC_NAME_LE_1
#undef FUNC_NAME_LE
#undef FUNC_NAME_BE_2
#undef FUNC_NAME_BE_1
#undef FUNC_NAME_BE
#undef CRC_HW_UNROLL_BYTES
#undef CRC_HW_WORD_FUNC
#undef CRC_HW_WORD_TYPE

View File

@ -1,117 +1,199 @@
/* 7zCrcOpt.c -- CRC32 calculation /* 7zCrcOpt.c -- CRC32 calculation (optimized functions)
2023-04-02 : Igor Pavlov : Public domain */ 2023-12-07 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "CpuArch.h" #include "CpuArch.h"
#if !defined(Z7_CRC_NUM_TABLES) || Z7_CRC_NUM_TABLES > 1
// for debug only : define Z7_CRC_DEBUG_BE to test big-endian code in little-endian cpu
// #define Z7_CRC_DEBUG_BE
#ifdef Z7_CRC_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif
// the value Z7_CRC_NUM_TABLES_USE must be defined to same value as in 7zCrc.c
#ifdef Z7_CRC_NUM_TABLES
#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES
#else
#define Z7_CRC_NUM_TABLES_USE 12
#endif
#if Z7_CRC_NUM_TABLES_USE % 4 || \
Z7_CRC_NUM_TABLES_USE < 4 * 1 || \
Z7_CRC_NUM_TABLES_USE > 4 * 6
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
#ifndef MY_CPU_BE #ifndef MY_CPU_BE
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); #define Q(n, d) \
UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table) ( (table + ((n) * 4 + 3) * 0x100)[(Byte)(d)] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
#define R(a) *((const UInt32 *)(const void *)p + (a))
#define CRC_FUNC_PRE_LE2(step) \
UInt32 Z7_FASTCALL CrcUpdateT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table)
#define CRC_FUNC_PRE_LE(step) \
CRC_FUNC_PRE_LE2(step); \
CRC_FUNC_PRE_LE2(step)
CRC_FUNC_PRE_LE(Z7_CRC_NUM_TABLES_USE)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) const Byte *lim;
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p); v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4) lim = p + size;
if (size >= Z7_CRC_NUM_TABLES_USE)
{ {
v ^= *(const UInt32 *)(const void *)p; lim -= Z7_CRC_NUM_TABLES_USE;
v = do
(table + 0x300)[((v ) & 0xFF)] {
^ (table + 0x200)[((v >> 8) & 0xFF)] v ^= R(0);
^ (table + 0x100)[((v >> 16) & 0xFF)] {
^ (table + 0x000)[((v >> 24))]; #if Z7_CRC_NUM_TABLES_USE == 1 * 4
v = Q(0, v);
#else
#define U2(r, op) \
{ d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); }
UInt32 d, x;
U2(1, =)
#if Z7_CRC_NUM_TABLES_USE >= 3 * 4
#define U(r) U2(r, ^=)
U(2)
#if Z7_CRC_NUM_TABLES_USE >= 4 * 4
U(3)
#if Z7_CRC_NUM_TABLES_USE >= 5 * 4
U(4)
#if Z7_CRC_NUM_TABLES_USE >= 6 * 4
U(5)
#if Z7_CRC_NUM_TABLES_USE >= 7 * 4
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
#endif
#endif
#endif
#endif
#undef U
#undef U2
v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v);
#endif
} }
for (; size > 0; size--, p++) p += Z7_CRC_NUM_TABLES_USE;
}
while (p <= lim);
lim += Z7_CRC_NUM_TABLES_USE;
}
for (; p < lim; p++)
v = CRC_UPDATE_BYTE_2(v, *p); v = CRC_UPDATE_BYTE_2(v, *p);
return v; return v;
} }
UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); #undef CRC_UPDATE_BYTE_2
UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table) #undef R
{ #undef Q
const Byte *p = (const Byte *)data; #undef CRC_FUNC_PRE_LE
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++) #undef CRC_FUNC_PRE_LE2
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x700)[((v ) & 0xFF)]
^ (table + 0x600)[((v >> 8) & 0xFF)]
^ (table + 0x500)[((v >> 16) & 0xFF)]
^ (table + 0x400)[((v >> 24))];
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
^ (table + 0x100)[((d >> 16) & 0xFF)]
^ (table + 0x000)[((d >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
#endif #endif
#ifndef MY_CPU_LE #ifndef MY_CPU_LE
#define CRC_UINT32_SWAP(v) Z7_BSWAP32(v) #define CRC_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 24) ^ (b)] ^ ((crc) << 8))
#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8)) #define Q(n, d) \
( (table + ((n) * 4 + 0) * 0x100)[((d)) & 0xFF] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table) #ifdef Z7_CRC_DEBUG_BE
#define R(a) GetBe32a((const UInt32 *)(const void *)p + (a))
#else
#define R(a) *((const UInt32 *)(const void *)p + (a))
#endif
#define CRC_FUNC_PRE_BE2(step) \
UInt32 Z7_FASTCALL CrcUpdateT1_BeT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table)
#define CRC_FUNC_PRE_BE(step) \
CRC_FUNC_PRE_BE2(step); \
CRC_FUNC_PRE_BE2(step)
CRC_FUNC_PRE_BE(Z7_CRC_NUM_TABLES_USE)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
const Byte *lim;
table += 0x100; table += 0x100;
v = CRC_UINT32_SWAP(v); v = Z7_BSWAP32(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p); v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4) lim = p + size;
if (size >= Z7_CRC_NUM_TABLES_USE)
{ {
v ^= *(const UInt32 *)(const void *)p; lim -= Z7_CRC_NUM_TABLES_USE;
v = do
(table + 0x000)[((v ) & 0xFF)] {
^ (table + 0x100)[((v >> 8) & 0xFF)] v ^= R(0);
^ (table + 0x200)[((v >> 16) & 0xFF)] {
^ (table + 0x300)[((v >> 24))]; #if Z7_CRC_NUM_TABLES_USE == 1 * 4
v = Q(0, v);
#else
#define U2(r, op) \
{ d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); }
UInt32 d, x;
U2(1, =)
#if Z7_CRC_NUM_TABLES_USE >= 3 * 4
#define U(r) U2(r, ^=)
U(2)
#if Z7_CRC_NUM_TABLES_USE >= 4 * 4
U(3)
#if Z7_CRC_NUM_TABLES_USE >= 5 * 4
U(4)
#if Z7_CRC_NUM_TABLES_USE >= 6 * 4
U(5)
#if Z7_CRC_NUM_TABLES_USE >= 7 * 4
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
#endif
#endif
#endif
#endif
#undef U
#undef U2
v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v);
#endif
} }
for (; size > 0; size--, p++) p += Z7_CRC_NUM_TABLES_USE;
}
while (p <= lim);
lim += Z7_CRC_NUM_TABLES_USE;
}
for (; p < lim; p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p); v = CRC_UPDATE_BYTE_2_BE(v, *p);
return CRC_UINT32_SWAP(v); return Z7_BSWAP32(v);
} }
UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table) #undef CRC_UPDATE_BYTE_2_BE
{ #undef R
const Byte *p = (const Byte *)data; #undef Q
table += 0x100; #undef CRC_FUNC_PRE_BE
v = CRC_UINT32_SWAP(v); #undef CRC_FUNC_PRE_BE2
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x400)[((v ) & 0xFF)]
^ (table + 0x500)[((v >> 8) & 0xFF)]
^ (table + 0x600)[((v >> 16) & 0xFF)]
^ (table + 0x700)[((v >> 24))];
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]
^ (table + 0x200)[((d >> 16) & 0xFF)]
^ (table + 0x300)[((d >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
return CRC_UINT32_SWAP(v);
}
#endif #endif
#undef Z7_CRC_NUM_TABLES_USE
#endif

View File

@ -1,5 +1,5 @@
/* 7zDec.c -- Decoding from 7z folder /* 7zDec.c -- Decoding from 7z folder
2023-04-02 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -51,6 +51,7 @@
#ifndef Z7_NO_METHODS_FILTERS #ifndef Z7_NO_METHODS_FILTERS
#define k_Delta 3 #define k_Delta 3
#define k_RISCV 0xb
#define k_BCJ 0x3030103 #define k_BCJ 0x3030103
#define k_PPC 0x3030205 #define k_PPC 0x3030205
#define k_IA64 0x3030401 #define k_IA64 0x3030401
@ -362,6 +363,7 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
case k_IA64: case k_IA64:
case k_SPARC: case k_SPARC:
case k_ARM: case k_ARM:
case k_RISCV:
#endif #endif
#ifdef Z7_USE_FILTER_ARM64 #ifdef Z7_USE_FILTER_ARM64
case k_ARM64: case k_ARM64:
@ -535,10 +537,10 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
} }
} }
} }
#if defined(Z7_USE_BRANCH_FILTER) #if defined(Z7_USE_BRANCH_FILTER)
else if (ci == 1) else if (ci == 1)
{ {
#if !defined(Z7_NO_METHODS_FILTERS) #if !defined(Z7_NO_METHODS_FILTERS)
if (coder->MethodID == k_Delta) if (coder->MethodID == k_Delta)
{ {
if (coder->PropsSize != 1) if (coder->PropsSize != 1)
@ -550,22 +552,43 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
} }
continue; continue;
} }
#endif #endif
#ifdef Z7_USE_FILTER_ARM64 #ifdef Z7_USE_FILTER_ARM64
if (coder->MethodID == k_ARM64) if (coder->MethodID == k_ARM64)
{ {
UInt32 pc = 0; UInt32 pc = 0;
if (coder->PropsSize == 4) if (coder->PropsSize == 4)
{
pc = GetUi32(propsData + coder->PropsOffset); pc = GetUi32(propsData + coder->PropsOffset);
if (pc & 3)
return SZ_ERROR_UNSUPPORTED;
}
else if (coder->PropsSize != 0) else if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc); z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc);
continue; continue;
} }
#endif #endif
#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) #if !defined(Z7_NO_METHODS_FILTERS)
if (coder->MethodID == k_RISCV)
{
UInt32 pc = 0;
if (coder->PropsSize == 4)
{
pc = GetUi32(propsData + coder->PropsOffset);
if (pc & 1)
return SZ_ERROR_UNSUPPORTED;
}
else if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED;
z7_BranchConv_RISCV_Dec(outBuffer, outSize, pc);
continue;
}
#endif
#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT)
{ {
if (coder->PropsSize != 0) if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
@ -579,7 +602,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0 z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0
break; break;
} }
CASE_BRA_CONV(PPC) case k_PPC: Z7_BRANCH_CONV_DEC_2(BranchConv_PPC)(outBuffer, outSize, 0); break; // pc = 0;
// CASE_BRA_CONV(PPC)
CASE_BRA_CONV(IA64) CASE_BRA_CONV(IA64)
CASE_BRA_CONV(SPARC) CASE_BRA_CONV(SPARC)
CASE_BRA_CONV(ARM) CASE_BRA_CONV(ARM)
@ -592,9 +616,9 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
} }
continue; continue;
} }
#endif #endif
} // (c == 1) } // (c == 1)
#endif #endif // Z7_USE_BRANCH_FILTER
else else
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
} }

View File

@ -1,5 +1,5 @@
/* Aes.c -- AES encryption / decryption /* Aes.c -- AES encryption / decryption
2023-04-02 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -13,7 +13,9 @@ AES_CODE_FUNC g_AesCtr_Code;
UInt32 g_Aes_SupportedFunctions_Flags; UInt32 g_Aes_SupportedFunctions_Flags;
#endif #endif
MY_ALIGN(64)
static UInt32 T[256 * 4]; static UInt32 T[256 * 4];
MY_ALIGN(64)
static const Byte Sbox[256] = { static const Byte Sbox[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
@ -33,7 +35,9 @@ static const Byte Sbox[256] = {
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
MY_ALIGN(64)
static UInt32 D[256 * 4]; static UInt32 D[256 * 4];
MY_ALIGN(64)
static Byte InvS[256]; static Byte InvS[256];
#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF) #define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
@ -54,24 +58,54 @@ static Byte InvS[256];
// #define Z7_SHOW_AES_STATUS // #define Z7_SHOW_AES_STATUS
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#if defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1110)
#define USE_HW_AES #define USE_HW_AES
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) #if (__INTEL_COMPILER >= 1900)
#if defined(__clang__) #define USE_HW_VAES
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_AES
#endif #endif
#elif defined(__GNUC__) #endif
#if (__GNUC__ >= 6) // fix that check #elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400)
#define USE_HW_AES #define USE_HW_AES
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#define USE_HW_VAES
#endif #endif
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_AES #define USE_HW_AES
#define USE_HW_VAES
#endif
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__ARM_FEATURE_AES) \
|| defined(__ARM_FEATURE_CRYPTO)
#define USE_HW_AES
#else
#if defined(MY_CPU_ARM64) \
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|| defined(Z7_MSC_VER_ORIGINAL)
#if defined(__ARM_FP) && \
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ >= 6) \
) \
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON) && \
(Z7_CLANG_VERSION < 170000 || \
Z7_CLANG_VERSION > 170001)
#define USE_HW_AES
#endif
#endif
#endif #endif
#endif #endif
#endif #endif
#ifdef USE_HW_AES #ifdef USE_HW_AES
// #pragma message("=== Aes.c USE_HW_AES === ")
#ifdef Z7_SHOW_AES_STATUS #ifdef Z7_SHOW_AES_STATUS
#include <stdio.h> #include <stdio.h>
#define PRF(x) x #define PRF(x) x
@ -136,6 +170,7 @@ void AesGenTables(void)
#endif #endif
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#ifdef USE_HW_VAES
if (CPU_IsSupported_VAES_AVX2()) if (CPU_IsSupported_VAES_AVX2())
{ {
PRF(printf("\n===vaes avx2\n")); PRF(printf("\n===vaes avx2\n"));
@ -146,6 +181,7 @@ void AesGenTables(void)
#endif #endif
} }
#endif #endif
#endif
} }
#endif #endif

View File

@ -1,5 +1,5 @@
/* AesOpt.c -- AES optimized code for x86 AES hardware instructions /* AesOpt.c -- AES optimized code for x86 AES hardware instructions
2023-04-02 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -15,8 +15,8 @@
#define USE_INTEL_VAES #define USE_INTEL_VAES
#endif #endif
#endif #endif
#elif defined(__clang__) && (__clang_major__ > 3 || __clang_major__ == 3 && __clang_minor__ >= 8) \ #elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4) || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400)
#define USE_INTEL_AES #define USE_INTEL_AES
#if !defined(__AES__) #if !defined(__AES__)
#define ATTRIB_AES __attribute__((__target__("aes"))) #define ATTRIB_AES __attribute__((__target__("aes")))
@ -35,27 +35,37 @@
#define USE_INTEL_VAES #define USE_INTEL_VAES
#endif #endif
#endif #endif
#ifndef USE_INTEL_AES
#define Z7_USE_AES_HW_STUB
#endif
#ifndef USE_INTEL_VAES
#define Z7_USE_VAES_HW_STUB
#endif
#endif #endif
#ifndef ATTRIB_AES #ifndef USE_INTEL_AES
#define ATTRIB_AES // #define Z7_USE_AES_HW_STUB // for debug
#endif #endif
#ifndef ATTRIB_VAES #ifndef USE_INTEL_VAES
#define ATTRIB_VAES // #define Z7_USE_VAES_HW_STUB // for debug
#endif #endif
#ifdef USE_INTEL_AES #ifdef USE_INTEL_AES
#include <wmmintrin.h> #include <wmmintrin.h>
#ifndef USE_INTEL_VAES #if !defined(USE_INTEL_VAES) && defined(Z7_USE_VAES_HW_STUB)
#define AES_TYPE_keys UInt32 #define AES_TYPE_keys UInt32
#define AES_TYPE_data Byte #define AES_TYPE_data Byte
// #define AES_TYPE_keys __m128i // #define AES_TYPE_keys __m128i
// #define AES_TYPE_data __m128i // #define AES_TYPE_data __m128i
#endif #endif
#ifndef ATTRIB_AES
#define ATTRIB_AES
#endif
#define AES_FUNC_START(name) \ #define AES_FUNC_START(name) \
void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks) void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks)
// void Z7_FASTCALL name(__m128i *p, __m128i *data, size_t numBlocks) // void Z7_FASTCALL name(__m128i *p, __m128i *data, size_t numBlocks)
@ -69,8 +79,6 @@ AES_FUNC_START (name)
#define MM_OP_m(op, src) MM_OP(op, m, src) #define MM_OP_m(op, src) MM_OP(op, m, src)
#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src) #define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src)
#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src)
AES_FUNC_START2 (AesCbc_Encode_HW) AES_FUNC_START2 (AesCbc_Encode_HW)
{ {
@ -139,11 +147,6 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]) #define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1])
#endif #endif
#define AVX_DECLARE_VAR(reg, ii) __m256i reg;
#define AVX_LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii];
#define AVX_STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg;
#define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]))
#define MM_OP_key(op, reg) MM_OP(op, reg, key); #define MM_OP_key(op, reg) MM_OP(op, reg, key);
#define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg) #define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg)
@ -152,27 +155,13 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
#define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg) #define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg)
#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg) #define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg)
#define AVX_AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg)
#define AVX_AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg)
#define AVX_AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg)
#define AVX_AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg)
#define AVX_AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg)
#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one) reg = ctr; #define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one) reg = ctr;
#define CTR_END( reg, ii) MM_XOR (data[ii], reg) #define CTR_END( reg, ii) MM_XOR (data[ii], reg)
#define AVX_CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two) reg = _mm256_xor_si256(ctr2, key);
#define AVX_CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg)
#define WOP_KEY(op, n) { \ #define WOP_KEY(op, n) { \
const __m128i key = w[n]; \ const __m128i key = w[n]; \
WOP(op); } WOP(op); }
#define AVX_WOP_KEY(op, n) { \
const __m256i key = w[n]; \
WOP(op); }
#define WIDE_LOOP_START \ #define WIDE_LOOP_START \
dataEnd = data + numBlocks; \ dataEnd = data + numBlocks; \
@ -190,6 +179,40 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
for (; data < dataEnd; data++) for (; data < dataEnd; data++)
#ifdef USE_INTEL_VAES
#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src)
#define AVX_DECLARE_VAR(reg, ii) __m256i reg;
#define AVX_LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii];
#define AVX_STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg;
/*
AVX_XOR_data_M1() needs unaligned memory load
if (we don't use _mm256_loadu_si256() here)
{
Most compilers with enabled optimizations generate fused AVX (LOAD + OP)
instruction that can load unaligned data.
But GCC and CLANG without -O2 or -O1 optimizations can generate separated
LOAD-ALIGNED (vmovdqa) instruction that will fail on execution.
}
Note: some compilers generate more instructions, if we use _mm256_loadu_si256() here.
v23.02: we use _mm256_loadu_si256() here, because we need compatibility with any compiler.
*/
#define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, _mm256_loadu_si256(&(((const __m256i *)(const void *)(data - 1))[ii])))
// for debug only: the following code will fail on execution, if compiled by some compilers:
// #define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]))
#define AVX_AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg)
#define AVX_AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg)
#define AVX_AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg)
#define AVX_AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg)
#define AVX_AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg)
#define AVX_CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two) reg = _mm256_xor_si256(ctr2, key);
#define AVX_CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg)
#define AVX_WOP_KEY(op, n) { \
const __m256i key = w[n]; \
WOP(op); }
#define NUM_AES_KEYS_MAX 15 #define NUM_AES_KEYS_MAX 15
#define WIDE_LOOP_START_AVX(OP) \ #define WIDE_LOOP_START_AVX(OP) \
@ -214,6 +237,9 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified, /* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified,
MSVC still can insert vzeroupper instruction. */ MSVC still can insert vzeroupper instruction. */
#endif
AES_FUNC_START2 (AesCbc_Decode_HW) AES_FUNC_START2 (AesCbc_Decode_HW)
{ {
@ -380,6 +406,9 @@ required that <immintrin.h> must be included before <avxintrin.h>.
#endif #endif
#endif // __clang__ && _MSC_VER #endif // __clang__ && _MSC_VER
#ifndef ATTRIB_VAES
#define ATTRIB_VAES
#endif
#define VAES_FUNC_START2(name) \ #define VAES_FUNC_START2(name) \
AES_FUNC_START (name); \ AES_FUNC_START (name); \
@ -519,10 +548,18 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256)
/* no USE_INTEL_AES */ /* no USE_INTEL_AES */
#if defined(Z7_USE_AES_HW_STUB)
// We can compile this file with another C compiler,
// or we can compile asm version.
// So we can generate real code instead of this stub function.
// #if defined(_MSC_VER)
#pragma message("AES HW_SW stub was used") #pragma message("AES HW_SW stub was used")
// #endif
#if !defined(USE_INTEL_VAES) && defined(Z7_USE_VAES_HW_STUB)
#define AES_TYPE_keys UInt32 #define AES_TYPE_keys UInt32
#define AES_TYPE_data Byte #define AES_TYPE_data Byte
#endif
#define AES_FUNC_START(name) \ #define AES_FUNC_START(name) \
void Z7_FASTCALL name(UInt32 *p, Byte *data, size_t numBlocks) \ void Z7_FASTCALL name(UInt32 *p, Byte *data, size_t numBlocks) \
@ -535,13 +572,16 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256)
AES_COMPAT_STUB (AesCbc_Encode) AES_COMPAT_STUB (AesCbc_Encode)
AES_COMPAT_STUB (AesCbc_Decode) AES_COMPAT_STUB (AesCbc_Decode)
AES_COMPAT_STUB (AesCtr_Code) AES_COMPAT_STUB (AesCtr_Code)
#endif // Z7_USE_AES_HW_STUB
#endif // USE_INTEL_AES #endif // USE_INTEL_AES
#ifndef USE_INTEL_VAES #ifndef USE_INTEL_VAES
#if defined(Z7_USE_VAES_HW_STUB)
// #if defined(_MSC_VER)
#pragma message("VAES HW_SW stub was used") #pragma message("VAES HW_SW stub was used")
// #endif
#define VAES_COMPAT_STUB(name) \ #define VAES_COMPAT_STUB(name) \
void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \ void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \
@ -550,36 +590,59 @@ AES_COMPAT_STUB (AesCtr_Code)
VAES_COMPAT_STUB (AesCbc_Decode_HW) VAES_COMPAT_STUB (AesCbc_Decode_HW)
VAES_COMPAT_STUB (AesCtr_Code_HW) VAES_COMPAT_STUB (AesCtr_Code_HW)
#endif
#endif // ! USE_INTEL_VAES #endif // ! USE_INTEL_VAES
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__clang__) #if defined(__ARM_FEATURE_AES) \
#if (__clang_major__ >= 8) // fix that check || defined(__ARM_FEATURE_CRYPTO)
#define USE_HW_AES
#else
#if defined(MY_CPU_ARM64) \
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|| defined(Z7_MSC_VER_ORIGINAL)
#if defined(__ARM_FP) && \
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ >= 6) \
) \
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON) && \
(Z7_CLANG_VERSION < 170000 || \
Z7_CLANG_VERSION > 170001)
#define USE_HW_AES #define USE_HW_AES
#endif #endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_AES
#endif #endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_AES
#endif #endif
#endif #endif
#ifdef USE_HW_AES #ifdef USE_HW_AES
// #pragma message("=== AES HW === ") // #pragma message("=== AES HW === ")
// __ARM_FEATURE_CRYPTO macro is deprecated in favor of the finer grained feature macro __ARM_FEATURE_AES
#if defined(__clang__) || defined(__GNUC__) #if defined(__clang__) || defined(__GNUC__)
#if !defined(__ARM_FEATURE_AES) && \
!defined(__ARM_FEATURE_CRYPTO)
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
#define ATTRIB_AES __attribute__((__target__("+crypto,aes"))) #if defined(__clang__)
#define ATTRIB_AES __attribute__((__target__("crypto")))
#else
#define ATTRIB_AES __attribute__((__target__("+crypto")))
#endif
#else #else
#if defined(__clang__)
#define ATTRIB_AES __attribute__((__target__("armv8-a,aes")))
#else
#define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) #define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#endif #endif
#endif
#else #else
// _MSC_VER // _MSC_VER
// for arm32 // for arm32
@ -590,11 +653,59 @@ VAES_COMPAT_STUB (AesCtr_Code_HW)
#define ATTRIB_AES #define ATTRIB_AES
#endif #endif
#if defined(_MSC_VER) && !defined(__clang__) && defined(MY_CPU_ARM64) #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
#include <arm64_neon.h> #include <arm64_neon.h>
#else #else
#include <arm_neon.h> /*
clang-17.0.1: error : Cannot select: intrinsic %llvm.arm.neon.aese
clang
3.8.1 : __ARM_NEON : defined(__ARM_FEATURE_CRYPTO)
7.0.1 : __ARM_NEON : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)
11.?.0 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)
13.0.1 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_AES)
16 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8
*/
#if defined(__clang__) && __clang_major__ < 16
#if !defined(__ARM_FEATURE_AES) && \
!defined(__ARM_FEATURE_CRYPTO)
// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
// #if defined(__clang__) && __clang_major__ < 13
#define __ARM_FEATURE_CRYPTO 1
// #else
#define __ARM_FEATURE_AES 1
// #endif
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif #endif
#endif // clang
#if defined(__clang__)
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
// #pragma message("#define __ARM_ARCH 8")
#undef __ARM_ARCH
#define __ARM_ARCH 8
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // clang
#include <arm_neon.h>
#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
defined(__ARM_FEATURE_CRYPTO) && \
defined(__ARM_FEATURE_AES)
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#undef __ARM_FEATURE_CRYPTO
#undef __ARM_FEATURE_AES
#undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
#endif
#endif // Z7_MSC_VER_ORIGINAL
typedef uint8x16_t v128; typedef uint8x16_t v128;
@ -620,7 +731,7 @@ AES_FUNC_START (name)
AES_FUNC_START2 (AesCbc_Encode_HW) AES_FUNC_START2 (AesCbc_Encode_HW)
{ {
v128 *p = (v128*)(void*)ivAes; v128 * const p = (v128*)(void*)ivAes;
v128 *data = (v128*)(void*)data8; v128 *data = (v128*)(void*)data8;
v128 m = *p; v128 m = *p;
const v128 k0 = p[2]; const v128 k0 = p[2];
@ -639,7 +750,7 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
const v128 k_z0 = w[2]; const v128 k_z0 = w[2];
for (; numBlocks != 0; numBlocks--, data++) for (; numBlocks != 0; numBlocks--, data++)
{ {
MM_XOR_m (*data); MM_XOR_m (*data)
AES_E_MC_m (k0) AES_E_MC_m (k0)
AES_E_MC_m (k1) AES_E_MC_m (k1)
AES_E_MC_m (k2) AES_E_MC_m (k2)
@ -660,7 +771,7 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
} }
} }
AES_E_m (k_z1) AES_E_m (k_z1)
MM_XOR_m (k_z0); MM_XOR_m (k_z0)
*data = m; *data = m;
} }
*p = m; *p = m;
@ -745,7 +856,7 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
while (w != p); while (w != p);
WOP_KEY (AES_D, 1) WOP_KEY (AES_D, 1)
WOP_KEY (AES_XOR, 0) WOP_KEY (AES_XOR, 0)
MM_XOR (m0, iv); MM_XOR (m0, iv)
WOP_M1 (XOR_data_M1) WOP_M1 (XOR_data_M1)
iv = data[NUM_WAYS - 1]; iv = data[NUM_WAYS - 1];
WOP (STORE_data) WOP (STORE_data)
@ -759,14 +870,14 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
AES_D_IMC_m (w[2]) AES_D_IMC_m (w[2])
do do
{ {
AES_D_IMC_m (w[1]); AES_D_IMC_m (w[1])
AES_D_IMC_m (w[0]); AES_D_IMC_m (w[0])
w -= 2; w -= 2;
} }
while (w != p); while (w != p);
AES_D_m (w[1]); AES_D_m (w[1])
MM_XOR_m (w[0]); MM_XOR_m (w[0])
MM_XOR_m (iv); MM_XOR_m (iv)
iv = *data; iv = *data;
*data = m; *data = m;
} }
@ -783,6 +894,12 @@ AES_FUNC_START2 (AesCtr_Code_HW)
const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
const v128 *dataEnd; const v128 *dataEnd;
uint64x2_t one = vdupq_n_u64(0); uint64x2_t one = vdupq_n_u64(0);
// the bug in clang:
// __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2);
#if defined(__clang__) && (__clang_major__ <= 9)
#pragma GCC diagnostic ignored "-Wvector-conversion"
#endif
one = vsetq_lane_u64(1, one, 0); one = vsetq_lane_u64(1, one, 0);
p += 2; p += 2;
@ -809,11 +926,11 @@ AES_FUNC_START2 (AesCtr_Code_HW)
{ {
const v128 *w = p; const v128 *w = p;
v128 m; v128 m;
CTR_START (m, 0); CTR_START (m, 0)
do do
{ {
AES_E_MC_m (w[0]); AES_E_MC_m (w[0])
AES_E_MC_m (w[1]); AES_E_MC_m (w[1])
w += 2; w += 2;
} }
while (w != wEnd); while (w != wEnd);

View File

@ -1,5 +1,5 @@
/* Alloc.c -- Memory allocation functions /* Alloc.c -- Memory allocation functions
2023-04-02 : Igor Pavlov : Public domain */ 2024-02-18 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -10,19 +10,18 @@
#include "Alloc.h" #include "Alloc.h"
#ifdef _WIN32 #if defined(Z7_LARGE_PAGES) && defined(_WIN32) && \
#ifdef Z7_LARGE_PAGES (!defined(Z7_WIN32_WINNT_MIN) || Z7_WIN32_WINNT_MIN < 0x0502) // < Win2003 (xp-64)
#if defined(__clang__) || defined(__GNUC__) #define Z7_USE_DYN_GetLargePageMinimum
typedef void (*Z7_voidFunction)(void); #endif
#define MY_CAST_FUNC (Z7_voidFunction)
#elif defined(_MSC_VER) && _MSC_VER > 1920 // for debug:
#define MY_CAST_FUNC (void *) #if 0
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' #if defined(__CHERI__) && defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
#else // #pragma message("=== Z7_ALLOC_NO_OFFSET_ALLOCATOR === ")
#define MY_CAST_FUNC #define Z7_ALLOC_NO_OFFSET_ALLOCATOR
#endif
#endif #endif
#endif // Z7_LARGE_PAGES
#endif // _WIN32
// #define SZ_ALLOC_DEBUG // #define SZ_ALLOC_DEBUG
/* #define SZ_ALLOC_DEBUG */ /* #define SZ_ALLOC_DEBUG */
@ -146,7 +145,9 @@ static void PrintAddr(void *p)
#define PRINT_FREE(name, cnt, ptr) #define PRINT_FREE(name, cnt, ptr)
#define Print(s) #define Print(s)
#define PrintLn() #define PrintLn()
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
#define PrintHex(v, align) #define PrintHex(v, align)
#endif
#define PrintAddr(p) #define PrintAddr(p)
#endif #endif
@ -246,9 +247,9 @@ void MidFree(void *address)
#ifdef Z7_LARGE_PAGES #ifdef Z7_LARGE_PAGES
#ifdef MEM_LARGE_PAGES #ifdef MEM_LARGE_PAGES
#define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES #define MY_MEM_LARGE_PAGES MEM_LARGE_PAGES
#else #else
#define MY__MEM_LARGE_PAGES 0x20000000 #define MY_MEM_LARGE_PAGES 0x20000000
#endif #endif
extern extern
@ -258,19 +259,23 @@ typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
void SetLargePageSize(void) void SetLargePageSize(void)
{ {
#ifdef Z7_LARGE_PAGES
SIZE_T size; SIZE_T size;
#ifdef Z7_USE_DYN_GetLargePageMinimum
Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
const const
Func_GetLargePageMinimum fn = Func_GetLargePageMinimum fn =
(Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), (Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
"GetLargePageMinimum"); "GetLargePageMinimum");
if (!fn) if (!fn)
return; return;
size = fn(); size = fn();
#else
size = GetLargePageMinimum();
#endif
if (size == 0 || (size & (size - 1)) != 0) if (size == 0 || (size & (size - 1)) != 0)
return; return;
g_LargePageSize = size; g_LargePageSize = size;
#endif
} }
#endif // Z7_LARGE_PAGES #endif // Z7_LARGE_PAGES
@ -292,7 +297,7 @@ void *BigAlloc(size_t size)
size2 = (size + ps) & ~ps; size2 = (size + ps) & ~ps;
if (size2 >= size) if (size2 >= size)
{ {
void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE);
if (p) if (p)
{ {
PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p) PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
@ -328,20 +333,7 @@ const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
#endif #endif
/* #ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
uintptr_t : <stdint.h> C99 (optional)
: unsupported in VS6
*/
#ifdef _WIN32
typedef UINT_PTR UIntPtr;
#else
/*
typedef uintptr_t UIntPtr;
*/
typedef ptrdiff_t UIntPtr;
#endif
#define ADJUST_ALLOC_SIZE 0 #define ADJUST_ALLOC_SIZE 0
/* /*
@ -352,14 +344,36 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
MyAlloc() can return address that is NOT multiple of sizeof(void *). MyAlloc() can return address that is NOT multiple of sizeof(void *).
*/ */
/* /*
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1)))) uintptr_t : <stdint.h> C99 (optional)
: unsupported in VS6
*/ */
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) typedef
#ifdef _WIN32
UINT_PTR
#elif 1
uintptr_t
#else
ptrdiff_t
#endif
MY_uintptr_t;
#if 0 \
|| (defined(__CHERI__) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > 8))
// for 128-bit pointers (cheri):
#define MY_ALIGN_PTR_DOWN(p, align) \
((void *)((char *)(p) - ((size_t)(MY_uintptr_t)(p) & ((align) - 1))))
#else
#define MY_ALIGN_PTR_DOWN(p, align) \
((void *)((((MY_uintptr_t)(p)) & ~((MY_uintptr_t)(align) - 1))))
#endif
#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) #endif
#if !defined(_WIN32) \
&& (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \
|| defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L))
#define USE_posix_memalign #define USE_posix_memalign
#endif #endif
@ -399,14 +413,13 @@ static int posix_memalign(void **ptr, size_t align, size_t size)
#define ALLOC_ALIGN_SIZE ((size_t)1 << 7) #define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) void *z7_AlignedAlloc(size_t size)
{ {
#ifndef USE_posix_memalign #ifndef USE_posix_memalign
void *p; void *p;
void *pAligned; void *pAligned;
size_t newSize; size_t newSize;
UNUSED_VAR(pp)
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */ block to prevent cache line sharing with another allocated blocks */
@ -431,10 +444,9 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
return pAligned; return pAligned;
#else #else
void *p; void *p;
UNUSED_VAR(pp)
if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
return NULL; return NULL;
@ -443,19 +455,37 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
return p; return p;
#endif #endif
}
void z7_AlignedFree(void *address)
{
#ifndef USE_posix_memalign
if (address)
MyFree(((void **)address)[-1]);
#else
free(address);
#endif
}
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
{
UNUSED_VAR(pp)
return z7_AlignedAlloc(size);
} }
static void SzAlignedFree(ISzAllocPtr pp, void *address) static void SzAlignedFree(ISzAllocPtr pp, void *address)
{ {
UNUSED_VAR(pp) UNUSED_VAR(pp)
#ifndef USE_posix_memalign #ifndef USE_posix_memalign
if (address) if (address)
MyFree(((void **)address)[-1]); MyFree(((void **)address)[-1]);
#else #else
free(address); free(address);
#endif #endif
} }
@ -463,16 +493,44 @@ const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */ /* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] #ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
/* #if 1
#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1] #define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
*/ #define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
#else
// we can use this simplified code,
// if (CAlignOffsetAlloc::offset == (k * sizeof(void *))
#define REAL_BLOCK_PTR_VAR(p) (((void **)(p))[-1])
#endif
#endif
#if 0
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
#include <stdio.h>
static void PrintPtr(const char *s, const void *p)
{
const Byte *p2 = (const Byte *)&p;
unsigned i;
printf("%s %p ", s, p);
for (i = sizeof(p); i != 0;)
{
i--;
printf("%02x", p2[i]);
}
printf("\n");
}
#endif
#endif
static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
{ {
#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
UNUSED_VAR(pp)
return z7_AlignedAlloc(size);
#else
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
void *adr; void *adr;
void *pAligned; void *pAligned;
@ -501,6 +559,12 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr + pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset; alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
#if 0
printf("\nalignSize = %6x, offset=%6x, size=%8x \n", (unsigned)alignSize, (unsigned)p->offset, (unsigned)size);
PrintPtr("base", adr);
PrintPtr("alig", pAligned);
#endif
PrintLn(); PrintLn();
Print("- Aligned: "); Print("- Aligned: ");
Print(" size="); PrintHex(size, 8); Print(" size="); PrintHex(size, 8);
@ -512,11 +576,16 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
REAL_BLOCK_PTR_VAR(pAligned) = adr; REAL_BLOCK_PTR_VAR(pAligned) = adr;
return pAligned; return pAligned;
#endif
} }
static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
{ {
#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
UNUSED_VAR(pp)
z7_AlignedFree(address);
#else
if (address) if (address)
{ {
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
@ -525,6 +594,7 @@ static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
PrintLn(); PrintLn();
ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address)); ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
} }
#endif
} }

View File

@ -1,11 +1,11 @@
/* Bra.c -- Branch converters for RISC code /* Bra.c -- Branch converters for RISC code
2023-04-02 : Igor Pavlov : Public domain */ 2024-01-20 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "Bra.h" #include "Bra.h"
#include "CpuArch.h"
#include "RotateDefs.h" #include "RotateDefs.h"
#include "CpuArch.h"
#if defined(MY_CPU_SIZEOF_POINTER) \ #if defined(MY_CPU_SIZEOF_POINTER) \
&& ( MY_CPU_SIZEOF_POINTER == 4 \ && ( MY_CPU_SIZEOF_POINTER == 4 \
@ -26,7 +26,7 @@
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name #define Z7_BRANCH_CONV(name) z7_ ## name
#define Z7_BRANCH_FUNC_MAIN(name) \ #define Z7_BRANCH_FUNC_MAIN(name) \
static \ static \
@ -42,11 +42,11 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
#ifdef Z7_EXTRACT_ONLY #ifdef Z7_EXTRACT_ONLY
#define Z7_BRANCH_FUNCS_IMP(name) \ #define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
#else #else
#define Z7_BRANCH_FUNCS_IMP(name) \ #define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \ Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1) Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
#endif #endif
#if defined(__clang__) #if defined(__clang__)
@ -72,7 +72,7 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
#endif #endif
Z7_BRANCH_FUNC_MAIN(ARM64) Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
{ {
// Byte *p = data; // Byte *p = data;
const Byte *lim; const Byte *lim;
@ -121,10 +121,10 @@ Z7_BRANCH_FUNC_MAIN(ARM64)
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(ARM64) Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
Z7_BRANCH_FUNC_MAIN(ARM) Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
{ {
// Byte *p = data; // Byte *p = data;
const Byte *lim; const Byte *lim;
@ -152,10 +152,10 @@ Z7_BRANCH_FUNC_MAIN(ARM)
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(ARM) Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
Z7_BRANCH_FUNC_MAIN(PPC) Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
{ {
// Byte *p = data; // Byte *p = data;
const Byte *lim; const Byte *lim;
@ -192,14 +192,14 @@ Z7_BRANCH_FUNC_MAIN(PPC)
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(PPC) Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
#define BR_SPARC_USE_ROTATE #define BR_SPARC_USE_ROTATE
#endif #endif
Z7_BRANCH_FUNC_MAIN(SPARC) Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
{ {
// Byte *p = data; // Byte *p = data;
const Byte *lim; const Byte *lim;
@ -254,10 +254,10 @@ Z7_BRANCH_FUNC_MAIN(SPARC)
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(SPARC) Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
Z7_BRANCH_FUNC_MAIN(ARMT) Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
{ {
// Byte *p = data; // Byte *p = data;
Byte *lim; Byte *lim;
@ -335,12 +335,12 @@ Z7_BRANCH_FUNC_MAIN(ARMT)
// return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
// return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
} }
Z7_BRANCH_FUNCS_IMP(ARMT) Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
// #define BR_IA64_NO_INLINE // #define BR_IA64_NO_INLINE
Z7_BRANCH_FUNC_MAIN(IA64) Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
{ {
// Byte *p = data; // Byte *p = data;
const Byte *lim; const Byte *lim;
@ -417,4 +417,293 @@ Z7_BRANCH_FUNC_MAIN(IA64)
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(IA64) Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET;
#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET;
#if 1 && defined(MY_CPU_LE_UNALIGN)
#define RISCV_USE_UNALIGNED_LOAD
#endif
#ifdef RISCV_USE_UNALIGNED_LOAD
#define RISCV_GET_UI32(p) GetUi32(p)
#define RISCV_SET_UI32(p, v) { SetUi32(p, v) }
#else
#define RISCV_GET_UI32(p) \
((UInt32)GetUi16a(p) + \
((UInt32)GetUi16a((p) + 2) << 16))
#define RISCV_SET_UI32(p, v) { \
SetUi16a(p, (UInt16)(v)) \
SetUi16a((p) + 2, (UInt16)(v >> 16)) }
#endif
#if 1 && defined(MY_CPU_LE)
#define RISCV_USE_16BIT_LOAD
#endif
#ifdef RISCV_USE_16BIT_LOAD
#define RISCV_LOAD_VAL(p) GetUi16a(p)
#else
#define RISCV_LOAD_VAL(p) (*(p))
#endif
#define RISCV_INSTR_SIZE 2
#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE)
#define RISCV_STEP_2 4
#define RISCV_REG_VAL (2 << 7)
#define RISCV_CMD_VAL 3
#if 1
// for code size optimization:
#define RISCV_DELTA_7F 0x7f
#else
#define RISCV_DELTA_7F 0
#endif
#define RISCV_CHECK_1(v, b) \
(((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
#if 1
#define RISCV_CHECK_2(v, r) \
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
<< 18) \
< ((r) & 0x1d))
#else
// this branch gives larger code, because
// compilers generate larger code for big constants.
#define RISCV_CHECK_2(v, r) \
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
& ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
< ((r) & 0x1d))
#endif
#define RISCV_SCAN_LOOP \
Byte *lim; \
size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
if (size <= 6) return p; \
size -= 6; \
lim = p + size; \
BR_PC_INIT \
for (;;) \
{ \
UInt32 a, v; \
/* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
for (;;) \
{ \
if Z7_UNLIKELY(p >= lim) { return p; } \
a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
if ((a & 0x77) == 0) break; \
a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
p += RISCV_INSTR_SIZE * 2; \
if ((a & 0x77) == 0) \
{ \
p -= RISCV_INSTR_SIZE; \
if Z7_UNLIKELY(p >= lim) { return p; } \
break; \
} \
}
// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL
// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC
// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC
Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
{
RISCV_SCAN_LOOP
v = a;
a = RISCV_GET_UI32(p);
#ifndef RISCV_USE_16BIT_LOAD
v += (UInt32)p[1] << 8;
#endif
if ((v & 8) == 0) // JAL
{
if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
{
p += RISCV_INSTR_SIZE;
continue;
}
{
v = ((a & 1u << 31) >> 11)
| ((a & 0x3ff << 21) >> 20)
| ((a & 1 << 20) >> 9)
| (a & 0xff << 12);
BR_CONVERT_VAL_ENC(v)
// ((v & 1) == 0)
// v: bits [1 : 20] contain offset bits
#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
a &= 0xfff;
a |= ((UInt32)(v << 23))
| ((UInt32)(v << 7) & ((UInt32)0xff << 16))
| ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8));
RISCV_SET_UI32(p, a)
#else // aligned
#if 0
SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
#else
p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
#endif
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
v <<= 15;
v = Z7_BSWAP32(v);
SetUi16a(p + 2, (UInt16)v)
#else
p[2] = (Byte)(v >> 9);
p[3] = (Byte)(v >> 1);
#endif
#endif // aligned
}
p += 4;
continue;
} // JAL
{
// AUIPC
if (v & 0xe80) // (not x0) and (not x2)
{
const UInt32 b = RISCV_GET_UI32(p + 4);
if (RISCV_CHECK_1(v, b))
{
{
const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
RISCV_SET_UI32(p, temp)
}
a &= 0xfffff000;
{
#if 1
const int t = -1 >> 1;
if (t != -1)
a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
else
#endif
a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
}
BR_CONVERT_VAL_ENC(a)
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
a = Z7_BSWAP32(a);
RISCV_SET_UI32(p + 4, a)
#else
SetBe32(p + 4, a)
#endif
p += 8;
}
else
p += RISCV_STEP_1;
}
else
{
UInt32 r = a >> 27;
if (RISCV_CHECK_2(v, r))
{
v = RISCV_GET_UI32(p + 4);
r = (r << 7) + 0x17 + (v & 0xfffff000);
a = (a >> 12) | (v << 20);
RISCV_SET_UI32(p, r)
RISCV_SET_UI32(p + 4, a)
p += 8;
}
else
p += RISCV_STEP_2;
}
}
} // for
}
Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
{
RISCV_SCAN_LOOP
#ifdef RISCV_USE_16BIT_LOAD
if ((a & 8) == 0)
{
#else
v = a;
a += (UInt32)p[1] << 8;
if ((v & 8) == 0)
{
#endif
// JAL
a -= 0x100 - RISCV_DELTA_7F;
if (a & 0xd80)
{
p += RISCV_INSTR_SIZE;
continue;
}
{
const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
#if 0 // unaligned
a = GetUi32(p);
v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
| (UInt32)(a >> 7) & ((UInt32)0xff << 9)
#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
v = GetUi16a(p + 2);
v = Z7_BSWAP32(v) >> 15
#else
v = (UInt32)p[3] << 1
| (UInt32)p[2] << 9
#endif
| (UInt32)((a & 0xf000) << 5);
BR_CONVERT_VAL_DEC(v)
a = a_old
| (v << 11 & 1u << 31)
| (v << 20 & 0x3ff << 21)
| (v << 9 & 1 << 20)
| (v & 0xff << 12);
RISCV_SET_UI32(p, a)
}
p += 4;
continue;
} // JAL
{
// AUIPC
v = a;
#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
a = GetUi32(p);
#else
a |= (UInt32)GetUi16a(p + 2) << 16;
#endif
if ((v & 0xe80) == 0) // x0/x2
{
const UInt32 r = a >> 27;
if (RISCV_CHECK_2(v, r))
{
UInt32 b;
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
b = RISCV_GET_UI32(p + 4);
b = Z7_BSWAP32(b);
#else
b = GetBe32(p + 4);
#endif
v = a >> 12;
BR_CONVERT_VAL_DEC(b)
a = (r << 7) + 0x17;
a += (b + 0x800) & 0xfffff000;
v |= b << 20;
RISCV_SET_UI32(p, a)
RISCV_SET_UI32(p + 4, v)
p += 8;
}
else
p += RISCV_STEP_2;
}
else
{
const UInt32 b = RISCV_GET_UI32(p + 4);
if (!RISCV_CHECK_1(v, b))
p += RISCV_STEP_1;
else
{
v = (a & 0xfffff000) | (b >> 20);
a = (b << 12) | (0x17 + RISCV_REG_VAL);
RISCV_SET_UI32(p, a)
RISCV_SET_UI32(p + 4, v)
p += 8;
}
}
}
} // for
}

View File

@ -1,5 +1,5 @@
/* CpuArch.c -- CPU specific code /* CpuArch.c -- CPU specific code
2023-05-18 : Igor Pavlov : Public domain */ 2024-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -226,7 +226,7 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
*/ */
static static
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo) Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo)
{ {
UNUSED_VAR(subFunction) UNUSED_VAR(subFunction)
__cpuid(CPUInfo, func); __cpuid(CPUInfo, func);
@ -242,13 +242,13 @@ Z7_NO_INLINE
#endif #endif
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{ {
MY_cpuidex((int *)p, (int)func, 0); MY_cpuidex((Int32 *)p, (Int32)func, 0);
} }
Z7_NO_INLINE Z7_NO_INLINE
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{ {
int a[4]; Int32 a[4];
MY_cpuidex(a, 0, 0); MY_cpuidex(a, 0, 0);
return a[0]; return a[0];
} }
@ -384,7 +384,7 @@ BoolInt CPU_IsSupported_CMOV(void)
UInt32 a[4]; UInt32 a[4];
if (!x86cpuid_Func_1(&a[0])) if (!x86cpuid_Func_1(&a[0]))
return 0; return 0;
return (a[3] >> 15) & 1; return (BoolInt)(a[3] >> 15) & 1;
} }
BoolInt CPU_IsSupported_SSE(void) BoolInt CPU_IsSupported_SSE(void)
@ -393,7 +393,7 @@ BoolInt CPU_IsSupported_SSE(void)
CHECK_SYS_SSE_SUPPORT CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0])) if (!x86cpuid_Func_1(&a[0]))
return 0; return 0;
return (a[3] >> 25) & 1; return (BoolInt)(a[3] >> 25) & 1;
} }
BoolInt CPU_IsSupported_SSE2(void) BoolInt CPU_IsSupported_SSE2(void)
@ -402,7 +402,7 @@ BoolInt CPU_IsSupported_SSE2(void)
CHECK_SYS_SSE_SUPPORT CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0])) if (!x86cpuid_Func_1(&a[0]))
return 0; return 0;
return (a[3] >> 26) & 1; return (BoolInt)(a[3] >> 26) & 1;
} }
#endif #endif
@ -419,17 +419,17 @@ static UInt32 x86cpuid_Func_1_ECX(void)
BoolInt CPU_IsSupported_AES(void) BoolInt CPU_IsSupported_AES(void)
{ {
return (x86cpuid_Func_1_ECX() >> 25) & 1; return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1;
} }
BoolInt CPU_IsSupported_SSSE3(void) BoolInt CPU_IsSupported_SSSE3(void)
{ {
return (x86cpuid_Func_1_ECX() >> 9) & 1; return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1;
} }
BoolInt CPU_IsSupported_SSE41(void) BoolInt CPU_IsSupported_SSE41(void)
{ {
return (x86cpuid_Func_1_ECX() >> 19) & 1; return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1;
} }
BoolInt CPU_IsSupported_SHA(void) BoolInt CPU_IsSupported_SHA(void)
@ -441,7 +441,7 @@ BoolInt CPU_IsSupported_SHA(void)
{ {
UInt32 d[4]; UInt32 d[4];
z7_x86_cpuid(d, 7); z7_x86_cpuid(d, 7);
return (d[1] >> 29) & 1; return (BoolInt)(d[1] >> 29) & 1;
} }
} }
@ -638,10 +638,10 @@ BoolInt CPU_IsSupported_AVX(void)
{ {
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
// printf("\n=== XGetBV=%d\n", bm); // printf("\n=== XGetBV=0x%x\n", bm);
return 1 return 1
& (bm >> 1) // SSE state is supported (set by OS) for storing/restoring & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring
& (bm >> 2); // AVX state is supported (set by OS) for storing/restoring & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring
} }
// since Win7SP1: we can use GetEnabledXStateFeatures(); // since Win7SP1: we can use GetEnabledXStateFeatures();
} }
@ -658,10 +658,39 @@ BoolInt CPU_IsSupported_AVX2(void)
z7_x86_cpuid(d, 7); z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1 return 1
& (d[1] >> 5); // avx2 & (BoolInt)(d[1] >> 5); // avx2
} }
} }
#if 0
BoolInt CPU_IsSupported_AVX512F_AVX512VL(void)
{
if (!CPU_IsSupported_AVX())
return False;
if (z7_x86_cpuid_GetMaxFunc() < 7)
return False;
{
UInt32 d[4];
BoolInt v;
z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
v = 1
& (BoolInt)(d[1] >> 16) // avx512f
& (BoolInt)(d[1] >> 31); // avx512vl
if (!v)
return False;
}
{
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
// printf("\n=== XGetBV=0x%x\n", bm);
return 1
& (BoolInt)(bm >> 5) // OPMASK
& (BoolInt)(bm >> 6) // ZMM upper 256-bit
& (BoolInt)(bm >> 7); // ZMM16 ... ZMM31
}
}
#endif
BoolInt CPU_IsSupported_VAES_AVX2(void) BoolInt CPU_IsSupported_VAES_AVX2(void)
{ {
if (!CPU_IsSupported_AVX()) if (!CPU_IsSupported_AVX())
@ -673,9 +702,9 @@ BoolInt CPU_IsSupported_VAES_AVX2(void)
z7_x86_cpuid(d, 7); z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1 return 1
& (d[1] >> 5) // avx2 & (BoolInt)(d[1] >> 5) // avx2
// & (d[1] >> 31) // avx512vl // & (d[1] >> 31) // avx512vl
& (d[2] >> 9); // vaes // VEX-256/EVEX & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX
} }
} }
@ -688,7 +717,7 @@ BoolInt CPU_IsSupported_PageGB(void)
if (d[0] < 0x80000001) if (d[0] < 0x80000001)
return False; return False;
z7_x86_cpuid(d, 0x80000001); z7_x86_cpuid(d, 0x80000001);
return (d[3] >> 26) & 1; return (BoolInt)(d[3] >> 26) & 1;
} }
} }
@ -760,33 +789,70 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
#else // __APPLE__ #else // __APPLE__
#include <sys/auxv.h> #if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216)
#define Z7_GETAUXV_AVAILABLE
#else
// #pragma message("=== is not NEW GLIBC === ")
#if defined __has_include
#if __has_include (<sys/auxv.h>)
// #pragma message("=== sys/auxv.h is avail=== ")
#define Z7_GETAUXV_AVAILABLE
#endif
#endif
#endif
#ifdef Z7_GETAUXV_AVAILABLE
// #pragma message("=== Z7_GETAUXV_AVAILABLE === ")
#include <sys/auxv.h>
#define USE_HWCAP #define USE_HWCAP
#endif
#ifdef USE_HWCAP #ifdef USE_HWCAP
#if defined(__FreeBSD__)
static unsigned long MY_getauxval(int aux)
{
unsigned long val;
if (elf_aux_info(aux, &val, sizeof(val)))
return 0;
return val;
}
#else
#define MY_getauxval getauxval
#if defined __has_include
#if __has_include (<asm/hwcap.h>)
#include <asm/hwcap.h> #include <asm/hwcap.h>
#endif
#endif
#endif
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); }
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
#define MY_HWCAP_CHECK_FUNC(name) \ #define MY_HWCAP_CHECK_FUNC(name) \
MY_HWCAP_CHECK_FUNC_2(name, name) MY_HWCAP_CHECK_FUNC_2(name, name)
#if 1 || defined(__ARM_NEON)
BoolInt CPU_IsSupported_NEON(void) { return True; }
#else
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
#endif
// MY_HWCAP_CHECK_FUNC (ASIMD) // MY_HWCAP_CHECK_FUNC (ASIMD)
#elif defined(MY_CPU_ARM) #elif defined(MY_CPU_ARM)
#define MY_HWCAP_CHECK_FUNC(name) \ #define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); }
MY_HWCAP_CHECK_FUNC_2(NEON, NEON) MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
#endif #endif
#else // USE_HWCAP #else // USE_HWCAP
#define MY_HWCAP_CHECK_FUNC(name) \ #define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return 0; } BoolInt CPU_IsSupported_ ## name(void) { return 0; }
#if defined(__ARM_NEON)
BoolInt CPU_IsSupported_NEON(void) { return True; }
#else
MY_HWCAP_CHECK_FUNC(NEON) MY_HWCAP_CHECK_FUNC(NEON)
#endif
#endif // USE_HWCAP #endif // USE_HWCAP

View File

@ -1,5 +1,5 @@
/* DllSecur.c -- DLL loading security /* DllSecur.c -- DLL loading security
2023-04-02 : Igor Pavlov : Public domain */ 2023-12-03 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -11,19 +11,7 @@
#ifndef UNDER_CE #ifndef UNDER_CE
#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__) Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
// #pragma GCC diagnostic ignored "-Wcast-function-type"
#endif
#if defined(__clang__) || defined(__GNUC__)
typedef void (*Z7_voidFunction)(void);
#define MY_CAST_FUNC (Z7_voidFunction)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define MY_CAST_FUNC (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define MY_CAST_FUNC
#endif
typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags); typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
@ -61,7 +49,7 @@ static const char * const g_Dlls =
if ((UInt16)GetVersion() != 6) { \ if ((UInt16)GetVersion() != 6) { \
const \ const \
Func_SetDefaultDllDirectories setDllDirs = \ Func_SetDefaultDllDirectories setDllDirs = \
(Func_SetDefaultDllDirectories) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \ (Func_SetDefaultDllDirectories) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \
"SetDefaultDllDirectories"); \ "SetDefaultDllDirectories"); \
if (setDllDirs) if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) return; } if (setDllDirs) if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) return; }

View File

@ -1,5 +1,5 @@
/* LzFind.c -- Match finder for LZ algorithms /* LzFind.c -- Match finder for LZ algorithms
2023-03-14 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -108,9 +108,15 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
return (p->bufBase != NULL); return (p->bufBase != NULL);
} }
static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } static const Byte *MatchFinder_GetPointerToCurrentPos(void *p)
{
return ((CMatchFinder *)p)->buffer;
}
static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } static UInt32 MatchFinder_GetNumAvailableBytes(void *p)
{
return GET_AVAIL_BYTES((CMatchFinder *)p);
}
Z7_NO_INLINE Z7_NO_INLINE
@ -571,8 +577,9 @@ void MatchFinder_Init_4(CMatchFinder *p)
#define CYC_TO_POS_OFFSET 0 #define CYC_TO_POS_OFFSET 0
// #define CYC_TO_POS_OFFSET 1 // for debug // #define CYC_TO_POS_OFFSET 1 // for debug
void MatchFinder_Init(CMatchFinder *p) void MatchFinder_Init(void *_p)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
MatchFinder_Init_HighHash(p); MatchFinder_Init_HighHash(p);
MatchFinder_Init_LowHash(p); MatchFinder_Init_LowHash(p);
MatchFinder_Init_4(p); MatchFinder_Init_4(p);
@ -607,16 +614,16 @@ void MatchFinder_Init(CMatchFinder *p)
#endif #endif
#endif #endif
// #elif defined(MY_CPU_ARM_OR_ARM64) #elif defined(MY_CPU_ARM64) \
#elif defined(MY_CPU_ARM64) /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */
#if defined(__clang__) && (__clang_major__ >= 8) \ #if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ >= 8) || defined(__GNUC__) && (__GNUC__ >= 6)
#define USE_LZFIND_SATUR_SUB_128 #define USE_LZFIND_SATUR_SUB_128
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__(""))) // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
#else #else
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon")))
#endif #endif
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
@ -625,7 +632,7 @@ void MatchFinder_Init(CMatchFinder *p)
#endif #endif
#endif #endif
#if defined(_MSC_VER) && !defined(__clang__) && defined(MY_CPU_ARM64) #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
#include <arm64_neon.h> #include <arm64_neon.h>
#else #else
#include <arm_neon.h> #include <arm_neon.h>
@ -1082,9 +1089,11 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
#define MOVE_POS \ #define MOVE_POS \
++p->cyclicBufferPos; \ p->cyclicBufferPos++; \
p->buffer++; \ p->buffer++; \
{ const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } { const UInt32 pos1 = p->pos + 1; \
p->pos = pos1; \
if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
#define MOVE_POS_RET MOVE_POS return distances; #define MOVE_POS_RET MOVE_POS return distances;
@ -1103,20 +1112,26 @@ static void MatchFinder_MovePos(CMatchFinder *p)
} }
#define GET_MATCHES_HEADER2(minLen, ret_op) \ #define GET_MATCHES_HEADER2(minLen, ret_op) \
unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ UInt32 hv; const Byte *cur; UInt32 curMatch; \
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ UInt32 lenLimit = p->lenLimit; \
if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \
cur = p->buffer; cur = p->buffer;
#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) #define SKIP_HEADER(minLen) \
do { GET_MATCHES_HEADER2(minLen, continue)
#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue #define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \
p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num); #define SKIP_FOOTER \
SkipMatchesSpec(MF_PARAMS(p)); \
MOVE_POS \
} while (--num);
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
distances = func(MF_PARAMS(p), \ distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \
distances, (UInt32)_maxLen_); MOVE_POS_RET MOVE_POS_RET
#define GET_MATCHES_FOOTER_BT(_maxLen_) \ #define GET_MATCHES_FOOTER_BT(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
@ -1133,8 +1148,9 @@ static void MatchFinder_MovePos(CMatchFinder *p)
for (; c != lim; c++) if (*(c + diff) != *c) break; \ for (; c != lim; c++) if (*(c + diff) != *c) break; \
maxLen = (unsigned)(c - cur); } maxLen = (unsigned)(c - cur); }
static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
GET_MATCHES_HEADER(2) GET_MATCHES_HEADER(2)
HASH2_CALC HASH2_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
@ -1158,8 +1174,9 @@ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
mmm = pos; mmm = pos;
static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm; UInt32 mmm;
UInt32 h2, d2, pos; UInt32 h2, d2, pos;
unsigned maxLen; unsigned maxLen;
@ -1199,8 +1216,9 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
} }
static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm; UInt32 mmm;
UInt32 h2, h3, d2, d3, pos; UInt32 h2, h3, d2, d3, pos;
unsigned maxLen; unsigned maxLen;
@ -1267,10 +1285,12 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
} }
static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm; UInt32 mmm;
UInt32 h2, h3, d2, d3, maxLen, pos; UInt32 h2, h3, d2, d3, pos;
unsigned maxLen;
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(5) GET_MATCHES_HEADER(5)
@ -1339,8 +1359,9 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
} }
static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm; UInt32 mmm;
UInt32 h2, h3, d2, d3, pos; UInt32 h2, h3, d2, d3, pos;
unsigned maxLen; unsigned maxLen;
@ -1407,10 +1428,12 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
} }
static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm; UInt32 mmm;
UInt32 h2, h3, d2, d3, maxLen, pos; UInt32 h2, h3, d2, d3, pos;
unsigned maxLen;
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(5) GET_MATCHES_HEADER(5)
@ -1466,7 +1489,7 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (*(cur - d2 + 3) != cur[3]) if (*(cur - d2 + 3) != cur[3])
break; break;
UPDATE_maxLen UPDATE_maxLen
distances[-2] = maxLen; distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit) if (maxLen == lenLimit)
{ {
p->son[p->cyclicBufferPos] = curMatch; p->son[p->cyclicBufferPos] = curMatch;
@ -1489,8 +1512,9 @@ UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
} }
static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) static void Bt2_MatchFinder_Skip(void *_p, UInt32 num)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
SKIP_HEADER(2) SKIP_HEADER(2)
{ {
HASH2_CALC HASH2_CALC
@ -1511,8 +1535,9 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
SKIP_FOOTER SKIP_FOOTER
} }
static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) static void Bt3_MatchFinder_Skip(void *_p, UInt32 num)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
SKIP_HEADER(3) SKIP_HEADER(3)
{ {
UInt32 h2; UInt32 h2;
@ -1526,8 +1551,9 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
SKIP_FOOTER SKIP_FOOTER
} }
static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) static void Bt4_MatchFinder_Skip(void *_p, UInt32 num)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
SKIP_HEADER(4) SKIP_HEADER(4)
{ {
UInt32 h2, h3; UInt32 h2, h3;
@ -1542,8 +1568,9 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
SKIP_FOOTER SKIP_FOOTER
} }
static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
SKIP_HEADER(5) SKIP_HEADER(5)
{ {
UInt32 h2, h3; UInt32 h2, h3;
@ -1589,8 +1616,9 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
}} while(num); \ }} while(num); \
static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) static void Hc4_MatchFinder_Skip(void *_p, UInt32 num)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
HC_SKIP_HEADER(4) HC_SKIP_HEADER(4)
UInt32 h2, h3; UInt32 h2, h3;
@ -1604,8 +1632,9 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
} }
static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) static void Hc5_MatchFinder_Skip(void *_p, UInt32 num)
{ {
CMatchFinder *p = (CMatchFinder *)_p;
HC_SKIP_HEADER(5) HC_SKIP_HEADER(5)
UInt32 h2, h3; UInt32 h2, h3;
@ -1634,41 +1663,41 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
{ {
vTable->Init = (Mf_Init_Func)MatchFinder_Init; vTable->Init = MatchFinder_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos;
if (!p->btMode) if (!p->btMode)
{ {
if (p->numHashBytes <= 4) if (p->numHashBytes <= 4)
{ {
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; vTable->GetMatches = Hc4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; vTable->Skip = Hc4_MatchFinder_Skip;
} }
else else
{ {
vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; vTable->GetMatches = Hc5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; vTable->Skip = Hc5_MatchFinder_Skip;
} }
} }
else if (p->numHashBytes == 2) else if (p->numHashBytes == 2)
{ {
vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; vTable->GetMatches = Bt2_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; vTable->Skip = Bt2_MatchFinder_Skip;
} }
else if (p->numHashBytes == 3) else if (p->numHashBytes == 3)
{ {
vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; vTable->GetMatches = Bt3_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; vTable->Skip = Bt3_MatchFinder_Skip;
} }
else if (p->numHashBytes == 4) else if (p->numHashBytes == 4)
{ {
vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; vTable->GetMatches = Bt4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; vTable->Skip = Bt4_MatchFinder_Skip;
} }
else else
{ {
vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; vTable->GetMatches = Bt5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; vTable->Skip = Bt5_MatchFinder_Skip;
} }
} }

View File

@ -1,5 +1,5 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms /* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2023-04-02 : Igor Pavlov : Public domain */ 2024-01-22 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -94,7 +94,7 @@ static void MtSync_Construct(CMtSync *p)
} }
#define DEBUG_BUFFER_LOCK // define it to debug lock state // #define DEBUG_BUFFER_LOCK // define it to debug lock state
#ifdef DEBUG_BUFFER_LOCK #ifdef DEBUG_BUFFER_LOCK
#include <stdlib.h> #include <stdlib.h>
@ -877,8 +877,9 @@ SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
} }
static void MatchFinderMt_Init(CMatchFinderMt *p) static void MatchFinderMt_Init(void *_p)
{ {
CMatchFinderMt *p = (CMatchFinderMt *)_p;
CMatchFinder *mf = MF(p); CMatchFinder *mf = MF(p);
p->btBufPos = p->btBufPos =
@ -981,8 +982,9 @@ static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) static const Byte * MatchFinderMt_GetPointerToCurrentPos(void *_p)
{ {
CMatchFinderMt *p = (CMatchFinderMt *)_p;
return p->pointerToCurPos; return p->pointerToCurPos;
} }
@ -990,8 +992,9 @@ static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) static UInt32 MatchFinderMt_GetNumAvailableBytes(void *_p)
{ {
CMatchFinderMt *p = (CMatchFinderMt *)_p;
if (p->btBufPos != p->btBufPosLimit) if (p->btBufPos != p->btBufPosLimit)
return p->btNumAvailBytes; return p->btNumAvailBytes;
return MatchFinderMt_GetNextBlock_Bt(p); return MatchFinderMt_GetNextBlock_Bt(p);
@ -1243,8 +1246,9 @@ static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
} }
static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) static UInt32 * MatchFinderMt2_GetMatches(void *_p, UInt32 *d)
{ {
CMatchFinderMt *p = (CMatchFinderMt *)_p;
const UInt32 *bt = p->btBufPos; const UInt32 *bt = p->btBufPos;
const UInt32 len = *bt++; const UInt32 len = *bt++;
const UInt32 *btLim = bt + len; const UInt32 *btLim = bt + len;
@ -1267,8 +1271,9 @@ static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) static UInt32 * MatchFinderMt_GetMatches(void *_p, UInt32 *d)
{ {
CMatchFinderMt *p = (CMatchFinderMt *)_p;
const UInt32 *bt = p->btBufPos; const UInt32 *bt = p->btBufPos;
UInt32 len = *bt++; UInt32 len = *bt++;
const UInt32 avail = p->btNumAvailBytes - 1; const UInt32 avail = p->btNumAvailBytes - 1;
@ -1315,14 +1320,16 @@ static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) static void MatchFinderMt0_Skip(void *_p, UInt32 num)
{ {
CMatchFinderMt *p = (CMatchFinderMt *)_p;
SKIP_HEADER2_MT { p->btNumAvailBytes--; SKIP_HEADER2_MT { p->btNumAvailBytes--;
SKIP_FOOTER_MT SKIP_FOOTER_MT
} }
static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) static void MatchFinderMt2_Skip(void *_p, UInt32 num)
{ {
CMatchFinderMt *p = (CMatchFinderMt *)_p;
SKIP_HEADER_MT(2) SKIP_HEADER_MT(2)
UInt32 h2; UInt32 h2;
MT_HASH2_CALC MT_HASH2_CALC
@ -1330,8 +1337,9 @@ static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
SKIP_FOOTER_MT SKIP_FOOTER_MT
} }
static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) static void MatchFinderMt3_Skip(void *_p, UInt32 num)
{ {
CMatchFinderMt *p = (CMatchFinderMt *)_p;
SKIP_HEADER_MT(3) SKIP_HEADER_MT(3)
UInt32 h2, h3; UInt32 h2, h3;
MT_HASH3_CALC MT_HASH3_CALC
@ -1361,39 +1369,39 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
{ {
vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; vTable->Init = MatchFinderMt_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; vTable->GetNumAvailableBytes = MatchFinderMt_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; vTable->GetPointerToCurrentPos = MatchFinderMt_GetPointerToCurrentPos;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; vTable->GetMatches = MatchFinderMt_GetMatches;
switch (MF(p)->numHashBytes) switch (MF(p)->numHashBytes)
{ {
case 2: case 2:
p->GetHeadsFunc = GetHeads2; p->GetHeadsFunc = GetHeads2;
p->MixMatchesFunc = (Mf_Mix_Matches)NULL; p->MixMatchesFunc = NULL;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; vTable->Skip = MatchFinderMt0_Skip;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; vTable->GetMatches = MatchFinderMt2_GetMatches;
break; break;
case 3: case 3:
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; p->MixMatchesFunc = MixMatches2;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; vTable->Skip = MatchFinderMt2_Skip;
break; break;
case 4: case 4:
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
// it's fast inline version of GetMatches() // it's fast inline version of GetMatches()
// vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; // vTable->GetMatches = MatchFinderMt_GetMatches_Bt4;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; p->MixMatchesFunc = MixMatches3;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; vTable->Skip = MatchFinderMt3_Skip;
break; break;
default: default:
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; p->MixMatchesFunc = MixMatches4;
vTable->Skip = vTable->Skip =
(Mf_Skip_Func)MatchFinderMt3_Skip; MatchFinderMt3_Skip;
// (Mf_Skip_Func)MatchFinderMt4_Skip; // MatchFinderMt4_Skip;
break; break;
} }
} }

View File

@ -1,5 +1,5 @@
/* Lzma2Dec.c -- LZMA2 Decoder /* Lzma2Dec.c -- LZMA2 Decoder
2023-03-03 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */ /* #define SHOW_DEBUG_INFO */
@ -157,8 +157,10 @@ static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
p->decoder.prop.lp = (Byte)lp; p->decoder.prop.lp = (Byte)lp;
return LZMA2_STATE_DATA; return LZMA2_STATE_DATA;
} }
}
default:
return LZMA2_STATE_ERROR; return LZMA2_STATE_ERROR;
}
} }
static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size) static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)

View File

@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder /* LzmaEnc.c -- LZMA Encoder
2023-04-13: Igor Pavlov : Public domain */ 2024-01-24: Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -195,11 +195,11 @@ unsigned GetPosSlot1(UInt32 pos);
unsigned GetPosSlot1(UInt32 pos) unsigned GetPosSlot1(UInt32 pos)
{ {
unsigned res; unsigned res;
BSR2_RET(pos, res); BSR2_RET(pos, res)
return res; return res;
} }
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } #define GetPosSlot2(pos, res) { BSR2_RET(pos, res) }
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } #define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) }
#else // ! LZMA_LOG_BSR #else // ! LZMA_LOG_BSR
@ -512,7 +512,7 @@ struct CLzmaEnc
COPY_ARR(d, s, posEncoders) \ COPY_ARR(d, s, posEncoders) \
(d)->lenProbs = (s)->lenProbs; \ (d)->lenProbs = (s)->lenProbs; \
(d)->repLenProbs = (s)->repLenProbs; \ (d)->repLenProbs = (s)->repLenProbs; \
memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb)); memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp);
void LzmaEnc_SaveState(CLzmaEncHandle p) void LzmaEnc_SaveState(CLzmaEncHandle p)
{ {
@ -1040,14 +1040,14 @@ Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
UInt32 price = b; UInt32 price = b;
do do
{ {
unsigned bit = sym & 1; const unsigned bit = sym & 1;
sym >>= 1; sym >>= 1;
price += GET_PRICEa(probs[sym], bit); price += GET_PRICEa(probs[sym], bit);
} }
while (sym >= 2); while (sym >= 2);
{ {
unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob);
prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
} }
@ -1056,7 +1056,7 @@ Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
{ {
unsigned posState; unsigned posState;
size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
for (posState = 1; posState < numPosStates; posState++) for (posState = 1; posState < numPosStates; posState++)
memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
} }
@ -2696,12 +2696,12 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
#endif #endif
{ {
unsigned lclp = p->lc + p->lp; const unsigned lclp = p->lc + p->lp;
if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
{ {
LzmaEnc_FreeLits(p, alloc); LzmaEnc_FreeLits(p, alloc);
p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
if (!p->litProbs || !p->saveState.litProbs) if (!p->litProbs || !p->saveState.litProbs)
{ {
LzmaEnc_FreeLits(p, alloc); LzmaEnc_FreeLits(p, alloc);
@ -2802,8 +2802,8 @@ static void LzmaEnc_Init(CLzmaEnc *p)
} }
{ {
UInt32 num = (UInt32)0x300 << (p->lp + p->lc); const size_t num = (size_t)0x300 << (p->lp + p->lc);
UInt32 k; size_t k;
CLzmaProb *probs = p->litProbs; CLzmaProb *probs = p->litProbs;
for (k = 0; k < num; k++) for (k = 0; k < num; k++)
probs[k] = kProbInitValue; probs[k] = kProbInitValue;

View File

@ -1,5 +1,5 @@
/* MtCoder.c -- Multi-thread Coder /* MtCoder.c -- Multi-thread Coder
2023-04-13 : Igor Pavlov : Public domain */ 2023-09-07 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -469,7 +469,7 @@ SRes MtCoder_Code(CMtCoder *p)
{ {
RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->readEvent)) RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->readEvent))
RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax)) RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, (UInt32)numBlocksMax, (UInt32)numBlocksMax))
} }
for (i = 0; i < MTCODER_BLOCKS_MAX - 1; i++) for (i = 0; i < MTCODER_BLOCKS_MAX - 1; i++)

View File

@ -1,5 +1,5 @@
/* MtDec.c -- Multi-thread Decoder /* MtDec.c -- Multi-thread Decoder
2023-04-02 : Igor Pavlov : Public domain */ 2024-02-20 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -809,6 +809,16 @@ static WRes MtDec_ThreadFunc2(CMtDecThread *t)
#endif #endif
typedef
#ifdef _WIN32
UINT_PTR
#elif 1
uintptr_t
#else
ptrdiff_t
#endif
MY_uintptr_t;
static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp) static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp)
{ {
WRes res; WRes res;
@ -821,7 +831,7 @@ static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp)
res = MtDec_ThreadFunc2(t); res = MtDec_ThreadFunc2(t);
p = t->mtDec; p = t->mtDec;
if (res == 0) if (res == 0)
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes; return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)p->exitThreadWRes;
{ {
// it's unexpected situation for some threading function error // it's unexpected situation for some threading function error
if (p->exitThreadWRes == 0) if (p->exitThreadWRes == 0)
@ -832,7 +842,7 @@ static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp)
Event_Set(&p->threads[0].canWrite); Event_Set(&p->threads[0].canWrite);
MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res)); MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res));
} }
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res; return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)res;
} }
static Z7_NO_INLINE THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp) static Z7_NO_INLINE THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp)
@ -1072,7 +1082,7 @@ SRes MtDec_Code(CMtDec *p)
if (wres == 0) { wres = Event_Set(&nextThread->canWrite); if (wres == 0) { wres = Event_Set(&nextThread->canWrite);
if (wres == 0) { wres = Event_Set(&nextThread->canRead); if (wres == 0) { wres = Event_Set(&nextThread->canRead);
if (wres == 0) { THREAD_FUNC_RET_TYPE res = MtDec_ThreadFunc(nextThread); if (wres == 0) { THREAD_FUNC_RET_TYPE res = MtDec_ThreadFunc(nextThread);
wres = (WRes)(UINT_PTR)res; wres = (WRes)(MY_uintptr_t)res;
if (wres != 0) if (wres != 0)
{ {
p->needContinue = False; p->needContinue = False;

View File

@ -1,5 +1,5 @@
/* Ppmd7.c -- PPMdH codec /* Ppmd7.c -- PPMdH codec
2023-04-02 : Igor Pavlov : Public domain 2023-09-07 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -302,8 +302,17 @@ static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx)
#define MEM_12_CPY(dest, src, num) \ #define MEM_12_CPY(dest, src, num) \
{ UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ { UInt32 *d = (UInt32 *)(dest); \
do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } const UInt32 *z = (const UInt32 *)(src); \
unsigned n = (num); \
do { \
d[0] = z[0]; \
d[1] = z[1]; \
d[2] = z[2]; \
z += 3; \
d += 3; \
} while (--n); \
}
/* /*
@ -711,8 +720,8 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
if ((ns1 & 1) == 0) if ((ns1 & 1) == 0)
{ {
/* Expand for one UNIT */ /* Expand for one UNIT */
unsigned oldNU = ns1 >> 1; const unsigned oldNU = ns1 >> 1;
unsigned i = U2I(oldNU); const unsigned i = U2I(oldNU);
if (i != U2I((size_t)oldNU + 1)) if (i != U2I((size_t)oldNU + 1))
{ {
void *ptr = Ppmd7_AllocUnits(p, i + 1); void *ptr = Ppmd7_AllocUnits(p, i + 1);
@ -731,7 +740,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
sum = c->Union2.SummFreq; sum = c->Union2.SummFreq;
/* max increase of Escape_Freq is 3 here. /* max increase of Escape_Freq is 3 here.
total increase of Union2.SummFreq for all symbols is less than 256 here */ total increase of Union2.SummFreq for all symbols is less than 256 here */
sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)); sum += (UInt32)(unsigned)((2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)));
/* original PPMdH uses 16-bit variable for (sum) here. /* original PPMdH uses 16-bit variable for (sum) here.
But (sum < 0x9000). So we don't truncate (sum) to 16-bit */ But (sum < 0x9000). So we don't truncate (sum) to 16-bit */
// sum = (UInt16)sum; // sum = (UInt16)sum;
@ -761,7 +770,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
// (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context
s->Freq = (Byte)freq; s->Freq = (Byte)freq;
// max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here
sum = freq + p->InitEsc + (ns > 3); sum = (UInt32)(freq + p->InitEsc + (ns > 3));
} }
} }
@ -933,10 +942,10 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
p->HiBitsFlag; p->HiBitsFlag;
{ {
// if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
unsigned summ = (UInt16)see->Summ; // & 0xFFFF const unsigned summ = (UInt16)see->Summ; // & 0xFFFF
unsigned r = (summ >> see->Shift); const unsigned r = (summ >> see->Shift);
see->Summ = (UInt16)(summ - r); see->Summ = (UInt16)(summ - r);
*escFreq = r + (r == 0); *escFreq = (UInt32)(r + (r == 0));
} }
} }
else else
@ -981,9 +990,9 @@ void Ppmd7_Update1_0(CPpmd7 *p)
CPpmd_State *s = p->FoundState; CPpmd_State *s = p->FoundState;
CPpmd7_Context *mc = p->MinContext; CPpmd7_Context *mc = p->MinContext;
unsigned freq = s->Freq; unsigned freq = s->Freq;
unsigned summFreq = mc->Union2.SummFreq; const unsigned summFreq = mc->Union2.SummFreq;
p->PrevSuccess = (2 * freq > summFreq); p->PrevSuccess = (2 * freq > summFreq);
p->RunLength += (int)p->PrevSuccess; p->RunLength += (Int32)p->PrevSuccess;
mc->Union2.SummFreq = (UInt16)(summFreq + 4); mc->Union2.SummFreq = (UInt16)(summFreq + 4);
freq += 4; freq += 4;
s->Freq = (Byte)freq; s->Freq = (Byte)freq;

View File

@ -1,5 +1,5 @@
/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder /* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
2023-04-02 : Igor Pavlov : Public domain 2023-09-07 : Igor Pavlov : Public domain
This code is based on: This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */ PPMd var.H (2001): Dmitry Shkarin : Public domain */
@ -58,7 +58,7 @@ static void Ppmd7z_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p); void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym] #define MASK(sym) ((Byte *)charMask)[sym]
// Z7_FORCE_INLINE // Z7_FORCE_INLINE
// static // static
int Ppmd7z_DecodeSymbol(CPpmd7 *p) int Ppmd7z_DecodeSymbol(CPpmd7 *p)
@ -120,8 +120,8 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
MASK(s->Symbol) = 0; MASK(s->Symbol) = 0;
do do
{ {
unsigned sym0 = s2[0].Symbol; const unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol; const unsigned sym1 = s2[1].Symbol;
s2 += 2; s2 += 2;
MASK(sym0) = 0; MASK(sym0) = 0;
MASK(sym1) = 0; MASK(sym1) = 0;
@ -209,17 +209,17 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
unsigned num2 = num / 2; unsigned num2 = num / 2;
num &= 1; num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num; s += num;
p->MinContext = mc; p->MinContext = mc;
do do
{ {
unsigned sym0 = s[0].Symbol; const unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol; const unsigned sym1 = s[1].Symbol;
s += 2; s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1)));
} }
while (--num2); while (--num2);
} }
@ -238,13 +238,13 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
s = Ppmd7_GetStats(p, p->MinContext); s = Ppmd7_GetStats(p, p->MinContext);
hiCnt = count; hiCnt = count;
// count -= s->Freq & (unsigned)(MASK(s->Symbol)); // count -= s->Freq & (UInt32)(MASK(s->Symbol));
// if ((Int32)count >= 0) // if ((Int32)count >= 0)
{ {
for (;;) for (;;)
{ {
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; // count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
} }
} }
s--; s--;

View File

@ -1,5 +1,5 @@
/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder /* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder
2023-04-02 : Igor Pavlov : Public domain 2023-09-07 : Igor Pavlov : Public domain
This code is based on: This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */ PPMd var.H (2001): Dmitry Shkarin : Public domain */
@ -82,7 +82,7 @@ void Ppmd7z_Flush_RangeEnc(CPpmd7 *p)
void Ppmd7_UpdateModel(CPpmd7 *p); void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym] #define MASK(sym) ((Byte *)charMask)[sym]
Z7_FORCE_INLINE Z7_FORCE_INLINE
static static
@ -139,8 +139,8 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
MASK(s->Symbol) = 0; MASK(s->Symbol) = 0;
do do
{ {
unsigned sym0 = s2[0].Symbol; const unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol; const unsigned sym1 = s2[1].Symbol;
s2 += 2; s2 += 2;
MASK(sym0) = 0; MASK(sym0) = 0;
MASK(sym1) = 0; MASK(sym1) = 0;
@ -265,16 +265,15 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
if (num2 != 0) if (num2 != 0)
{ {
s += i; s += i;
for (;;) do
{ {
unsigned sym0 = s[0].Symbol; const unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol; const unsigned sym1 = s[1].Symbol;
s += 2; s += 2;
sum += (s[-2].Freq & (unsigned)(MASK(sym0))); sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
sum += (s[-1].Freq & (unsigned)(MASK(sym1))); sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
if (--num2 == 0)
break;
} }
while (--num2);
} }

View File

@ -1,5 +1,5 @@
/* Sha256.c -- SHA-256 Hash /* Sha256.c -- SHA-256 Hash
2023-04-02 : Igor Pavlov : Public domain 2024-03-01 : Igor Pavlov : Public domain
This code is based on public domain code from Wei Dai's Crypto++ library. */ This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "Precomp.h" #include "Precomp.h"
@ -15,35 +15,35 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */
#endif #endif
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#ifdef _MSC_VER #if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
#if _MSC_VER >= 1200 || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \
|| defined(_MSC_VER) && (_MSC_VER >= 1200)
#define Z7_COMPILER_SHA256_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#elif defined(__clang__) #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if (__clang_major__ >= 8) // fix that check
#if defined(__ARM_FEATURE_SHA2) \
|| defined(__ARM_FEATURE_CRYPTO)
#define Z7_COMPILER_SHA256_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #else
#elif defined(__GNUC__) #if defined(MY_CPU_ARM64) \
#if (__GNUC__ >= 8) // fix that check || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
#define Z7_COMPILER_SHA256_SUPPORTED || defined(Z7_MSC_VER_ORIGINAL)
#endif #if defined(__ARM_FP) && \
#elif defined(__INTEL_COMPILER) ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
#if (__INTEL_COMPILER >= 1800) // fix that check || defined(__GNUC__) && (__GNUC__ >= 6) \
) \
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON) && \
(Z7_CLANG_VERSION < 170000 || \
Z7_CLANG_VERSION > 170001)
#define Z7_COMPILER_SHA256_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#endif #endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER
#if _MSC_VER >= 1910
#define Z7_COMPILER_SHA256_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define Z7_COMPILER_SHA256_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#endif #endif
#endif #endif
@ -224,8 +224,6 @@ void Sha256_Init(CSha256 *p)
#endif #endif
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
// static // static
extern MY_ALIGN(64) extern MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64]; const UInt32 SHA256_K_ARRAY[64];

View File

@ -1,5 +1,5 @@
/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
2023-04-02 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "Compiler.h" #include "Compiler.h"
@ -11,6 +11,8 @@
#endif #endif
#endif #endif
// #define Z7_USE_HW_SHA_STUB // for debug
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
#define USE_HW_SHA #define USE_HW_SHA
@ -32,9 +34,14 @@
#endif #endif
#if (_MSC_VER >= USE_VER_MIN) #if (_MSC_VER >= USE_VER_MIN)
#define USE_HW_SHA #define USE_HW_SHA
#else
#define Z7_USE_HW_SHA_STUB
#endif #endif
#endif #endif
// #endif // MY_CPU_X86_OR_AMD64 // #endif // MY_CPU_X86_OR_AMD64
#ifndef USE_HW_SHA
// #define Z7_USE_HW_SHA_STUB // for debug
#endif
#ifdef USE_HW_SHA #ifdef USE_HW_SHA
@ -202,19 +209,28 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
#endif // USE_HW_SHA #endif // USE_HW_SHA
#elif defined(MY_CPU_ARM_OR_ARM64) #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__clang__) #if defined(__ARM_FEATURE_SHA2) \
#if (__clang_major__ >= 8) // fix that check || defined(__ARM_FEATURE_CRYPTO)
#define USE_HW_SHA
#else
#if defined(MY_CPU_ARM64) \
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|| defined(Z7_MSC_VER_ORIGINAL)
#if defined(__ARM_FP) && \
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ >= 6) \
) \
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON) && \
(Z7_CLANG_VERSION < 170000 || \
Z7_CLANG_VERSION > 170001)
#define USE_HW_SHA #define USE_HW_SHA
#endif #endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_SHA
#endif #endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_SHA
#endif #endif
#endif #endif
@ -222,23 +238,87 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
// #pragma message("=== Sha256 HW === ") // #pragma message("=== Sha256 HW === ")
#if defined(__clang__) || defined(__GNUC__) #if defined(__clang__) || defined(__GNUC__)
#if !defined(__ARM_FEATURE_SHA2) && \
!defined(__ARM_FEATURE_CRYPTO)
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
#define ATTRIB_SHA __attribute__((__target__("+crypto,sha2"))) #if defined(__clang__)
#define ATTRIB_SHA __attribute__((__target__("crypto")))
#else
#define ATTRIB_SHA __attribute__((__target__("+crypto")))
#endif
#else #else
#if defined(__clang__) && (__clang_major__ >= 1)
#define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2")))
#else
#define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#endif #endif
#endif
#else #else
// _MSC_VER // _MSC_VER
// for arm32 // for arm32
#define _ARM_USE_NEW_NEON_INTRINSICS #define _ARM_USE_NEW_NEON_INTRINSICS
#endif #endif
#if defined(_MSC_VER) && !defined(__clang__) && defined(MY_CPU_ARM64)
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
#include <arm64_neon.h> #include <arm64_neon.h>
#else #else
#include <arm_neon.h>
#if defined(__clang__) && __clang_major__ < 16
#if !defined(__ARM_FEATURE_SHA2) && \
!defined(__ARM_FEATURE_CRYPTO)
// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
// #if defined(__clang__) && __clang_major__ < 13
#define __ARM_FEATURE_CRYPTO 1
// #else
#define __ARM_FEATURE_SHA2 1
// #endif
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif #endif
#endif // clang
#if defined(__clang__)
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
// #pragma message("#define __ARM_ARCH 8")
#undef __ARM_ARCH
#define __ARM_ARCH 8
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // clang
#include <arm_neon.h>
#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
defined(__ARM_FEATURE_CRYPTO) && \
defined(__ARM_FEATURE_SHA2)
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#undef __ARM_FEATURE_CRYPTO
#undef __ARM_FEATURE_SHA2
#undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
#endif
#endif // Z7_MSC_VER_ORIGINAL
typedef uint32x4_t v128; typedef uint32x4_t v128;
// typedef __n128 v128; // MSVC // typedef __n128 v128; // MSVC
@ -316,10 +396,10 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
LOAD_SHUFFLE (m2, 2) LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3) LOAD_SHUFFLE (m3, 3)
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
state0 = vaddq_u32(state0, state0_save); state0 = vaddq_u32(state0, state0_save);
state1 = vaddq_u32(state1, state1_save); state1 = vaddq_u32(state1, state1_save);
@ -337,16 +417,17 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
#endif // MY_CPU_ARM_OR_ARM64 #endif // MY_CPU_ARM_OR_ARM64
#ifndef USE_HW_SHA #if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
// #error Stop_Compiling_UNSUPPORTED_SHA // #error Stop_Compiling_UNSUPPORTED_SHA
// #include <stdlib.h> // #include <stdlib.h>
// We can compile this file with another C compiler,
// or we can compile asm version.
// So we can generate real code instead of this stub function.
// #include "Sha256.h" // #include "Sha256.h"
void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); // #if defined(_MSC_VER)
#pragma message("Sha256 HW-SW stub was used") #pragma message("Sha256 HW-SW stub was used")
// #endif
void Z7_FASTCALL Sha256_UpdateBlocks (UInt32 state[8], const Byte *data, size_t numBlocks);
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{ {
@ -359,7 +440,6 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
return; return;
*/ */
} }
#endif #endif
@ -384,3 +464,4 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
#undef USE_HW_SHA #undef USE_HW_SHA
#undef ATTRIB_SHA #undef ATTRIB_SHA
#undef USE_VER_MIN #undef USE_VER_MIN
#undef Z7_USE_HW_SHA_STUB

View File

@ -1,5 +1,5 @@
/* SwapBytes.c -- Byte Swap conversion filter /* SwapBytes.c -- Byte Swap conversion filter
2023-04-07 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -305,11 +305,12 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr)
msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want
*/ */
// _mm256_broadcastsi128_si256(*mask128_ptr); // _mm256_broadcastsi128_si256(*mask128_ptr);
/* #if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 80000)
#define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) #define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
MY_mm256_set_m128i #else
*/ #define MY_mm256_set_m128i _mm256_set_m128i
_mm256_set_m128i( #endif
MY_mm256_set_m128i(
*(const __m128i *)mask128_ptr, *(const __m128i *)mask128_ptr,
*(const __m128i *)mask128_ptr); *(const __m128i *)mask128_ptr);
#endif #endif
@ -330,32 +331,59 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr)
// compile message "NEON intrinsics not available with the soft-float ABI" // compile message "NEON intrinsics not available with the soft-float ABI"
#elif defined(MY_CPU_ARM_OR_ARM64) || \ #elif defined(MY_CPU_ARM_OR_ARM64) \
(defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) && defined(MY_CPU_LE) \
// #elif defined(MY_CPU_ARM64) && !defined(Z7_DISABLE_ARM_NEON)
#if defined(__clang__) && (__clang_major__ >= 8) \ #if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8) || defined(__GNUC__) && (__GNUC__ >= 6)
#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) \ #if defined(__ARM_FP)
#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 4)) \
|| defined(MY_CPU_ARM64) || defined(MY_CPU_ARM64)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON)
#define USE_SWAP_128 #define USE_SWAP_128
#endif
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
// #define SWAP_ATTRIB_NEON __attribute__((__target__(""))) // #define SWAP_ATTRIB_NEON __attribute__((__target__("")))
#else #else
// #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) #if defined(Z7_CLANG_VERSION)
#endif // #define SWAP_ATTRIB_NEON __attribute__((__target__("neon")))
#else
// #pragma message("SWAP_ATTRIB_NEON __attribute__((__target__(fpu=neon))")
#define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=neon")))
#endif
#endif // MY_CPU_ARM64
#endif // __ARM_NEON
#endif // __ARM_ARCH
#endif // __ARM_FP
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#if (_MSC_VER >= 1910) #if (_MSC_VER >= 1910)
#define USE_SWAP_128 #define USE_SWAP_128
#endif #endif
#endif #endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64) #ifdef USE_SWAP_128
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
#include <arm64_neon.h> #include <arm64_neon.h>
#else #else
/*
#if !defined(__ARM_NEON)
#if defined(Z7_GCC_VERSION) && (__GNUC__ < 5) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 90201) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 100100)
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#pragma message("#define __ARM_NEON 1")
// #define __ARM_NEON 1
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif
*/
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#endif
#ifndef USE_SWAP_128 #ifndef USE_SWAP_128
#define FORCE_SWAP_MODE #define FORCE_SWAP_MODE
@ -464,6 +492,13 @@ Z7_ATTRIB_NO_VECTOR \
void Z7_FASTCALL void Z7_FASTCALL
#if defined(MY_CPU_ARM_OR_ARM64)
#if defined(__clang__)
#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
#endif
#endif
#ifdef MY_CPU_64BIT #ifdef MY_CPU_64BIT
#if defined(MY_CPU_ARM64) \ #if defined(MY_CPU_ARM64) \

View File

@ -1,5 +1,5 @@
/* Threads.c -- multithreading library /* Threads.c -- multithreading library
2023-03-04 : Igor Pavlov : Public domain */ 2024-03-28 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -195,20 +195,19 @@ WRes CriticalSection_Init(CCriticalSection *p)
// ---------- POSIX ---------- // ---------- POSIX ----------
#ifndef __APPLE__ #if defined(__linux__) && !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef Z7_AFFINITY_DISABLE #ifndef Z7_AFFINITY_DISABLE
// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET // _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
// clang < 3.6 : unknown warning group '-Wreserved-id-macro' // clang < 3.6 : unknown warning group '-Wreserved-id-macro'
// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier" // clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier"
// clang >= 13 : do not give warning // clang >= 13 : do not give warning
#if !defined(_GNU_SOURCE) #if !defined(_GNU_SOURCE)
#if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12) Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#pragma GCC diagnostic ignored "-Wreserved-id-macro" // #define _GNU_SOURCE
#endif Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#define _GNU_SOURCE
#endif // !defined(_GNU_SOURCE) #endif // !defined(_GNU_SOURCE)
#endif // Z7_AFFINITY_DISABLE #endif // Z7_AFFINITY_DISABLE
#endif // __APPLE__ #endif // __linux__
#include "Threads.h" #include "Threads.h"
@ -244,8 +243,9 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param,
{ {
if (cpuSet) if (cpuSet)
{ {
#ifdef Z7_AFFINITY_SUPPORTED // pthread_attr_setaffinity_np() is not supported for MUSL compile.
// so we check for __GLIBC__ here
#if defined(Z7_AFFINITY_SUPPORTED) && defined( __GLIBC__)
/* /*
printf("\n affinity :"); printf("\n affinity :");
unsigned i; unsigned i;
@ -267,7 +267,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param,
// ret2 = // ret2 =
pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet); pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2; // if (ret2) ret = ret2;
#endif #endif
} }
ret = pthread_create(&p->_tid, &attr, func, param); ret = pthread_create(&p->_tid, &attr, func, param);
@ -369,13 +369,20 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
{ return AutoResetEvent_Create(p, 0); } { return AutoResetEvent_Create(p, 0); }
#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13)
// freebsd:
#pragma GCC diagnostic ignored "-Wthread-safety-analysis"
#endif
WRes Event_Set(CEvent *p) WRes Event_Set(CEvent *p)
{ {
RINOK(pthread_mutex_lock(&p->_mutex)) RINOK(pthread_mutex_lock(&p->_mutex))
p->_state = True; p->_state = True;
int res1 = pthread_cond_broadcast(&p->_cond); {
int res2 = pthread_mutex_unlock(&p->_mutex); const int res1 = pthread_cond_broadcast(&p->_cond);
const int res2 = pthread_mutex_unlock(&p->_mutex);
return (res2 ? res2 : res1); return (res2 ? res2 : res1);
}
} }
WRes Event_Reset(CEvent *p) WRes Event_Reset(CEvent *p)
@ -408,8 +415,8 @@ WRes Event_Close(CEvent *p)
return 0; return 0;
p->_created = 0; p->_created = 0;
{ {
int res1 = pthread_mutex_destroy(&p->_mutex); const int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond); const int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2); return (res1 ? res1 : res2);
} }
} }
@ -487,8 +494,8 @@ WRes Semaphore_Close(CSemaphore *p)
return 0; return 0;
p->_created = 0; p->_created = 0;
{ {
int res1 = pthread_mutex_destroy(&p->_mutex); const int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond); const int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2); return (res1 ? res1 : res2);
} }
} }
@ -549,6 +556,18 @@ LONG InterlockedIncrement(LONG volatile *addend)
#endif #endif
} }
LONG InterlockedDecrement(LONG volatile *addend)
{
// Print("InterlockedDecrement")
#ifdef USE_HACK_UNSAFE_ATOMIC
LONG val = *addend - 1;
*addend = val;
return val;
#else
return __sync_sub_and_fetch(addend, 1);
#endif
}
#endif // _WIN32 #endif // _WIN32
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p)

View File

@ -1,5 +1,5 @@
/* Xz.c - Xz /* Xz.c - Xz
2023-04-02 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -52,6 +52,7 @@ void XzCheck_Init(CXzCheck *p, unsigned mode)
case XZ_CHECK_CRC32: p->crc = CRC_INIT_VAL; break; case XZ_CHECK_CRC32: p->crc = CRC_INIT_VAL; break;
case XZ_CHECK_CRC64: p->crc64 = CRC64_INIT_VAL; break; case XZ_CHECK_CRC64: p->crc64 = CRC64_INIT_VAL; break;
case XZ_CHECK_SHA256: Sha256_Init(&p->sha); break; case XZ_CHECK_SHA256: Sha256_Init(&p->sha); break;
default: break;
} }
} }
@ -62,6 +63,7 @@ void XzCheck_Update(CXzCheck *p, const void *data, size_t size)
case XZ_CHECK_CRC32: p->crc = CrcUpdate(p->crc, data, size); break; case XZ_CHECK_CRC32: p->crc = CrcUpdate(p->crc, data, size); break;
case XZ_CHECK_CRC64: p->crc64 = Crc64Update(p->crc64, data, size); break; case XZ_CHECK_CRC64: p->crc64 = Crc64Update(p->crc64, data, size); break;
case XZ_CHECK_SHA256: Sha256_Update(&p->sha, (const Byte *)data, size); break; case XZ_CHECK_SHA256: Sha256_Update(&p->sha, (const Byte *)data, size); break;
default: break;
} }
} }

View File

@ -1,5 +1,5 @@
/* XzCrc64.c -- CRC64 calculation /* XzCrc64.c -- CRC64 calculation
2023-04-02 : Igor Pavlov : Public domain */ 2023-12-08 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -8,36 +8,76 @@
#define kCrc64Poly UINT64_CONST(0xC96C5795D7870F42) #define kCrc64Poly UINT64_CONST(0xC96C5795D7870F42)
#ifdef MY_CPU_LE // for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
#define CRC64_NUM_TABLES 4 // #define Z7_CRC64_DEBUG_BE
#else #ifdef Z7_CRC64_DEBUG_BE
#define CRC64_NUM_TABLES 5 #undef MY_CPU_LE
#define MY_CPU_BE
UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
#endif #endif
#ifdef Z7_CRC64_NUM_TABLES
#define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES
#else
#define Z7_CRC64_NUM_TABLES_USE 12
#endif
#if Z7_CRC64_NUM_TABLES_USE < 1
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
#if Z7_CRC64_NUM_TABLES_USE != 1
#ifndef MY_CPU_BE #ifndef MY_CPU_BE
UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); #define FUNC_NAME_LE_2(s) XzCrc64UpdateT ## s
#define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s)
#define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC64_NUM_TABLES_USE)
UInt64 Z7_FASTCALL FUNC_NAME_LE (UInt64 v, const void *data, size_t size, const UInt64 *table);
#endif
#ifndef MY_CPU_LE
#define FUNC_NAME_BE_2(s) XzCrc64UpdateBeT ## s
#define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s)
#define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC64_NUM_TABLES_USE)
UInt64 Z7_FASTCALL FUNC_NAME_BE (UInt64 v, const void *data, size_t size, const UInt64 *table);
#endif #endif
typedef UInt64 (Z7_FASTCALL *CRC64_FUNC)(UInt64 v, const void *data, size_t size, const UInt64 *table); #if defined(MY_CPU_LE)
#define FUNC_REF FUNC_NAME_LE
#elif defined(MY_CPU_BE)
#define FUNC_REF FUNC_NAME_BE
#else
#define FUNC_REF g_Crc64Update
static UInt64 (Z7_FASTCALL *FUNC_REF)(UInt64 v, const void *data, size_t size, const UInt64 *table);
#endif
#endif
MY_ALIGN(64)
static UInt64 g_Crc64Table[256 * Z7_CRC64_NUM_TABLES_USE];
static CRC64_FUNC g_Crc64Update;
UInt64 g_Crc64Table[256 * CRC64_NUM_TABLES];
UInt64 Z7_FASTCALL Crc64Update(UInt64 v, const void *data, size_t size) UInt64 Z7_FASTCALL Crc64Update(UInt64 v, const void *data, size_t size)
{ {
return g_Crc64Update(v, data, size, g_Crc64Table); #if Z7_CRC64_NUM_TABLES_USE == 1
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
const UInt64 *table = g_Crc64Table;
const Byte *p = (const Byte *)data;
const Byte *lim = p + size;
for (; p != lim; p++)
v = CRC64_UPDATE_BYTE_2(v, *p);
return v;
#undef CRC64_UPDATE_BYTE_2
#else
return FUNC_REF (v, data, size, g_Crc64Table);
#endif
} }
UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size)
{
return g_Crc64Update(CRC64_INIT_VAL, data, size, g_Crc64Table) ^ CRC64_INIT_VAL;
}
Z7_NO_INLINE
void Z7_FASTCALL Crc64GenerateTable(void) void Z7_FASTCALL Crc64GenerateTable(void)
{ {
UInt32 i; unsigned i;
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
{ {
UInt64 r = i; UInt64 r = i;
@ -46,35 +86,55 @@ void Z7_FASTCALL Crc64GenerateTable(void)
r = (r >> 1) ^ (kCrc64Poly & ((UInt64)0 - (r & 1))); r = (r >> 1) ^ (kCrc64Poly & ((UInt64)0 - (r & 1)));
g_Crc64Table[i] = r; g_Crc64Table[i] = r;
} }
for (i = 256; i < 256 * CRC64_NUM_TABLES; i++)
#if Z7_CRC64_NUM_TABLES_USE != 1
#if 1 || 1 && defined(MY_CPU_X86) // low register count
for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i++)
{ {
const UInt64 r = g_Crc64Table[(size_t)i - 256]; const UInt64 r0 = g_Crc64Table[(size_t)i];
g_Crc64Table[i] = g_Crc64Table[r & 0xFF] ^ (r >> 8); g_Crc64Table[(size_t)i + 256] = g_Crc64Table[(Byte)r0] ^ (r0 >> 8);
} }
#else
#ifdef MY_CPU_LE for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i += 2)
g_Crc64Update = XzCrc64UpdateT4;
#else
{ {
#ifndef MY_CPU_BE UInt64 r0 = g_Crc64Table[(size_t)(i) ];
UInt64 r1 = g_Crc64Table[(size_t)(i) + 1];
r0 = g_Crc64Table[(Byte)r0] ^ (r0 >> 8);
r1 = g_Crc64Table[(Byte)r1] ^ (r1 >> 8);
g_Crc64Table[(size_t)i + 256 ] = r0;
g_Crc64Table[(size_t)i + 256 + 1] = r1;
}
#endif
#ifndef MY_CPU_LE
{
#ifndef MY_CPU_BE
UInt32 k = 1; UInt32 k = 1;
if (*(const Byte *)&k == 1) if (*(const Byte *)&k == 1)
g_Crc64Update = XzCrc64UpdateT4; FUNC_REF = FUNC_NAME_LE;
else else
#endif #endif
{ {
for (i = 256 * CRC64_NUM_TABLES - 1; i >= 256; i--) #ifndef MY_CPU_BE
FUNC_REF = FUNC_NAME_BE;
#endif
for (i = 0; i < 256 * Z7_CRC64_NUM_TABLES_USE; i++)
{ {
const UInt64 x = g_Crc64Table[(size_t)i - 256]; const UInt64 x = g_Crc64Table[i];
g_Crc64Table[i] = Z7_BSWAP64(x); g_Crc64Table[i] = Z7_BSWAP64(x);
} }
g_Crc64Update = XzCrc64UpdateT1_BeT4;
} }
} }
#endif #endif // ndef MY_CPU_LE
#endif // Z7_CRC64_NUM_TABLES_USE != 1
} }
#undef kCrc64Poly #undef kCrc64Poly
#undef CRC64_NUM_TABLES #undef Z7_CRC64_NUM_TABLES_USE
#undef FUNC_REF
#undef FUNC_NAME_LE_2
#undef FUNC_NAME_LE_1
#undef FUNC_NAME_LE
#undef FUNC_NAME_BE_2
#undef FUNC_NAME_BE_1
#undef FUNC_NAME_BE

View File

@ -1,61 +1,261 @@
/* XzCrc64Opt.c -- CRC64 calculation /* XzCrc64Opt.c -- CRC64 calculation (optimized functions)
2023-04-02 : Igor Pavlov : Public domain */ 2023-12-08 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "CpuArch.h" #include "CpuArch.h"
#if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1
// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
// #define Z7_CRC64_DEBUG_BE
#ifdef Z7_CRC64_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif
#if defined(MY_CPU_64BIT)
#define Z7_CRC64_USE_64BIT
#endif
// the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c
#ifdef Z7_CRC64_NUM_TABLES
#define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES
#else
#define Z7_CRC64_NUM_TABLES_USE 12
#endif
#if Z7_CRC64_NUM_TABLES_USE % 4 || \
Z7_CRC64_NUM_TABLES_USE < 4 || \
Z7_CRC64_NUM_TABLES_USE > 4 * 4
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
#ifndef MY_CPU_BE #ifndef MY_CPU_BE
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); #if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
#define Q64LE(n, d) \
( (table + ((n) * 8 + 7) * 0x100)[((d) ) & 0xFF] \
^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \
^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \
^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \
^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \
^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] )
#define R64(a) *((const UInt64 *)(const void *)p + (a))
#else
#define Q32LE(n, d) \
( (table + ((n) * 4 + 3) * 0x100)[((d) ) & 0xFF] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
#define R32(a) *((const UInt32 *)(const void *)p + (a))
#endif
#define CRC64_FUNC_PRE_LE2(step) \
UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
#define CRC64_FUNC_PRE_LE(step) \
CRC64_FUNC_PRE_LE2(step); \
CRC64_FUNC_PRE_LE2(step)
CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) const Byte *lim;
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC64_UPDATE_BYTE_2(v, *p); v = CRC64_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4) lim = p + size;
if (size >= Z7_CRC64_NUM_TABLES_USE)
{ {
const UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p; lim -= Z7_CRC64_NUM_TABLES_USE;
v = (v >> 32) do
^ (table + 0x300)[((d ) & 0xFF)] {
^ (table + 0x200)[((d >> 8) & 0xFF)] #if Z7_CRC64_NUM_TABLES_USE == 4
^ (table + 0x100)[((d >> 16) & 0xFF)] const UInt32 d = (UInt32)v ^ R32(0);
^ (table + 0x000)[((d >> 24))]; v = (v >> 32) ^ Q32LE(0, d);
#elif Z7_CRC64_NUM_TABLES_USE == 8
#ifdef Z7_CRC64_USE_64BIT
v ^= R64(0);
v = Q64LE(0, v);
#else
UInt32 v0, v1;
v0 = (UInt32)v ^ R32(0);
v1 = (UInt32)(v >> 32) ^ R32(1);
v = Q32LE(1, v0) ^ Q32LE(0, v1);
#endif
#elif Z7_CRC64_NUM_TABLES_USE == 12
UInt32 w;
UInt32 v0, v1;
v0 = (UInt32)v ^ R32(0);
v1 = (UInt32)(v >> 32) ^ R32(1);
w = R32(2);
v = Q32LE(0, w);
v ^= Q32LE(2, v0) ^ Q32LE(1, v1);
#elif Z7_CRC64_NUM_TABLES_USE == 16
#ifdef Z7_CRC64_USE_64BIT
UInt64 w;
UInt64 x;
w = R64(1); x = Q64LE(0, w);
v ^= R64(0); v = x ^ Q64LE(1, v);
#else
UInt32 v0, v1;
UInt32 r0, r1;
v0 = (UInt32)v ^ R32(0);
v1 = (UInt32)(v >> 32) ^ R32(1);
r0 = R32(2);
r1 = R32(3);
v = Q32LE(1, r0) ^ Q32LE(0, r1);
v ^= Q32LE(3, v0) ^ Q32LE(2, v1);
#endif
#else
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
p += Z7_CRC64_NUM_TABLES_USE;
} }
for (; size > 0; size--, p++) while (p <= lim);
lim += Z7_CRC64_NUM_TABLES_USE;
}
for (; p < lim; p++)
v = CRC64_UPDATE_BYTE_2(v, *p); v = CRC64_UPDATE_BYTE_2(v, *p);
return v; return v;
} }
#undef CRC64_UPDATE_BYTE_2
#undef R32
#undef R64
#undef Q32LE
#undef Q64LE
#undef CRC64_FUNC_PRE_LE
#undef CRC64_FUNC_PRE_LE2
#endif #endif
#ifndef MY_CPU_LE #ifndef MY_CPU_LE
#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8)) #define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8))
UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); #if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
#define Q64BE(n, d) \
( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \
^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \
^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \
^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \
^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \
^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] )
#ifdef Z7_CRC64_DEBUG_BE
#define R64BE(a) GetBe64a((const UInt64 *)(const void *)p + (a))
#else
#define R64BE(a) *((const UInt64 *)(const void *)p + (a))
#endif
#else
#define Q32BE(n, d) \
( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
#ifdef Z7_CRC64_DEBUG_BE
#define R32BE(a) GetBe32a((const UInt32 *)(const void *)p + (a))
#else
#define R32BE(a) *((const UInt32 *)(const void *)p + (a))
#endif
#endif
#define CRC64_FUNC_PRE_BE2(step) \
UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
#define CRC64_FUNC_PRE_BE(step) \
CRC64_FUNC_PRE_BE2(step); \
CRC64_FUNC_PRE_BE2(step)
CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
table += 0x100; const Byte *lim;
v = Z7_BSWAP64(v); v = Z7_BSWAP64(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC64_UPDATE_BYTE_2_BE(v, *p); v = CRC64_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4) lim = p + size;
if (size >= Z7_CRC64_NUM_TABLES_USE)
{ {
const UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p; lim -= Z7_CRC64_NUM_TABLES_USE;
v = (v << 32) do
^ (table + 0x000)[((d ) & 0xFF)] {
^ (table + 0x100)[((d >> 8) & 0xFF)] #if Z7_CRC64_NUM_TABLES_USE == 4
^ (table + 0x200)[((d >> 16) & 0xFF)] const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0);
^ (table + 0x300)[((d >> 24))]; v = (v << 32) ^ Q32BE(0, d);
#elif Z7_CRC64_NUM_TABLES_USE == 12
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
const UInt32 w = R32BE(2);
v = Q32BE(0, w);
v ^= Q32BE(2, d1) ^ Q32BE(1, d0);
#elif Z7_CRC64_NUM_TABLES_USE == 8
#ifdef Z7_CRC64_USE_64BIT
v ^= R64BE(0);
v = Q64BE(0, v);
#else
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
v = Q32BE(1, d1) ^ Q32BE(0, d0);
#endif
#elif Z7_CRC64_NUM_TABLES_USE == 16
#ifdef Z7_CRC64_USE_64BIT
const UInt64 w = R64BE(1);
v ^= R64BE(0);
v = Q64BE(0, w) ^ Q64BE(1, v);
#else
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
const UInt32 w1 = R32BE(2);
const UInt32 w0 = R32BE(3);
v = Q32BE(1, w1) ^ Q32BE(0, w0);
v ^= Q32BE(3, d1) ^ Q32BE(2, d0);
#endif
#elif
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
p += Z7_CRC64_NUM_TABLES_USE;
} }
for (; size > 0; size--, p++) while (p <= lim);
lim += Z7_CRC64_NUM_TABLES_USE;
}
for (; p < lim; p++)
v = CRC64_UPDATE_BYTE_2_BE(v, *p); v = CRC64_UPDATE_BYTE_2_BE(v, *p);
return Z7_BSWAP64(v); return Z7_BSWAP64(v);
} }
#undef CRC64_UPDATE_BYTE_2_BE
#undef R32BE
#undef R64BE
#undef Q32BE
#undef Q64BE
#undef CRC64_FUNC_PRE_BE
#undef CRC64_FUNC_PRE_BE2
#endif
#undef Z7_CRC64_NUM_TABLES_USE
#endif #endif

View File

@ -1,5 +1,5 @@
/* XzDec.c -- Xz Decode /* XzDec.c -- Xz Decode
2023-04-13 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -105,30 +105,32 @@ static SRes XzBcFilterState_SetProps(void *pp, const Byte *props, size_t propSiz
{ {
if (propSize != 1) if (propSize != 1)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
p->delta = (unsigned)props[0] + 1; p->delta = (UInt32)props[0] + 1;
} }
else else
{ {
if (propSize == 4) if (propSize == 4)
{ {
UInt32 v = GetUi32(props); const UInt32 v = GetUi32(props);
switch (p->methodId) switch (p->methodId)
{ {
case XZ_ID_PPC: case XZ_ID_PPC:
case XZ_ID_ARM: case XZ_ID_ARM:
case XZ_ID_SPARC: case XZ_ID_SPARC:
case XZ_ID_ARM64: case XZ_ID_ARM64:
if ((v & 3) != 0) if (v & 3)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
break; break;
case XZ_ID_ARMT: case XZ_ID_ARMT:
if ((v & 1) != 0) case XZ_ID_RISCV:
if (v & 1)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
break; break;
case XZ_ID_IA64: case XZ_ID_IA64:
if ((v & 0xF) != 0) if (v & 0xf)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
break; break;
default: break;
} }
p->ip = v; p->ip = v;
} }
@ -151,12 +153,13 @@ static void XzBcFilterState_Init(void *pp)
static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Dec[] = static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Dec[] =
{ {
Z7_BRANCH_CONV_DEC(PPC), Z7_BRANCH_CONV_DEC_2 (BranchConv_PPC),
Z7_BRANCH_CONV_DEC(IA64), Z7_BRANCH_CONV_DEC_2 (BranchConv_IA64),
Z7_BRANCH_CONV_DEC(ARM), Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM),
Z7_BRANCH_CONV_DEC(ARMT), Z7_BRANCH_CONV_DEC_2 (BranchConv_ARMT),
Z7_BRANCH_CONV_DEC(SPARC), Z7_BRANCH_CONV_DEC_2 (BranchConv_SPARC),
Z7_BRANCH_CONV_DEC(ARM64) Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM64),
Z7_BRANCH_CONV_DEC_2 (BranchConv_RISCV)
}; };
static SizeT XzBcFilterStateBase_Filter_Dec(CXzBcFilterStateBase *p, Byte *data, SizeT size) static SizeT XzBcFilterStateBase_Filter_Dec(CXzBcFilterStateBase *p, Byte *data, SizeT size)
@ -262,7 +265,7 @@ static SRes XzBcFilterState_Code2(void *pp,
#define XZ_IS_SUPPORTED_FILTER_ID(id) \ #define XZ_IS_SUPPORTED_FILTER_ID(id) \
((id) >= XZ_ID_Delta && (id) <= XZ_ID_ARM64) ((id) >= XZ_ID_Delta && (id) <= XZ_ID_RISCV)
SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id, SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id,
Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc) Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc)
@ -541,13 +544,12 @@ static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 met
{ {
IStateCoder *sc = &p->coders[coderIndex]; IStateCoder *sc = &p->coders[coderIndex];
p->ids[coderIndex] = methodId; p->ids[coderIndex] = methodId;
switch (methodId) if (methodId == XZ_ID_LZMA2)
{ return Lzma2State_SetFromMethod(sc, outBuf, outBufSize, p->alloc);
case XZ_ID_LZMA2: return Lzma2State_SetFromMethod(sc, outBuf, outBufSize, p->alloc); #ifdef USE_SUBBLOCK
#ifdef USE_SUBBLOCK if (methodId == XZ_ID_Subblock)
case XZ_ID_Subblock: return SbState_SetFromMethod(sc, p->alloc); return SbState_SetFromMethod(sc, p->alloc);
#endif #endif
}
if (coderIndex == 0) if (coderIndex == 0)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
return Xz_StateCoder_Bc_SetFromMethod_Func(sc, methodId, return Xz_StateCoder_Bc_SetFromMethod_Func(sc, methodId,
@ -558,10 +560,8 @@ static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 met
static SRes MixCoder_ResetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 methodId, Byte *outBuf, size_t outBufSize) static SRes MixCoder_ResetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 methodId, Byte *outBuf, size_t outBufSize)
{ {
IStateCoder *sc = &p->coders[coderIndex]; IStateCoder *sc = &p->coders[coderIndex];
switch (methodId) if (methodId == XZ_ID_LZMA2)
{ return Lzma2State_ResetOutBuf(sc, outBuf, outBufSize);
case XZ_ID_LZMA2: return Lzma2State_ResetOutBuf(sc, outBuf, outBufSize);
}
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
} }
@ -804,7 +804,7 @@ static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte
} }
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \ #define READ_VARINT_AND_CHECK(buf, pos, size, res) \
{ unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
if (s == 0) return SZ_ERROR_ARCHIVE; \ if (s == 0) return SZ_ERROR_ARCHIVE; \
pos += s; } pos += s; }
@ -1034,7 +1034,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
SRes res; SRes res;
ECoderFinishMode finishMode2 = finishMode; ECoderFinishMode finishMode2 = finishMode;
BoolInt srcFinished2 = srcFinished; BoolInt srcFinished2 = (BoolInt)srcFinished;
BoolInt destFinish = False; BoolInt destFinish = False;
if (p->block.packSize != (UInt64)(Int64)-1) if (p->block.packSize != (UInt64)(Int64)-1)
@ -1127,7 +1127,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
return SZ_OK; return SZ_OK;
} }
switch (p->state) switch ((int)p->state)
{ {
case XZ_STATE_STREAM_HEADER: case XZ_STATE_STREAM_HEADER:
{ {
@ -1172,15 +1172,15 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
p->state = XZ_STATE_STREAM_INDEX; p->state = XZ_STATE_STREAM_INDEX;
break; break;
} }
p->blockHeaderSize = ((UInt32)p->buf[0] << 2) + 4; p->blockHeaderSize = ((unsigned)p->buf[0] << 2) + 4;
break; break;
} }
if (p->pos != p->blockHeaderSize) if (p->pos != p->blockHeaderSize)
{ {
UInt32 cur = p->blockHeaderSize - p->pos; unsigned cur = p->blockHeaderSize - p->pos;
if (cur > srcRem) if (cur > srcRem)
cur = (UInt32)srcRem; cur = (unsigned)srcRem;
memcpy(p->buf + p->pos, src, cur); memcpy(p->buf + p->pos, src, cur);
p->pos += cur; p->pos += cur;
(*srcLen) += cur; (*srcLen) += cur;
@ -1222,8 +1222,8 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
} }
else else
{ {
UInt32 checkSize = XzFlags_GetCheckSize(p->streamFlags); const unsigned checkSize = XzFlags_GetCheckSize(p->streamFlags);
UInt32 cur = checkSize - p->pos; unsigned cur = checkSize - p->pos;
if (cur != 0) if (cur != 0)
{ {
if (srcRem == 0) if (srcRem == 0)
@ -1232,7 +1232,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
return SZ_OK; return SZ_OK;
} }
if (cur > srcRem) if (cur > srcRem)
cur = (UInt32)srcRem; cur = (unsigned)srcRem;
memcpy(p->buf + p->pos, src, cur); memcpy(p->buf + p->pos, src, cur);
p->pos += cur; p->pos += cur;
(*srcLen) += cur; (*srcLen) += cur;
@ -1321,9 +1321,9 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
case XZ_STATE_STREAM_FOOTER: case XZ_STATE_STREAM_FOOTER:
{ {
UInt32 cur = XZ_STREAM_FOOTER_SIZE - p->pos; unsigned cur = XZ_STREAM_FOOTER_SIZE - p->pos;
if (cur > srcRem) if (cur > srcRem)
cur = (UInt32)srcRem; cur = (unsigned)srcRem;
memcpy(p->buf + p->pos, src, cur); memcpy(p->buf + p->pos, src, cur);
p->pos += cur; p->pos += cur;
(*srcLen) += cur; (*srcLen) += cur;
@ -1358,6 +1358,8 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
} }
case XZ_STATE_BLOCK: break; /* to disable GCC warning */ case XZ_STATE_BLOCK: break; /* to disable GCC warning */
default: return SZ_ERROR_FAIL;
} }
} }
/* /*
@ -1773,10 +1775,10 @@ static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbac
} }
} }
{ {
UInt64 packSize = block->packSize; const UInt64 packSize = block->packSize;
UInt64 packSizeAligned = packSize + ((0 - (unsigned)packSize) & 3); const UInt64 packSizeAligned = packSize + ((0 - (unsigned)packSize) & 3);
UInt32 checkSize = XzFlags_GetCheckSize(coder->dec.streamFlags); const unsigned checkSize = XzFlags_GetCheckSize(coder->dec.streamFlags);
UInt64 blockPackSum = coder->inPreSize + packSizeAligned + checkSize; const UInt64 blockPackSum = coder->inPreSize + packSizeAligned + checkSize;
// if (blockPackSum <= me->props.inBlockMax) // if (blockPackSum <= me->props.inBlockMax)
// unpackBlockMaxSize // unpackBlockMaxSize
{ {
@ -2381,7 +2383,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
if (tMode) if (tMode)
{ {
XzDecMt_FreeOutBufs(p); XzDecMt_FreeOutBufs(p);
tMode = MtDec_PrepareRead(&p->mtc); tMode = (BoolInt)MtDec_PrepareRead(&p->mtc);
} }
#endif #endif
@ -2644,7 +2646,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle p,
p->outSize = *outDataSize; p->outSize = *outDataSize;
} }
p->finishMode = finishMode; p->finishMode = (BoolInt)finishMode;
// p->outSize = 457; p->outSize_Defined = True; p->finishMode = False; // for test // p->outSize = 457; p->outSize_Defined = True; p->finishMode = False; // for test

View File

@ -1,5 +1,5 @@
/* XzEnc.c -- Xz Encode /* XzEnc.c -- Xz Encode
2023-04-13 : Igor Pavlov : Public domain */ 2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -29,8 +29,9 @@
#define XZ_GET_PAD_SIZE(dataSize) ((4 - ((unsigned)(dataSize) & 3)) & 3) #define XZ_GET_PAD_SIZE(dataSize) ((4 - ((unsigned)(dataSize) & 3)) & 3)
/* max pack size for LZMA2 block + check-64bytrs: */ #define XZ_CHECK_SIZE_MAX 64
#define XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize) ((unpackSize) + ((unpackSize) >> 10) + 16 + 64) /* max pack size for LZMA2 block + pad4 + check_size: */
#define XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize) ((unpackSize) + ((unpackSize) >> 10) + 16 + XZ_CHECK_SIZE_MAX)
#define XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(unpackSize) (XZ_BLOCK_HEADER_SIZE_MAX + XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize)) #define XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(unpackSize) (XZ_BLOCK_HEADER_SIZE_MAX + XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize))
@ -325,12 +326,13 @@ typedef struct
static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Enc[] = static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Enc[] =
{ {
Z7_BRANCH_CONV_ENC(PPC), Z7_BRANCH_CONV_ENC_2 (BranchConv_PPC),
Z7_BRANCH_CONV_ENC(IA64), Z7_BRANCH_CONV_ENC_2 (BranchConv_IA64),
Z7_BRANCH_CONV_ENC(ARM), Z7_BRANCH_CONV_ENC_2 (BranchConv_ARM),
Z7_BRANCH_CONV_ENC(ARMT), Z7_BRANCH_CONV_ENC_2 (BranchConv_ARMT),
Z7_BRANCH_CONV_ENC(SPARC), Z7_BRANCH_CONV_ENC_2 (BranchConv_SPARC),
Z7_BRANCH_CONV_ENC(ARM64) Z7_BRANCH_CONV_ENC_2 (BranchConv_ARM64),
Z7_BRANCH_CONV_ENC_2 (BranchConv_RISCV)
}; };
static SizeT XzBcFilterStateBase_Filter_Enc(CXzBcFilterStateBase *p, Byte *data, SizeT size) static SizeT XzBcFilterStateBase_Filter_Enc(CXzBcFilterStateBase *p, Byte *data, SizeT size)
@ -888,9 +890,9 @@ static SRes Xz_CompressBlock(
blockSizes->unpackSize = checkInStream.processed; blockSizes->unpackSize = checkInStream.processed;
} }
{ {
Byte buf[4 + 64]; Byte buf[4 + XZ_CHECK_SIZE_MAX];
unsigned padSize = XZ_GET_PAD_SIZE(seqSizeOutStream.processed); const unsigned padSize = XZ_GET_PAD_SIZE(seqSizeOutStream.processed);
UInt64 packSize = seqSizeOutStream.processed; const UInt64 packSize = seqSizeOutStream.processed;
buf[0] = 0; buf[0] = 0;
buf[1] = 0; buf[1] = 0;
@ -898,7 +900,8 @@ static SRes Xz_CompressBlock(
buf[3] = 0; buf[3] = 0;
SeqCheckInStream_GetDigest(&checkInStream, buf + 4); SeqCheckInStream_GetDigest(&checkInStream, buf + 4);
RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize), padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId))) RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize),
padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId)))
blockSizes->totalSize = seqSizeOutStream.processed - padSize; blockSizes->totalSize = seqSizeOutStream.processed - padSize;
@ -1083,18 +1086,19 @@ static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBuf
CXzEnc *me = (CXzEnc *)pp; CXzEnc *me = (CXzEnc *)pp;
SRes res; SRes res;
CMtProgressThunk progressThunk; CMtProgressThunk progressThunk;
Byte *dest;
Byte *dest = me->outBufs[outBufIndex];
UNUSED_VAR(finished) UNUSED_VAR(finished)
{ {
CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex]; CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex];
bInfo->totalSize = 0; bInfo->totalSize = 0;
bInfo->unpackSize = 0; bInfo->unpackSize = 0;
bInfo->headerSize = 0; bInfo->headerSize = 0;
// v23.02: we don't compress empty blocks
// also we must ignore that empty block in XzEnc_MtCallback_Write()
if (srcSize == 0)
return SZ_OK;
} }
dest = me->outBufs[outBufIndex];
if (!dest) if (!dest)
{ {
dest = (Byte *)ISzAlloc_Alloc(me->alloc, me->outBufSize); dest = (Byte *)ISzAlloc_Alloc(me->alloc, me->outBufSize);
@ -1140,18 +1144,20 @@ static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBuf
static SRes XzEnc_MtCallback_Write(void *pp, unsigned outBufIndex) static SRes XzEnc_MtCallback_Write(void *pp, unsigned outBufIndex)
{ {
CXzEnc *me = (CXzEnc *)pp; CXzEnc *me = (CXzEnc *)pp;
const CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex]; const CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex];
const Byte *data = me->outBufs[outBufIndex]; // v23.02: we don't write empty blocks
// note: if (bInfo->unpackSize == 0) then there is no compressed data of block
RINOK(WriteBytes(me->outStream, data, bInfo->headerSize)) if (bInfo->unpackSize == 0)
return SZ_OK;
{ {
UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize); const Byte *data = me->outBufs[outBufIndex];
RINOK(WriteBytes(me->outStream, data, bInfo->headerSize))
{
const UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize);
RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize)) RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize))
} }
return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc); return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc);
}
} }
#endif #endif

View File

@ -1,5 +1,5 @@
/* XzIn.c - Xz input /* XzIn.c - Xz input
2023-04-02 : Igor Pavlov : Public domain */ 2023-09-07 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -27,7 +27,7 @@ SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream)
} }
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \ #define READ_VARINT_AND_CHECK(buf, pos, size, res) \
{ unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
if (s == 0) return SZ_ERROR_ARCHIVE; \ if (s == 0) return SZ_ERROR_ARCHIVE; \
pos += s; } pos += s; }
@ -37,7 +37,7 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex,
unsigned headerSize; unsigned headerSize;
*headerSizeRes = 0; *headerSizeRes = 0;
RINOK(SeqInStream_ReadByte(inStream, &header[0])) RINOK(SeqInStream_ReadByte(inStream, &header[0]))
headerSize = (unsigned)header[0]; headerSize = header[0];
if (headerSize == 0) if (headerSize == 0)
{ {
*headerSizeRes = 1; *headerSizeRes = 1;
@ -47,7 +47,7 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex,
*isIndex = False; *isIndex = False;
headerSize = (headerSize << 2) + 4; headerSize = (headerSize << 2) + 4;
*headerSizeRes = headerSize; *headerSizeRes = (UInt32)headerSize;
{ {
size_t processedSize = headerSize - 1; size_t processedSize = headerSize - 1;
RINOK(SeqInStream_ReadMax(inStream, header + 1, &processedSize)) RINOK(SeqInStream_ReadMax(inStream, header + 1, &processedSize))
@ -58,7 +58,7 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex,
} }
#define ADD_SIZE_CHECK(size, val) \ #define ADD_SIZE_CHECK(size, val) \
{ UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; } { const UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; }
UInt64 Xz_GetUnpackSize(const CXzStream *p) UInt64 Xz_GetUnpackSize(const CXzStream *p)
{ {